summaryrefslogtreecommitdiff
path: root/src/couchdb/couch_db_updater.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/couchdb/couch_db_updater.erl')
-rw-r--r--src/couchdb/couch_db_updater.erl122
1 files changed, 80 insertions, 42 deletions
diff --git a/src/couchdb/couch_db_updater.erl b/src/couchdb/couch_db_updater.erl
index 1267ffa4..b1cb9037 100644
--- a/src/couchdb/couch_db_updater.erl
+++ b/src/couchdb/couch_db_updater.erl
@@ -115,13 +115,16 @@ handle_call({purge_docs, IdRevs}, _From, Db) ->
|| {#full_doc_info{id=Id}, Revs} <- NewDocInfos],
{DocInfoToUpdate, NewSeq} = lists:mapfoldl(
- fun(FullInfo, SeqAcc) ->
- Info = couch_doc:to_doc_info(FullInfo),
- {Info#doc_info{update_seq=SeqAcc + 1}, SeqAcc + 1}
+ fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) ->
+ Tree2 = couch_key_tree:map_leafs( fun(RevInfo) ->
+ RevInfo#rev_info{seq=SeqAcc + 1}
+ end, Tree),
+ {couch_doc:to_doc_info(FullInfo#full_doc_info{rev_tree=Tree2}),
+ SeqAcc + 1}
end, LastSeq, FullDocInfoToUpdate),
- IdsToRemove = [Id || {#full_doc_info{id=Id,rev_tree=Tree},_}
- <- NewDocInfos, Tree == []],
+ IdsToRemove = [Id || {#full_doc_info{id=Id,rev_tree=[]},_}
+ <- NewDocInfos],
{ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree,
DocInfoToUpdate, SeqsToRemove),
@@ -194,33 +197,61 @@ code_change(_OldVsn, State, _Extra) ->
{ok, State}.
-btree_by_seq_split(DocInfo) ->
- #doc_info{
- id = Id,
- rev = Rev,
- update_seq = Seq,
- summary_pointer = Sp,
- conflict_revs = Conflicts,
- deleted_conflict_revs = DelConflicts,
- deleted = Deleted} = DocInfo,
- {Seq,{Id, Rev, Sp, Conflicts, DelConflicts, Deleted}}.
+btree_by_seq_split(#doc_info{id=Id, high_seq=KeySeq, revs=Revs}) ->
+ RevInfos = [{Rev, Seq, Bp} ||
+ #rev_info{rev=Rev,seq=Seq,deleted=false,body_sp=Bp} <- Revs],
+ DeletedRevInfos = [{Rev, Seq, Bp} ||
+ #rev_info{rev=Rev,seq=Seq,deleted=true,body_sp=Bp} <- Revs],
+ {KeySeq,{Id, RevInfos, DeletedRevInfos}}.
-btree_by_seq_join(Seq,{Id, Rev, Sp, Conflicts, DelConflicts, Deleted}) ->
+btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
#doc_info{
id = Id,
- rev = Rev,
- update_seq = Seq,
- summary_pointer = Sp,
- conflict_revs = Conflicts,
- deleted_conflict_revs = DelConflicts,
- deleted = Deleted}.
+ high_seq=KeySeq,
+ revs =
+ [#rev_info{rev=Rev,seq=Seq,deleted=false,body_sp = Bp} ||
+ {Rev, Seq, Bp} <- RevInfos] ++
+ [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
+ {Rev, Seq, Bp} <- DeletedRevInfos]};
+btree_by_seq_join(KeySeq,{Id, Rev, Bp, Conflicts, DelConflicts, Deleted}) ->
+ % this is the 0.9.0 and earlier by_seq record. It's missing the body pointers
+ % and individual seq nums for conflicts that are currently in the index,
+ % meaning the filtered _changes api will not work except for on main docs.
+ % Simply compact a 0.9.0 database to upgrade the index.
+ #doc_info{
+ id=Id,
+ high_seq=KeySeq,
+ revs = [#rev_info{rev=Rev,seq=KeySeq,deleted=Deleted,body_sp=Bp}] ++
+ [#rev_info{rev=Rev1,seq=KeySeq,deleted=false} || Rev1 <- Conflicts] ++
+ [#rev_info{rev=Rev2,seq=KeySeq,deleted=true} || Rev2 <- DelConflicts]}.
btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
deleted=Deleted, rev_tree=Tree}) ->
- {Id, {Seq, case Deleted of true -> 1; false-> 0 end, Tree}}.
-
-btree_by_id_join(Id, {Seq, Deleted, Tree}) ->
- #full_doc_info{id=Id, update_seq=Seq, deleted=Deleted==1, rev_tree=Tree}.
+ DiskTree =
+ couch_key_tree:map(
+ fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
+ {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq};
+ (_RevId, ?REV_MISSING) ->
+ ?REV_MISSING
+ end, Tree),
+ {Id, {Seq, if Deleted -> 1; true -> 0 end, DiskTree}}.
+
+btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
+ Tree =
+ couch_key_tree:map(
+ fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
+ {IsDeleted == 1, BodyPointer, UpdateSeq};
+ (_RevId, ?REV_MISSING) ->
+ ?REV_MISSING;
+ (_RevId, {IsDeleted, BodyPointer}) ->
+ % this is the 0.9.0 and earlier rev info record. It's missing the seq
+ % nums, which means couchdb will sometimes reexamine unchanged
+ % documents with the _changes API.
+ % This is fixed by compacting the database.
+ {IsDeleted, BodyPointer, HighSeq}
+ end, DiskTree),
+
+ #full_doc_info{id=Id, update_seq=HighSeq, deleted=Deleted==1, rev_tree=Tree}.
btree_by_id_reduce(reduce, FullDocInfos) ->
% count the number of not deleted documents
@@ -251,6 +282,7 @@ less_docid(A, B) -> A < B.
init_db(DbName, Filepath, Fd, Header0) ->
case element(2, Header0) of
+ ?DISK_VERSION_0_9 -> ok; % no problem, all records upgrade on the fly
?LATEST_DISK_VERSION -> ok;
_ -> throw({database_disk_version_error, "Incorrect disk header version"})
end,
@@ -320,7 +352,8 @@ refresh_validate_doc_funs(Db) ->
get_design_docs(#db{fulldocinfo_by_id_btree=Btree}=Db) ->
couch_btree:foldl(Btree, <<"_design/">>,
fun(#full_doc_info{id= <<"_design/",_/binary>>}=FullDocInfo, _Reds, AccDocs) ->
- {ok, [couch_db:make_doc(Db, FullDocInfo) | AccDocs]};
+ {ok, Doc} = couch_db:open_doc_int(Db, FullDocInfo, []),
+ {ok, [Doc | AccDocs]};
(_, _Reds, AccDocs) ->
{stop, AccDocs}
end,
@@ -331,8 +364,8 @@ get_design_docs(#db{fulldocinfo_by_id_btree=Btree}=Db) ->
flush_trees(_Db, [], AccFlushedTrees) ->
{ok, lists:reverse(AccFlushedTrees)};
flush_trees(#db{fd=Fd}=Db, [InfoUnflushed | RestUnflushed], AccFlushed) ->
- #full_doc_info{rev_tree=Unflushed} = InfoUnflushed,
- Flushed = couch_key_tree:map(
+ #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = InfoUnflushed,
+ Flushed = couch_key_tree:map(
fun(_Rev, Value) ->
case Value of
#doc{attachments=Atts,deleted=IsDeleted}=Doc ->
@@ -355,7 +388,7 @@ flush_trees(#db{fd=Fd}=Db, [InfoUnflushed | RestUnflushed], AccFlushed) ->
throw(retry)
end,
{ok, NewSummaryPointer} = couch_stream:write_term(Db#db.summary_stream, {Doc#doc.body, Bins}),
- {IsDeleted, NewSummaryPointer};
+ {IsDeleted, NewSummaryPointer, UpdateSeq};
_ ->
Value
end
@@ -394,10 +427,13 @@ merge_rev_trees(MergeConflicts, [NewDocs|RestDocsList],
[NewInfo|AccNewInfos], RemoveSeqs, NewConflicts, AccSeq+1)
end.
+
+
new_index_entries([], AccById, AccBySeq) ->
- {ok, AccById, AccBySeq};
+ {AccById, AccBySeq};
new_index_entries([FullDocInfo|RestInfos], AccById, AccBySeq) ->
- #doc_info{deleted=Deleted} = DocInfo = couch_doc:to_doc_info(FullDocInfo),
+ #doc_info{revs=[#rev_info{deleted=Deleted}|_]} = DocInfo =
+ couch_doc:to_doc_info(FullDocInfo),
new_index_entries(RestInfos,
[FullDocInfo#full_doc_info{deleted=Deleted}|AccById],
[DocInfo|AccBySeq]).
@@ -436,13 +472,13 @@ update_docs_int(Db, DocsList, Options) ->
#full_doc_info{id=Id}
end,
Ids, OldDocLookups),
-
+
% Merge the new docs into the revision trees.
{ok, NewDocInfos0, RemoveSeqs, Conflicts, NewSeq} = merge_rev_trees(
lists:member(merge_conflicts, Options),
DocsList2, OldDocInfos, [], [], [], LastSeq),
- NewDocInfos = stem_full_doc_infos(Db, NewDocInfos0),
+ NewFullDocInfos = stem_full_doc_infos(Db, NewDocInfos0),
% All documents are now ready to write.
@@ -450,13 +486,14 @@ update_docs_int(Db, DocsList, Options) ->
% Write out the document summaries (the bodies are stored in the nodes of
% the trees, the attachments are already written to disk)
- {ok, FlushedDocInfos} = flush_trees(Db2, NewDocInfos, []),
+ {ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
- {ok, InfoById, InfoBySeq} = new_index_entries(FlushedDocInfos, [], []),
+ {IndexFullDocInfos, IndexDocInfos} =
+ new_index_entries(FlushedFullDocInfos, [], []),
% and the indexes
- {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, InfoBySeq, RemoveSeqs),
- {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, InfoById, []),
+ {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, IndexFullDocInfos, []),
+ {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, IndexDocInfos, RemoveSeqs),
Db3 = Db2#db{
fulldocinfo_by_id_btree = DocInfoByIdBTree2,
@@ -474,7 +511,8 @@ update_docs_int(Db, DocsList, Options) ->
{ok, LocalConflicts ++ Conflicts,
commit_data(Db4, not lists:member(full_commit, Options))}.
-
+
+
update_local_docs(#db{local_docs_btree=Btree}=Db, Docs) ->
Ids = [Id || #doc{id=Id} <- Docs],
OldDocLookups = couch_btree:lookup(Btree, Ids),
@@ -558,10 +596,10 @@ copy_rev_tree(SrcFd, DestFd, DestStream, [{Start, Tree} | RestTree]) ->
% root nner node, only copy info/data from leaf nodes
[Tree2] = copy_rev_tree(SrcFd, DestFd, DestStream, [Tree]),
[{Start, Tree2} | copy_rev_tree(SrcFd, DestFd, DestStream, RestTree)];
-copy_rev_tree(SrcFd, DestFd, DestStream, [{RevId, {IsDel, Sp}, []} | RestTree]) ->
+copy_rev_tree(SrcFd, DestFd, DestStream, [{RevId, {IsDel, Sp, Seq}, []} | RestTree]) ->
% This is a leaf node, copy it over
NewSp = copy_raw_doc(SrcFd, Sp, DestFd, DestStream),
- [{RevId, {IsDel, NewSp}, []} | copy_rev_tree(SrcFd, DestFd, DestStream, RestTree)];
+ [{RevId, {IsDel, NewSp, Seq}, []} | copy_rev_tree(SrcFd, DestFd, DestStream, RestTree)];
copy_rev_tree(SrcFd, DestFd, DestStream, [{RevId, _, SubTree} | RestTree]) ->
% inner node, only copy info/data from leaf nodes
[{RevId, ?REV_MISSING, copy_rev_tree(SrcFd, DestFd, DestStream, SubTree)} | copy_rev_tree(SrcFd, DestFd, DestStream, RestTree)].
@@ -598,7 +636,7 @@ copy_docs(#db{fd=SrcFd}=Db, #db{fd=DestFd,summary_stream=DestStream}=NewDb, Info
copy_compact(Db, NewDb, Retry) ->
TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq),
EnumBySeqFun =
- fun(#doc_info{update_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) ->
+ fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) ->
couch_task_status:update("Copied ~p of ~p changes (~p%)",
[TotalCopied, TotalChanges, (TotalCopied*100) div TotalChanges]),
if TotalCopied rem 1000 == 0 ->