summaryrefslogtreecommitdiff
path: root/src/couchdb/couch_key_tree.erl
diff options
context:
space:
mode:
authorRobert Newson <rnewson@apache.org>2011-06-20 21:22:02 +0000
committerRobert Newson <rnewson@apache.org>2011-06-20 21:22:02 +0000
commit0827cb483ef4cf74123313e8f2d0d2bd5ce8e46b (patch)
tree0368430767ad613748d03dd29686722b4efdb0d1 /src/couchdb/couch_key_tree.erl
parentf0aaa2c3c7f6bbb858b40a55267781f1bcde0f67 (diff)
Fix spurious declarations of new merge conflicts
This patch also adds extra tests of the key tree merging logic as well as edoc-formatted documentation for the module and a few of the merge functions. Closes COUCHDB-902. Thanks Paul Davis, Bob Dionne, Klaus Trainer. backported from trunk@1065471 Conflicts: src/couchdb/couch_key_tree.erl git-svn-id: https://svn.apache.org/repos/asf/couchdb/branches/1.1.x@1137789 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/couchdb/couch_key_tree.erl')
-rw-r--r--src/couchdb/couch_key_tree.erl48
1 files changed, 44 insertions, 4 deletions
diff --git a/src/couchdb/couch_key_tree.erl b/src/couchdb/couch_key_tree.erl
index e5d3549f..48a76b1d 100644
--- a/src/couchdb/couch_key_tree.erl
+++ b/src/couchdb/couch_key_tree.erl
@@ -10,6 +10,41 @@
% License for the specific language governing permissions and limitations under
% the License.
+%% @doc Data structure used to represent document edit histories.
+
+%% A key tree is used to represent the edit history of a document. Each node of
+%% the tree represents a particular version. Relations between nodes represent
+%% the order that these edits were applied. For instance, a set of three edits
+%% would produce a tree of versions A->B->C indicating that edit C was based on
+%% version B which was in turn based on A. In a world without replication (and
+%% no ability to disable MVCC checks), all histories would be forced to be
+%% linear lists of edits due to constraints imposed by MVCC (ie, new edits must
+%% be based on the current version). However, we have replication, so we must
+%% deal with not so easy cases, which lead to trees.
+%%
+%% Consider a document in state A. This doc is replicated to a second node. We
+%% then edit the document on each node leaving it in two different states, B
+%% and C. We now have two key trees, A->B and A->C. When we go to replicate a
+%% second time, the key tree must combine these two trees which gives us
+%% A->(B|C). This is how conflicts are introduced. In terms of the key tree, we
+%% say that we have two leaves (B and C) that are not deleted. The presense of
+%% the multiple leaves indicate conflict. To remove a conflict, one of the
+%% edits (B or C) can be deleted, which results in, A->(B|C->D) where D is an
+%% edit that is specially marked with the a deleted=true flag.
+%%
+%% What makes this a bit more complicated is that there is a limit to the
+%% number of revisions kept, specified in couch_db.hrl (default is 1000). When
+%% this limit is exceeded only the last 1000 are kept. This comes in to play
+%% when branches are merged. The comparison has to begin at the same place in
+%% the branches. A revision id is of the form N-XXXXXXX where N is the current
+%% revision. So each path will have a start number, calculated in
+%% couch_doc:to_path using the formula N - length(RevIds) + 1 So, .eg. if a doc
+%% was edit 1003 times this start number would be 4, indicating that 3
+%% revisions were truncated.
+%%
+%% This comes into play in @see merge_at/3 which recursively walks down one
+%% tree or the other until they begin at the same revision.
+
-module(couch_key_tree).
-export([merge/3, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]).
@@ -31,6 +66,9 @@ merge(Paths, Path, Depth) ->
{Merged, Conflicts} = merge(Paths, Path),
{stem(Merged, Depth), Conflicts}.
+%% @doc Merge a path with an existing list of paths, returning a new list of
+%% paths. A return of conflicts indicates a new conflict was discovered in this
+%% merge. Conflicts may already exist in the original list of paths.
-spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}.
merge(Paths, Path) ->
{ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false),
@@ -69,6 +107,7 @@ merge_at([{Key, Value, SubTree}|Sibs], Place, InsertTree) when Place > 0 ->
{ok, Merged, Conflicts} ->
{ok, [{Key, Value, Merged} | Sibs], Conflicts};
no ->
+ % first branch didn't merge, move to next branch
case merge_at(Sibs, Place, InsertTree) of
{ok, Merged, Conflicts} ->
{ok, [{Key, Value, SubTree} | Merged], Conflicts};
@@ -111,11 +150,12 @@ merge_simple([{Key, V1, SubA} | NextA], [{Key, V2, SubB} | NextB]) ->
Value = value_pref(V1, V2),
{[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2};
merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B ->
- {Merged, _} = merge_simple(Next, Insert),
- {[Tree | Merged], true};
+ {Merged, Conflict} = merge_simple(Next, Insert),
+ % if Merged has more branches than the input we added a new conflict
+ {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))};
merge_simple(Ours, [Tree | Next]) ->
- {Merged, _} = merge_simple(Ours, Next),
- {[Tree | Merged], true}.
+ {Merged, Conflict} = merge_simple(Ours, Next),
+ {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}.
find_missing(_Tree, []) ->
[];