summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Joseph Davis <davisp@apache.org>2011-09-28 03:44:25 +0000
committerPaul Joseph Davis <davisp@apache.org>2011-09-28 03:44:25 +0000
commit89a5c28775f24f2706f443c76afa3edf9df78ce4 (patch)
tree9f7379e679162407bb1df8851717ad62d258896b
parent86f113f9a0f3fdca02dfada0fa9854923b133a67 (diff)
Rest of the fix for COUCHDB-1265
As a follow up to COUCHDB-1265 I was missing the fact that after the insertion of a new update_seq into an internal node it is quite possible that a compaction runs before the doc is updated again. This is important because compaction removes information of the largest update seq from the tree itself. The fix is simple to include the update_seq from the #full_doc_info{} record when calculating #doc_info.high_seq. The way to think of this is that it's the maximum value from all known values for the update sequence which can be defined as all values known in the tree or in the full_doc_info record. Backport of r1176701 from trunk git-svn-id: https://svn.apache.org/repos/asf/couchdb/branches/1.1.x@1176704 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--share/www/script/test/recreate_doc.js88
-rw-r--r--src/couchdb/couch_doc.erl8
2 files changed, 60 insertions, 36 deletions
diff --git a/share/www/script/test/recreate_doc.js b/share/www/script/test/recreate_doc.js
index a1cfb8f8..f9723793 100644
--- a/share/www/script/test/recreate_doc.js
+++ b/share/www/script/test/recreate_doc.js
@@ -81,41 +81,65 @@ couchTests.recreate_doc = function(debug) {
db.deleteDb();
db.createDb();
- // COUCHDB-1265
- // Resuscitate an unavailable old revision and make sure that it
- // doesn't introduce duplicates into the _changes feed.
-
- var doc = {_id: "bar", count: 0};
- T(db.save(doc).ok);
- var ghost = {_id: "bar", _rev: doc._rev, count: doc.count};
- for(var i = 0; i < 2; i++) {
- doc.count += 1;
- T(db.save(doc).ok);
+ // Helper function to create a doc with multiple revisions
+ // that are compacted away to ?REV_MISSING.
+
+ var createDoc = function(docid) {
+ var ret = [{_id: docid, count: 0}];
+ T(db.save(ret[0]).ok);
+ for(var i = 0; i < 2; i++) {
+ ret[ret.length] = {
+ _id: docid,
+ _rev: ret[ret.length-1]._rev,
+ count: ret[ret.length-1].count+1
+ };
+ T(db.save(ret[ret.length-1]).ok);
+ }
+ db.compact();
+ while(db.info().compact_running) {}
+ return ret;
}
- // Compact so that the old revision to be resuscitated will be
- // in the rev_tree as ?REV_MISSING
+ // Helper function to check that there are no duplicates
+ // in the changes feed and that it has proper update
+ // sequence ordering.
+
+ var checkChanges = function() {
+ // Assert that there are no duplicates in _changes.
+ var req = CouchDB.request("GET", "/test_suite_db/_changes");
+ var resp = JSON.parse(req.responseText);
+ var docids = {};
+ var prev_seq = -1;
+ for(var i = 0; i < resp.results.length; i++) {
+ row = resp.results[i];
+ T(row.seq > prev_seq, "Unordered _changes feed.");
+ T(docids[row.id] === undefined, "Duplicates in _changes feed.");
+ prev_seq = row.seq;
+ docids[row.id] = true;
+ }
+ };
+
+ // COUCHDB-1265 - Check that the changes feed remains proper
+ // after we try and break the update_seq tree.
+
+ // This first case is the one originally reported and "fixed"
+ // in COUCHDB-1265. Reinserting an old revision into the
+ // revision tree causes duplicates in the update_seq tree.
+
+ var revs = createDoc("a");
+ T(db.save(revs[1], {new_edits: false}).ok);
+ T(db.save(revs[revs.length-1]).ok);
+ checkChanges();
+
+ // The original fix for COUCHDB-1265 is not entirely correct
+ // as it didn't consider the possibility that a compaction
+ // might run after the original tree screw up.
+
+ revs = createDoc("b");
+ T(db.save(revs[1], {new_edits: false}).ok);
db.compact();
while(db.info().compact_running) {}
+ T(db.save(revs[revs.length-1]).ok);
+ checkChanges();
- // Saving the ghost here puts it back in the rev_tree in such
- // a way as to create a new update_seq but without changing a
- // leaf revision. This would cause the #full_doc_info{} and
- // #doc_info{} records to diverge in their idea of what the
- // doc's update_seq is and end up introducing a duplicate in
- // the _changes feed the next time this doc is updated.
- T(db.save(ghost, {new_edits: false}).ok);
-
- // The duplicate would have been introduce here becuase the #doc_info{}
- // would not have been removed correctly.
- T(db.save(doc).ok);
-
- // And finally assert that there are no duplicates in _changes.
- var req = CouchDB.request("GET", "/test_suite_db/_changes");
- var resp = JSON.parse(req.responseText);
- var docids = {};
- for(var i = 0; i < resp.results.length; i++) {
- T(docids[resp.results[i].id] === undefined, "Duplicates in _changes feed.");
- docids[resp.results[i].id] = true;
- }
};
diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
index a6700d59..01e92836 100644
--- a/src/couchdb/couch_doc.erl
+++ b/src/couchdb/couch_doc.erl
@@ -302,7 +302,7 @@ to_doc_info(FullDocInfo) ->
{DocInfo, _Path} = to_doc_info_path(FullDocInfo),
DocInfo.
-max_seq(Tree) ->
+max_seq(Tree, UpdateSeq) ->
FoldFun = fun({_Pos, _Key}, Value, _Type, MaxOldSeq) ->
case Value of
{_Deleted, _DiskPos, OldTreeSeq} ->
@@ -311,9 +311,9 @@ max_seq(Tree) ->
MaxOldSeq
end
end,
- couch_key_tree:fold(FoldFun, 0, Tree).
+ couch_key_tree:fold(FoldFun, UpdateSeq, Tree).
-to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) ->
+to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=Seq}) ->
RevInfosAndPath =
[{#rev_info{deleted=Del,body_sp=Bp,seq=Seq,rev={Pos,RevId}}, Path} ||
{{Del, Bp, Seq},{Pos, [RevId|_]}=Path} <-
@@ -326,7 +326,7 @@ to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) ->
end, RevInfosAndPath),
[{_RevInfo, WinPath}|_] = SortedRevInfosAndPath,
RevInfos = [RevInfo || {RevInfo, _Path} <- SortedRevInfosAndPath],
- {#doc_info{id=Id, high_seq=max_seq(Tree), revs=RevInfos}, WinPath}.
+ {#doc_info{id=Id, high_seq=max_seq(Tree, Seq), revs=RevInfos}, WinPath}.