summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Kocoloski <kocolosk@apache.org>2010-12-08 15:48:52 +0000
committerAdam Kocoloski <kocolosk@apache.org>2010-12-08 15:48:52 +0000
commitfd693f972cd987c4fddda25738239582153d2a7c (patch)
tree475c955e50d62a6adbbbe9fe541626a9c8dd6618
parenteb3d803e24e3b9f09a037c48cbac15a356067772 (diff)
Usort the infos during compaction to remove dupes, COUCHDB-968
This is not a bulletproof solution; it only removes dupes when the they appear in the same batch of 1000 updates. However, for dupes that show up in _all_docs the probability of that happening is quite high. If the dupes are only in _changes a user may need to compact twice, once to get the dupes ordered together and a second time to remove them. A more complete solution would be to trigger the compaction in "retry" mode, but this is siginificantly slower. git-svn-id: https://svn.apache.org/repos/asf/couchdb/branches/1.1.x@1043461 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/couchdb/couch_db_updater.erl5
1 files changed, 4 insertions, 1 deletions
diff --git a/src/couchdb/couch_db_updater.erl b/src/couchdb/couch_db_updater.erl
index bbba5d4b..8630ff4e 100644
--- a/src/couchdb/couch_db_updater.erl
+++ b/src/couchdb/couch_db_updater.erl
@@ -775,7 +775,10 @@ copy_rev_tree_attachments(SrcDb, DestFd, Tree) ->
end, Tree).
-copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
+copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq0, Retry) ->
+ % COUCHDB-968, make sure we prune duplicates during compaction
+ InfoBySeq = lists:usort(fun(#doc_info{id=A}, #doc_info{id=B}) -> A =< B end,
+ InfoBySeq0),
Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids),