diff options
Diffstat (limited to 'apps/couch')
-rw-r--r-- | apps/couch/src/couch_doc.erl | 20 | ||||
-rw-r--r-- | apps/couch/src/couch_httpd_db.erl | 14 | ||||
-rw-r--r-- | apps/couch/src/couch_key_tree.erl | 48 | ||||
-rw-r--r-- | apps/couch/src/couch_rep.erl | 45 | ||||
-rw-r--r-- | apps/couch/src/couch_replication_manager.erl | 41 | ||||
-rw-r--r-- | apps/couch/src/couch_view.erl | 4 | ||||
-rw-r--r-- | apps/couch/src/couch_view_compactor.erl | 6 | ||||
-rwxr-xr-x | apps/couch/test/etap/060-kt-merging.t | 95 |
8 files changed, 232 insertions, 41 deletions
diff --git a/apps/couch/src/couch_doc.erl b/apps/couch/src/couch_doc.erl index c7b9dbb9..33d7e3cf 100644 --- a/apps/couch/src/couch_doc.erl +++ b/apps/couch/src/couch_doc.erl @@ -18,6 +18,7 @@ -export([validate_docid/1]). -export([doc_from_multi_part_stream/2]). -export([doc_to_multi_part_stream/5, len_doc_to_multi_part_stream/4]). +-export([abort_multi_part_stream/1]). -include("couch_db.hrl"). @@ -499,7 +500,7 @@ doc_from_multi_part_stream(ContentType, DataFun) -> receive {Parser, finished} -> ok end, erlang:put(mochiweb_request_recv, true) end, - {ok, Doc#doc{atts=Atts2}, WaitFun} + {ok, Doc#doc{atts=Atts2}, WaitFun, Parser} end. mp_parse_doc({headers, H}, []) -> @@ -590,3 +591,20 @@ maybe_send_data({ChunkList, Offset, Counters, Waiting}) -> end end end. + +abort_multi_part_stream(Parser) -> + abort_multi_part_stream(Parser, erlang:monitor(process, Parser)). + +abort_multi_part_stream(Parser, MonRef) -> + case is_process_alive(Parser) of + true -> + Parser ! {get_bytes, self()}, + receive + {bytes, _Bytes} -> + abort_multi_part_stream(Parser, MonRef); + {'DOWN', MonRef, _, _, _} -> + ok + end; + false -> + erlang:demonitor(MonRef, [flush]) + end. diff --git a/apps/couch/src/couch_httpd_db.erl b/apps/couch/src/couch_httpd_db.erl index e3638b25..71204598 100644 --- a/apps/couch/src/couch_httpd_db.erl +++ b/apps/couch/src/couch_httpd_db.erl @@ -692,12 +692,18 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> RespHeaders = [{"Location", Loc}], case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of ("multipart/related;" ++ _) = ContentType -> - {ok, Doc0, WaitFun} = couch_doc:doc_from_multi_part_stream( + {ok, Doc0, WaitFun, Parser} = couch_doc:doc_from_multi_part_stream( ContentType, fun() -> receive_request_data(Req) end), Doc = couch_doc_from_req(Req, DocId, Doc0), - Result = update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType), - WaitFun(), - Result; + try + Result = update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType), + WaitFun(), + Result + catch throw:Err -> + % Document rejected by a validate_doc_update function. + couch_doc:abort_multi_part_stream(Parser), + throw(Err) + end; _Else -> case couch_httpd:qs_value(Req, "batch") of "ok" -> diff --git a/apps/couch/src/couch_key_tree.erl b/apps/couch/src/couch_key_tree.erl index c5fd1088..2f3c6abf 100644 --- a/apps/couch/src/couch_key_tree.erl +++ b/apps/couch/src/couch_key_tree.erl @@ -10,6 +10,41 @@ % License for the specific language governing permissions and limitations under % the License. +%% @doc Data structure used to represent document edit histories. + +%% A key tree is used to represent the edit history of a document. Each node of +%% the tree represents a particular version. Relations between nodes represent +%% the order that these edits were applied. For instance, a set of three edits +%% would produce a tree of versions A->B->C indicating that edit C was based on +%% version B which was in turn based on A. In a world without replication (and +%% no ability to disable MVCC checks), all histories would be forced to be +%% linear lists of edits due to constraints imposed by MVCC (ie, new edits must +%% be based on the current version). However, we have replication, so we must +%% deal with not so easy cases, which lead to trees. +%% +%% Consider a document in state A. This doc is replicated to a second node. We +%% then edit the document on each node leaving it in two different states, B +%% and C. We now have two key trees, A->B and A->C. When we go to replicate a +%% second time, the key tree must combine these two trees which gives us +%% A->(B|C). This is how conflicts are introduced. In terms of the key tree, we +%% say that we have two leaves (B and C) that are not deleted. The presense of +%% the multiple leaves indicate conflict. To remove a conflict, one of the +%% edits (B or C) can be deleted, which results in, A->(B|C->D) where D is an +%% edit that is specially marked with the a deleted=true flag. +%% +%% What makes this a bit more complicated is that there is a limit to the +%% number of revisions kept, specified in couch_db.hrl (default is 1000). When +%% this limit is exceeded only the last 1000 are kept. This comes in to play +%% when branches are merged. The comparison has to begin at the same place in +%% the branches. A revision id is of the form N-XXXXXXX where N is the current +%% revision. So each path will have a start number, calculated in +%% couch_doc:to_path using the formula N - length(RevIds) + 1 So, .eg. if a doc +%% was edit 1003 times this start number would be 4, indicating that 3 +%% revisions were truncated. +%% +%% This comes into play in @see merge_at/3 which recursively walks down one +%% tree or the other until they begin at the same revision. + -module(couch_key_tree). -export([merge/3, find_missing/2, get_key_leafs/2, @@ -32,6 +67,9 @@ merge(Paths, Path, Depth) -> {Merged, Conflicts} = merge(Paths, Path), {stem(Merged, Depth), Conflicts}. +%% @doc Merge a path with an existing list of paths, returning a new list of +%% paths. A return of conflicts indicates a new conflict was discovered in this +%% merge. Conflicts may already exist in the original list of paths. -spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}. merge(Paths, Path) -> {ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false), @@ -70,6 +108,7 @@ merge_at([{Key, Value, SubTree}|Sibs], Place, InsertTree) when Place > 0 -> {ok, Merged, Conflicts} -> {ok, [{Key, Value, Merged} | Sibs], Conflicts}; no -> + % first branch didn't merge, move to next branch case merge_at(Sibs, Place, InsertTree) of {ok, Merged, Conflicts} -> {ok, [{Key, Value, SubTree} | Merged], Conflicts}; @@ -112,11 +151,12 @@ merge_simple([{Key, V1, SubA} | NextA], [{Key, V2, SubB} | NextB]) -> Value = value_pref(V1, V2), {[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B -> - {Merged, _} = merge_simple(Next, Insert), - {[Tree | Merged], true}; + {Merged, Conflict} = merge_simple(Next, Insert), + % if Merged has more branches than the input we added a new conflict + {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}; merge_simple(Ours, [Tree | Next]) -> - {Merged, _} = merge_simple(Ours, Next), - {[Tree | Merged], true}. + {Merged, Conflict} = merge_simple(Ours, Next), + {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}. find_missing(_Tree, []) -> []; diff --git a/apps/couch/src/couch_rep.erl b/apps/couch/src/couch_rep.erl index 482b84dc..966d6872 100644 --- a/apps/couch/src/couch_rep.erl +++ b/apps/couch/src/couch_rep.erl @@ -486,7 +486,17 @@ make_replication_id(RepProps, UserCtx) -> % add a new clause and increase ?REP_ID_VERSION at the top make_replication_id({Props}, UserCtx, 2) -> {ok, HostName} = inet:gethostname(), - Port = mochiweb_socket_server:get(couch_httpd, port), + Port = case (catch mochiweb_socket_server:get(couch_httpd, port)) of + P when is_number(P) -> + P; + _ -> + % On restart we might be called before the couch_httpd process is + % started. + % TODO: we might be under an SSL socket server only, or both under + % SSL and a non-SSL socket. + % ... mochiweb_socket_server:get(https, port) + list_to_integer(couch_config:get("httpd", "port", "5984")) + end, Src = get_rep_endpoint(UserCtx, couch_util:get_value(<<"source">>, Props)), Tgt = get_rep_endpoint(UserCtx, couch_util:get_value(<<"target">>, Props)), maybe_append_filters({Props}, [HostName, Port, Src, Tgt], UserCtx); @@ -513,16 +523,37 @@ maybe_append_filters({Props}, Base, UserCtx) -> couch_util:to_hex(couch_util:md5(term_to_binary(Base2))). filter_code(Filter, Props, UserCtx) -> - {match, [DDocName, FilterName]} = - re:run(Filter, "(.*?)/(.*)", [{capture, [1, 2], binary}]), + {DDocName, FilterName} = + case re:run(Filter, "(.*?)/(.*)", [{capture, [1, 2], binary}]) of + {match, [DDocName0, FilterName0]} -> + {DDocName0, FilterName0}; + _ -> + throw({error, <<"Invalid filter. Must match `ddocname/filtername`.">>}) + end, ProxyParams = parse_proxy_params( couch_util:get_value(<<"proxy">>, Props, [])), - Source = open_db( - couch_util:get_value(<<"source">>, Props), UserCtx, ProxyParams), + DbName = couch_util:get_value(<<"source">>, Props), + Source = try + open_db(DbName, UserCtx, ProxyParams) + catch + _Tag:DbError -> + DbErrorMsg = io_lib:format("Could not open source database `~s`: ~s", + [couch_util:url_strip_password(DbName), couch_util:to_binary(DbError)]), + throw({error, iolist_to_binary(DbErrorMsg)}) + end, try - {ok, DDoc} = open_doc(Source, <<"_design/", DDocName/binary>>), + Body = case (catch open_doc(Source, <<"_design/", DDocName/binary>>)) of + {ok, #doc{body = Body0}} -> + Body0; + DocError -> + DocErrorMsg = io_lib:format( + "Couldn't open document `_design/~s` from source " + "database `~s`: ~s", + [dbname(Source), DDocName, couch_util:to_binary(DocError)]), + throw({error, iolist_to_binary(DocErrorMsg)}) + end, Code = couch_util:get_nested_json_value( - DDoc#doc.body, [<<"filters">>, FilterName]), + Body, [<<"filters">>, FilterName]), re:replace(Code, "^\s*(.*?)\s*$", "\\1", [{return, binary}]) after close_db(Source) diff --git a/apps/couch/src/couch_replication_manager.erl b/apps/couch/src/couch_replication_manager.erl index 6537c8b2..e3d97c37 100644 --- a/apps/couch/src/couch_replication_manager.erl +++ b/apps/couch/src/couch_replication_manager.erl @@ -32,7 +32,8 @@ -import(couch_util, [ get_value/2, - get_value/3 + get_value/3, + to_binary/1 ]). @@ -62,8 +63,16 @@ init(_) -> }}. -handle_call({rep_db_update, Change}, _From, State) -> - {reply, ok, process_update(State, Change)}; +handle_call({rep_db_update, {ChangeProps} = Change}, _From, State) -> + NewState = try + process_update(State, Change) + catch + _Tag:Error -> + JsonRepDoc = get_value(doc, ChangeProps), + rep_db_update_error(Error, JsonRepDoc), + State + end, + {reply, ok, NewState}; handle_call({triggered, {BaseId, _}}, _From, State) -> [{BaseId, {DocId, true}}] = ets:lookup(?REP_ID_TO_DOC_ID, BaseId), @@ -250,6 +259,19 @@ process_update(State, {Change}) -> end. +rep_db_update_error(Error, {Props} = JsonRepDoc) -> + case Error of + {bad_rep_doc, Reason} -> + ok; + _ -> + Reason = to_binary(Error) + end, + ?LOG_ERROR("Replication manager, error processing document `~s`: ~s", + [get_value(<<"_id">>, Props), Reason]), + couch_rep:update_rep_doc( + JsonRepDoc, [{<<"_replication_state">>, <<"error">>}]). + + rep_user_ctx({RepDoc}) -> case get_value(<<"user_ctx">>, RepDoc) of undefined -> @@ -265,7 +287,7 @@ rep_user_ctx({RepDoc}) -> maybe_start_replication(#state{max_retries = MaxRetries} = State, DocId, JsonRepDoc) -> UserCtx = rep_user_ctx(JsonRepDoc), - {BaseId, _} = RepId = couch_rep:make_replication_id(JsonRepDoc, UserCtx), + {BaseId, _} = RepId = make_rep_id(JsonRepDoc, UserCtx), case ets:lookup(?REP_ID_TO_DOC_ID, BaseId) of [] -> true = ets:insert(?REP_ID_TO_DOC_ID, {BaseId, {DocId, true}}), @@ -290,6 +312,17 @@ maybe_start_replication(#state{max_retries = MaxRetries} = State, end. +make_rep_id(RepDoc, UserCtx) -> + try + couch_rep:make_replication_id(RepDoc, UserCtx) + catch + throw:{error, Reason} -> + throw({bad_rep_doc, Reason}); + Tag:Err -> + throw({bad_rep_doc, to_binary({Tag, Err})}) + end. + + maybe_tag_rep_doc({Props} = JsonRepDoc, RepId) -> case get_value(<<"_replication_id">>, Props) of RepId -> diff --git a/apps/couch/src/couch_view.erl b/apps/couch/src/couch_view.erl index 2d84b0f9..05174245 100644 --- a/apps/couch/src/couch_view.erl +++ b/apps/couch/src/couch_view.erl @@ -97,8 +97,8 @@ cleanup_index_files(Db) -> if length(Sigs) =:= 0 -> FileList; true -> - % regex that matches all ddocs - RegExp = "("++ string:join(Sigs, "|") ++")", + % regex that matches all ddocs + RegExp = "("++ string:join(Sigs, "|") ++")", % filter out the ones in use [FilePath || FilePath <- FileList, diff --git a/apps/couch/src/couch_view_compactor.erl b/apps/couch/src/couch_view_compactor.erl index 38f63f66..69aaff00 100644 --- a/apps/couch/src/couch_view_compactor.erl +++ b/apps/couch/src/couch_view_compactor.erl @@ -55,8 +55,10 @@ compact_group(Group, EmptyGroup) -> Fun = fun({DocId, _ViewIdKeys} = KV, {Bt, Acc, TotalCopied, LastId}) -> if DocId =:= LastId -> % COUCHDB-999 - Msg = "Duplicates of ~s detected in ~s ~s - rebuild required", - exit(io_lib:format(Msg, [DocId, DbName, GroupId])); + ?LOG_ERROR("Duplicates of document `~s` detected in view group `~s`" + ", database `~s` - view rebuild, from scratch, is required", + [DocId, GroupId, DbName]), + exit({view_duplicated_id, DocId}); true -> ok end, if TotalCopied rem 10000 =:= 0 -> couch_task_status:update("Copied ~p of ~p Ids (~p%)", diff --git a/apps/couch/test/etap/060-kt-merging.t b/apps/couch/test/etap/060-kt-merging.t index 0e481a52..efbdbf69 100755 --- a/apps/couch/test/etap/060-kt-merging.t +++ b/apps/couch/test/etap/060-kt-merging.t @@ -15,7 +15,7 @@ main(_) -> test_util:init_code_path(), - etap:plan(12), + etap:plan(16), case (catch test()) of ok -> etap:end_tests(); @@ -26,25 +26,21 @@ main(_) -> ok. test() -> - One = {0, {"1","foo",[]}}, - TwoSibs = [{0, {"1","foo",[]}}, - {0, {"2","foo",[]}}], - OneChild = {0, {"1","foo",[{"1a", "bar", []}]}}, - TwoChild = {0, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}}, - TwoChildSibs = {0, {"1","foo", [{"1a", "bar", []}, - {"1b", "bar", []}]}}, - TwoChildSibs2 = {0, {"1","foo", [{"1a", "bar", []}, - {"1b", "bar", [{"1bb", "boo", []}]}]}}, - Stemmed1b = {1, {"1a", "bar", []}}, - Stemmed1a = {1, {"1a", "bar", [{"1aa", "bar", []}]}}, - Stemmed1aa = {2, {"1aa", "bar", []}}, - Stemmed1bb = {2, {"1bb", "boo", []}}, + One = {1, {"1","foo",[]}}, etap:is( {[One], no_conflicts}, couch_key_tree:merge([], One, 10), "The empty tree is the identity for merge." ), + etap:is( + {[One], no_conflicts}, + couch_key_tree:merge([One], One, 10), + "Merging is reflexive." + ), + + TwoSibs = [{1, {"1","foo",[]}}, + {1, {"2","foo",[]}}], etap:is( {TwoSibs, no_conflicts}, @@ -52,41 +48,75 @@ test() -> "Merging a prefix of a tree with the tree yields the tree." ), + Three = {1, {"3","foo",[]}}, + ThreeSibs = [{1, {"1","foo",[]}}, + {1, {"2","foo",[]}}, + {1, {"3","foo",[]}}], + etap:is( - {[One], no_conflicts}, - couch_key_tree:merge([One], One, 10), - "Merging is reflexive." + {ThreeSibs, conflicts}, + couch_key_tree:merge(TwoSibs, Three, 10), + "Merging a third unrelated branch leads to a conflict." ), + + TwoChild = {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}}, + etap:is( {[TwoChild], no_conflicts}, couch_key_tree:merge([TwoChild], TwoChild, 10), "Merging two children is still reflexive." ), + TwoChildSibs = {1, {"1","foo", [{"1a", "bar", []}, + {"1b", "bar", []}]}}, etap:is( {[TwoChildSibs], no_conflicts}, couch_key_tree:merge([TwoChildSibs], TwoChildSibs, 10), "Merging a tree to itself is itself."), + TwoChildPlusSibs = + {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}, + {"1b", "bar", []}]}}, + + etap:is( + {[TwoChildPlusSibs], no_conflicts}, + couch_key_tree:merge([TwoChild], TwoChildSibs, 10), + "Merging tree of uneven length at node 2."), + + Stemmed1b = {2, {"1a", "bar", []}}, etap:is( {[TwoChildSibs], no_conflicts}, couch_key_tree:merge([TwoChildSibs], Stemmed1b, 10), "Merging a tree with a stem." ), + TwoChildSibs2 = {1, {"1","foo", [{"1a", "bar", []}, + {"1b", "bar", [{"1bb", "boo", []}]}]}}, + Stemmed1bb = {3, {"1bb", "boo", []}}, etap:is( {[TwoChildSibs2], no_conflicts}, couch_key_tree:merge([TwoChildSibs2], Stemmed1bb, 10), "Merging a stem at a deeper level." ), + StemmedTwoChildSibs2 = [{2,{"1a", "bar", []}}, + {2,{"1b", "bar", [{"1bb", "boo", []}]}}], + + etap:is( + {StemmedTwoChildSibs2, no_conflicts}, + couch_key_tree:merge(StemmedTwoChildSibs2, Stemmed1bb, 10), + "Merging a stem at a deeper level against paths at deeper levels." + ), + + Stemmed1aa = {3, {"1aa", "bar", []}}, etap:is( {[TwoChild], no_conflicts}, couch_key_tree:merge([TwoChild], Stemmed1aa, 10), "Merging a single tree with a deeper stem." ), + Stemmed1a = {2, {"1a", "bar", [{"1aa", "bar", []}]}}, etap:is( {[TwoChild], no_conflicts}, couch_key_tree:merge([TwoChild], Stemmed1a, 10), @@ -99,6 +129,7 @@ test() -> "More merging." ), + OneChild = {1, {"1","foo",[{"1a", "bar", []}]}}, Expect1 = [OneChild, Stemmed1aa], etap:is( {Expect1, conflicts}, @@ -112,4 +143,34 @@ test() -> "Merge should have no conflicts." ), + %% this test is based on couch-902-test-case2.py + %% foo has conflicts from replication at depth two + %% foo3 is the current value + Foo = {1, {"foo", + "val1", + [{"foo2","val2",[]}, + {"foo3", "val3", []} + ]}}, + %% foo now has an attachment added, which leads to foo4 and val4 + %% off foo3 + Bar = {1, {"foo", + [], + [{"foo3", + [], + [{"foo4","val4",[]} + ]}]}}, + %% this is what the merge returns + %% note that it ignore the conflicting branch as there's no match + FooBar = {1, {"foo", + "val1", + [{"foo2","val2",[]}, + {"foo3", "val3", [{"foo4","val4",[]}]} + ]}}, + + etap:is( + {[FooBar], no_conflicts}, + couch_key_tree:merge([Foo],Bar,10), + "Merging trees with conflicts ought to behave." + ), + ok. |