From 17333052c495d2015accb7fb57cfcc96d3e7f011 Mon Sep 17 00:00:00 2001 From: "Damien F. Katz" Date: Thu, 14 Aug 2008 17:47:24 +0000 Subject: CouchDB performance work. Faster Btree updates and lookups. git-svn-id: https://svn.apache.org/repos/asf/incubator/couchdb/trunk@685975 13f79535-47bb-0310-9956-ffa450edef68 --- share/www/script/couch_tests.js | 29 ++++--- src/couchdb/couch_btree.erl | 173 +++++++++++++++++++++++----------------- src/couchdb/couch_db.hrl | 2 +- src/couchdb/couch_view.erl | 60 +++++++++++--- 4 files changed, 167 insertions(+), 97 deletions(-) diff --git a/share/www/script/couch_tests.js b/share/www/script/couch_tests.js index 728aad33..287ba6fb 100644 --- a/share/www/script/couch_tests.js +++ b/share/www/script/couch_tests.js @@ -363,29 +363,30 @@ var tests = { T(db.bulkSave(docs).ok); var summate = function(N) {return (N+1)*N/2;}; - var map = function (doc) {emit(doc.integer, doc.integer)}; + var map = function (doc) { + emit(doc.integer, doc.integer); + emit(doc.integer, doc.integer)}; var reduce = function (keys, values) { return sum(values); }; var result = db.query(map, reduce); - T(result.rows[0].value == summate(numDocs)); + T(result.rows[0].value == 2*summate(numDocs)); result = db.query(map, reduce, {startkey: 4, endkey: 4}); - T(result.rows[0].value == 4); + T(result.rows[0].value == 8); result = db.query(map, reduce, {startkey: 4, endkey: 5}); - T(result.rows[0].value == 9); + T(result.rows[0].value == 18); result = db.query(map, reduce, {startkey: 4, endkey: 6}); - T(result.rows[0].value == 15); + T(result.rows[0].value == 30); result = db.query(map, reduce, {group:true, count:3}); - T(result.rows.length == 3); - T(result.rows[0].value == 1); - T(result.rows[1].value == 2); - T(result.rows[2].value == 3); + T(result.rows[0].value == 2); + T(result.rows[1].value == 4); + T(result.rows[2].value == 6); for(var i=1; i {NodeType, NodeList} = get_node(Bt, Pointer), case NodeType of kp_node -> - lookup_kpnode(Bt, NodeList, Keys, []); + lookup_kpnode(Bt, list_to_tuple(NodeList), 1, Keys, []); kv_node -> - lookup_kvnode(Bt, NodeList, Keys, []) + lookup_kvnode(Bt, list_to_tuple(NodeList), 1, Keys, []) end. -lookup_kpnode(_Bt, [], Keys, Output) -> - {ok, lists:reverse(Output, [{Key, not_found} || Key <- Keys])}; -lookup_kpnode(_Bt, _KPs, [], Output) -> +lookup_kpnode(_Bt, _NodeTuple, _LowerBound, [], Output) -> {ok, lists:reverse(Output)}; + +lookup_kpnode(_Bt, NodeTuple, LowerBound, Keys, Output) when size(NodeTuple) < LowerBound -> + {ok, lists:reverse(Output, [{Key, not_found} || Key <- Keys])}; -lookup_kpnode(Bt, [{Key, PointerInfo} | RestKPs], LookupKeys, Output) -> - % Split the Keys into two lists, queries of values less - % than equals, and greater than the current key +lookup_kpnode(Bt, NodeTuple, LowerBound, [FirstLookupKey | _] = LookupKeys, Output) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, size(NodeTuple), FirstLookupKey), + {Key, PointerInfo} = element(N, NodeTuple), SplitFun = fun(LookupKey) -> not less(Bt, Key, LookupKey) end, case lists:splitwith(SplitFun, LookupKeys) of {[], GreaterQueries} -> - lookup_kpnode(Bt, RestKPs, GreaterQueries, Output); + lookup_kpnode(Bt, NodeTuple, N + 1, GreaterQueries, Output); {LessEqQueries, GreaterQueries} -> {ok, Results} = lookup(Bt, PointerInfo, LessEqQueries), - lookup_kpnode(Bt, RestKPs, GreaterQueries, lists:reverse(Results, Output)) + lookup_kpnode(Bt, NodeTuple, N + 1, GreaterQueries, lists:reverse(Results, Output)) end. - -lookup_kvnode(_Bt, _KVs, [], Output) -> +lookup_kvnode(_Bt, _NodeTuple, _LowerBound, [], Output) -> {ok, lists:reverse(Output)}; -lookup_kvnode(_Bt, [], Keys, Output) -> +lookup_kvnode(_Bt, NodeTuple, LowerBound, Keys, Output) when size(NodeTuple) < LowerBound -> % keys not found {ok, lists:reverse(Output, [{Key, not_found} || Key <- Keys])}; -lookup_kvnode(Bt, [{Key, Value} | RestKVs], [LookupKey | RestLookupKeys], Output) -> +lookup_kvnode(Bt, NodeTuple, LowerBound, [LookupKey | RestLookupKeys], Output) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, size(NodeTuple), LookupKey), + {Key, Value} = element(N, NodeTuple), case less(Bt, LookupKey, Key) of true -> - lookup_kvnode(Bt, [{Key, Value} | RestKVs], RestLookupKeys, [{LookupKey, not_found} | Output]); + % LookupKey is less than Key + lookup_kvnode(Bt, NodeTuple, N, RestLookupKeys, [{LookupKey, not_found} | Output]); false -> case less(Bt, Key, LookupKey) of true -> % LookupKey is greater than Key - lookup_kvnode(Bt, RestKVs, [LookupKey | RestLookupKeys], Output); + lookup_kvnode(Bt, NodeTuple, N+1, RestLookupKeys, [{LookupKey, not_found} | Output]); false -> % LookupKey is equal to Key - lookup_kvnode(Bt, RestKVs, RestLookupKeys, [{LookupKey, {ok, assemble(Bt, LookupKey, Value)}} | Output]) + lookup_kvnode(Bt, NodeTuple, N, RestLookupKeys, [{LookupKey, {ok, assemble(Bt, LookupKey, Value)}} | Output]) end end. @@ -272,17 +276,18 @@ modify_node(Bt, RootPointerInfo, Actions, QueryOutput) -> {Pointer, _Reds} -> {NodeType, NodeList} = get_node(Bt, Pointer) end, + NodeTuple = list_to_tuple(NodeList), case NodeType of kp_node -> - {ok, NewNodeList, QueryOutput2, Bt2} = modify_kpnode(Bt, NodeList, Actions, [], QueryOutput); + {ok, NewNodeList, QueryOutput2, Bt2} = modify_kpnode(Bt, NodeTuple, 1, Actions, [], QueryOutput); kv_node -> - {ok, NewNodeList, QueryOutput2, Bt2} = modify_kvnode(Bt, NodeList, Actions, [], QueryOutput) + {ok, NewNodeList, QueryOutput2, Bt2} = modify_kvnode(Bt, NodeTuple, 1, Actions, [], QueryOutput) end, case NewNodeList of [] -> % no nodes remain {ok, [], QueryOutput2, Bt2}; NodeList -> % nothing changed - {LastKey, _LastValue} = lists:last(NodeList), + {LastKey, _LastValue} = element(size(NodeTuple), NodeTuple), {ok, [{LastKey, RootPointerInfo}], QueryOutput2, Bt2}; _Else2 -> {ok, ResultList, Bt3} = write_node(Bt2, NodeType, NewNodeList), @@ -299,17 +304,6 @@ reduce_node(#btree{reduce=R}, kv_node, NodeList) -> get_node(#btree{fd = Fd}, NodePos) -> {ok, {NodeType, NodeList}} = couch_file:pread_term(Fd, NodePos), - case NodeType of - kp_node -> - % Node pointers always point backward on disk. - % Validating this prevents infinite loops should - % a disk corruption occur. - [throw({error, disk_corruption}) - || {_Key, {SubNodePos, _Reds}} - <- NodeList, SubNodePos >= NodePos]; - kv_node -> - ok - end, {NodeType, NodeList}. write_node(Bt, NodeType, NodeList) -> @@ -326,77 +320,108 @@ write_node(Bt, NodeType, NodeList) -> ANodeList <- NodeListList ], {ok, ResultList, Bt}. - - -modify_kpnode(Bt, KPs, [], ResultNode, QueryOutput) -> - % processed all queries for the current tree - {ok, lists:reverse(ResultNode, KPs), QueryOutput, Bt}; - -modify_kpnode(Bt, [], Actions, [], QueryOutput) -> + +modify_kpnode(Bt, {}, _LowerBound, Actions, [], QueryOutput) -> modify_node(Bt, nil, Actions, QueryOutput); - -modify_kpnode(Bt, [], Actions, [{_Key, PointerInfo} | ResultNode], QueryOutput) -> - {ok, ChildKPs, QueryOutput2, Bt2} = modify_node(Bt, PointerInfo, Actions, QueryOutput), - {ok, lists:reverse(ResultNode, ChildKPs), QueryOutput2, Bt2}; - -modify_kpnode(Bt, [{Key,PointerInfo} | RestKPs], Actions, ResultNode, QueryOutput) -> - % Split the actions into two lists, queries of values <= and > than the current key - SplitFun = fun({_ActionType, ActionKey, _ActionValue}) -> - not less(Bt, Key, ActionKey) - end, - case lists:splitwith(SplitFun, Actions) of - {[], GreaterQueries} -> - modify_kpnode(Bt, RestKPs, GreaterQueries, [{Key, PointerInfo} | ResultNode], QueryOutput); - {LessEqQueries, GreaterQueries} -> - {ok, ChildKPs, QueryOutput2, Bt2} = modify_node(Bt, PointerInfo, LessEqQueries, QueryOutput), - modify_kpnode(Bt2, RestKPs, GreaterQueries, lists:reverse(ChildKPs, ResultNode), QueryOutput2) +modify_kpnode(Bt, NodeTuple, LowerBound, [], ResultNode, QueryOutput) -> + {ok, lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, + size(NodeTuple), [])), QueryOutput, Bt}; +modify_kpnode(Bt, NodeTuple, LowerBound, + [{_, FirstActionKey, _}|_]=Actions, ResultNode, QueryOutput) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, size(NodeTuple), FirstActionKey), + case N == size(NodeTuple) of + true -> + % perform remaining actions on last node + {_, PointerInfo} = element(size(NodeTuple), NodeTuple), + {ok, ChildKPs, QueryOutput2, Bt2} = + modify_node(Bt, PointerInfo, Actions, QueryOutput), + NodeList = lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, + size(NodeTuple) - 1, ChildKPs)), + {ok, NodeList, QueryOutput2, Bt2}; + false -> + {NodeKey, PointerInfo} = element(N, NodeTuple), + SplitFun = fun({_ActionType, ActionKey, _ActionValue}) -> + not less(Bt, NodeKey, ActionKey) + end, + {LessEqQueries, GreaterQueries} = lists:splitwith(SplitFun, Actions), + {ok, ChildKPs, QueryOutput2, Bt2} = + modify_node(Bt, PointerInfo, LessEqQueries, QueryOutput), + ResultNode2 = lists:reverse(ChildKPs, bounded_tuple_to_revlist(NodeTuple, + LowerBound, N - 1, ResultNode)), + modify_kpnode(Bt2, NodeTuple, N+1, GreaterQueries, ResultNode2, QueryOutput2) + end. + +bounded_tuple_to_revlist(_Tuple, Start, End, Tail) when Start > End -> + Tail; +bounded_tuple_to_revlist(Tuple, Start, End, Tail) -> + bounded_tuple_to_revlist(Tuple, Start+1, End, [element(Start, Tuple)|Tail]). + +bounded_tuple_to_list(Tuple, Start, End, Tail) -> + bounded_tuple_to_list2(Tuple, Start, End, [], Tail). + +bounded_tuple_to_list2(_Tuple, Start, End, Acc, Tail) when Start > End -> + lists:reverse(Acc, Tail); +bounded_tuple_to_list2(Tuple, Start, End, Acc, Tail) -> + bounded_tuple_to_list2(Tuple, Start + 1, End, [element(Start, Tuple) | Acc], Tail). + +find_first_gteq(_Bt, _Tuple, Start, End, _Key) when Start == End -> + End; +find_first_gteq(Bt, Tuple, Start, End, Key) -> + Mid = Start + ((End - Start) div 2), + {TupleKey, _} = element(Mid, Tuple), + case less(Bt, TupleKey, Key) of + true -> + find_first_gteq(Bt, Tuple, Mid+1, End, Key); + false -> + find_first_gteq(Bt, Tuple, Start, Mid, Key) end. -modify_kvnode(Bt, KVs, [], ResultNode, QueryOutput) -> - {ok, lists:reverse(ResultNode, KVs), QueryOutput, Bt}; -modify_kvnode(Bt, [], [{ActionType, ActionKey, ActionValue} | RestActions], ResultNode, QueryOutput) -> +modify_kvnode(Bt, NodeTuple, LowerBound, [], ResultNode, QueryOutput) -> + {ok, lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, size(NodeTuple), [])), QueryOutput, Bt}; +modify_kvnode(Bt, NodeTuple, LowerBound, [{ActionType, ActionKey, ActionValue} | RestActions], ResultNode, QueryOutput) when LowerBound > size(NodeTuple) -> case ActionType of insert -> - modify_kvnode(Bt, [], RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); remove -> % just drop the action - modify_kvnode(Bt, [], RestActions, ResultNode, QueryOutput); + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, ResultNode, QueryOutput); fetch -> % the key/value must not exist in the tree - modify_kvnode(Bt, [], RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) end; -modify_kvnode(Bt, [{Key, Value} | RestKVs], [{ActionType, ActionKey, ActionValue} | RestActions], ResultNode, QueryOutput) -> +modify_kvnode(Bt, NodeTuple, LowerBound, [{ActionType, ActionKey, ActionValue} | RestActions], AccNode, QueryOutput) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, size(NodeTuple), ActionKey), + {Key, Value} = element(N, NodeTuple), + ResultNode = bounded_tuple_to_revlist(NodeTuple, LowerBound, N - 1, AccNode), case less(Bt, ActionKey, Key) of true -> case ActionType of insert -> % ActionKey is less than the Key, so insert - modify_kvnode(Bt, [{Key, Value} | RestKVs], RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + modify_kvnode(Bt, NodeTuple, N, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); remove -> % ActionKey is less than the Key, just drop the action - modify_kvnode(Bt, [{Key, Value} | RestKVs], RestActions, ResultNode, QueryOutput); + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, QueryOutput); fetch -> % ActionKey is less than the Key, the key/value must not exist in the tree - modify_kvnode(Bt, [{Key, Value} | RestKVs], RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) end; false -> + % ActionKey and Key are maybe equal. case less(Bt, Key, ActionKey) of - true -> - % ActionKey is greater than Key - modify_kvnode(Bt, RestKVs, [{ActionType, ActionKey, ActionValue} | RestActions], [{Key, Value} | ResultNode], QueryOutput); false -> - % InsertKey is equal to Key case ActionType of insert -> - % ActionKey is less than the Key, so insert - modify_kvnode(Bt, RestKVs, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + modify_kvnode(Bt, NodeTuple, N+1, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); remove -> - modify_kvnode(Bt, RestKVs, RestActions, ResultNode, QueryOutput); + modify_kvnode(Bt, NodeTuple, N+1, RestActions, ResultNode, QueryOutput); fetch -> % ActionKey is equal to the Key, insert into the QueryOuput, but re-process the node % since an identical action key can follow it. - modify_kvnode(Bt, [{Key, Value} | RestKVs], RestActions, ResultNode, [{ok, assemble(Bt, Key, Value)} | QueryOutput]) - end + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, [{ok, assemble(Bt, Key, Value)} | QueryOutput]) + end; + true -> + modify_kvnode(Bt, NodeTuple, N + 1, [{ActionType, ActionKey, ActionValue} | RestActions], [{Key, Value} | ResultNode], QueryOutput) end end. diff --git a/src/couchdb/couch_db.hrl b/src/couchdb/couch_db.hrl index 0c274396..43860a1a 100644 --- a/src/couchdb/couch_db.hrl +++ b/src/couchdb/couch_db.hrl @@ -52,7 +52,7 @@ -record(doc, { id = "", - revs = [], % in the form [{RevId, IsAvailable}, ...] + revs = [], % the json body object. body = {obj, []}, diff --git a/src/couchdb/couch_view.erl b/src/couchdb/couch_view.erl index 3ed51d80..fc304dee 100644 --- a/src/couchdb/couch_view.erl +++ b/src/couchdb/couch_view.erl @@ -96,6 +96,14 @@ get_reduce_view0(Name, Lang, [#view{reduce_funs=RedFuns}=View|Rest]) -> N -> {ok, {reduce, N, Lang, View}} end. +expand_dups([], Acc) -> + lists:reverse(Acc); +expand_dups([{Key, {dups, Vals}} | Rest], Acc) -> + Expanded = [{Key, Val} || Val <- Vals], + expand_dups(Rest, Expanded ++ Acc); +expand_dups([KV | Rest], Acc) -> + expand_dups(Rest, [KV | Acc]). + fold_reduce({temp_reduce, #view{btree=Bt}}, Dir, StartKey, EndKey, GroupFun, Fun, Acc) -> WrapperFun = fun({GroupedKey, _}, PartialReds, Acc0) -> @@ -111,7 +119,7 @@ fold_reduce({reduce, NthRed, Lang, #view{btree=Bt, reduce_funs=RedFuns}}, Dir, S {_Name, FunSrc} = lists:nth(NthRed,RedFuns), ReduceFun = fun(reduce, KVs) -> - {ok, Reduced} = couch_query_servers:reduce(Lang, [FunSrc], KVs), + {ok, Reduced} = couch_query_servers:reduce(Lang, [FunSrc], expand_dups(KVs, [])), {0, PreResultPadding ++ Reduced ++ PostResultPadding}; (rereduce, Reds) -> UserReds = [[lists:nth(NthRed, UserRedsList)] || {_, UserRedsList} <- Reds], @@ -151,7 +159,10 @@ reduce_to_count(Reductions) -> {Count, _} = couch_btree:final_reduce( fun(reduce, KVs) -> - {length(KVs), []}; + Count = lists:sum( + [case V of {dups, Vals} -> length(Vals); _ -> 1 end + || {_,V} <- KVs]), + {Count, []}; (rereduce, Reds) -> {lists:sum([Count0 || {Count0, _} <- Reds]), []} end, Reductions), @@ -190,13 +201,25 @@ design_doc_to_view_group(#doc{id=Id,body={obj, Fields}}) -> Group = #group{name=Id, views=Views, def_lang=Language}, Group#group{sig=erlang:md5(term_to_binary(Group))}. - +fold_fun(_Fun, [], _, Acc) -> + {ok, Acc}; +fold_fun(Fun, [KV|Rest], {KVReds, Reds}, Acc) -> + case Fun(KV, {KVReds, Reds}, Acc) of + {ok, Acc2} -> + fold_fun(Fun, Rest, {[KV|KVReds], Reds}, Acc2); + {stop, Acc2} -> + {stop, Acc2} + end. fold(#view{btree=Btree}, Dir, Fun, Acc) -> - {ok, _AccResult} = couch_btree:fold(Btree, Dir, Fun, Acc). + fold(Btree, nil, Dir, Fun, Acc). fold(#view{btree=Btree}, StartKey, Dir, Fun, Acc) -> - {ok, _AccResult} = couch_btree:fold(Btree, StartKey, Dir, Fun, Acc). + WrapperFun = + fun(KV, Reds, Acc2) -> + fold_fun(Fun, expand_dups([KV],[]), Reds, Acc2) + end, + {ok, _AccResult} = couch_btree:fold(Btree, StartKey, Dir, WrapperFun, Acc). init(RootDir) -> @@ -496,8 +519,9 @@ init_group(Db, Fd, #group{def_lang=Lang,views=Views}=Group, FunSrcs = [FunSrc || {_Name, FunSrc} <- RedFuns], ReduceFun = fun(reduce, KVs) -> - {ok, Reduced} = couch_query_servers:reduce(Lang, FunSrcs, KVs), - {length(KVs), Reduced}; + KVs2 = expand_dups(KVs,[]), + {ok, Reduced} = couch_query_servers:reduce(Lang, FunSrcs, KVs2), + {length(KVs2), Reduced}; (rereduce, Reds) -> Count = lists:sum([Count0 || {Count0, _} <- Reds]), UserReds = [UserRedsList || {_, UserRedsList} <- Reds], @@ -643,8 +667,25 @@ view_insert_query_results([Doc|RestDocs], [QueryResults | RestResults], ViewKVs, view_insert_doc_query_results(_Doc, [], [], ViewKVsAcc, ViewIdKeysAcc) -> {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)}; view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{View, KVs}|RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) -> - NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs], - NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs], + % Take any identical keys and combine the values + ResultKVs2 = lists:foldl( + fun({Key,Value}, [{PrevKey,PrevVal}|AccRest]) -> + case Key == PrevKey of + true -> + case PrevVal of + {dups, Dups} -> + [{PrevKey, {dups, [Value|Dups]}} | AccRest]; + _ -> + [{PrevKey, {dups, [Value,PrevVal]}} | AccRest] + end; + false -> + [{Key,Value},{PrevKey,PrevVal}|AccRest] + end; + (KV, []) -> + [KV] + end, [], lists:sort(ResultKVs)), + NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs2], + NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs2], NewViewKVsAcc = [{View, NewKVs ++ KVs} | ViewKVsAcc], NewViewIdKeysAcc = NewViewIdKeys ++ ViewIdKeysAcc, view_insert_doc_query_results(Doc, RestResults, RestViewKVs, NewViewKVsAcc, NewViewIdKeysAcc). @@ -661,6 +702,7 @@ view_compute(#group{def_lang=DefLang, query_server=QueryServerIn}=Group, Docs) - {ok, QueryServerIn} end, {ok, Results} = couch_query_servers:map_docs(QueryServer, Docs), + {Group#group{query_server=QueryServer}, Results}. -- cgit v1.2.3