From dd7583b3a4b418f8778a3f82e4aa3f17d1868b02 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 9 Aug 2010 10:18:11 -0400 Subject: Improve batching of writes in view updater This patch causes the view_updater to wait until either a minimum number of items or a maximum process memory size has been reached before committing the changes to disk. See COUCHDB-700 for discussion BugzID: 11885 --- apps/couch/src/couch_view_updater.erl | 58 +++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 20 deletions(-) (limited to 'apps') diff --git a/apps/couch/src/couch_view_updater.erl b/apps/couch/src/couch_view_updater.erl index b04a80ce..a07e5dd3 100644 --- a/apps/couch/src/couch_view_updater.erl +++ b/apps/couch/src/couch_view_updater.erl @@ -145,29 +145,47 @@ do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) -> -spec do_writes(pid(), pid() | nil, #group{}, pid(), boolean()) -> any(). do_writes(Parent, Owner, Group, WriteQueue, InitialBuild) -> - case couch_work_queue:dequeue(WriteQueue) of - closed -> + case accumulate_writes(WriteQueue, couch_work_queue:dequeue(WriteQueue), nil) of + stop -> Parent ! {new_group, Group}; - {ok, Queue} -> - {NewSeq, ViewKeyValues, DocIdViewIdKeys} = lists:foldl( - fun({Seq, ViewKVs, DocIdViewIdKeys}, nil) -> - {Seq, ViewKVs, DocIdViewIdKeys}; - ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) -> - {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc, - AccViewKVs2 = lists:zipwith( - fun({View, KVsIn}, {_View, KVsAcc}) -> - {View, KVsIn ++ KVsAcc} - end, ViewKVs, AccViewKVs), - {lists:max([Seq, Seq2]), - AccViewKVs2, DocIdViewIdKeys ++ AccDocIdViewIdKeys} - end, nil, Queue), + {Go, {NewSeq, ViewKeyValues, DocIdViewIdKeys}} -> Group2 = write_changes(Group, ViewKeyValues, DocIdViewIdKeys, NewSeq, InitialBuild), - case Owner of - nil -> ok; - _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2}) - end, - ?MODULE:do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild) + if Go =:= stop -> + Parent ! {new_group, Group2}; + true -> + case Owner of + nil -> ok; + _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2}) + end, + ?MODULE:do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild) + end + end. + +accumulate_writes(_, closed, nil) -> + stop; +accumulate_writes(_, closed, Acc) -> + {stop, Acc}; +accumulate_writes(W, {ok, Queue}, Acc0) -> + {_, _, DocIdViewIdKeys} = NewAcc = lists:foldl( + fun(First, nil) -> First; ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) -> + {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc, + AccViewKVs2 = lists:zipwith( + fun({View, KVsIn}, {_View, KVsAcc}) -> + {View, KVsIn ++ KVsAcc} + end, ViewKVs, AccViewKVs), + {erlang:max(Seq, Seq2), AccViewKVs2, + DocIdViewIdKeys ++ AccDocIdViewIdKeys} + end, Acc0, Queue), + % check if we have enough items now + MinItems = couch_config:get("view_updater", "min_writer_items", "100"), + MinSize = couch_config:get("view_updater", "min_writer_size", "16777216"), + case length(DocIdViewIdKeys) >= list_to_integer(MinItems) orelse + process_info(self(), memory) >= list_to_integer(MinSize) of + true -> + {ok, NewAcc}; + false -> + accumulate_writes(W, couch_work_queue:dequeue(W), NewAcc) end. -spec view_insert_query_results([#doc{}], list(), any(), any()) -> any(). -- cgit v1.2.3