diff options
Diffstat (limited to 'apps/couch/src/couch_view_updater.erl')
-rw-r--r-- | apps/couch/src/couch_view_updater.erl | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/apps/couch/src/couch_view_updater.erl b/apps/couch/src/couch_view_updater.erl new file mode 100644 index 00000000..8238e3e5 --- /dev/null +++ b/apps/couch/src/couch_view_updater.erl @@ -0,0 +1,296 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view_updater). + +-export([update/3, do_maps/4, do_writes/5, load_docs/3]). + +-include("couch_db.hrl"). + +-spec update(_, #group{}, Dbname::binary()) -> no_return(). + +update(Owner, Group, DbName) when is_binary(DbName) -> + {ok, Db} = couch_db:open_int(DbName, []), + try + update(Owner, Group, Db) + after + couch_db:close(Db) + end; + +update(Owner, Group, #db{name = DbName} = Db) -> + #group{ + name = GroupName, + current_seq = Seq, + purge_seq = PurgeSeq + } = Group, + couch_task_status:add_task(<<"View Group Indexer">>, <<DbName/binary," ",GroupName/binary>>, <<"Starting index update">>), + + DbPurgeSeq = couch_db:get_purge_seq(Db), + Group2 = + if DbPurgeSeq == PurgeSeq -> + Group; + DbPurgeSeq == PurgeSeq + 1 -> + couch_task_status:update(<<"Removing purged entries from view index.">>), + purge_index(Group, Db); + true -> + couch_task_status:update(<<"Resetting view index due to lost purge entries.">>), + exit(reset) + end, + {ok, MapQueue} = couch_work_queue:new( + [{max_size, 100000}, {max_items, 500}]), + {ok, WriteQueue} = couch_work_queue:new( + [{max_size, 100000}, {max_items, 500}]), + Self = self(), + ViewEmptyKVs = [{View, []} || View <- Group2#group.views], + spawn_link(?MODULE, do_maps, [Group, MapQueue, WriteQueue, ViewEmptyKVs]), + spawn_link(?MODULE, do_writes, [Self, Owner, Group2, WriteQueue, Seq == 0]), + % compute on all docs modified since we last computed. + TotalChanges = couch_db:count_changes_since(Db, Seq), + % update status every half second + couch_task_status:set_update_frequency(500), + #group{ design_options = DesignOptions } = Group, + IncludeDesign = couch_util:get_value(<<"include_design">>, + DesignOptions, false), + LocalSeq = couch_util:get_value(<<"local_seq">>, DesignOptions, false), + DocOpts = + case LocalSeq of + true -> [conflicts, deleted_conflicts, local_seq]; + _ -> [conflicts, deleted_conflicts] + end, + EnumFun = fun ?MODULE:load_docs/3, + Acc0 = {0, Db, MapQueue, DocOpts, IncludeDesign, TotalChanges}, + {ok, _, _} = couch_db:enum_docs_since(Db, Seq, EnumFun, Acc0, []), + couch_task_status:set_update_frequency(0), + couch_task_status:update("Finishing."), + couch_work_queue:close(MapQueue), + receive {new_group, NewGroup} -> + exit({new_group, + NewGroup#group{current_seq=couch_db:get_update_seq(Db)}}) + end. + +load_docs(DocInfo, _, {I, Db, MapQueue, DocOpts, IncludeDesign, Total} = Acc) -> + couch_task_status:update("Processed ~p of ~p changes (~p%)", [I, Total, + (I*100) div Total]), + load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign), + {ok, setelement(1, Acc, I+1)}. + +purge_index(#group{views=Views, id_btree=IdBtree}=Group, Db) -> + {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db), + Ids = [Id || {Id, _Revs} <- PurgedIdsRevs], + {ok, Lookups, IdBtree2} = couch_btree:query_modify(IdBtree, Ids, [], Ids), + + % now populate the dictionary with all the keys to delete + ViewKeysToRemoveDict = lists:foldl( + fun({ok,{DocId,ViewNumRowKeys}}, ViewDictAcc) -> + lists:foldl( + fun({ViewNum, RowKey}, ViewDictAcc2) -> + dict:append(ViewNum, {RowKey, DocId}, ViewDictAcc2) + end, ViewDictAcc, ViewNumRowKeys); + ({not_found, _}, ViewDictAcc) -> + ViewDictAcc + end, dict:new(), Lookups), + + % Now remove the values from the btrees + PurgeSeq = couch_db:get_purge_seq(Db), + Views2 = lists:map( + fun(#view{id_num=Num,btree=Btree}=View) -> + case dict:find(Num, ViewKeysToRemoveDict) of + {ok, RemoveKeys} -> + {ok, ViewBtree2} = couch_btree:add_remove(Btree, [], RemoveKeys), + case ViewBtree2 =/= Btree of + true -> + View#view{btree=ViewBtree2, purge_seq=PurgeSeq}; + _ -> + View#view{btree=ViewBtree2} + end; + error -> % no keys to remove in this view + View + end + end, Views), + Group#group{id_btree=IdBtree2, + views=Views2, + purge_seq=PurgeSeq}. + +-spec load_doc(#db{}, #doc_info{}, pid(), [atom()], boolean()) -> ok. +load_doc(Db, DI, MapQueue, DocOpts, IncludeDesign) -> + DocInfo = case DI of + #full_doc_info{id=DocId, update_seq=Seq, deleted=Deleted} -> + couch_doc:to_doc_info(DI); + #doc_info{id=DocId, high_seq=Seq, revs=[#rev_info{deleted=Deleted}|_]} -> + DI + end, + case {IncludeDesign, DocId} of + {false, <<?DESIGN_DOC_PREFIX, _/binary>>} -> % we skip design docs + ok; + _ -> + if Deleted -> + couch_work_queue:queue(MapQueue, {Seq, #doc{id=DocId, deleted=true}}); + true -> + {ok, Doc} = couch_db:open_doc_int(Db, DocInfo, DocOpts), + couch_work_queue:queue(MapQueue, {Seq, Doc}) + end + end. + +-spec do_maps(#group{}, pid(), pid(), any()) -> any(). +do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) -> + case couch_work_queue:dequeue(MapQueue) of + closed -> + couch_work_queue:close(WriteQueue), + couch_query_servers:stop_doc_map(Group#group.query_server); + {ok, Queue} -> + Docs = [Doc || {_,#doc{deleted=false}=Doc} <- Queue], + DelKVs = [{Id, []} || {_, #doc{deleted=true,id=Id}} <- Queue], + LastSeq = lists:max([Seq || {Seq, _Doc} <- Queue]), + {Group1, Results} = view_compute(Group, Docs), + {ViewKVs, DocIdViewIdKeys} = view_insert_query_results(Docs, + Results, ViewEmptyKVs, DelKVs), + couch_work_queue:queue(WriteQueue, {LastSeq, ViewKVs, DocIdViewIdKeys}), + ?MODULE:do_maps(Group1, MapQueue, WriteQueue, ViewEmptyKVs) + end. + +-spec do_writes(pid(), pid() | nil, #group{}, pid(), boolean()) -> any(). +do_writes(Parent, Owner, Group, WriteQueue, InitialBuild) -> + case accumulate_writes(WriteQueue, couch_work_queue:dequeue(WriteQueue), nil) of + stop -> + Parent ! {new_group, Group}; + {Go, {NewSeq, ViewKeyValues, DocIdViewIdKeys}} -> + Group2 = write_changes(Group, ViewKeyValues, DocIdViewIdKeys, NewSeq, + InitialBuild), + if Go =:= stop -> + Parent ! {new_group, Group2}; + true -> + case Owner of + nil -> ok; + _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2}) + end, + ?MODULE:do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild) + end + end. + +accumulate_writes(_, closed, nil) -> + stop; +accumulate_writes(_, closed, Acc) -> + {stop, Acc}; +accumulate_writes(W, {ok, Queue}, Acc0) -> + {_, _, DocIdViewIdKeys} = NewAcc = lists:foldl( + fun(First, nil) -> First; ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) -> + {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc, + AccViewKVs2 = lists:zipwith( + fun({View, KVsIn}, {_View, KVsAcc}) -> + {View, KVsIn ++ KVsAcc} + end, ViewKVs, AccViewKVs), + {erlang:max(Seq, Seq2), AccViewKVs2, + DocIdViewIdKeys ++ AccDocIdViewIdKeys} + end, Acc0, Queue), + % check if we have enough items now + MinItems = couch_config:get("view_updater", "min_writer_items", "100"), + MinSize = couch_config:get("view_updater", "min_writer_size", "16777216"), + case length(DocIdViewIdKeys) >= list_to_integer(MinItems) orelse + process_info(self(), memory) >= list_to_integer(MinSize) of + true -> + {ok, NewAcc}; + false -> + accumulate_writes(W, couch_work_queue:dequeue(W), NewAcc) + end. + +-spec view_insert_query_results([#doc{}], list(), any(), any()) -> any(). +view_insert_query_results([], [], ViewKVs, DocIdViewIdKeysAcc) -> + {ViewKVs, DocIdViewIdKeysAcc}; +view_insert_query_results([Doc|RestDocs], [QueryResults | RestResults], ViewKVs, DocIdViewIdKeysAcc) -> + {NewViewKVs, NewViewIdKeys} = view_insert_doc_query_results(Doc, QueryResults, ViewKVs, [], []), + NewDocIdViewIdKeys = [{Doc#doc.id, NewViewIdKeys} | DocIdViewIdKeysAcc], + view_insert_query_results(RestDocs, RestResults, NewViewKVs, NewDocIdViewIdKeys). + +-spec view_insert_doc_query_results(#doc{}, list(), list(), any(), any()) -> + any(). +view_insert_doc_query_results(_Doc, [], [], ViewKVsAcc, ViewIdKeysAcc) -> + {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)}; +view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{View, KVs}|RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) -> + % Take any identical keys and combine the values + ResultKVs2 = lists:foldl( + fun({Key,Value}, [{PrevKey,PrevVal}|AccRest]) -> + case Key == PrevKey of + true -> + case PrevVal of + {dups, Dups} -> + [{PrevKey, {dups, [Value|Dups]}} | AccRest]; + _ -> + [{PrevKey, {dups, [Value,PrevVal]}} | AccRest] + end; + false -> + [{Key,Value},{PrevKey,PrevVal}|AccRest] + end; + (KV, []) -> + [KV] + end, [], lists:sort(ResultKVs)), + NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs2], + NewViewKVsAcc = [{View, NewKVs ++ KVs} | ViewKVsAcc], + NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs2], + NewViewIdKeysAcc = NewViewIdKeys ++ ViewIdKeysAcc, + view_insert_doc_query_results(Doc, RestResults, RestViewKVs, NewViewKVsAcc, NewViewIdKeysAcc). + +-spec view_compute(#group{}, [#doc{}]) -> {#group{}, any()}. +view_compute(Group, []) -> + {Group, []}; +view_compute(#group{def_lang=DefLang, lib=Lib, query_server=QueryServerIn}=Group, Docs) -> + {ok, QueryServer} = + case QueryServerIn of + nil -> % doc map not started + Definitions = [View#view.def || View <- Group#group.views], + couch_query_servers:start_doc_map(DefLang, Definitions, Lib); + _ -> + {ok, QueryServerIn} + end, + {ok, Results} = couch_query_servers:map_docs(QueryServer, Docs), + {Group#group{query_server=QueryServer}, Results}. + + +write_changes(Group, ViewKeyValuesToAdd, DocIdViewIdKeys, NewSeq, InitialBuild) -> + #group{id_btree=IdBtree} = Group, + + AddDocIdViewIdKeys = [{DocId, ViewIdKeys} || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys /= []], + if InitialBuild -> + RemoveDocIds = [], + LookupDocIds = []; + true -> + RemoveDocIds = [DocId || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys == []], + LookupDocIds = [DocId || {DocId, _ViewIdKeys} <- DocIdViewIdKeys] + end, + {ok, LookupResults, IdBtree2} + = couch_btree:query_modify(IdBtree, LookupDocIds, AddDocIdViewIdKeys, RemoveDocIds), + KeysToRemoveByView = lists:foldl( + fun(LookupResult, KeysToRemoveByViewAcc) -> + case LookupResult of + {ok, {DocId, ViewIdKeys}} -> + lists:foldl( + fun({ViewId, Key}, KeysToRemoveByViewAcc2) -> + dict:append(ViewId, {Key, DocId}, KeysToRemoveByViewAcc2) + end, + KeysToRemoveByViewAcc, ViewIdKeys); + {not_found, _} -> + KeysToRemoveByViewAcc + end + end, + dict:new(), LookupResults), + Views2 = lists:zipwith(fun(View, {_View, AddKeyValues}) -> + KeysToRemove = couch_util:dict_find(View#view.id_num, KeysToRemoveByView, []), + {ok, ViewBtree2} = couch_btree:add_remove(View#view.btree, AddKeyValues, KeysToRemove), + case ViewBtree2 =/= View#view.btree of + true -> + View#view{btree=ViewBtree2, update_seq=NewSeq}; + _ -> + View#view{btree=ViewBtree2} + end + end, Group#group.views, ViewKeyValuesToAdd), + Group#group{views=Views2, current_seq=NewSeq, id_btree=IdBtree2}. + + |