% Licensed under the Apache License, Version 2.0 (the "License"); you may not % use this file except in compliance with the License. You may obtain a copy of % the License at % % http://www.apache.org/licenses/LICENSE-2.0 % % Unless required by applicable law or agreed to in writing, software % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the % License for the specific language governing permissions and limitations under % the License. -module(couch_view_updater). -export([update/3, do_maps/4, do_writes/5, load_docs/3]). -include("couch_db.hrl"). -spec update(_, #group{}, Dbname::binary()) -> no_return(). update(Owner, Group, DbName) when is_binary(DbName) -> {ok, Db} = couch_db:open_int(DbName, []), try update(Owner, Group, Db) after couch_db:close(Db) end; update(Owner, Group, #db{name = DbName} = Db) -> #group{ name = GroupName, current_seq = Seq, purge_seq = PurgeSeq } = Group, % compute on all docs modified since we last computed. TotalChanges = couch_db:count_changes_since(Db, Seq), couch_task_status:add_task([ {type, indexer}, {database, DbName}, {design_document, GroupName}, {progress, 0}, {changes_done, 0}, {total_changes, TotalChanges} ]), couch_task_status:set_update_frequency(500), DbPurgeSeq = couch_db:get_purge_seq(Db), Group2 = if DbPurgeSeq == PurgeSeq -> Group; DbPurgeSeq == PurgeSeq + 1 -> purge_index(Group, Db); true -> exit(reset) end, {ok, MapQueue} = couch_work_queue:new( [{max_size, 100000}, {max_items, 500}]), {ok, WriteQueue} = couch_work_queue:new( [{max_size, 100000}, {max_items, 500}]), Self = self(), ViewEmptyKVs = [{View, []} || View <- Group2#group.views], spawn_link(?MODULE, do_maps, [Group, MapQueue, WriteQueue, ViewEmptyKVs]), spawn_link(?MODULE, do_writes, [Self, Owner, Group2, WriteQueue, Seq == 0]), #group{ design_options = DesignOptions } = Group, IncludeDesign = couch_util:get_value(<<"include_design">>, DesignOptions, false), LocalSeq = couch_util:get_value(<<"local_seq">>, DesignOptions, false), DocOpts = case LocalSeq of true -> [conflicts, deleted_conflicts, local_seq]; _ -> [conflicts, deleted_conflicts] end, EnumFun = fun ?MODULE:load_docs/3, Acc0 = {0, Db, MapQueue, DocOpts, IncludeDesign, TotalChanges}, {ok, _, _} = couch_db:enum_docs_since(Db, Seq, EnumFun, Acc0, []), couch_work_queue:close(MapQueue), receive {new_group, NewGroup} -> exit({new_group, NewGroup#group{current_seq=couch_db:get_update_seq(Db)}}) end. load_docs(DocInfo, _, {I, Db, MapQueue, DocOpts, IncludeDesign, Total} = Acc) -> couch_task_status:update([{changes_done, I}, {progress, (I * 100) div Total}]), load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign), {ok, setelement(1, Acc, I+1)}. purge_index(#group{views=Views, id_btree=IdBtree}=Group, Db) -> {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db), Ids = [Id || {Id, _Revs} <- PurgedIdsRevs], {ok, Lookups, IdBtree2} = couch_btree:query_modify(IdBtree, Ids, [], Ids), % now populate the dictionary with all the keys to delete ViewKeysToRemoveDict = lists:foldl( fun({ok,{DocId,ViewNumRowKeys}}, ViewDictAcc) -> lists:foldl( fun({ViewNum, RowKey}, ViewDictAcc2) -> dict:append(ViewNum, {RowKey, DocId}, ViewDictAcc2) end, ViewDictAcc, ViewNumRowKeys); ({not_found, _}, ViewDictAcc) -> ViewDictAcc end, dict:new(), Lookups), % Now remove the values from the btrees PurgeSeq = couch_db:get_purge_seq(Db), Views2 = lists:map( fun(#view{id_num=Num,btree=Btree}=View) -> case dict:find(Num, ViewKeysToRemoveDict) of {ok, RemoveKeys} -> {ok, ViewBtree2} = couch_btree:add_remove(Btree, [], RemoveKeys), case ViewBtree2 =/= Btree of true -> View#view{btree=ViewBtree2, purge_seq=PurgeSeq}; _ -> View#view{btree=ViewBtree2} end; error -> % no keys to remove in this view View end end, Views), Group#group{id_btree=IdBtree2, views=Views2, purge_seq=PurgeSeq}. -spec load_doc(#db{}, #doc_info{}, pid(), [atom()], boolean()) -> ok. load_doc(Db, DI, MapQueue, DocOpts, IncludeDesign) -> DocInfo = case DI of #full_doc_info{id=DocId, update_seq=Seq, deleted=Deleted} -> couch_doc:to_doc_info(DI); #doc_info{id=DocId, high_seq=Seq, revs=[#rev_info{deleted=Deleted}|_]} -> DI end, case {IncludeDesign, DocId} of {false, <>} -> % we skip design docs ok; _ -> if Deleted -> couch_work_queue:queue(MapQueue, {Seq, #doc{id=DocId, deleted=true}}); true -> {ok, Doc} = couch_db:open_doc_int(Db, DocInfo, DocOpts), couch_work_queue:queue(MapQueue, {Seq, Doc}) end end. -spec do_maps(#group{}, pid(), pid(), any()) -> any(). do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) -> case couch_work_queue:dequeue(MapQueue) of closed -> couch_work_queue:close(WriteQueue), couch_query_servers:stop_doc_map(Group#group.query_server); {ok, Queue} -> Docs = [Doc || {_,#doc{deleted=false}=Doc} <- Queue], DelKVs = [{Id, []} || {_, #doc{deleted=true,id=Id}} <- Queue], LastSeq = lists:max([Seq || {Seq, _Doc} <- Queue]), {Group1, Results} = view_compute(Group, Docs), {ViewKVs, DocIdViewIdKeys} = view_insert_query_results(Docs, Results, ViewEmptyKVs, DelKVs), couch_work_queue:queue(WriteQueue, {LastSeq, ViewKVs, DocIdViewIdKeys}), ?MODULE:do_maps(Group1, MapQueue, WriteQueue, ViewEmptyKVs) end. -spec do_writes(pid(), pid() | nil, #group{}, pid(), boolean()) -> any(). do_writes(Parent, Owner, Group, WriteQueue, InitialBuild) -> case accumulate_writes(WriteQueue, couch_work_queue:dequeue(WriteQueue), nil) of stop -> Parent ! {new_group, Group}; {Go, {NewSeq, ViewKeyValues, DocIdViewIdKeys}} -> Group2 = write_changes(Group, ViewKeyValues, DocIdViewIdKeys, NewSeq, InitialBuild), if Go =:= stop -> Parent ! {new_group, Group2}; true -> case Owner of nil -> ok; _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2}) end, ?MODULE:do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild) end end. accumulate_writes(_, closed, nil) -> stop; accumulate_writes(_, closed, Acc) -> {stop, Acc}; accumulate_writes(W, {ok, Queue}, Acc0) -> {_, _, DocIdViewIdKeys} = NewAcc = lists:foldl( fun(First, nil) -> First; ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) -> {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc, AccViewKVs2 = lists:zipwith( fun({View, KVsIn}, {_View, KVsAcc}) -> {View, KVsIn ++ KVsAcc} end, ViewKVs, AccViewKVs), {erlang:max(Seq, Seq2), AccViewKVs2, DocIdViewIdKeys ++ AccDocIdViewIdKeys} end, Acc0, Queue), % check if we have enough items now MinItems = couch_config:get("view_updater", "min_writer_items", "100"), MinSize = couch_config:get("view_updater", "min_writer_size", "16777216"), case length(DocIdViewIdKeys) >= list_to_integer(MinItems) orelse process_info(self(), memory) >= list_to_integer(MinSize) of true -> {ok, NewAcc}; false -> accumulate_writes(W, couch_work_queue:dequeue(W), NewAcc) end. -spec view_insert_query_results([#doc{}], list(), any(), any()) -> any(). view_insert_query_results([], [], ViewKVs, DocIdViewIdKeysAcc) -> {ViewKVs, DocIdViewIdKeysAcc}; view_insert_query_results([Doc|RestDocs], [QueryResults | RestResults], ViewKVs, DocIdViewIdKeysAcc) -> {NewViewKVs, NewViewIdKeys} = view_insert_doc_query_results(Doc, QueryResults, ViewKVs, [], []), NewDocIdViewIdKeys = [{Doc#doc.id, NewViewIdKeys} | DocIdViewIdKeysAcc], view_insert_query_results(RestDocs, RestResults, NewViewKVs, NewDocIdViewIdKeys). -spec view_insert_doc_query_results(#doc{}, list(), list(), any(), any()) -> any(). view_insert_doc_query_results(_Doc, [], [], ViewKVsAcc, ViewIdKeysAcc) -> {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)}; view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{View, KVs}|RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) -> % Take any identical keys and combine the values ResultKVs2 = lists:foldl( fun({Key,Value}, [{PrevKey,PrevVal}|AccRest]) -> case Key == PrevKey of true -> case PrevVal of {dups, Dups} -> [{PrevKey, {dups, [Value|Dups]}} | AccRest]; _ -> [{PrevKey, {dups, [Value,PrevVal]}} | AccRest] end; false -> [{Key,Value},{PrevKey,PrevVal}|AccRest] end; (KV, []) -> [KV] end, [], lists:sort(ResultKVs)), NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs2], NewViewKVsAcc = [{View, NewKVs ++ KVs} | ViewKVsAcc], NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs2], NewViewIdKeysAcc = NewViewIdKeys ++ ViewIdKeysAcc, view_insert_doc_query_results(Doc, RestResults, RestViewKVs, NewViewKVsAcc, NewViewIdKeysAcc). -spec view_compute(#group{}, [#doc{}]) -> {#group{}, any()}. view_compute(Group, []) -> {Group, []}; view_compute(#group{def_lang=DefLang, lib=Lib, query_server=QueryServerIn}=Group, Docs) -> {ok, QueryServer} = case QueryServerIn of nil -> % doc map not started Definitions = [View#view.def || View <- Group#group.views], couch_query_servers:start_doc_map(DefLang, Definitions, Lib); _ -> {ok, QueryServerIn} end, {ok, Results} = couch_query_servers:map_docs(QueryServer, Docs), {Group#group{query_server=QueryServer}, Results}. write_changes(Group, ViewKeyValuesToAdd, DocIdViewIdKeys, NewSeq, InitialBuild) -> #group{id_btree=IdBtree} = Group, AddDocIdViewIdKeys = [{DocId, ViewIdKeys} || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys /= []], if InitialBuild -> RemoveDocIds = [], LookupDocIds = []; true -> RemoveDocIds = [DocId || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys == []], LookupDocIds = [DocId || {DocId, _ViewIdKeys} <- DocIdViewIdKeys] end, {ok, LookupResults, IdBtree2} = couch_btree:query_modify(IdBtree, LookupDocIds, AddDocIdViewIdKeys, RemoveDocIds), KeysToRemoveByView = lists:foldl( fun(LookupResult, KeysToRemoveByViewAcc) -> case LookupResult of {ok, {DocId, ViewIdKeys}} -> lists:foldl( fun({ViewId, Key}, KeysToRemoveByViewAcc2) -> dict:append(ViewId, {Key, DocId}, KeysToRemoveByViewAcc2) end, KeysToRemoveByViewAcc, ViewIdKeys); {not_found, _} -> KeysToRemoveByViewAcc end end, dict:new(), LookupResults), Views2 = lists:zipwith(fun(View, {_View, AddKeyValues}) -> KeysToRemove = couch_util:dict_find(View#view.id_num, KeysToRemoveByView, []), {ok, ViewBtree2} = couch_btree:add_remove(View#view.btree, AddKeyValues, KeysToRemove), case ViewBtree2 =/= View#view.btree of true -> View#view{btree=ViewBtree2, update_seq=NewSeq}; _ -> View#view{btree=ViewBtree2} end end, Group#group.views, ViewKeyValuesToAdd), Group#group{views=Views2, current_seq=NewSeq, id_btree=IdBtree2}.