diff options
Diffstat (limited to 'src/couchdb/couch_view_updater.erl')
-rw-r--r-- | src/couchdb/couch_view_updater.erl | 252 |
1 files changed, 0 insertions, 252 deletions
diff --git a/src/couchdb/couch_view_updater.erl b/src/couchdb/couch_view_updater.erl deleted file mode 100644 index 2a9c960f..00000000 --- a/src/couchdb/couch_view_updater.erl +++ /dev/null @@ -1,252 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_view_updater). - --export([update/2]). - --include("couch_db.hrl"). - --spec update(_, #group{}) -> no_return(). - -update(Owner, Group) -> - #group{ - db = #db{name=DbName} = Db, - name = GroupName, - current_seq = Seq, - purge_seq = PurgeSeq - } = Group, - couch_task_status:add_task(<<"View Group Indexer">>, <<DbName/binary," ",GroupName/binary>>, <<"Starting index update">>), - - DbPurgeSeq = couch_db:get_purge_seq(Db), - Group2 = - if DbPurgeSeq == PurgeSeq -> - Group; - DbPurgeSeq == PurgeSeq + 1 -> - couch_task_status:update(<<"Removing purged entries from view index.">>), - purge_index(Group); - true -> - couch_task_status:update(<<"Resetting view index due to lost purge entries.">>), - exit(reset) - end, - {ok, MapQueue} = couch_work_queue:new(100000, 500), - {ok, WriteQueue} = couch_work_queue:new(100000, 500), - Self = self(), - ViewEmptyKVs = [{View, []} || View <- Group2#group.views], - spawn_link(fun() -> do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) end), - spawn_link(fun() -> do_writes(Self, Owner, Group2, WriteQueue, Seq == 0) end), - % compute on all docs modified since we last computed. - TotalChanges = couch_db:count_changes_since(Db, Seq), - % update status every half second - couch_task_status:set_update_frequency(500), - #group{ design_options = DesignOptions } = Group, - IncludeDesign = couch_util:get_value(<<"include_design">>, - DesignOptions, false), - LocalSeq = couch_util:get_value(<<"local_seq">>, DesignOptions, false), - DocOpts = - case LocalSeq of - true -> [conflicts, deleted_conflicts, local_seq]; - _ -> [conflicts, deleted_conflicts] - end, - {ok, _, _} - = couch_db:enum_docs_since( - Db, - Seq, - fun(DocInfo, _, ChangesProcessed) -> - couch_task_status:update("Processed ~p of ~p changes (~p%)", - [ChangesProcessed, TotalChanges, (ChangesProcessed*100) div TotalChanges]), - load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign), - {ok, ChangesProcessed+1} - end, - 0, []), - couch_task_status:set_update_frequency(0), - couch_task_status:update("Finishing."), - couch_work_queue:close(MapQueue), - receive {new_group, NewGroup} -> - exit({new_group, - NewGroup#group{current_seq=couch_db:get_update_seq(Db)}}) - end. - - -purge_index(#group{db=Db, views=Views, id_btree=IdBtree}=Group) -> - {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db), - Ids = [Id || {Id, _Revs} <- PurgedIdsRevs], - {ok, Lookups, IdBtree2} = couch_btree:query_modify(IdBtree, Ids, [], Ids), - - % now populate the dictionary with all the keys to delete - ViewKeysToRemoveDict = lists:foldl( - fun({ok,{DocId,ViewNumRowKeys}}, ViewDictAcc) -> - lists:foldl( - fun({ViewNum, RowKey}, ViewDictAcc2) -> - dict:append(ViewNum, {RowKey, DocId}, ViewDictAcc2) - end, ViewDictAcc, ViewNumRowKeys); - ({not_found, _}, ViewDictAcc) -> - ViewDictAcc - end, dict:new(), Lookups), - - % Now remove the values from the btrees - Views2 = lists:map( - fun(#view{id_num=Num,btree=Btree}=View) -> - case dict:find(Num, ViewKeysToRemoveDict) of - {ok, RemoveKeys} -> - {ok, Btree2} = couch_btree:add_remove(Btree, [], RemoveKeys), - View#view{btree=Btree2}; - error -> % no keys to remove in this view - View - end - end, Views), - Group#group{id_btree=IdBtree2, - views=Views2, - purge_seq=couch_db:get_purge_seq(Db)}. - - -load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign) -> - #doc_info{id=DocId, high_seq=Seq, revs=[#rev_info{deleted=Deleted}|_]} = DocInfo, - case {IncludeDesign, DocId} of - {false, <<?DESIGN_DOC_PREFIX, _/binary>>} -> % we skip design docs - ok; - _ -> - if Deleted -> - couch_work_queue:queue(MapQueue, {Seq, #doc{id=DocId, deleted=true}}); - true -> - {ok, Doc} = couch_db:open_doc_int(Db, DocInfo, DocOpts), - couch_work_queue:queue(MapQueue, {Seq, Doc}) - end - end. - -do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) -> - case couch_work_queue:dequeue(MapQueue) of - closed -> - couch_work_queue:close(WriteQueue), - couch_query_servers:stop_doc_map(Group#group.query_server); - {ok, Queue} -> - Docs = [Doc || {_,#doc{deleted=false}=Doc} <- Queue], - DelKVs = [{Id, []} || {_, #doc{deleted=true,id=Id}} <- Queue], - LastSeq = lists:max([Seq || {Seq, _Doc} <- Queue]), - {Group1, Results} = view_compute(Group, Docs), - {ViewKVs, DocIdViewIdKeys} = view_insert_query_results(Docs, - Results, ViewEmptyKVs, DelKVs), - couch_work_queue:queue(WriteQueue, {LastSeq, ViewKVs, DocIdViewIdKeys}), - do_maps(Group1, MapQueue, WriteQueue, ViewEmptyKVs) - end. - -do_writes(Parent, Owner, Group, WriteQueue, InitialBuild) -> - case couch_work_queue:dequeue(WriteQueue) of - closed -> - Parent ! {new_group, Group}; - {ok, Queue} -> - {NewSeq, ViewKeyValues, DocIdViewIdKeys} = lists:foldl( - fun({Seq, ViewKVs, DocIdViewIdKeys}, nil) -> - {Seq, ViewKVs, DocIdViewIdKeys}; - ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) -> - {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc, - AccViewKVs2 = lists:zipwith( - fun({View, KVsIn}, {_View, KVsAcc}) -> - {View, KVsIn ++ KVsAcc} - end, ViewKVs, AccViewKVs), - {lists:max([Seq, Seq2]), - AccViewKVs2, DocIdViewIdKeys ++ AccDocIdViewIdKeys} - end, nil, Queue), - Group2 = write_changes(Group, ViewKeyValues, DocIdViewIdKeys, NewSeq, - InitialBuild), - case Owner of - nil -> ok; - _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2}) - end, - do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild) - end. - -view_insert_query_results([], [], ViewKVs, DocIdViewIdKeysAcc) -> - {ViewKVs, DocIdViewIdKeysAcc}; -view_insert_query_results([Doc|RestDocs], [QueryResults | RestResults], ViewKVs, DocIdViewIdKeysAcc) -> - {NewViewKVs, NewViewIdKeys} = view_insert_doc_query_results(Doc, QueryResults, ViewKVs, [], []), - NewDocIdViewIdKeys = [{Doc#doc.id, NewViewIdKeys} | DocIdViewIdKeysAcc], - view_insert_query_results(RestDocs, RestResults, NewViewKVs, NewDocIdViewIdKeys). - - -view_insert_doc_query_results(_Doc, [], [], ViewKVsAcc, ViewIdKeysAcc) -> - {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)}; -view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{View, KVs}|RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) -> - % Take any identical keys and combine the values - ResultKVs2 = lists:foldl( - fun({Key,Value}, [{PrevKey,PrevVal}|AccRest]) -> - case Key == PrevKey of - true -> - case PrevVal of - {dups, Dups} -> - [{PrevKey, {dups, [Value|Dups]}} | AccRest]; - _ -> - [{PrevKey, {dups, [Value,PrevVal]}} | AccRest] - end; - false -> - [{Key,Value},{PrevKey,PrevVal}|AccRest] - end; - (KV, []) -> - [KV] - end, [], lists:sort(ResultKVs)), - NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs2], - NewViewKVsAcc = [{View, NewKVs ++ KVs} | ViewKVsAcc], - NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs2], - NewViewIdKeysAcc = NewViewIdKeys ++ ViewIdKeysAcc, - view_insert_doc_query_results(Doc, RestResults, RestViewKVs, NewViewKVsAcc, NewViewIdKeysAcc). - -view_compute(Group, []) -> - {Group, []}; -view_compute(#group{def_lang=DefLang, query_server=QueryServerIn}=Group, Docs) -> - {ok, QueryServer} = - case QueryServerIn of - nil -> % doc map not started - Definitions = [View#view.def || View <- Group#group.views], - couch_query_servers:start_doc_map(DefLang, Definitions); - _ -> - {ok, QueryServerIn} - end, - {ok, Results} = couch_query_servers:map_docs(QueryServer, Docs), - {Group#group{query_server=QueryServer}, Results}. - - - -write_changes(Group, ViewKeyValuesToAdd, DocIdViewIdKeys, NewSeq, InitialBuild) -> - #group{id_btree=IdBtree} = Group, - - AddDocIdViewIdKeys = [{DocId, ViewIdKeys} || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys /= []], - if InitialBuild -> - RemoveDocIds = [], - LookupDocIds = []; - true -> - RemoveDocIds = [DocId || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys == []], - LookupDocIds = [DocId || {DocId, _ViewIdKeys} <- DocIdViewIdKeys] - end, - {ok, LookupResults, IdBtree2} - = couch_btree:query_modify(IdBtree, LookupDocIds, AddDocIdViewIdKeys, RemoveDocIds), - KeysToRemoveByView = lists:foldl( - fun(LookupResult, KeysToRemoveByViewAcc) -> - case LookupResult of - {ok, {DocId, ViewIdKeys}} -> - lists:foldl( - fun({ViewId, Key}, KeysToRemoveByViewAcc2) -> - dict:append(ViewId, {Key, DocId}, KeysToRemoveByViewAcc2) - end, - KeysToRemoveByViewAcc, ViewIdKeys); - {not_found, _} -> - KeysToRemoveByViewAcc - end - end, - dict:new(), LookupResults), - Views2 = lists:zipwith(fun(View, {_View, AddKeyValues}) -> - KeysToRemove = couch_util:dict_find(View#view.id_num, KeysToRemoveByView, []), - {ok, ViewBtree2} = couch_btree:add_remove(View#view.btree, AddKeyValues, KeysToRemove), - View#view{btree = ViewBtree2} - end, Group#group.views, ViewKeyValuesToAdd), - Group#group{views=Views2, current_seq=NewSeq, id_btree=IdBtree2}. - - |