summaryrefslogtreecommitdiff
path: root/apps/couch/src/couch_view_updater.erl
diff options
context:
space:
mode:
authorAdam Kocoloski <adam@cloudant.com>2010-08-11 15:22:33 -0400
committerAdam Kocoloski <adam@cloudant.com>2010-08-11 17:39:37 -0400
commit81bdbed444df2cbcf3cdb32f7d4a74019de06454 (patch)
treeeade7d0d9bb4cac01b55fd8642adfe0f7da35161 /apps/couch/src/couch_view_updater.erl
parentcc1910f73fbd20c5ffc94bd61e7701d7f5e4c92a (diff)
reorganize couch .erl and driver code into rebar layout
Diffstat (limited to 'apps/couch/src/couch_view_updater.erl')
-rw-r--r--apps/couch/src/couch_view_updater.erl252
1 files changed, 252 insertions, 0 deletions
diff --git a/apps/couch/src/couch_view_updater.erl b/apps/couch/src/couch_view_updater.erl
new file mode 100644
index 00000000..2a9c960f
--- /dev/null
+++ b/apps/couch/src/couch_view_updater.erl
@@ -0,0 +1,252 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_view_updater).
+
+-export([update/2]).
+
+-include("couch_db.hrl").
+
+-spec update(_, #group{}) -> no_return().
+
+update(Owner, Group) ->
+ #group{
+ db = #db{name=DbName} = Db,
+ name = GroupName,
+ current_seq = Seq,
+ purge_seq = PurgeSeq
+ } = Group,
+ couch_task_status:add_task(<<"View Group Indexer">>, <<DbName/binary," ",GroupName/binary>>, <<"Starting index update">>),
+
+ DbPurgeSeq = couch_db:get_purge_seq(Db),
+ Group2 =
+ if DbPurgeSeq == PurgeSeq ->
+ Group;
+ DbPurgeSeq == PurgeSeq + 1 ->
+ couch_task_status:update(<<"Removing purged entries from view index.">>),
+ purge_index(Group);
+ true ->
+ couch_task_status:update(<<"Resetting view index due to lost purge entries.">>),
+ exit(reset)
+ end,
+ {ok, MapQueue} = couch_work_queue:new(100000, 500),
+ {ok, WriteQueue} = couch_work_queue:new(100000, 500),
+ Self = self(),
+ ViewEmptyKVs = [{View, []} || View <- Group2#group.views],
+ spawn_link(fun() -> do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) end),
+ spawn_link(fun() -> do_writes(Self, Owner, Group2, WriteQueue, Seq == 0) end),
+ % compute on all docs modified since we last computed.
+ TotalChanges = couch_db:count_changes_since(Db, Seq),
+ % update status every half second
+ couch_task_status:set_update_frequency(500),
+ #group{ design_options = DesignOptions } = Group,
+ IncludeDesign = couch_util:get_value(<<"include_design">>,
+ DesignOptions, false),
+ LocalSeq = couch_util:get_value(<<"local_seq">>, DesignOptions, false),
+ DocOpts =
+ case LocalSeq of
+ true -> [conflicts, deleted_conflicts, local_seq];
+ _ -> [conflicts, deleted_conflicts]
+ end,
+ {ok, _, _}
+ = couch_db:enum_docs_since(
+ Db,
+ Seq,
+ fun(DocInfo, _, ChangesProcessed) ->
+ couch_task_status:update("Processed ~p of ~p changes (~p%)",
+ [ChangesProcessed, TotalChanges, (ChangesProcessed*100) div TotalChanges]),
+ load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign),
+ {ok, ChangesProcessed+1}
+ end,
+ 0, []),
+ couch_task_status:set_update_frequency(0),
+ couch_task_status:update("Finishing."),
+ couch_work_queue:close(MapQueue),
+ receive {new_group, NewGroup} ->
+ exit({new_group,
+ NewGroup#group{current_seq=couch_db:get_update_seq(Db)}})
+ end.
+
+
+purge_index(#group{db=Db, views=Views, id_btree=IdBtree}=Group) ->
+ {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db),
+ Ids = [Id || {Id, _Revs} <- PurgedIdsRevs],
+ {ok, Lookups, IdBtree2} = couch_btree:query_modify(IdBtree, Ids, [], Ids),
+
+ % now populate the dictionary with all the keys to delete
+ ViewKeysToRemoveDict = lists:foldl(
+ fun({ok,{DocId,ViewNumRowKeys}}, ViewDictAcc) ->
+ lists:foldl(
+ fun({ViewNum, RowKey}, ViewDictAcc2) ->
+ dict:append(ViewNum, {RowKey, DocId}, ViewDictAcc2)
+ end, ViewDictAcc, ViewNumRowKeys);
+ ({not_found, _}, ViewDictAcc) ->
+ ViewDictAcc
+ end, dict:new(), Lookups),
+
+ % Now remove the values from the btrees
+ Views2 = lists:map(
+ fun(#view{id_num=Num,btree=Btree}=View) ->
+ case dict:find(Num, ViewKeysToRemoveDict) of
+ {ok, RemoveKeys} ->
+ {ok, Btree2} = couch_btree:add_remove(Btree, [], RemoveKeys),
+ View#view{btree=Btree2};
+ error -> % no keys to remove in this view
+ View
+ end
+ end, Views),
+ Group#group{id_btree=IdBtree2,
+ views=Views2,
+ purge_seq=couch_db:get_purge_seq(Db)}.
+
+
+load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign) ->
+ #doc_info{id=DocId, high_seq=Seq, revs=[#rev_info{deleted=Deleted}|_]} = DocInfo,
+ case {IncludeDesign, DocId} of
+ {false, <<?DESIGN_DOC_PREFIX, _/binary>>} -> % we skip design docs
+ ok;
+ _ ->
+ if Deleted ->
+ couch_work_queue:queue(MapQueue, {Seq, #doc{id=DocId, deleted=true}});
+ true ->
+ {ok, Doc} = couch_db:open_doc_int(Db, DocInfo, DocOpts),
+ couch_work_queue:queue(MapQueue, {Seq, Doc})
+ end
+ end.
+
+do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) ->
+ case couch_work_queue:dequeue(MapQueue) of
+ closed ->
+ couch_work_queue:close(WriteQueue),
+ couch_query_servers:stop_doc_map(Group#group.query_server);
+ {ok, Queue} ->
+ Docs = [Doc || {_,#doc{deleted=false}=Doc} <- Queue],
+ DelKVs = [{Id, []} || {_, #doc{deleted=true,id=Id}} <- Queue],
+ LastSeq = lists:max([Seq || {Seq, _Doc} <- Queue]),
+ {Group1, Results} = view_compute(Group, Docs),
+ {ViewKVs, DocIdViewIdKeys} = view_insert_query_results(Docs,
+ Results, ViewEmptyKVs, DelKVs),
+ couch_work_queue:queue(WriteQueue, {LastSeq, ViewKVs, DocIdViewIdKeys}),
+ do_maps(Group1, MapQueue, WriteQueue, ViewEmptyKVs)
+ end.
+
+do_writes(Parent, Owner, Group, WriteQueue, InitialBuild) ->
+ case couch_work_queue:dequeue(WriteQueue) of
+ closed ->
+ Parent ! {new_group, Group};
+ {ok, Queue} ->
+ {NewSeq, ViewKeyValues, DocIdViewIdKeys} = lists:foldl(
+ fun({Seq, ViewKVs, DocIdViewIdKeys}, nil) ->
+ {Seq, ViewKVs, DocIdViewIdKeys};
+ ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) ->
+ {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc,
+ AccViewKVs2 = lists:zipwith(
+ fun({View, KVsIn}, {_View, KVsAcc}) ->
+ {View, KVsIn ++ KVsAcc}
+ end, ViewKVs, AccViewKVs),
+ {lists:max([Seq, Seq2]),
+ AccViewKVs2, DocIdViewIdKeys ++ AccDocIdViewIdKeys}
+ end, nil, Queue),
+ Group2 = write_changes(Group, ViewKeyValues, DocIdViewIdKeys, NewSeq,
+ InitialBuild),
+ case Owner of
+ nil -> ok;
+ _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2})
+ end,
+ do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild)
+ end.
+
+view_insert_query_results([], [], ViewKVs, DocIdViewIdKeysAcc) ->
+ {ViewKVs, DocIdViewIdKeysAcc};
+view_insert_query_results([Doc|RestDocs], [QueryResults | RestResults], ViewKVs, DocIdViewIdKeysAcc) ->
+ {NewViewKVs, NewViewIdKeys} = view_insert_doc_query_results(Doc, QueryResults, ViewKVs, [], []),
+ NewDocIdViewIdKeys = [{Doc#doc.id, NewViewIdKeys} | DocIdViewIdKeysAcc],
+ view_insert_query_results(RestDocs, RestResults, NewViewKVs, NewDocIdViewIdKeys).
+
+
+view_insert_doc_query_results(_Doc, [], [], ViewKVsAcc, ViewIdKeysAcc) ->
+ {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)};
+view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{View, KVs}|RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) ->
+ % Take any identical keys and combine the values
+ ResultKVs2 = lists:foldl(
+ fun({Key,Value}, [{PrevKey,PrevVal}|AccRest]) ->
+ case Key == PrevKey of
+ true ->
+ case PrevVal of
+ {dups, Dups} ->
+ [{PrevKey, {dups, [Value|Dups]}} | AccRest];
+ _ ->
+ [{PrevKey, {dups, [Value,PrevVal]}} | AccRest]
+ end;
+ false ->
+ [{Key,Value},{PrevKey,PrevVal}|AccRest]
+ end;
+ (KV, []) ->
+ [KV]
+ end, [], lists:sort(ResultKVs)),
+ NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs2],
+ NewViewKVsAcc = [{View, NewKVs ++ KVs} | ViewKVsAcc],
+ NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs2],
+ NewViewIdKeysAcc = NewViewIdKeys ++ ViewIdKeysAcc,
+ view_insert_doc_query_results(Doc, RestResults, RestViewKVs, NewViewKVsAcc, NewViewIdKeysAcc).
+
+view_compute(Group, []) ->
+ {Group, []};
+view_compute(#group{def_lang=DefLang, query_server=QueryServerIn}=Group, Docs) ->
+ {ok, QueryServer} =
+ case QueryServerIn of
+ nil -> % doc map not started
+ Definitions = [View#view.def || View <- Group#group.views],
+ couch_query_servers:start_doc_map(DefLang, Definitions);
+ _ ->
+ {ok, QueryServerIn}
+ end,
+ {ok, Results} = couch_query_servers:map_docs(QueryServer, Docs),
+ {Group#group{query_server=QueryServer}, Results}.
+
+
+
+write_changes(Group, ViewKeyValuesToAdd, DocIdViewIdKeys, NewSeq, InitialBuild) ->
+ #group{id_btree=IdBtree} = Group,
+
+ AddDocIdViewIdKeys = [{DocId, ViewIdKeys} || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys /= []],
+ if InitialBuild ->
+ RemoveDocIds = [],
+ LookupDocIds = [];
+ true ->
+ RemoveDocIds = [DocId || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys == []],
+ LookupDocIds = [DocId || {DocId, _ViewIdKeys} <- DocIdViewIdKeys]
+ end,
+ {ok, LookupResults, IdBtree2}
+ = couch_btree:query_modify(IdBtree, LookupDocIds, AddDocIdViewIdKeys, RemoveDocIds),
+ KeysToRemoveByView = lists:foldl(
+ fun(LookupResult, KeysToRemoveByViewAcc) ->
+ case LookupResult of
+ {ok, {DocId, ViewIdKeys}} ->
+ lists:foldl(
+ fun({ViewId, Key}, KeysToRemoveByViewAcc2) ->
+ dict:append(ViewId, {Key, DocId}, KeysToRemoveByViewAcc2)
+ end,
+ KeysToRemoveByViewAcc, ViewIdKeys);
+ {not_found, _} ->
+ KeysToRemoveByViewAcc
+ end
+ end,
+ dict:new(), LookupResults),
+ Views2 = lists:zipwith(fun(View, {_View, AddKeyValues}) ->
+ KeysToRemove = couch_util:dict_find(View#view.id_num, KeysToRemoveByView, []),
+ {ok, ViewBtree2} = couch_btree:add_remove(View#view.btree, AddKeyValues, KeysToRemove),
+ View#view{btree = ViewBtree2}
+ end, Group#group.views, ViewKeyValuesToAdd),
+ Group#group{views=Views2, current_seq=NewSeq, id_btree=IdBtree2}.
+
+