summaryrefslogtreecommitdiff
path: root/apps/couch/src/couch_view_compactor.erl
blob: ccb3fe3ae0e36355b4858c7a1e45d7427ac1954c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
%   http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_view_compactor).

-include ("couch_db.hrl").

-export([start_compact/2]).

%% @spec start_compact(DbName::binary(), GroupId:binary()) -> ok
%% @doc Compacts the views.  GroupId must not include the _design/ prefix
start_compact(DbName, GroupId) ->
    Pid = couch_view:get_group_server(DbName, <<"_design/",GroupId/binary>>),
    gen_server:call(Pid, {start_compact, fun compact_group/3}).

%%=============================================================================
%% internal functions
%%=============================================================================

%% @spec compact_group(Group, NewGroup) -> ok
compact_group(Group, EmptyGroup, DbName) ->
    #group{
        current_seq = Seq,
        id_btree = IdBtree,
        name = GroupId,
        views = Views
    } = Group,

    #group{
        fd = Fd,
        id_btree = EmptyIdBtree,
        views = EmptyViews
    } = EmptyGroup,

    erlang:monitor(process, Fd),

    {ok, Db} = couch_db:open(DbName, []),

    {ok, Count} = couch_db:get_doc_count(Db),

    couch_task_status:add_task([
        {type, view_compaction},
        {database, DbName},
        {design_document, GroupId},
        {progress, 0}
    ]),

    Fun = fun({DocId, _ViewIdKeys} = KV, {Bt, Acc, TotalCopied, LastId}) ->
        if DocId =:= LastId -> % COUCHDB-999
            ?LOG_ERROR("Duplicates of document `~s` detected in view group `~s`"
                ", database `~s` - view rebuild, from scratch, is required",
                [DocId, GroupId, DbName]),
            exit({view_duplicated_id, DocId});
        true -> ok end,
        if TotalCopied rem 10000 =:= 0 ->
            couch_task_status:update([{changes_done, TotalCopied}, {progress, (TotalCopied * 100) div Count}]),
            {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
            {ok, {Bt2, [], TotalCopied+1, DocId}};
        true ->
            {ok, {Bt, [KV|Acc], TotalCopied+1, DocId}}
        end
    end,
    {ok, _, {Bt3, Uncopied, _Total, _LastId}} = couch_btree:foldl(IdBtree, Fun,
        {EmptyIdBtree, [], 0, nil}),
    {ok, NewIdBtree} = couch_btree:add(Bt3, lists:reverse(Uncopied)),

    NewViews = lists:map(fun({View, EmptyView}) ->
        compact_view(View, EmptyView)
    end, lists:zip(Views, EmptyViews)),

    NewGroup = EmptyGroup#group{
        id_btree=NewIdBtree,
        views=NewViews,
        current_seq=Seq
    },
    maybe_retry_compact(Db, GroupId, NewGroup).

maybe_retry_compact(#db{name = DbName} = Db, GroupId, NewGroup) ->
    #group{sig = Sig, fd = NewFd} = NewGroup,
    Header = {Sig, couch_view_group:get_index_header_data(NewGroup)},
    ok = couch_file:write_header(NewFd, Header),
    Pid =  ets:lookup_element(group_servers_by_sig, {DbName, Sig}, 2),
    case gen_server:call(Pid, {compact_done, NewGroup}) of
    ok ->
        couch_db:close(Db);
    update ->
        {ok, Db2} = couch_db:reopen(Db),
        {_, Ref} = erlang:spawn_monitor(fun() ->
            couch_view_updater:update(nil, NewGroup, Db2)
        end),
        receive
        {'DOWN', Ref, _, _, {new_group, NewGroup2}} ->
            maybe_retry_compact(Db2, GroupId, NewGroup2)
        end
    end.

%% @spec compact_view(View, EmptyView) -> CompactView
compact_view(View, EmptyView) ->
    {ok, Count} = couch_view:get_row_count(View),

    %% Key is {Key,DocId}
    Fun = fun(KV, {Bt, Acc, TotalCopied}) ->
        if TotalCopied rem 10000 =:= 0 ->
            couch_task_status:update([{changes_done, TotalCopied}, {progress, (TotalCopied * 100) div Count}]),
            {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
            {ok, {Bt2, [], TotalCopied + 1}};
        true ->
            {ok, {Bt, [KV|Acc], TotalCopied + 1}}
        end
    end,

    {ok, _, {Bt3, Uncopied, _Total}} = couch_btree:foldl(View#view.btree, Fun,
        {EmptyView#view.btree, [], 0}),
    {ok, NewBt} = couch_btree:add(Bt3, lists:reverse(Uncopied)),
    EmptyView#view{btree = NewBt}.