diff options
author | Robert Dionne <bob@cloudant.com> | 2011-04-19 12:06:37 -0400 |
---|---|---|
committer | Adam Kocoloski <adam@cloudant.com> | 2011-04-19 13:09:18 -0400 |
commit | a9410e622d84c4b6c017d16ea600e9b19e306c59 (patch) | |
tree | 6d5ff8a1a10fef103594072b50d55990ccc43607 | |
parent | 663d04f907ba0f8e23bab0eb2492c431246974fa (diff) |
Track and report size of live data in DBs and views
The #full_doc_info record is extended to include the summed size of
leaf revision document bodies and their attachments. Document sizes
are computed on update; accurate sizes of existing databases and
view groups are only available after compaction.
The document size is defined to be the size of the binary
representation of #doc.body. The att_len field is used for
attachments; attachments that are shared by multiple revisions of a
document are only counted once. The size of a view index is defined as
the size of all keys, values, and reductions accessible from the current
root of the tree.
BugzID: 9995
-rw-r--r-- | apps/couch/include/couch_db.hrl | 9 | ||||
-rw-r--r-- | apps/couch/src/couch_btree.erl | 8 | ||||
-rw-r--r-- | apps/couch/src/couch_db.erl | 19 | ||||
-rw-r--r-- | apps/couch/src/couch_db_updater.erl | 116 | ||||
-rw-r--r-- | apps/couch/src/couch_doc.erl | 2 | ||||
-rw-r--r-- | apps/couch/src/couch_key_tree.erl | 52 | ||||
-rw-r--r-- | apps/couch/src/couch_view.erl | 11 | ||||
-rw-r--r-- | apps/couch/src/couch_view_compactor.erl | 2 | ||||
-rw-r--r-- | apps/couch/src/couch_view_group.erl | 25 |
9 files changed, 180 insertions, 64 deletions
diff --git a/apps/couch/include/couch_db.hrl b/apps/couch/include/couch_db.hrl index b2f02cee..6b3d53a1 100644 --- a/apps/couch/include/couch_db.hrl +++ b/apps/couch/include/couch_db.hrl @@ -65,6 +65,7 @@ {id = <<"">>, update_seq = 0, deleted = false, + data_size = 0, rev_tree = [] }). @@ -298,3 +299,11 @@ set_timeout_fun, stop_fun }). + +-record(leaf, { + deleted, + ptr, + seq, + size = 0, + atts = [] +}). diff --git a/apps/couch/src/couch_btree.erl b/apps/couch/src/couch_btree.erl index 5b950dc1..2fcc8ae7 100644 --- a/apps/couch/src/couch_btree.erl +++ b/apps/couch/src/couch_btree.erl @@ -65,8 +65,8 @@ final_reduce(#btree{reduce=Reduce}, Val) -> final_reduce(Reduce, Val); final_reduce(Reduce, {[], []}) -> Reduce(reduce, []); -final_reduce(_Bt, {[], [Red]}) -> - Red; +final_reduce(Reduce, {[], [Red]}) -> + Reduce(rereduce, [Red]); final_reduce(Reduce, {[], Reductions}) -> Reduce(rereduce, Reductions); final_reduce(Reduce, {KVs, Reductions}) -> @@ -104,8 +104,8 @@ fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) -> full_reduce(#btree{root=nil,reduce=Reduce}) -> {ok, Reduce(reduce, [])}; -full_reduce(#btree{root={_P, Red}}) -> - {ok, Red}. +full_reduce(#btree{root={_P, Red}, reduce=Reduce}) -> + {ok, Reduce(rereduce, [Red])}. % wraps a 2 arity function with the proper 3 arity function convert_fun_arity(Fun) when is_function(Fun, 2) -> diff --git a/apps/couch/src/couch_db.erl b/apps/couch/src/couch_db.erl index af650414..b9b66f4b 100644 --- a/apps/couch/src/couch_db.erl +++ b/apps/couch/src/couch_db.erl @@ -252,7 +252,7 @@ get_last_purged(#db{fd=Fd, header=#db_header{purged_docs=PurgedPointer}}) -> couch_file:pread_term(Fd, PurgedPointer). get_doc_count(Db) -> - {ok, {Count, _DelCount}} = couch_btree:full_reduce(Db#db.id_tree), + {ok, {Count, _, _}} = couch_btree:full_reduce(Db#db.id_tree), {ok, Count}. get_db_info(Db) -> @@ -265,7 +265,7 @@ get_db_info(Db) -> instance_start_time=StartTime, committed_update_seq=CommittedUpdateSeq} = Db, {ok, Size} = couch_file:bytes(Fd), - {ok, {Count, DelCount}} = couch_btree:full_reduce(FullDocBtree), + {ok, {Count, DelCount, DataSize}} = couch_btree:full_reduce(FullDocBtree), InfoList = [ {db_name, Name}, {doc_count, Count}, @@ -274,6 +274,7 @@ get_db_info(Db) -> {purge_seq, couch_db:get_purge_seq(Db)}, {compact_running, Compactor/=nil}, {disk_size, Size}, + {other, {[{data_size, DataSize}]}}, {instance_start_time, StartTime}, {disk_format_version, DiskVersion}, {committed_update_seq, CommittedUpdateSeq} @@ -548,8 +549,8 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets], [{ok, #full_doc_info{rev_tree=OldRevTree}=OldFullDocInfo}|RestLookups], AllowConflict, AccPrepped, AccErrors) -> Leafs = couch_key_tree:get_all_leafs(OldRevTree), - LeafRevsDict = dict:from_list([{{Start, RevId}, {Deleted, Sp, Revs}} || - {{Deleted, Sp, _Seq}, {Start, [RevId|_]}=Revs} <- Leafs]), + LeafRevsDict = dict:from_list([{{Start, RevId}, {Del, Ptr, Revs}} || + {#leaf{deleted=Del, ptr=Ptr}, {Start, [RevId|_]}=Revs} <- Leafs]), {PreppedBucket, AccErrors3} = lists:foldl( fun(Doc, {Docs2Acc, AccErrors2}) -> case prep_and_validate_update(Db, Doc, OldFullDocInfo, @@ -776,7 +777,7 @@ make_first_doc_on_disk(_Db, _Id, _Pos, []) -> nil; make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) -> make_first_doc_on_disk(Db, Id, Pos - 1, RestPath); -make_first_doc_on_disk(Db, Id, Pos, [{_Rev, {IsDel, Sp, _Seq}} |_]=DocPath) -> +make_first_doc_on_disk(Db, Id, Pos, [{_, #leaf{deleted=IsDel, ptr=Sp}} |_]=DocPath) -> Revs = [Rev || {Rev, _} <- DocPath], make_doc(Db, Id, IsDel, Sp, {Pos, Revs}). @@ -971,7 +972,7 @@ enum_docs_since_reduce_to_count(Reds) -> fun couch_db_updater:btree_by_seq_reduce/2, Reds). enum_docs_reduce_to_count(Reds) -> - {Count, _DelCount} = couch_btree:final_reduce( + {Count, _, _} = couch_btree:final_reduce( fun couch_db_updater:btree_by_id_reduce/2, Reds), Count. @@ -1045,7 +1046,7 @@ open_doc_revs_int(Db, IdRevs, Options) -> ?REV_MISSING -> % we have the rev in our list but know nothing about it {{not_found, missing}, {Pos, Rev}}; - {IsDeleted, SummaryPtr, _UpdateSeq} -> + #leaf{deleted=IsDeleted, ptr=SummaryPtr} -> {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)} end end, FoundRevs), @@ -1092,9 +1093,9 @@ doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTre couch_key_tree:get_full_key_paths(RevTree, [Rev]), [{revs_info, Pos, lists:map( - fun({Rev1, {true, _Sp, _UpdateSeq}}) -> + fun({Rev1, #leaf{deleted=true}}) -> {Rev1, deleted}; - ({Rev1, {false, _Sp, _UpdateSeq}}) -> + ({Rev1, #leaf{deleted=false}}) -> {Rev1, available}; ({Rev1, ?REV_MISSING}) -> {Rev1, missing} diff --git a/apps/couch/src/couch_db_updater.erl b/apps/couch/src/couch_db_updater.erl index 835d188c..138930f1 100644 --- a/apps/couch/src/couch_db_updater.erl +++ b/apps/couch/src/couch_db_updater.erl @@ -303,17 +303,19 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) -> rev_tree(DiskTree) -> - couch_key_tree:map(fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) -> - {IsDeleted == 1, BodyPointer, UpdateSeq}; + couch_key_tree:map(fun(_RevId, {Del, Ptr, Seq}) -> + #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq}; + (_RevId, {Del, Ptr, Seq, Size, Atts}) -> + #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq, size=Size, atts=Atts}; (_RevId, ?REV_MISSING) -> ?REV_MISSING end, DiskTree). disk_tree(RevTree) -> - couch_key_tree:map(fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) -> - {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq}; - (_RevId, ?REV_MISSING) -> - ?REV_MISSING + couch_key_tree:map(fun(_RevId, ?REV_MISSING) -> + ?REV_MISSING; + (_RevId, #leaf{deleted=Del, ptr=Ptr, seq=Seq, size=Size, atts=Atts}) -> + {if Del -> 1; true -> 0 end, Ptr, Seq, Size, Atts} end, RevTree). btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) -> @@ -345,34 +347,37 @@ btree_by_seq_join(KeySeq,{Id, Rev, Bp, Conflicts, DelConflicts, Deleted}) -> [#rev_info{rev=Rev2,seq=KeySeq,deleted=true} || Rev2 <- DelConflicts]}. btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq, - deleted=Deleted, rev_tree=Tree}) -> - {Id, {Seq, if Deleted -> 1; true -> 0 end, disk_tree(Tree)}}. + data_size=Size, deleted=Deleted, rev_tree=Tree}) -> + {Id, {Seq, if Deleted -> 1; true -> 0 end, Size, disk_tree(Tree)}}. +%% handle old formats before `data_size` added btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) -> - Tree = - couch_key_tree:map( - fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) -> - {IsDeleted == 1, BodyPointer, UpdateSeq}; - (_RevId, ?REV_MISSING) -> - ?REV_MISSING; - (_RevId, {IsDeleted, BodyPointer}) -> - % 09 UPGRADE CODE - % this is the 0.9.0 and earlier rev info record. It's missing the seq - % nums, which means couchdb will sometimes reexamine unchanged - % documents with the _changes API. - % This is fixed by compacting the database. - {IsDeleted == 1, BodyPointer, HighSeq} - end, DiskTree), + btree_by_id_join(Id, {HighSeq, Deleted, 0, DiskTree}); - #full_doc_info{id=Id, update_seq=HighSeq, deleted=Deleted==1, rev_tree=Tree}. +btree_by_id_join(Id, {HighSeq, Deleted, Size, DiskTree}) -> + #full_doc_info{id=Id, update_seq=HighSeq, + deleted=Deleted==1, data_size=Size, + rev_tree=rev_tree(DiskTree)}. btree_by_id_reduce(reduce, FullDocInfos) -> - % count the number of not deleted documents - {length([1 || #full_doc_info{deleted=false} <- FullDocInfos]), - length([1 || #full_doc_info{deleted=true} <- FullDocInfos])}; -btree_by_id_reduce(rereduce, Reds) -> - {lists:sum([Count || {Count,_} <- Reds]), - lists:sum([DelCount || {_, DelCount} <- Reds])}. + lists:foldl( + fun(#full_doc_info{deleted = false, data_size=Size}, + {NotDeleted, Deleted, DocSize}) -> + {NotDeleted + 1, Deleted, DocSize + Size}; + (#full_doc_info{deleted = true, data_size=Size}, + {NotDeleted, Deleted, DocSize}) -> + {NotDeleted, Deleted + 1, DocSize + Size} + end, + {0, 0, 0}, FullDocInfos); + +btree_by_id_reduce(rereduce, Reductions) -> + lists:foldl( + fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, AccDocSizes}) -> + {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccDocSizes}; + ({NotDeleted, Deleted, DocSizes}, {AccNotDeleted, AccDeleted, AccDocSizes}) -> + {AccNotDeleted + NotDeleted, AccDeleted + Deleted, DocSizes + AccDocSizes} + end, + {0, 0, 0}, Reductions). btree_by_seq_reduce(reduce, DocInfos) -> % count the number of documents @@ -486,14 +491,17 @@ flush_trees(#db{fd=Fd,header=Header}=Db, % make sure the Fd in the written bins is the same Fd we are % and convert bins, removing the FD. % All bins should have been written to disk already. - DiskAtts = + {DiskAtts, SizeInfo} = case Atts of - [] -> []; + [] -> {[],[]}; [#att{data={BinFd, _Sp}} | _ ] when BinFd == Fd -> - [{N,T,P,AL,DL,R,M,E} + {[{N,T,P,AL,DL,R,M,E} || #att{name=N,type=T,data={_,P},md5=M,revpos=R, att_len=AL,disk_len=DL,encoding=E} - <- Atts]; + <- Atts], + [{P1,AL1} + || #att{data={_,P1},att_len=AL1} + <- Atts]}; _ -> % BinFd must not equal our Fd. This can happen when a database % is being switched out during a compaction @@ -508,7 +516,13 @@ flush_trees(#db{fd=Fd,header=Header}=Db, false -> couch_file:append_term_md5(Fd, {Doc#doc.body, DiskAtts}) end, - {IsDeleted, NewSummaryPointer, UpdateSeq}; + #leaf{ + deleted = IsDeleted, + ptr = NewSummaryPointer, + seq = UpdateSeq, + size = size(term_to_binary(Doc#doc.body)), + atts = SizeInfo + }; _ -> Value end @@ -636,9 +650,9 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> % Write out the document summaries (the bodies are stored in the nodes of % the trees, the attachments are already written to disk) {ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []), - - IndexInfos = new_index_entries(FlushedFullDocInfos, []), - + IndexInfos = + new_index_entries(compute_data_sizes(FlushedFullDocInfos, []), + []), % and the indexes {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, IndexInfos, []), @@ -661,6 +675,18 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> {ok, commit_data(Db4, not FullCommit)}. +compute_data_sizes([], Acc) -> + lists:reverse(Acc); + +compute_data_sizes([FullDocInfo | RestDocInfos], Acc) -> + #full_doc_info{rev_tree=Tree} = FullDocInfo, + Size = couch_key_tree:compute_data_size(Tree), + compute_data_sizes(RestDocInfos, + [FullDocInfo#full_doc_info{data_size=Size} + | Acc]). + + + update_local_docs(#db{local_tree=Btree}=Db, Docs) -> Ids = [Id || {_Client, #doc{id=Id}} <- Docs], @@ -815,15 +841,21 @@ copy_docs(Db, #db{fd=DestFd}=NewDb, MixedInfos, Retry) -> end, merge_lookups(MixedInfos, LookupResults)), NewInfos1 = [Info#full_doc_info{rev_tree=couch_key_tree:map( - fun(Rev, {IsDel, Sp, Seq}, leaf) -> - DocBody = copy_doc_attachments(Db, Rev, Sp, DestFd), - {ok, Pos} = couch_file:append_term_md5(DestFd, DocBody), - {IsDel, Pos, Seq}; + fun(Rev, #leaf{ptr=Sp, size=Size0}=Leaf, leaf) -> + {Body, AttInfos} = copy_doc_attachments(Db, Rev, Sp, DestFd), + {ok, Pos} = couch_file:append_term_md5(DestFd, {Body, AttInfos}), + if Size0 > 0 -> + Leaf#leaf{ptr=Pos}; + true -> + DocSize = byte_size(term_to_binary(Body)), + AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos], + Leaf#leaf{ptr=Pos, size=DocSize, atts=AttSizes} + end; (_, _, branch) -> ?REV_MISSING end, RevTree)} || #full_doc_info{rev_tree=RevTree}=Info <- Infos], - NewInfos = stem_full_doc_infos(Db, NewInfos1), + NewInfos = stem_full_doc_infos(Db, compute_data_sizes(NewInfos1, [])), RemoveSeqs = case Retry of false -> diff --git a/apps/couch/src/couch_doc.erl b/apps/couch/src/couch_doc.erl index 1c5d797d..9f0dae45 100644 --- a/apps/couch/src/couch_doc.erl +++ b/apps/couch/src/couch_doc.erl @@ -282,7 +282,7 @@ max_seq([#rev_info{seq=Seq}|Rest], Max) -> to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) -> RevInfosAndPath = [{#rev_info{deleted=Del,body_sp=Bp,seq=Seq,rev={Pos,RevId}}, Path} || - {{Del, Bp, Seq},{Pos, [RevId|_]}=Path} <- + {#leaf{deleted=Del, ptr=Bp, seq=Seq},{Pos, [RevId|_]}=Path} <- couch_key_tree:get_all_leafs(Tree)], SortedRevInfosAndPath = lists:sort( fun({#rev_info{deleted=DeletedA,rev=RevA}, _PathA}, diff --git a/apps/couch/src/couch_key_tree.erl b/apps/couch/src/couch_key_tree.erl index 9ec444ac..2233f3a9 100644 --- a/apps/couch/src/couch_key_tree.erl +++ b/apps/couch/src/couch_key_tree.erl @@ -12,10 +12,13 @@ -module(couch_key_tree). --export([merge/3, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]). +-export([merge/3, find_missing/2, get_key_leafs/2, + get_full_key_paths/2, get/2, compute_data_size/1]). -export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2, get_all_leafs_full/1,stem/2,map_leafs/2]). +-include("couch_db.hrl"). + % Tree::term() is really a tree(), but we don't want to require R13B04 yet -type branch() :: {Key::term(), Value::term(), Tree::term()}. -type path() :: {Start::pos_integer(), branch()}. @@ -277,6 +280,53 @@ count_leafs_simple([{_Key, _Value, []} | RestTree]) -> count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) -> count_leafs_simple(SubTree) + count_leafs_simple(RestTree). +compute_data_size(Tree) -> + {TotBodySizes,TotAttSizes} = + tree_fold(fun({_Pos, _Key, _Value},branch,Acc) -> + {ok,Acc}; + ({_Pos, _Key, Value},leaf,Acc) -> + {ok, sum_up_sizes(Value, Acc)} + end,{0,[]},Tree), + SumTotAttSizes = lists:foldl(fun({_K,V},Acc) -> + V + Acc + end,0,TotAttSizes), + TotBodySizes + SumTotAttSizes. + +sum_up_sizes(#leaf{deleted=true}, Acc) -> + Acc; +sum_up_sizes(#leaf{deleted=false, size=DocBodySize, atts=AttSizes},Acc) -> + {TotBodySizes,TotalAttSizes} = Acc, + {TotBodySizes + DocBodySize, add_att_sizes(TotalAttSizes, AttSizes)}. + +add_att_sizes(TotalAttSizes,AttSizes) -> + lists:umerge(TotalAttSizes, lists:sort(AttSizes)). + +tree_fold(_Fun, Acc, []) -> + Acc; + +tree_fold(Fun, Acc, [{Pos, Branch} | Rest]) -> + Acc1 = tree_fold_simple(Fun, Pos, [Branch], Acc), + tree_fold(Fun, Acc1, Rest). + +tree_fold_simple(_Fun, _Pos, [], Acc) -> + Acc; + +tree_fold_simple(Fun, Pos, [{Key, Value, []} | RestTree], Acc) -> + case Fun({Pos, Key, Value}, leaf, Acc) of + {ok, Acc1} -> + tree_fold_simple(Fun, Pos, RestTree, Acc1); + {stop, Acc1} -> + Acc1 + end; + +tree_fold_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree], Acc) -> + Acc1 = tree_fold_simple(Fun, Pos + 1, SubTree, Acc), + case Fun({Pos, Key, Value}, branch, Acc1) of + {ok, Acc2} -> + tree_fold_simple(Fun, Pos, RestTree, Acc2); + {stop, Acc2} -> + Acc2 + end. foldl(_Fun, Acc, []) -> Acc; diff --git a/apps/couch/src/couch_view.erl b/apps/couch/src/couch_view.erl index 8dca17da..65a42c0b 100644 --- a/apps/couch/src/couch_view.erl +++ b/apps/couch/src/couch_view.erl @@ -18,7 +18,7 @@ code_change/3,get_reduce_view/4,get_temp_reduce_view/5,get_temp_map_view/4, get_map_view/4,get_row_count/1,reduce_to_count/1,fold_reduce/4, extract_map_view/1,get_group_server/2,get_group_info/2, - cleanup_index_files/1,config_change/2]). + cleanup_index_files/1,config_change/2, data_size/2]). -include("couch_db.hrl"). @@ -102,7 +102,7 @@ list_index_files(Db) -> get_row_count(#view{btree=Bt}) -> - {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt), + {ok, {Count, _, _}} = couch_btree:full_reduce(Bt), {ok, Count}. get_temp_reduce_view(Db, Language, DesignOptions, MapSrc, RedSrc) -> @@ -150,6 +150,13 @@ expand_dups([{Key, {dups, Vals}} | Rest], Acc) -> expand_dups([KV | Rest], Acc) -> expand_dups(Rest, [KV | Acc]). +data_size(KVList, Reduction) -> + lists:foldl(fun([[Key, _], Value], Acc) -> + size(term_to_binary(Key)) + + size(term_to_binary(Value)) + + Acc + end,size(term_to_binary(Reduction)),KVList). + fold_reduce({temp_reduce, #view{btree=Bt}}, Fun, Acc, Options) -> WrapperFun = fun({GroupedKey, _}, PartialReds, Acc0) -> {_, [Red]} = couch_btree:final_reduce(Bt, PartialReds), diff --git a/apps/couch/src/couch_view_compactor.erl b/apps/couch/src/couch_view_compactor.erl index 43db9036..38f63f66 100644 --- a/apps/couch/src/couch_view_compactor.erl +++ b/apps/couch/src/couch_view_compactor.erl @@ -47,7 +47,7 @@ compact_group(Group, EmptyGroup) -> {ok, Db} = couch_db:open(DbName, []), - {ok, {Count, _}} = couch_btree:full_reduce(Db#db.id_tree), + {ok, Count} = couch_db:get_doc_count(Db), <<"_design", ShortName/binary>> = GroupId, TaskName = <<DbName/binary, ShortName/binary>>, diff --git a/apps/couch/src/couch_view_group.erl b/apps/couch/src/couch_view_group.erl index b0e67db5..de64ef51 100644 --- a/apps/couch/src/couch_view_group.erl +++ b/apps/couch/src/couch_view_group.erl @@ -466,6 +466,7 @@ get_group_info(State) -> fd = Fd, sig = GroupSig, def_lang = Lang, + views = Views, current_seq=CurrentSeq, purge_seq=PurgeSeq } = Group, @@ -474,6 +475,7 @@ get_group_info(State) -> {signature, ?l2b(hex_sig(GroupSig))}, {language, Lang}, {disk_size, Size}, + {data_size, compute_data_size(Views)}, {updater_running, UpdaterPid /= nil}, {compact_running, CompactorPid /= nil}, {waiting_commit, WaitingCommit}, @@ -482,6 +484,13 @@ get_group_info(State) -> {purge_seq, PurgeSeq} ]. +compute_data_size(ViewList) -> + lists:foldl(fun(#view{btree=Btree}, Acc) -> + {ok, {_, _, Size}} = couch_btree:full_reduce(Btree), + Size + Acc + end, 0, ViewList). + + % maybe move to another module design_doc_to_view_group(#doc{id=Id,body={Fields}}) -> Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>), @@ -563,13 +572,14 @@ init_group(Fd, #group{def_lang=Lang,views=Views}=Group, IndexHeader) -> KVs3 = couch_view:detuple_kvs(KVs2,[]), {ok, Reduced} = couch_query_servers:reduce(Lang, FunSrcs, KVs3), - {length(KVs3), Reduced}; + {length(KVs3), Reduced, couch_view:data_size(KVs3, Reduced)}; (rereduce, Reds) -> - Count = lists:sum([Count0 || {Count0, _} <- Reds]), - UserReds = [UserRedsList || {_, UserRedsList} <- Reds], + Count = lists:sum(extract(Reds, counts)), + DataSize = lists:sum(extract(Reds, data_size)), + UserReds = extract(Reds, user_reds), {ok, Reduced} = couch_query_servers:rereduce(Lang, FunSrcs, UserReds), - {Count, Reduced} + {Count, Reduced, DataSize} end, case couch_util:get_value(<<"collation">>, Options, <<"default">>) of @@ -585,3 +595,10 @@ init_group(Fd, #group{def_lang=Lang,views=Views}=Group, IndexHeader) -> ViewStates, Views), Group#group{fd=Fd, current_seq=Seq, purge_seq=PurgeSeq, id_btree=IdBtree, views=Views2}. + +extract(Reds, counts) -> + [element(1, R) || R <- Reds]; +extract(Reds, user_reds) -> + [element(2, R) || R <- Reds]; +extract(Reds, data_size) -> + lists:map(fun({_, _}) -> 0; ({_, _, Size}) -> Size end, Reds). |