summaryrefslogtreecommitdiff
path: root/apps/couch/src/couch_key_tree.erl
diff options
context:
space:
mode:
authorRobert Dionne <bob@cloudant.com>2011-04-19 12:06:37 -0400
committerAdam Kocoloski <adam@cloudant.com>2011-04-19 13:09:18 -0400
commita9410e622d84c4b6c017d16ea600e9b19e306c59 (patch)
tree6d5ff8a1a10fef103594072b50d55990ccc43607 /apps/couch/src/couch_key_tree.erl
parent663d04f907ba0f8e23bab0eb2492c431246974fa (diff)
Track and report size of live data in DBs and views
The #full_doc_info record is extended to include the summed size of leaf revision document bodies and their attachments. Document sizes are computed on update; accurate sizes of existing databases and view groups are only available after compaction. The document size is defined to be the size of the binary representation of #doc.body. The att_len field is used for attachments; attachments that are shared by multiple revisions of a document are only counted once. The size of a view index is defined as the size of all keys, values, and reductions accessible from the current root of the tree. BugzID: 9995
Diffstat (limited to 'apps/couch/src/couch_key_tree.erl')
-rw-r--r--apps/couch/src/couch_key_tree.erl52
1 files changed, 51 insertions, 1 deletions
diff --git a/apps/couch/src/couch_key_tree.erl b/apps/couch/src/couch_key_tree.erl
index 9ec444ac..2233f3a9 100644
--- a/apps/couch/src/couch_key_tree.erl
+++ b/apps/couch/src/couch_key_tree.erl
@@ -12,10 +12,13 @@
-module(couch_key_tree).
--export([merge/3, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]).
+-export([merge/3, find_missing/2, get_key_leafs/2,
+ get_full_key_paths/2, get/2, compute_data_size/1]).
-export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2,
get_all_leafs_full/1,stem/2,map_leafs/2]).
+-include("couch_db.hrl").
+
% Tree::term() is really a tree(), but we don't want to require R13B04 yet
-type branch() :: {Key::term(), Value::term(), Tree::term()}.
-type path() :: {Start::pos_integer(), branch()}.
@@ -277,6 +280,53 @@ count_leafs_simple([{_Key, _Value, []} | RestTree]) ->
count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) ->
count_leafs_simple(SubTree) + count_leafs_simple(RestTree).
+compute_data_size(Tree) ->
+ {TotBodySizes,TotAttSizes} =
+ tree_fold(fun({_Pos, _Key, _Value},branch,Acc) ->
+ {ok,Acc};
+ ({_Pos, _Key, Value},leaf,Acc) ->
+ {ok, sum_up_sizes(Value, Acc)}
+ end,{0,[]},Tree),
+ SumTotAttSizes = lists:foldl(fun({_K,V},Acc) ->
+ V + Acc
+ end,0,TotAttSizes),
+ TotBodySizes + SumTotAttSizes.
+
+sum_up_sizes(#leaf{deleted=true}, Acc) ->
+ Acc;
+sum_up_sizes(#leaf{deleted=false, size=DocBodySize, atts=AttSizes},Acc) ->
+ {TotBodySizes,TotalAttSizes} = Acc,
+ {TotBodySizes + DocBodySize, add_att_sizes(TotalAttSizes, AttSizes)}.
+
+add_att_sizes(TotalAttSizes,AttSizes) ->
+ lists:umerge(TotalAttSizes, lists:sort(AttSizes)).
+
+tree_fold(_Fun, Acc, []) ->
+ Acc;
+
+tree_fold(Fun, Acc, [{Pos, Branch} | Rest]) ->
+ Acc1 = tree_fold_simple(Fun, Pos, [Branch], Acc),
+ tree_fold(Fun, Acc1, Rest).
+
+tree_fold_simple(_Fun, _Pos, [], Acc) ->
+ Acc;
+
+tree_fold_simple(Fun, Pos, [{Key, Value, []} | RestTree], Acc) ->
+ case Fun({Pos, Key, Value}, leaf, Acc) of
+ {ok, Acc1} ->
+ tree_fold_simple(Fun, Pos, RestTree, Acc1);
+ {stop, Acc1} ->
+ Acc1
+ end;
+
+tree_fold_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree], Acc) ->
+ Acc1 = tree_fold_simple(Fun, Pos + 1, SubTree, Acc),
+ case Fun({Pos, Key, Value}, branch, Acc1) of
+ {ok, Acc2} ->
+ tree_fold_simple(Fun, Pos, RestTree, Acc2);
+ {stop, Acc2} ->
+ Acc2
+ end.
foldl(_Fun, Acc, []) ->
Acc;