summaryrefslogtreecommitdiff
path: root/apps/couch/src/couch_key_tree.erl
diff options
context:
space:
mode:
authorAdam Kocoloski <adam@cloudant.com>2010-08-11 15:22:33 -0400
committerAdam Kocoloski <adam@cloudant.com>2010-08-11 17:39:37 -0400
commit81bdbed444df2cbcf3cdb32f7d4a74019de06454 (patch)
treeeade7d0d9bb4cac01b55fd8642adfe0f7da35161 /apps/couch/src/couch_key_tree.erl
parentcc1910f73fbd20c5ffc94bd61e7701d7f5e4c92a (diff)
reorganize couch .erl and driver code into rebar layout
Diffstat (limited to 'apps/couch/src/couch_key_tree.erl')
-rw-r--r--apps/couch/src/couch_key_tree.erl329
1 files changed, 329 insertions, 0 deletions
diff --git a/apps/couch/src/couch_key_tree.erl b/apps/couch/src/couch_key_tree.erl
new file mode 100644
index 00000000..4fe09bf3
--- /dev/null
+++ b/apps/couch/src/couch_key_tree.erl
@@ -0,0 +1,329 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_key_tree).
+
+-export([merge/2, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]).
+-export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2,
+ get_all_leafs_full/1,stem/2,map_leafs/2]).
+
+% a key tree looks like this:
+% Tree -> [] or [{Key, Value, ChildTree} | SiblingTree]
+% ChildTree -> Tree
+% SiblingTree -> [] or [{SiblingKey, Value, Tree} | Tree]
+% And each Key < SiblingKey
+
+
+% partial trees arranged by how much they are cut off.
+
+merge(A, B) ->
+ {Merged, HasConflicts} =
+ lists:foldl(
+ fun(InsertTree, {AccTrees, AccConflicts}) ->
+ {ok, Merged, Conflicts} = merge_one(AccTrees, InsertTree, [], false),
+ {Merged, Conflicts or AccConflicts}
+ end,
+ {A, false}, B),
+ if HasConflicts or
+ ((length(Merged) =/= length(A)) and (length(Merged) =/= length(B))) ->
+ Conflicts = conflicts;
+ true ->
+ Conflicts = no_conflicts
+ end,
+ {lists:sort(Merged), Conflicts}.
+
+merge_one([], Insert, OutAcc, ConflictsAcc) ->
+ {ok, [Insert | OutAcc], ConflictsAcc};
+merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, OutAcc, ConflictsAcc) ->
+ if Start =< StartInsert ->
+ StartA = Start,
+ StartB = StartInsert,
+ TreeA = Tree,
+ TreeB = TreeInsert;
+ true ->
+ StartB = Start,
+ StartA = StartInsert,
+ TreeB = Tree,
+ TreeA = TreeInsert
+ end,
+ case merge_at([TreeA], StartB - StartA, TreeB) of
+ {ok, [CombinedTrees], Conflicts} ->
+ merge_one(Rest, {StartA, CombinedTrees}, OutAcc, Conflicts or ConflictsAcc);
+ no ->
+ merge_one(Rest, {StartB, TreeB}, [{StartA, TreeA} | OutAcc], ConflictsAcc)
+ end.
+
+merge_at([], _Place, _Insert) ->
+ no;
+merge_at([{Key, Value, SubTree}|Sibs], 0, {InsertKey, InsertValue, InsertSubTree}) ->
+ if Key == InsertKey ->
+ {Merge, Conflicts} = merge_simple(SubTree, InsertSubTree),
+ {ok, [{Key, Value, Merge} | Sibs], Conflicts};
+ true ->
+ case merge_at(Sibs, 0, {InsertKey, InsertValue, InsertSubTree}) of
+ {ok, Merged, Conflicts} ->
+ {ok, [{Key, Value, SubTree} | Merged], Conflicts};
+ no ->
+ no
+ end
+ end;
+merge_at([{Key, Value, SubTree}|Sibs], Place, Insert) ->
+ case merge_at(SubTree, Place - 1,Insert) of
+ {ok, Merged, Conflicts} ->
+ {ok, [{Key, Value, Merged} | Sibs], Conflicts};
+ no ->
+ case merge_at(Sibs, Place, Insert) of
+ {ok, Merged, Conflicts} ->
+ {ok, [{Key, Value, SubTree} | Merged], Conflicts};
+ no ->
+ no
+ end
+ end.
+
+% key tree functions
+merge_simple([], B) ->
+ {B, false};
+merge_simple(A, []) ->
+ {A, false};
+merge_simple([ATree | ANextTree], [BTree | BNextTree]) ->
+ {AKey, AValue, ASubTree} = ATree,
+ {BKey, _BValue, BSubTree} = BTree,
+ if
+ AKey == BKey ->
+ %same key
+ {MergedSubTree, Conflict1} = merge_simple(ASubTree, BSubTree),
+ {MergedNextTree, Conflict2} = merge_simple(ANextTree, BNextTree),
+ {[{AKey, AValue, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2};
+ AKey < BKey ->
+ {MTree, _} = merge_simple(ANextTree, [BTree | BNextTree]),
+ {[ATree | MTree], true};
+ true ->
+ {MTree, _} = merge_simple([ATree | ANextTree], BNextTree),
+ {[BTree | MTree], true}
+ end.
+
+find_missing(_Tree, []) ->
+ [];
+find_missing([], SeachKeys) ->
+ SeachKeys;
+find_missing([{Start, {Key, Value, SubTree}} | RestTree], SeachKeys) ->
+ PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Start],
+ ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Start],
+ Missing = find_missing_simple(Start, [{Key, Value, SubTree}], PossibleKeys),
+ find_missing(RestTree, ImpossibleKeys ++ Missing).
+
+find_missing_simple(_Pos, _Tree, []) ->
+ [];
+find_missing_simple(_Pos, [], SeachKeys) ->
+ SeachKeys;
+find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SeachKeys) ->
+ PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Pos],
+ ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Pos],
+
+ SrcKeys2 = PossibleKeys -- [{Pos, Key}],
+ SrcKeys3 = find_missing_simple(Pos + 1, SubTree, SrcKeys2),
+ ImpossibleKeys ++ find_missing_simple(Pos, RestTree, SrcKeys3).
+
+
+filter_leafs([], _Keys, FilteredAcc, RemovedKeysAcc) ->
+ {FilteredAcc, RemovedKeysAcc};
+filter_leafs([{Pos, [{LeafKey, _}|_]} = Path |Rest], Keys, FilteredAcc, RemovedKeysAcc) ->
+ FilteredKeys = lists:delete({Pos, LeafKey}, Keys),
+ if FilteredKeys == Keys ->
+ % this leaf is not a key we are looking to remove
+ filter_leafs(Rest, Keys, [Path | FilteredAcc], RemovedKeysAcc);
+ true ->
+ % this did match a key, remove both the node and the input key
+ filter_leafs(Rest, FilteredKeys, FilteredAcc, [{Pos, LeafKey} | RemovedKeysAcc])
+ end.
+
+% Removes any branches from the tree whose leaf node(s) are in the Keys
+remove_leafs(Trees, Keys) ->
+ % flatten each branch in a tree into a tree path
+ Paths = get_all_leafs_full(Trees),
+
+ % filter out any that are in the keys list.
+ {FilteredPaths, RemovedKeys} = filter_leafs(Paths, Keys, [], []),
+
+ % convert paths back to trees
+ NewTree = lists:foldl(
+ fun({PathPos, Path},TreeAcc) ->
+ [SingleTree] = lists:foldl(
+ fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path),
+ {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]),
+ NewTrees
+ end, [], FilteredPaths),
+ {NewTree, RemovedKeys}.
+
+
+% get the leafs in the tree matching the keys. The matching key nodes can be
+% leafs or an inner nodes. If an inner node, then the leafs for that node
+% are returned.
+get_key_leafs(Tree, Keys) ->
+ get_key_leafs(Tree, Keys, []).
+
+get_key_leafs(_, [], Acc) ->
+ {Acc, []};
+get_key_leafs([], Keys, Acc) ->
+ {Acc, Keys};
+get_key_leafs([{Pos, Tree}|Rest], Keys, Acc) ->
+ {Gotten, RemainingKeys} = get_key_leafs_simple(Pos, [Tree], Keys, []),
+ get_key_leafs(Rest, RemainingKeys, Gotten ++ Acc).
+
+get_key_leafs_simple(_Pos, _Tree, [], _KeyPathAcc) ->
+ {[], []};
+get_key_leafs_simple(_Pos, [], KeysToGet, _KeyPathAcc) ->
+ {[], KeysToGet};
+get_key_leafs_simple(Pos, [{Key, _Value, SubTree}=Tree | RestTree], KeysToGet, KeyPathAcc) ->
+ case lists:delete({Pos, Key}, KeysToGet) of
+ KeysToGet -> % same list, key not found
+ {LeafsFound, KeysToGet2} = get_key_leafs_simple(Pos + 1, SubTree, KeysToGet, [Key | KeyPathAcc]),
+ {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc),
+ {LeafsFound ++ RestLeafsFound, KeysRemaining};
+ KeysToGet2 ->
+ LeafsFound = get_all_leafs_simple(Pos, [Tree], KeyPathAcc),
+ LeafKeysFound = [LeafKeyFound || {LeafKeyFound, _} <- LeafsFound],
+ KeysToGet2 = KeysToGet2 -- LeafKeysFound,
+ {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc),
+ {LeafsFound ++ RestLeafsFound, KeysRemaining}
+ end.
+
+get(Tree, KeysToGet) ->
+ {KeyPaths, KeysNotFound} = get_full_key_paths(Tree, KeysToGet),
+ FixedResults = [ {Value, {Pos, [Key0 || {Key0, _} <- Path]}} || {Pos, [{_Key, Value}|_]=Path} <- KeyPaths],
+ {FixedResults, KeysNotFound}.
+
+get_full_key_paths(Tree, Keys) ->
+ get_full_key_paths(Tree, Keys, []).
+
+get_full_key_paths(_, [], Acc) ->
+ {Acc, []};
+get_full_key_paths([], Keys, Acc) ->
+ {Acc, Keys};
+get_full_key_paths([{Pos, Tree}|Rest], Keys, Acc) ->
+ {Gotten, RemainingKeys} = get_full_key_paths(Pos, [Tree], Keys, []),
+ get_full_key_paths(Rest, RemainingKeys, Gotten ++ Acc).
+
+
+get_full_key_paths(_Pos, _Tree, [], _KeyPathAcc) ->
+ {[], []};
+get_full_key_paths(_Pos, [], KeysToGet, _KeyPathAcc) ->
+ {[], KeysToGet};
+get_full_key_paths(Pos, [{KeyId, Value, SubTree} | RestTree], KeysToGet, KeyPathAcc) ->
+ KeysToGet2 = KeysToGet -- [{Pos, KeyId}],
+ CurrentNodeResult =
+ case length(KeysToGet2) =:= length(KeysToGet) of
+ true -> % not in the key list.
+ [];
+ false -> % this node is the key list. return it
+ [{Pos, [{KeyId, Value} | KeyPathAcc]}]
+ end,
+ {KeysGotten, KeysRemaining} = get_full_key_paths(Pos + 1, SubTree, KeysToGet2, [{KeyId, Value} | KeyPathAcc]),
+ {KeysGotten2, KeysRemaining2} = get_full_key_paths(Pos, RestTree, KeysRemaining, KeyPathAcc),
+ {CurrentNodeResult ++ KeysGotten ++ KeysGotten2, KeysRemaining2}.
+
+get_all_leafs_full(Tree) ->
+ get_all_leafs_full(Tree, []).
+
+get_all_leafs_full([], Acc) ->
+ Acc;
+get_all_leafs_full([{Pos, Tree} | Rest], Acc) ->
+ get_all_leafs_full(Rest, get_all_leafs_full_simple(Pos, [Tree], []) ++ Acc).
+
+get_all_leafs_full_simple(_Pos, [], _KeyPathAcc) ->
+ [];
+get_all_leafs_full_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) ->
+ [{Pos, [{KeyId, Value} | KeyPathAcc]} | get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc)];
+get_all_leafs_full_simple(Pos, [{KeyId, Value, SubTree} | RestTree], KeyPathAcc) ->
+ get_all_leafs_full_simple(Pos + 1, SubTree, [{KeyId, Value} | KeyPathAcc]) ++ get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc).
+
+get_all_leafs(Trees) ->
+ get_all_leafs(Trees, []).
+
+get_all_leafs([], Acc) ->
+ Acc;
+get_all_leafs([{Pos, Tree}|Rest], Acc) ->
+ get_all_leafs(Rest, get_all_leafs_simple(Pos, [Tree], []) ++ Acc).
+
+get_all_leafs_simple(_Pos, [], _KeyPathAcc) ->
+ [];
+get_all_leafs_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) ->
+ [{Value, {Pos, [KeyId | KeyPathAcc]}} | get_all_leafs_simple(Pos, RestTree, KeyPathAcc)];
+get_all_leafs_simple(Pos, [{KeyId, _Value, SubTree} | RestTree], KeyPathAcc) ->
+ get_all_leafs_simple(Pos + 1, SubTree, [KeyId | KeyPathAcc]) ++ get_all_leafs_simple(Pos, RestTree, KeyPathAcc).
+
+
+count_leafs([]) ->
+ 0;
+count_leafs([{_Pos,Tree}|Rest]) ->
+ count_leafs_simple([Tree]) + count_leafs(Rest).
+
+count_leafs_simple([]) ->
+ 0;
+count_leafs_simple([{_Key, _Value, []} | RestTree]) ->
+ 1 + count_leafs_simple(RestTree);
+count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) ->
+ count_leafs_simple(SubTree) + count_leafs_simple(RestTree).
+
+
+map(_Fun, []) ->
+ [];
+map(Fun, [{Pos, Tree}|Rest]) ->
+ case erlang:fun_info(Fun, arity) of
+ {arity, 2} ->
+ [NewTree] = map_simple(fun(A,B,_C) -> Fun(A,B) end, Pos, [Tree]),
+ [{Pos, NewTree} | map(Fun, Rest)];
+ {arity, 3} ->
+ [NewTree] = map_simple(Fun, Pos, [Tree]),
+ [{Pos, NewTree} | map(Fun, Rest)]
+ end.
+
+map_simple(_Fun, _Pos, []) ->
+ [];
+map_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) ->
+ Value2 = Fun({Pos, Key}, Value,
+ if SubTree == [] -> leaf; true -> branch end),
+ [{Key, Value2, map_simple(Fun, Pos + 1, SubTree)} | map_simple(Fun, Pos, RestTree)].
+
+
+map_leafs(_Fun, []) ->
+ [];
+map_leafs(Fun, [{Pos, Tree}|Rest]) ->
+ [NewTree] = map_leafs_simple(Fun, Pos, [Tree]),
+ [{Pos, NewTree} | map_leafs(Fun, Rest)].
+
+map_leafs_simple(_Fun, _Pos, []) ->
+ [];
+map_leafs_simple(Fun, Pos, [{Key, Value, []} | RestTree]) ->
+ Value2 = Fun({Pos, Key}, Value),
+ [{Key, Value2, []} | map_leafs_simple(Fun, Pos, RestTree)];
+map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) ->
+ [{Key, Value, map_leafs_simple(Fun, Pos + 1, SubTree)} | map_leafs_simple(Fun, Pos, RestTree)].
+
+
+stem(Trees, Limit) ->
+ % flatten each branch in a tree into a tree path
+ Paths = get_all_leafs_full(Trees),
+
+ Paths2 = [{Pos, lists:sublist(Path, Limit)} || {Pos, Path} <- Paths],
+
+ % convert paths back to trees
+ lists:foldl(
+ fun({PathPos, Path},TreeAcc) ->
+ [SingleTree] = lists:foldl(
+ fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path),
+ {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]),
+ NewTrees
+ end, [], Paths2).
+
+% Tests moved to test/etap/06?-*.t
+