summaryrefslogtreecommitdiff
path: root/apps/couch/src/couch_util.erl
diff options
context:
space:
mode:
Diffstat (limited to 'apps/couch/src/couch_util.erl')
-rw-r--r--apps/couch/src/couch_util.erl478
1 files changed, 478 insertions, 0 deletions
diff --git a/apps/couch/src/couch_util.erl b/apps/couch/src/couch_util.erl
new file mode 100644
index 00000000..839f5956
--- /dev/null
+++ b/apps/couch/src/couch_util.erl
@@ -0,0 +1,478 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_util).
+
+-export([priv_dir/0, start_driver/1, normpath/1]).
+-export([should_flush/0, should_flush/1, to_existing_atom/1]).
+-export([rand32/0, implode/2, collate/2, collate/3]).
+-export([abs_pathname/1,abs_pathname/2, trim/1]).
+-export([encodeBase64Url/1, decodeBase64Url/1]).
+-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
+-export([get_nested_json_value/2, json_user_ctx/1]).
+-export([proplist_apply_field/2, json_apply_field/2]).
+-export([to_binary/1, to_integer/1, to_list/1, url_encode/1]).
+-export([json_encode/1, json_decode/1]).
+-export([verify/2,simple_call/2,shutdown_sync/1]).
+-export([compressible_att_type/1]).
+-export([get_value/2, get_value/3]).
+-export([md5/1, md5_init/0, md5_update/2, md5_final/1]).
+-export([reorder_results/2]).
+-export([url_strip_password/1]).
+-export([encode_doc_id/1]).
+
+-include("couch_db.hrl").
+
+% arbitrarily chosen amount of memory to use before flushing to disk
+-define(FLUSH_MAX_MEM, 10000000).
+
+priv_dir() ->
+ case code:priv_dir(couch) of
+ {error, bad_name} ->
+ % small hack, in dev mode "app" is couchdb. Fixing requires
+ % renaming src/couch to src/couch. Not really worth the hassle.
+ % -Damien
+ code:priv_dir(couchdb);
+ Dir -> Dir
+ end.
+
+start_driver(LibDir) ->
+ case erl_ddll:load_driver(LibDir, "couch_icu_driver") of
+ ok ->
+ ok;
+ {error, already_loaded} ->
+ ok = erl_ddll:reload_driver(LibDir, "couch_icu_driver");
+ {error, Error} ->
+ exit(erl_ddll:format_error(Error))
+ end.
+
+% Normalize a pathname by removing .. and . components.
+normpath(Path) ->
+ normparts(filename:split(Path), []).
+
+normparts([], Acc) ->
+ filename:join(lists:reverse(Acc));
+normparts([".." | RestParts], [_Drop | RestAcc]) ->
+ normparts(RestParts, RestAcc);
+normparts(["." | RestParts], Acc) ->
+ normparts(RestParts, Acc);
+normparts([Part | RestParts], Acc) ->
+ normparts(RestParts, [Part | Acc]).
+
+% works like list_to_existing_atom, except can be list or binary and it
+% gives you the original value instead of an error if no existing atom.
+to_existing_atom(V) when is_list(V) ->
+ try list_to_existing_atom(V) catch _:_ -> V end;
+to_existing_atom(V) when is_binary(V) ->
+ try list_to_existing_atom(?b2l(V)) catch _:_ -> V end;
+to_existing_atom(V) when is_atom(V) ->
+ V.
+
+shutdown_sync(Pid) when not is_pid(Pid)->
+ ok;
+shutdown_sync(Pid) ->
+ MRef = erlang:monitor(process, Pid),
+ try
+ catch unlink(Pid),
+ catch exit(Pid, shutdown),
+ receive
+ {'DOWN', MRef, _, _, _} ->
+ ok
+ end
+ after
+ erlang:demonitor(MRef, [flush])
+ end.
+
+
+simple_call(Pid, Message) ->
+ MRef = erlang:monitor(process, Pid),
+ try
+ Pid ! {self(), Message},
+ receive
+ {Pid, Result} ->
+ Result;
+ {'DOWN', MRef, _, _, Reason} ->
+ exit(Reason)
+ end
+ after
+ erlang:demonitor(MRef, [flush])
+ end.
+
+validate_utf8(Data) when is_list(Data) ->
+ validate_utf8(?l2b(Data));
+validate_utf8(Bin) when is_binary(Bin) ->
+ validate_utf8_fast(Bin, 0).
+
+validate_utf8_fast(B, O) ->
+ case B of
+ <<_:O/binary>> ->
+ true;
+ <<_:O/binary, C1, _/binary>> when
+ C1 < 128 ->
+ validate_utf8_fast(B, 1 + O);
+ <<_:O/binary, C1, C2, _/binary>> when
+ C1 >= 194, C1 =< 223,
+ C2 >= 128, C2 =< 191 ->
+ validate_utf8_fast(B, 2 + O);
+ <<_:O/binary, C1, C2, C3, _/binary>> when
+ C1 >= 224, C1 =< 239,
+ C2 >= 128, C2 =< 191,
+ C3 >= 128, C3 =< 191 ->
+ validate_utf8_fast(B, 3 + O);
+ <<_:O/binary, C1, C2, C3, C4, _/binary>> when
+ C1 >= 240, C1 =< 244,
+ C2 >= 128, C2 =< 191,
+ C3 >= 128, C3 =< 191,
+ C4 >= 128, C4 =< 191 ->
+ validate_utf8_fast(B, 4 + O);
+ _ ->
+ false
+ end.
+
+to_hex([]) ->
+ [];
+to_hex(Bin) when is_binary(Bin) ->
+ to_hex(binary_to_list(Bin));
+to_hex([H|T]) ->
+ [to_digit(H div 16), to_digit(H rem 16) | to_hex(T)].
+
+to_digit(N) when N < 10 -> $0 + N;
+to_digit(N) -> $a + N-10.
+
+
+parse_term(Bin) when is_binary(Bin) ->
+ parse_term(binary_to_list(Bin));
+parse_term(List) ->
+ {ok, Tokens, _} = erl_scan:string(List ++ "."),
+ erl_parse:parse_term(Tokens).
+
+get_value(Key, List) ->
+ get_value(Key, List, undefined).
+
+get_value(Key, List, Default) ->
+ case lists:keysearch(Key, 1, List) of
+ {value, {Key,Value}} ->
+ Value;
+ false ->
+ Default
+ end.
+
+get_nested_json_value({Props}, [Key|Keys]) ->
+ case couch_util:get_value(Key, Props, nil) of
+ nil -> throw({not_found, <<"missing json key: ", Key/binary>>});
+ Value -> get_nested_json_value(Value, Keys)
+ end;
+get_nested_json_value(Value, []) ->
+ Value;
+get_nested_json_value(_NotJSONObj, _) ->
+ throw({not_found, json_mismatch}).
+
+proplist_apply_field(H, L) ->
+ {R} = json_apply_field(H, {L}),
+ R.
+
+json_apply_field(H, {L}) ->
+ json_apply_field(H, L, []).
+json_apply_field({Key, NewValue}, [{Key, _OldVal} | Headers], Acc) ->
+ json_apply_field({Key, NewValue}, Headers, Acc);
+json_apply_field({Key, NewValue}, [{OtherKey, OtherVal} | Headers], Acc) ->
+ json_apply_field({Key, NewValue}, Headers, [{OtherKey, OtherVal} | Acc]);
+json_apply_field({Key, NewValue}, [], Acc) ->
+ {[{Key, NewValue}|Acc]}.
+
+json_user_ctx(#db{name=ShardName, user_ctx=Ctx}) ->
+ {[{<<"db">>, mem3:dbname(ShardName)},
+ {<<"name">>,Ctx#user_ctx.name},
+ {<<"roles">>,Ctx#user_ctx.roles}]}.
+
+
+% returns a random integer
+rand32() ->
+ crypto:rand_uniform(0, 16#100000000).
+
+% given a pathname "../foo/bar/" it gives back the fully qualified
+% absolute pathname.
+abs_pathname(" " ++ Filename) ->
+ % strip leading whitspace
+ abs_pathname(Filename);
+abs_pathname([$/ |_]=Filename) ->
+ Filename;
+abs_pathname(Filename) ->
+ {ok, Cwd} = file:get_cwd(),
+ {Filename2, Args} = separate_cmd_args(Filename, ""),
+ abs_pathname(Filename2, Cwd) ++ Args.
+
+abs_pathname(Filename, Dir) ->
+ Name = filename:absname(Filename, Dir ++ "/"),
+ OutFilename = filename:join(fix_path_list(filename:split(Name), [])),
+ % If the filename is a dir (last char slash, put back end slash
+ case string:right(Filename,1) of
+ "/" ->
+ OutFilename ++ "/";
+ "\\" ->
+ OutFilename ++ "/";
+ _Else->
+ OutFilename
+ end.
+
+% if this as an executable with arguments, seperate out the arguments
+% ""./foo\ bar.sh -baz=blah" -> {"./foo\ bar.sh", " -baz=blah"}
+separate_cmd_args("", CmdAcc) ->
+ {lists:reverse(CmdAcc), ""};
+separate_cmd_args("\\ " ++ Rest, CmdAcc) -> % handle skipped value
+ separate_cmd_args(Rest, " \\" ++ CmdAcc);
+separate_cmd_args(" " ++ Rest, CmdAcc) ->
+ {lists:reverse(CmdAcc), " " ++ Rest};
+separate_cmd_args([Char|Rest], CmdAcc) ->
+ separate_cmd_args(Rest, [Char | CmdAcc]).
+
+% Is a character whitespace?
+is_whitespace($\s) -> true;
+is_whitespace($\t) -> true;
+is_whitespace($\n) -> true;
+is_whitespace($\r) -> true;
+is_whitespace(_Else) -> false.
+
+
+% removes leading and trailing whitespace from a string
+trim(String) ->
+ String2 = lists:dropwhile(fun is_whitespace/1, String),
+ lists:reverse(lists:dropwhile(fun is_whitespace/1, lists:reverse(String2))).
+
+% takes a heirarchical list of dirs and removes the dots ".", double dots
+% ".." and the corresponding parent dirs.
+fix_path_list([], Acc) ->
+ lists:reverse(Acc);
+fix_path_list([".."|Rest], [_PrevAcc|RestAcc]) ->
+ fix_path_list(Rest, RestAcc);
+fix_path_list(["."|Rest], Acc) ->
+ fix_path_list(Rest, Acc);
+fix_path_list([Dir | Rest], Acc) ->
+ fix_path_list(Rest, [Dir | Acc]).
+
+
+implode(List, Sep) ->
+ implode(List, Sep, []).
+
+implode([], _Sep, Acc) ->
+ lists:flatten(lists:reverse(Acc));
+implode([H], Sep, Acc) ->
+ implode([], Sep, [H|Acc]);
+implode([H|T], Sep, Acc) ->
+ implode(T, Sep, [Sep,H|Acc]).
+
+
+drv_port() ->
+ case get(couch_drv_port) of
+ undefined ->
+ Port = open_port({spawn, "couch_icu_driver"}, []),
+ put(couch_drv_port, Port),
+ Port;
+ Port ->
+ Port
+ end.
+
+collate(A, B) ->
+ collate(A, B, []).
+
+collate(A, B, Options) when is_binary(A), is_binary(B) ->
+ Operation =
+ case lists:member(nocase, Options) of
+ true -> 1; % Case insensitive
+ false -> 0 % Case sensitive
+ end,
+ SizeA = byte_size(A),
+ SizeB = byte_size(B),
+ Bin = <<SizeA:32/native, A/binary, SizeB:32/native, B/binary>>,
+ [Result] = erlang:port_control(drv_port(), Operation, Bin),
+ % Result is 0 for lt, 1 for eq and 2 for gt. Subtract 1 to return the
+ % expected typical -1, 0, 1
+ Result - 1.
+
+should_flush() ->
+ should_flush(?FLUSH_MAX_MEM).
+
+should_flush(MemThreshHold) ->
+ {memory, ProcMem} = process_info(self(), memory),
+ BinMem = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end,
+ 0, element(2,process_info(self(), binary))),
+ if ProcMem+BinMem > 2*MemThreshHold ->
+ garbage_collect(),
+ {memory, ProcMem2} = process_info(self(), memory),
+ BinMem2 = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end,
+ 0, element(2,process_info(self(), binary))),
+ ProcMem2+BinMem2 > MemThreshHold;
+ true -> false end.
+
+encodeBase64Url(Url) ->
+ Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")),
+ Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])),
+ iolist_to_binary(re:replace(Url2, "\\+", "-", [global])).
+
+decodeBase64Url(Url64) ->
+ Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]),
+ Url2 = iolist_to_binary(
+ re:replace(iolist_to_binary(Url1), "_", "/", [global])
+ ),
+ Padding = ?l2b(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)),
+ base64:decode(<<Url2/binary, Padding/binary>>).
+
+dict_find(Key, Dict, DefaultValue) ->
+ case dict:find(Key, Dict) of
+ {ok, Value} ->
+ Value;
+ error ->
+ DefaultValue
+ end.
+
+to_binary(V) when is_binary(V) ->
+ V;
+to_binary(V) when is_list(V) ->
+ try
+ list_to_binary(V)
+ catch
+ _:_ ->
+ list_to_binary(io_lib:format("~p", [V]))
+ end;
+to_binary(V) when is_atom(V) ->
+ list_to_binary(atom_to_list(V));
+to_binary(V) ->
+ list_to_binary(io_lib:format("~p", [V])).
+
+to_integer(V) when is_integer(V) ->
+ V;
+to_integer(V) when is_list(V) ->
+ erlang:list_to_integer(V);
+to_integer(V) when is_binary(V) ->
+ erlang:list_to_integer(binary_to_list(V)).
+
+to_list(V) when is_list(V) ->
+ V;
+to_list(V) when is_binary(V) ->
+ binary_to_list(V);
+to_list(V) when is_atom(V) ->
+ atom_to_list(V);
+to_list(V) ->
+ lists:flatten(io_lib:format("~p", [V])).
+
+url_encode(Bin) when is_binary(Bin) ->
+ url_encode(binary_to_list(Bin));
+url_encode([H|T]) ->
+ if
+ H >= $a, $z >= H ->
+ [H|url_encode(T)];
+ H >= $A, $Z >= H ->
+ [H|url_encode(T)];
+ H >= $0, $9 >= H ->
+ [H|url_encode(T)];
+ H == $_; H == $.; H == $-; H == $: ->
+ [H|url_encode(T)];
+ true ->
+ case lists:flatten(io_lib:format("~.16.0B", [H])) of
+ [X, Y] ->
+ [$%, X, Y | url_encode(T)];
+ [X] ->
+ [$%, $0, X | url_encode(T)]
+ end
+ end;
+url_encode([]) ->
+ [].
+
+json_encode(V) ->
+ Handler =
+ fun({L}) when is_list(L) ->
+ {struct,L};
+ (Bad) ->
+ exit({json_encode, {bad_term, Bad}})
+ end,
+ (mochijson2:encoder([{handler, Handler}]))(V).
+
+json_decode(V) ->
+ try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V)
+ catch
+ _Type:_Error ->
+ throw({invalid_json,V})
+ end.
+
+verify([X|RestX], [Y|RestY], Result) ->
+ verify(RestX, RestY, (X bxor Y) bor Result);
+verify([], [], Result) ->
+ Result == 0.
+
+verify(<<X/binary>>, <<Y/binary>>) ->
+ verify(?b2l(X), ?b2l(Y));
+verify(X, Y) when is_list(X) and is_list(Y) ->
+ case length(X) == length(Y) of
+ true ->
+ verify(X, Y, 0);
+ false ->
+ false
+ end;
+verify(_X, _Y) -> false.
+
+compressible_att_type(MimeType) when is_binary(MimeType) ->
+ compressible_att_type(?b2l(MimeType));
+compressible_att_type(MimeType) ->
+ TypeExpList = re:split(
+ couch_config:get("attachments", "compressible_types", ""),
+ "\\s*,\\s*",
+ [{return, list}]
+ ),
+ lists:any(
+ fun(TypeExp) ->
+ Regexp = ["^\\s*", re:replace(TypeExp, "\\*", ".*"),
+ "(?:\\s*;.*?)?\\s*", $$],
+ re:run(MimeType, Regexp, [caseless]) =/= nomatch
+ end,
+ [T || T <- TypeExpList, T /= []]
+ ).
+
+-spec md5(Data::(iolist() | binary())) -> Digest::binary().
+md5(Data) ->
+ try crypto:md5(Data) catch error:_ -> erlang:md5(Data) end.
+
+-spec md5_init() -> Context::binary().
+md5_init() ->
+ try crypto:md5_init() catch error:_ -> erlang:md5_init() end.
+
+-spec md5_update(Context::binary(), Data::(iolist() | binary())) ->
+ NewContext::binary().
+md5_update(Ctx, D) ->
+ try crypto:md5_update(Ctx,D) catch error:_ -> erlang:md5_update(Ctx,D) end.
+
+-spec md5_final(Context::binary()) -> Digest::binary().
+md5_final(Ctx) ->
+ try crypto:md5_final(Ctx) catch error:_ -> erlang:md5_final(Ctx) end.
+
+% linear search is faster for small lists, length() is 0.5 ms for 100k list
+reorder_results(Keys, SortedResults) when length(Keys) < 100 ->
+ [couch_util:get_value(Key, SortedResults) || Key <- Keys];
+reorder_results(Keys, SortedResults) ->
+ KeyDict = dict:from_list(SortedResults),
+ [dict:fetch(Key, KeyDict) || Key <- Keys].
+
+url_strip_password(Url) ->
+ re:replace(Url,
+ "http(s)?://([^:]+):[^@]+@(.*)$",
+ "http\\1://\\2:*****@\\3",
+ [{return, list}]).
+
+encode_doc_id(#doc{id = Id}) ->
+ encode_doc_id(Id);
+encode_doc_id(Id) when is_list(Id) ->
+ encode_doc_id(?l2b(Id));
+encode_doc_id(<<"_design/", Rest/binary>>) ->
+ "_design/" ++ url_encode(Rest);
+encode_doc_id(<<"_local/", Rest/binary>>) ->
+ "_local/" ++ url_encode(Rest);
+encode_doc_id(Id) ->
+ url_encode(Id).