diff options
Diffstat (limited to 'apps/couch/src/couch_httpd_rewrite.erl')
-rw-r--r-- | apps/couch/src/couch_httpd_rewrite.erl | 463 |
1 files changed, 463 insertions, 0 deletions
diff --git a/apps/couch/src/couch_httpd_rewrite.erl b/apps/couch/src/couch_httpd_rewrite.erl new file mode 100644 index 00000000..8480c1e9 --- /dev/null +++ b/apps/couch/src/couch_httpd_rewrite.erl @@ -0,0 +1,463 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +% +% bind_path is based on bind method from Webmachine + + +%% @doc Module for URL rewriting by pattern matching. + +-module(couch_httpd_rewrite). +-export([handle_rewrite_req/3]). +-include("couch_db.hrl"). + +-define(SEPARATOR, $\/). +-define(MATCH_ALL, {bind, <<"*">>}). + + +%% doc The http rewrite handler. All rewriting is done from +%% /dbname/_design/ddocname/_rewrite by default. +%% +%% each rules should be in rewrites member of the design doc. +%% Ex of a complete rule : +%% +%% { +%% .... +%% "rewrites": [ +%% { +%% "from": "", +%% "to": "index.html", +%% "method": "GET", +%% "query": {} +%% } +%% ] +%% } +%% +%% from: is the path rule used to bind current uri to the rule. It +%% use pattern matching for that. +%% +%% to: rule to rewrite an url. It can contain variables depending on binding +%% variables discovered during pattern matching and query args (url args and from +%% the query member.) +%% +%% method: method to bind the request method to the rule. by default "*" +%% query: query args you want to define they can contain dynamic variable +%% by binding the key to the bindings +%% +%% +%% to and from are path with patterns. pattern can be string starting with ":" or +%% "*". ex: +%% /somepath/:var/* +%% +%% This path is converted in erlang list by splitting "/". Each var are +%% converted in atom. "*" is converted to '*' atom. The pattern matching is done +%% by splitting "/" in request url in a list of token. A string pattern will +%% match equal token. The star atom ('*' in single quotes) will match any number +%% of tokens, but may only be present as the last pathtern in a pathspec. If all +%% tokens are matched and all pathterms are used, then the pathspec matches. It works +%% like webmachine. Each identified token will be reused in to rule and in query +%% +%% The pattern matching is done by first matching the request method to a rule. by +%% default all methods match a rule. (method is equal to "*" by default). Then +%% It will try to match the path to one rule. If no rule match, then a 404 error +%% is displayed. +%% +%% Once a rule is found we rewrite the request url using the "to" and +%% "query" members. The identified token are matched to the rule and +%% will replace var. if '*' is found in the rule it will contain the remaining +%% part if it exists. +%% +%% Examples: +%% +%% Dispatch rule URL TO Tokens +%% +%% {"from": "/a/b", /a/b?k=v /some/b?k=v var =:= b +%% "to": "/some/"} k = v +%% +%% {"from": "/a/b", /a/b /some/b?var=b var =:= b +%% "to": "/some/:var"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/*", /a/b/c /some/b/c +%% "to": "/some/*"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/:foo/*", /a/b/c /some/b/c?foo=b foo =:= b +%% "to": "/some/:foo/*"} +%% +%% {"from": "/a/:foo", /a/b /some/?k=b&foo=b foo =:= b +%% "to": "/some", +%% "query": { +%% "k": ":foo" +%% }} +%% +%% {"from": "/a", /a?foo=b /some/b foo =:= b +%% "to": "/some/:foo", +%% }} + + + +handle_rewrite_req(#httpd{ + path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts], + method=Method, + mochi_req=MochiReq}=Req, _Db, DDoc) -> + + % we are in a design handler + DesignId = <<"_design/", DesignName/binary>>, + Prefix = <<"/", DbName/binary, "/", DesignId/binary>>, + QueryList = lists:map(fun decode_query_value/1, couch_httpd:qs(Req)), + + #doc{body={Props}} = DDoc, + + % get rules from ddoc + case couch_util:get_value(<<"rewrites">>, Props) of + undefined -> + couch_httpd:send_error(Req, 404, <<"rewrite_error">>, + <<"Invalid path.">>); + Bin when is_binary(Bin) -> + couch_httpd:send_error(Req, 400, <<"rewrite_error">>, + <<"Rewrite rules are a String. They must be a JSON Array.">>); + Rules -> + % create dispatch list from rules + DispatchList = [make_rule(Rule) || {Rule} <- Rules], + Method1 = couch_util:to_binary(Method), + + %% get raw path by matching url to a rule. + RawPath = case try_bind_path(DispatchList, Method1, + PathParts, QueryList) of + no_dispatch_path -> + throw(not_found); + {NewPathParts, Bindings} -> + Parts = [quote_plus(X) || X <- NewPathParts], + + % build new path, reencode query args, eventually convert + % them to json + Bindings1 = maybe_encode_bindings(Bindings), + Path = binary_to_list( + iolist_to_binary([ + string:join(Parts, [?SEPARATOR]), + [["?", mochiweb_util:urlencode(Bindings1)] + || Bindings1 =/= [] ] + ])), + + % if path is relative detect it and rewrite path + case mochiweb_util:safe_relative_path(Path) of + undefined -> + ?b2l(Prefix) ++ "/" ++ Path; + P1 -> + ?b2l(Prefix) ++ "/" ++ P1 + end + + end, + + % normalize final path (fix levels "." and "..") + RawPath1 = ?b2l(iolist_to_binary(normalize_path(RawPath))), + + ?LOG_DEBUG("rewrite to ~p ~n", [RawPath1]), + + % build a new mochiweb request + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + RawPath1, + MochiReq:get(version), + MochiReq:get(headers)), + + % cleanup, It force mochiweb to reparse raw uri. + MochiReq1:cleanup(), + + #httpd{ + db_url_handlers = DbUrlHandlers, + design_url_handlers = DesignUrlHandlers, + default_fun = DefaultFun, + url_handlers = UrlHandlers + } = Req, + couch_httpd:handle_request_int(MochiReq1, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers) + end. + +quote_plus({bind, X}) -> + mochiweb_util:quote_plus(X); +quote_plus(X) -> + mochiweb_util:quote_plus(X). + +%% @doc Try to find a rule matching current url. If none is found +%% 404 error not_found is raised +try_bind_path([], _Method, _PathParts, _QueryList) -> + no_dispatch_path; +try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) -> + [{PathParts1, Method1}, RedirectPath, QueryArgs, Formats] = Dispatch, + case bind_method(Method1, Method) of + true -> + case bind_path(PathParts1, PathParts, []) of + {ok, Remaining, Bindings} -> + Bindings1 = Bindings ++ QueryList, + % we parse query args from the rule and fill + % it eventually with bindings vars + QueryArgs1 = make_query_list(QueryArgs, Bindings1, + Formats, []), + % remove params in QueryLists1 that are already in + % QueryArgs1 + Bindings2 = lists:foldl(fun({K, V}, Acc) -> + K1 = to_binding(K), + KV = case couch_util:get_value(K1, QueryArgs1) of + undefined -> [{K1, V}]; + _V1 -> [] + end, + Acc ++ KV + end, [], Bindings1), + + FinalBindings = Bindings2 ++ QueryArgs1, + NewPathParts = make_new_path(RedirectPath, FinalBindings, + Remaining, []), + {NewPathParts, FinalBindings}; + fail -> + try_bind_path(Rest, Method, PathParts, QueryList) + end; + false -> + try_bind_path(Rest, Method, PathParts, QueryList) + end. + +%% rewriting dynamically the quey list given as query member in +%% rewrites. Each value is replaced by one binding or an argument +%% passed in url. +make_query_list([], _Bindings, _Formats, Acc) -> + Acc; +make_query_list([{Key, {Value}}|Rest], Bindings, Formats, Acc) -> + Value1 = {Value}, + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_binary(Value) -> + Value1 = replace_var(Value, Bindings, Formats), + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_list(Value) -> + Value1 = replace_var(Value, Bindings, Formats), + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) -> + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value}|Acc]). + +replace_var(<<"*">>=Value, Bindings, Formats) -> + get_var(Value, Bindings, Value, Formats); +replace_var(<<":", Var/binary>> = Value, Bindings, Formats) -> + get_var(Var, Bindings, Value, Formats); +replace_var(Value, _Bindings, _Formats) when is_binary(Value) -> + Value; +replace_var(Value, Bindings, Formats) when is_list(Value) -> + lists:reverse(lists:foldl(fun + (<<":", Var/binary>>=Value1, Acc) -> + [get_var(Var, Bindings, Value1, Formats)|Acc]; + (Value1, Acc) -> + [Value1|Acc] + end, [], Value)); +replace_var(Value, _Bindings, _Formats) -> + Value. + +maybe_json(Key, Value) -> + case lists:member(Key, [<<"key">>, <<"startkey">>, <<"start_key">>, + <<"endkey">>, <<"end_key">>, <<"keys">>]) of + true -> + ?JSON_ENCODE(Value); + false -> + Value + end. + +get_var(VarName, Props, Default, Formats) -> + VarName1 = to_binding(VarName), + Val = couch_util:get_value(VarName1, Props, Default), + maybe_format(VarName, Val, Formats). + +maybe_format(VarName, Value, Formats) -> + case couch_util:get_value(VarName, Formats) of + undefined -> + Value; + Format -> + format(Format, Value) + end. + +format(<<"int">>, Value) when is_integer(Value) -> + Value; +format(<<"int">>, Value) when is_binary(Value) -> + format(<<"int">>, ?b2l(Value)); +format(<<"int">>, Value) when is_list(Value) -> + case (catch list_to_integer(Value)) of + IntVal when is_integer(IntVal) -> + IntVal; + _ -> + Value + end; +format(<<"bool">>, Value) when is_binary(Value) -> + format(<<"bool">>, ?b2l(Value)); +format(<<"bool">>, Value) when is_list(Value) -> + case string:to_lower(Value) of + "true" -> true; + "false" -> false; + _ -> Value + end; +format(_Format, Value) -> + Value. + +%% doc: build new patch from bindings. bindings are query args +%% (+ dynamic query rewritten if needed) and bindings found in +%% bind_path step. +make_new_path([], _Bindings, _Remaining, Acc) -> + lists:reverse(Acc); +make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) -> + P2 = case couch_util:get_value({bind, P}, Bindings) of + undefined -> << "undefined">>; + P1 -> + iolist_to_binary(P1) + end, + make_new_path(Rest, Bindings, Remaining, [P2|Acc]); +make_new_path([P|Rest], Bindings, Remaining, Acc) -> + make_new_path(Rest, Bindings, Remaining, [P|Acc]). + + +%% @doc If method of the query fith the rule method. If the +%% method rule is '*', which is the default, all +%% request method will bind. It allows us to make rules +%% depending on HTTP method. +bind_method(?MATCH_ALL, _Method ) -> + true; +bind_method({bind, Method}, Method) -> + true; +bind_method(_, _) -> + false. + + +%% @doc bind path. Using the rule from we try to bind variables given +%% to the current url by pattern matching +bind_path([], [], Bindings) -> + {ok, [], Bindings}; +bind_path([?MATCH_ALL], [Match|_RestMatch]=Rest, Bindings) -> + {ok, Rest, [{?MATCH_ALL, Match}|Bindings]}; +bind_path(_, [], _) -> + fail; +bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) -> + bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]); +bind_path([Token|RestToken], [Token|RestMatch], Bindings) -> + bind_path(RestToken, RestMatch, Bindings); +bind_path(_, _, _) -> + fail. + + +%% normalize path. +normalize_path(Path) -> + "/" ++ string:join(normalize_path1(string:tokens(Path, + "/"), []), [?SEPARATOR]). + + +normalize_path1([], Acc) -> + lists:reverse(Acc); +normalize_path1([".."|Rest], Acc) -> + Acc1 = case Acc of + [] -> [".."|Acc]; + [T|_] when T =:= ".." -> [".."|Acc]; + [_|R] -> R + end, + normalize_path1(Rest, Acc1); +normalize_path1(["."|Rest], Acc) -> + normalize_path1(Rest, Acc); +normalize_path1([Path|Rest], Acc) -> + normalize_path1(Rest, [Path|Acc]). + + +%% @doc transform json rule in erlang for pattern matching +make_rule(Rule) -> + Method = case couch_util:get_value(<<"method">>, Rule) of + undefined -> ?MATCH_ALL; + M -> to_binding(M) + end, + QueryArgs = case couch_util:get_value(<<"query">>, Rule) of + undefined -> []; + {Args} -> Args + end, + FromParts = case couch_util:get_value(<<"from">>, Rule) of + undefined -> [?MATCH_ALL]; + From -> + parse_path(From) + end, + ToParts = case couch_util:get_value(<<"to">>, Rule) of + undefined -> + throw({error, invalid_rewrite_target}); + To -> + parse_path(To) + end, + Formats = case couch_util:get_value(<<"formats">>, Rule) of + undefined -> []; + {Fmts} -> Fmts + end, + [{FromParts, Method}, ToParts, QueryArgs, Formats]. + +parse_path(Path) -> + {ok, SlashRE} = re:compile(<<"\\/">>), + path_to_list(re:split(Path, SlashRE), [], 0). + +%% @doc convert a path rule (from or to) to an erlang list +%% * and path variable starting by ":" are converted +%% in erlang atom. +path_to_list([], Acc, _DotDotCount) -> + lists:reverse(Acc); +path_to_list([<<>>|R], Acc, DotDotCount) -> + path_to_list(R, Acc, DotDotCount); +path_to_list([<<"*">>|R], Acc, DotDotCount) -> + path_to_list(R, [?MATCH_ALL|Acc], DotDotCount); +path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 -> + case couch_config:get("httpd", "secure_rewrites", "true") of + "false" -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); + _Else -> + ?LOG_INFO("insecure_rewrite_rule ~p blocked", [lists:reverse(Acc) ++ [<<"..">>] ++ R]), + throw({insecure_rewrite_rule, "too many ../.. segments"}) + end; +path_to_list([<<"..">>|R], Acc, DotDotCount) -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); +path_to_list([P|R], Acc, DotDotCount) -> + P1 = case P of + <<":", Var/binary>> -> + to_binding(Var); + _ -> P + end, + path_to_list(R, [P1|Acc], DotDotCount). + +maybe_encode_bindings([]) -> + []; +maybe_encode_bindings(Props) -> + lists:foldl(fun + ({{bind, <<"*">>}, _V}, Acc) -> + Acc; + ({{bind, K}, V}, Acc) -> + V1 = iolist_to_binary(maybe_json(K, V)), + [{K, V1}|Acc] + end, [], Props). + +decode_query_value({K,V}) -> + case lists:member(K, ["key", "startkey", "start_key", + "endkey", "end_key", "keys"]) of + true -> + {to_binding(K), ?JSON_DECODE(V)}; + false -> + {to_binding(K), ?l2b(V)} + end. + +to_binding({bind, V}) -> + {bind, V}; +to_binding(V) when is_list(V) -> + to_binding(?l2b(V)); +to_binding(V) -> + {bind, V}. |