From 39de3072bcf9fdeec6d3faeb125924c401242205 Mon Sep 17 00:00:00 2001
From: Christopher Lenz <cmlenz@apache.org>
Date: Tue, 15 Apr 2008 12:21:00 +0000
Subject: Merged mochiweb branch back into trunk.

git-svn-id: https://svn.apache.org/repos/asf/incubator/couchdb/trunk@648222 13f79535-47bb-0310-9956-ffa450edef68
---
 src/mochiweb/mochiweb_util.erl | 486 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 486 insertions(+)
 create mode 100644 src/mochiweb/mochiweb_util.erl

(limited to 'src/mochiweb/mochiweb_util.erl')
diff --git a/src/mochiweb/mochiweb_util.erl b/src/mochiweb/mochiweb_util.erl
new file mode 100644
index 00000000..a2b6b2fb
--- /dev/null
+++ b/src/mochiweb/mochiweb_util.erl
@@ -0,0 +1,486 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Utilities for parsing and quoting.
+
+-module(mochiweb_util).
+-author('bob@mochimedia.com').
+-export([join/2, quote_plus/1, urlencode/1, parse_qs/1, unquote/1]).
+-export([path_split/1]).
+-export([urlsplit/1, urlsplit_path/1, urlunsplit/1, urlunsplit_path/1]).
+-export([guess_mime/1, parse_header/1]).
+-export([shell_quote/1, cmd/1, cmd_string/1, cmd_port/2]).
+-export([record_to_proplist/2, record_to_proplist/3]).
+-export([test/0]).
+
+-define(PERCENT, 37).  % $\%
+-define(FULLSTOP, 46). % $\.
+-define(IS_HEX(C), ((C >= $0 andalso C =< $9) orelse
+		    (C >= $a andalso C =< $f) orelse
+		    (C >= $A andalso C =< $F))).
+-define(QS_SAFE(C), ((C >= $a andalso C =< $z) orelse
+		     (C >= $A andalso C =< $Z) orelse
+		     (C >= $0 andalso C =< $9) orelse
+		     (C =:= ?FULLSTOP orelse C =:= $- orelse C =:= $~ orelse
+		      C =:= $_))).
+
+hexdigit(C) when C < 10 -> $0 + C;
+hexdigit(C) when C < 16 -> $A + (C - 10).
+
+unhexdigit(C) when C >= $0, C =< $9 -> C - $0;
+unhexdigit(C) when C >= $a, C =< $f -> C - $a + 10;
+unhexdigit(C) when C >= $A, C =< $F -> C - $A + 10.
+
+%% @spec shell_quote(string()) -> string()
+%% @doc Quote a string according to UNIX shell quoting rules, returns a string
+%%      surrounded by double quotes.
+shell_quote(L) ->
+    shell_quote(L, [$\"]).
+
+%% @spec cmd_port([string()], Options) -> port()
+%% @doc open_port({spawn, mochiweb_util:cmd_string(Argv)}, Options).
+cmd_port(Argv, Options) ->
+    open_port({spawn, cmd_string(Argv)}, Options).
+
+%% @spec cmd([string()]) -> string()
+%% @doc os:cmd(cmd_string(Argv)).
+cmd(Argv) ->
+    os:cmd(cmd_string(Argv)).
+
+%% @spec cmd_string([string()]) -> string()
+%% @doc Create a shell quoted command string from a list of arguments.
+cmd_string(Argv) ->
+    join([shell_quote(X) || X <- Argv], " ").
+
+%% @spec join([string()], Separator) -> string()
+%% @doc Join a list of strings together with the given separator
+%%      string or char.
+join([], _Separator) ->
+    [];
+join([S], _Separator) ->
+    lists:flatten(S);
+join(Strings, Separator) ->
+    lists:flatten(revjoin(lists:reverse(Strings), Separator, [])).
+
+revjoin([], _Separator, Acc) ->
+    Acc;
+revjoin([S | Rest], Separator, []) ->
+    revjoin(Rest, Separator, [S]);
+revjoin([S | Rest], Separator, Acc) ->
+    revjoin(Rest, Separator, [S, Separator | Acc]).
+
+%% @spec quote_plus(atom() | integer() | string()) -> string()
+%% @doc URL safe encoding of the given term.
+quote_plus(Atom) when is_atom(Atom) ->
+    quote_plus(atom_to_list(Atom));
+quote_plus(Int) when is_integer(Int) ->
+    quote_plus(integer_to_list(Int));
+quote_plus(String) ->
+    quote_plus(String, []).
+
+quote_plus([], Acc) ->
+    lists:reverse(Acc);
+quote_plus([C | Rest], Acc) when ?QS_SAFE(C) ->
+    quote_plus(Rest, [C | Acc]);
+quote_plus([$\s | Rest], Acc) ->
+    quote_plus(Rest, [$+ | Acc]);
+quote_plus([C | Rest], Acc) ->
+    <<Hi:4, Lo:4>> = <<C>>,
+    quote_plus(Rest, [hexdigit(Lo), hexdigit(Hi), ?PERCENT | Acc]).
+
+%% @spec urlencode([{Key, Value}]) -> string()
+%% @doc URL encode the property list.
+urlencode(Props) ->
+    RevPairs = lists:foldl(fun ({K, V}, Acc) ->
+				   [[quote_plus(K), $=, quote_plus(V)] | Acc]
+			   end, [], Props),
+    lists:flatten(revjoin(RevPairs, $&, [])).
+
+%% @spec parse_qs(string() | binary()) -> [{Key, Value}]
+%% @doc Parse a query string or application/x-www-form-urlencoded.
+parse_qs(Binary) when is_binary(Binary) ->
+    parse_qs(binary_to_list(Binary));
+parse_qs(String) ->
+    parse_qs(String, []).
+
+parse_qs([], Acc) ->
+    lists:reverse(Acc);
+parse_qs(String, Acc) ->
+    {Key, Rest} = parse_qs_key(String),
+    {Value, Rest1} = parse_qs_value(Rest),
+    parse_qs(Rest1, [{Key, Value} | Acc]).
+
+parse_qs_key(String) ->
+    parse_qs_key(String, []).
+
+parse_qs_key([], Acc) ->
+    {qs_revdecode(Acc), ""};
+parse_qs_key([$= | Rest], Acc) ->
+    {qs_revdecode(Acc), Rest};
+parse_qs_key(Rest=[$; | _], Acc) ->
+    {qs_revdecode(Acc), Rest};
+parse_qs_key(Rest=[$& | _], Acc) ->
+    {qs_revdecode(Acc), Rest};
+parse_qs_key([C | Rest], Acc) ->
+    parse_qs_key(Rest, [C | Acc]).
+
+parse_qs_value(String) ->
+    parse_qs_value(String, []).
+
+parse_qs_value([], Acc) ->
+    {qs_revdecode(Acc), ""};
+parse_qs_value([$; | Rest], Acc) ->
+    {qs_revdecode(Acc), Rest};
+parse_qs_value([$& | Rest], Acc) ->
+    {qs_revdecode(Acc), Rest};
+parse_qs_value([C | Rest], Acc) ->
+    parse_qs_value(Rest, [C | Acc]).
+
+%% @spec unquote(string() | binary()) -> string()
+%% @doc Unquote a URL encoded string.
+unquote(Binary) when is_binary(Binary) ->
+    unquote(binary_to_list(Binary));
+unquote(String) ->
+    qs_revdecode(lists:reverse(String)).
+
+qs_revdecode(S) ->
+    qs_revdecode(S, []).
+
+qs_revdecode([], Acc) ->
+    Acc;
+qs_revdecode([$+ | Rest], Acc) ->
+    qs_revdecode(Rest, [$\s | Acc]);
+qs_revdecode([Lo, Hi, ?PERCENT | Rest], Acc) when ?IS_HEX(Lo), ?IS_HEX(Hi) ->
+    qs_revdecode(Rest, [(unhexdigit(Lo) bor (unhexdigit(Hi) bsl 4)) | Acc]);
+qs_revdecode([C | Rest], Acc) ->
+    qs_revdecode(Rest, [C | Acc]).
+
+%% @spec urlsplit(Url) -> {Scheme, Netloc, Path, Query, Fragment}
+%% @doc Return a 5-tuple, does not expand % escapes. Only supports HTTP style
+%%      URLs.
+urlsplit(Url) ->
+    {Scheme, Url1} = urlsplit_scheme(Url),
+    {Netloc, Url2} = urlsplit_netloc(Url1),
+    {Path, Query, Fragment} = urlsplit_path(Url2),
+    {Scheme, Netloc, Path, Query, Fragment}.
+
+urlsplit_scheme(Url) ->
+    urlsplit_scheme(Url, []).
+
+urlsplit_scheme([], Acc) ->
+    {"", lists:reverse(Acc)};
+urlsplit_scheme(":" ++ Rest, Acc) ->
+    {string:to_lower(lists:reverse(Acc)), Rest};
+urlsplit_scheme([C | Rest], Acc) ->
+    urlsplit_scheme(Rest, [C | Acc]).
+
+urlsplit_netloc("//" ++ Rest) ->
+    urlsplit_netloc(Rest, []);
+urlsplit_netloc(Path) ->
+    {"", Path}.
+
+urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# ->
+    {lists:reverse(Acc), Rest};
+urlsplit_netloc([C | Rest], Acc) ->
+    urlsplit_netloc(Rest, [C | Acc]).
+
+
+%% @spec path_split(string()) -> {Part, Rest}
+%% @doc Split a path starting from the left, as in URL traversal.
+%%      path_split("foo/bar") = {"foo", "bar"},
+%%      path_split("/foo/bar") = {"", "foo/bar"}.
+path_split(S) ->
+    path_split(S, []).
+
+path_split("", Acc) ->
+    {lists:reverse(Acc), ""};
+path_split("/" ++ Rest, Acc) ->
+    {lists:reverse(Acc), Rest};
+path_split([C | Rest], Acc) ->
+    path_split(Rest, [C | Acc]).
+
+
+%% @spec urlunsplit({Scheme, Netloc, Path, Query, Fragment}) -> string()
+%% @doc Assemble a URL from the 5-tuple. Path must be absolute.
+urlunsplit({Scheme, Netloc, Path, Query, Fragment}) ->
+    lists:flatten([case Scheme of "" -> "";  _ -> [Scheme, "://"] end,
+		   Netloc,
+		   urlunsplit_path({Path, Query, Fragment})]).
+
+%% @spec urlunsplit_path({Path, Query, Fragment}) -> string()
+%% @doc Assemble a URL path from the 3-tuple.
+urlunsplit_path({Path, Query, Fragment}) ->
+    lists:flatten([Path,
+		   case Query of "" -> ""; _ -> [$? | Query] end,
+		   case Fragment of "" -> ""; _ -> [$# | Fragment] end]).
+
+%% @spec urlsplit_path(Url) -> {Path, Query, Fragment}
+%% @doc Return a 3-tuple, does not expand % escapes. Only supports HTTP style
+%%      paths.
+urlsplit_path(Path) ->
+    urlsplit_path(Path, []).
+
+urlsplit_path("", Acc) ->
+    {lists:reverse(Acc), "", ""};
+urlsplit_path("?" ++ Rest, Acc) ->
+    {Query, Fragment} = urlsplit_query(Rest),
+    {lists:reverse(Acc), Query, Fragment};
+urlsplit_path("#" ++ Rest, Acc) ->
+    {lists:reverse(Acc), "", Rest};
+urlsplit_path([C | Rest], Acc) ->
+    urlsplit_path(Rest, [C | Acc]).
+
+urlsplit_query(Query) ->
+    urlsplit_query(Query, []).
+
+urlsplit_query("", Acc) ->
+    {lists:reverse(Acc), ""};
+urlsplit_query("#" ++ Rest, Acc) ->
+    {lists:reverse(Acc), Rest};
+urlsplit_query([C | Rest], Acc) ->
+    urlsplit_query(Rest, [C | Acc]).
+
+%% @spec guess_mime(string()) -> string()
+%% @doc  Guess the mime type of a file by the extension of its filename.
+guess_mime(File) ->
+    case filename:extension(File) of
+	".html" ->
+	    "text/html";
+	".xhtml" ->
+	    "application/xhtml+xml";
+	".xml" ->
+	    "application/xml";
+	".css" ->
+	    "text/css";
+	".js" ->
+	    "application/x-javascript";
+	".jpg" ->
+	    "image/jpeg";
+	".gif" ->
+	    "image/gif";
+	".png" ->
+	    "image/png";
+	".swf" ->
+	    "application/x-shockwave-flash";
+	".zip" ->
+	    "application/zip";
+	".bz2" ->
+	    "application/x-bzip2";
+	".gz" ->
+	    "application/x-gzip";
+	".tar" ->
+	    "application/x-tar";
+	".tgz" ->
+	    "application/x-gzip";
+	".txt" ->
+	    "text/plain";
+        ".doc" ->
+            "application/msword";
+        ".pdf" ->
+            "application/pdf";
+        ".xls" ->
+            "application/vnd.ms-excel";
+        ".rtf" ->
+            "application/rtf";
+        ".mov" ->
+            "video/quicktime";
+        ".mp3" ->
+            "audio/mpeg";
+        ".z" ->
+            "application/x-compress";
+        ".wav" ->
+            "audio/x-wav";
+        ".ico" ->
+            "image/x-icon";
+        ".bmp" ->
+            "image/bmp";
+        ".m4a" ->
+            "audio/mpeg";
+        ".m3u" ->
+            "audio/x-mpegurl";
+        ".exe" ->
+            "application/octet-stream";
+        ".csv" ->
+            "text/csv";
+        _ ->
+            "text/plain"
+    end.
+
+%% @spec parse_header(string()) -> {Type, [{K, V}]}
+%% @doc  Parse a Content-Type like header, return the main Content-Type
+%%       and a property list of options.
+parse_header(String) ->
+    %% TODO: This is exactly as broken as Python's cgi module.
+    %%       Should parse properly like mochiweb_cookies.
+    [Type | Parts] = [string:strip(S) || S <- string:tokens(String, ";")],
+    F = fun (S, Acc) ->
+		case lists:splitwith(fun (C) -> C =/= $= end, S) of
+		    {"", _} ->
+			%% Skip anything with no name
+			Acc;
+		    {_, ""} ->
+			%% Skip anything with no value
+			Acc;
+		    {Name, [$\= | Value]} ->
+			[{string:to_lower(string:strip(Name)),
+			  unquote_header(string:strip(Value))} | Acc]
+		end
+	end,
+    {string:to_lower(Type),
+     lists:foldr(F, [], Parts)}.
+
+unquote_header("\"" ++ Rest) ->
+    unquote_header(Rest, []);
+unquote_header(S) ->
+    S.
+
+unquote_header("", Acc) ->
+    lists:reverse(Acc);
+unquote_header("\"", Acc) ->
+    lists:reverse(Acc);
+unquote_header([$\\, C | Rest], Acc) ->
+    unquote_header(Rest, [C | Acc]);
+unquote_header([C | Rest], Acc) ->
+    unquote_header(Rest, [C | Acc]).
+
+%% @spec record_to_proplist(Record, Fields) -> proplist()
+%% @doc calls record_to_proplist/3 with a default TypeKey of '__record'
+record_to_proplist(Record, Fields) ->
+    record_to_proplist(Record, Fields, '__record').
+
+%% @spec record_to_proplist(Record, Fields, TypeKey) -> proplist()
+%% @doc Return a proplist of the given Record with each field in the 
+%%      Fields list set as a key with the corresponding value in the Record.
+%%      TypeKey is the key that is used to store the record type
+%%      Fields should be obtained by calling record_info(fields, record_type)
+%%      where record_type is the record type of Record
+record_to_proplist(Record, Fields, TypeKey)
+  when is_tuple(Record),
+       is_list(Fields),
+       size(Record) - 1 =:= length(Fields) ->
+    lists:zip([TypeKey | Fields], tuple_to_list(Record)).
+
+
+shell_quote([], Acc) ->
+    lists:reverse([$\" | Acc]);
+shell_quote([C | Rest], Acc) when C =:= $\" orelse C =:= $\` orelse
+                                  C =:= $\\ orelse C =:= $\$ ->
+    shell_quote(Rest, [C, $\\ | Acc]);
+shell_quote([C | Rest], Acc) ->
+    shell_quote(Rest, [C | Acc]).
+
+test() ->
+    test_join(),
+    test_quote_plus(),
+    test_unquote(),
+    test_urlencode(),
+    test_parse_qs(),
+    test_urlsplit_path(),
+    test_urlunsplit_path(),
+    test_urlsplit(),
+    test_urlunsplit(),
+    test_path_split(),
+    test_guess_mime(),
+    test_parse_header(),
+    test_shell_quote(),
+    test_cmd(),
+    test_cmd_string(),
+    ok.
+
+test_shell_quote() ->
+    "\"foo \\$bar\\\"\\`' baz\"" = shell_quote("foo $bar\"`' baz"),
+    ok.
+
+test_cmd() ->
+    "$bling$ `word`!\n" = cmd(["echo", "$bling$ `word`!"]),
+    ok.
+
+test_cmd_string() ->
+    "\"echo\" \"\\$bling\\$ \\`word\\`!\"" = cmd_string(["echo", "$bling$ `word`!"]),
+    ok.
+
+test_parse_header() ->
+    {"multipart/form-data", [{"boundary", "AaB03x"}]} =
+	parse_header("multipart/form-data; boundary=AaB03x"),
+    ok.
+
+test_guess_mime() ->
+    "text/plain" = guess_mime(""),
+    "text/plain" = guess_mime(".text"),
+    "application/zip" = guess_mime(".zip"),
+    "application/zip" = guess_mime("x.zip"),
+    "text/html" = guess_mime("x.html"),
+    "application/xhtml+xml" = guess_mime("x.xhtml"),
+    ok.
+
+test_path_split() ->
+    {"", "foo/bar"} = path_split("/foo/bar"),
+    {"foo", "bar"} = path_split("foo/bar"),
+    {"bar", ""} = path_split("bar"),
+    ok.
+
+test_urlsplit() ->
+    {"", "", "/foo", "", "bar?baz"} = urlsplit("/foo#bar?baz"),
+    {"http", "host:port", "/foo", "", "bar?baz"} =
+	urlsplit("http://host:port/foo#bar?baz"),
+    ok.
+
+test_urlsplit_path() ->
+    {"/foo/bar", "", ""} = urlsplit_path("/foo/bar"),
+    {"/foo", "baz", ""} = urlsplit_path("/foo?baz"),
+    {"/foo", "", "bar?baz"} = urlsplit_path("/foo#bar?baz"),
+    {"/foo", "", "bar?baz#wibble"} = urlsplit_path("/foo#bar?baz#wibble"),
+    {"/foo", "bar", "baz"} = urlsplit_path("/foo?bar#baz"),
+    {"/foo", "bar?baz", "baz"} = urlsplit_path("/foo?bar?baz#baz"),
+    ok.
+
+test_urlunsplit() ->
+    "/foo#bar?baz" = urlunsplit({"", "", "/foo", "", "bar?baz"}),
+    "http://host:port/foo#bar?baz" =
+	urlunsplit({"http", "host:port", "/foo", "", "bar?baz"}),
+    ok.
+
+test_urlunsplit_path() ->
+    "/foo/bar" = urlunsplit_path({"/foo/bar", "", ""}),
+    "/foo?baz" = urlunsplit_path({"/foo", "baz", ""}),
+    "/foo#bar?baz" = urlunsplit_path({"/foo", "", "bar?baz"}),
+    "/foo#bar?baz#wibble" = urlunsplit_path({"/foo", "", "bar?baz#wibble"}),
+    "/foo?bar#baz" = urlunsplit_path({"/foo", "bar", "baz"}),
+    "/foo?bar?baz#baz" = urlunsplit_path({"/foo", "bar?baz", "baz"}),
+    ok.
+
+test_join() ->
+    "foo,bar,baz" = join(["foo", "bar", "baz"], $,),
+    "foo,bar,baz" = join(["foo", "bar", "baz"], ","),
+    "foo bar" = join([["foo", " bar"]], ","),
+    "foo bar,baz" = join([["foo", " bar"], "baz"], ","),
+    "foo" = join(["foo"], ","),
+    "foobarbaz" = join(["foo", "bar", "baz"], ""),
+    ok.
+
+test_quote_plus() ->
+    "foo" = quote_plus(foo),
+    "1" = quote_plus(1),
+    "foo" = quote_plus("foo"),
+    "foo+bar" = quote_plus("foo bar"),
+    "foo%0A" = quote_plus("foo\n"),
+    "foo%0A" = quote_plus("foo\n"),
+    "foo%3B%26%3D" = quote_plus("foo;&="),
+    ok.
+
+test_unquote() ->
+    "foo bar" = unquote("foo+bar"),
+    "foo bar" = unquote("foo%20bar"),
+    "foo\r\n" = unquote("foo%0D%0A"),
+    ok.
+
+test_urlencode() ->
+    "foo=bar&baz=wibble+%0D%0A&z=1" = urlencode([{foo, "bar"},
+						 {"baz", "wibble \r\n"},
+						 {z, 1}]),
+    ok.
+
+test_parse_qs() ->
+    [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}] =
+	parse_qs("foo=bar&baz=wibble+%0D%0A&z=1"),
+    ok.
-- 
cgit v1.2.3