summaryrefslogtreecommitdiff
path: root/apps/mochiweb
diff options
context:
space:
mode:
authorAdam Kocoloski <adam@cloudant.com>2010-08-11 15:52:06 -0400
committerAdam Kocoloski <adam@cloudant.com>2010-08-11 17:39:38 -0400
commit498d5c85681ddfea577377b8e10051fb445ac0c2 (patch)
tree10c4d32773d703374d000f7dc0ae3988f6926366 /apps/mochiweb
parent7e67565d2241783b5f24a37400ddae83b00c7a81 (diff)
move mochiweb to rebar layout
Diffstat (limited to 'apps/mochiweb')
-rw-r--r--apps/mochiweb/src/mochifmt.erl426
-rw-r--r--apps/mochiweb/src/mochifmt_records.erl30
-rw-r--r--apps/mochiweb/src/mochifmt_std.erl23
-rw-r--r--apps/mochiweb/src/mochihex.erl75
-rw-r--r--apps/mochiweb/src/mochijson.erl528
-rw-r--r--apps/mochiweb/src/mochijson2.erl660
-rw-r--r--apps/mochiweb/src/mochinum.erl289
-rw-r--r--apps/mochiweb/src/mochiweb.app.src32
-rw-r--r--apps/mochiweb/src/mochiweb.erl110
-rw-r--r--apps/mochiweb/src/mochiweb_app.erl20
-rw-r--r--apps/mochiweb/src/mochiweb_charref.erl295
-rw-r--r--apps/mochiweb/src/mochiweb_cookies.erl257
-rw-r--r--apps/mochiweb/src/mochiweb_echo.erl31
-rw-r--r--apps/mochiweb/src/mochiweb_headers.erl251
-rw-r--r--apps/mochiweb/src/mochiweb_html.erl893
-rw-r--r--apps/mochiweb/src/mochiweb_http.erl152
-rw-r--r--apps/mochiweb/src/mochiweb_multipart.erl530
-rw-r--r--apps/mochiweb/src/mochiweb_request.erl867
-rw-r--r--apps/mochiweb/src/mochiweb_response.erl56
-rw-r--r--apps/mochiweb/src/mochiweb_skel.erl73
-rw-r--r--apps/mochiweb/src/mochiweb_socket_server.erl248
-rw-r--r--apps/mochiweb/src/mochiweb_sup.erl34
-rw-r--r--apps/mochiweb/src/mochiweb_util.erl859
-rw-r--r--apps/mochiweb/src/reloader.erl123
24 files changed, 6862 insertions, 0 deletions
diff --git a/apps/mochiweb/src/mochifmt.erl b/apps/mochiweb/src/mochifmt.erl
new file mode 100644
index 00000000..da0a133a
--- /dev/null
+++ b/apps/mochiweb/src/mochifmt.erl
@@ -0,0 +1,426 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2008 Mochi Media, Inc.
+
+%% @doc String Formatting for Erlang, inspired by Python 2.6
+%% (<a href="http://www.python.org/dev/peps/pep-3101/">PEP 3101</a>).
+%%
+-module(mochifmt).
+-author('bob@mochimedia.com').
+-export([format/2, format_field/2, convert_field/2, get_value/2, get_field/2]).
+-export([tokenize/1, format/3, get_field/3, format_field/3]).
+-export([bformat/2, bformat/3]).
+-export([f/2, f/3]).
+-export([test/0]).
+
+-record(conversion, {length, precision, ctype, align, fill_char, sign}).
+
+%% @spec tokenize(S::string()) -> tokens()
+%% @doc Tokenize a format string into mochifmt's internal format.
+tokenize(S) ->
+ {?MODULE, tokenize(S, "", [])}.
+
+%% @spec convert_field(Arg, Conversion::conversion()) -> term()
+%% @doc Process Arg according to the given explicit conversion specifier.
+convert_field(Arg, "") ->
+ Arg;
+convert_field(Arg, "r") ->
+ repr(Arg);
+convert_field(Arg, "s") ->
+ str(Arg).
+
+%% @spec get_value(Key::string(), Args::args()) -> term()
+%% @doc Get the Key from Args. If Args is a tuple then convert Key to
+%% an integer and get element(1 + Key, Args). If Args is a list and Key
+%% can be parsed as an integer then use lists:nth(1 + Key, Args),
+%% otherwise try and look for Key in Args as a proplist, converting
+%% Key to an atom or binary if necessary.
+get_value(Key, Args) when is_tuple(Args) ->
+ element(1 + list_to_integer(Key), Args);
+get_value(Key, Args) when is_list(Args) ->
+ try lists:nth(1 + list_to_integer(Key), Args)
+ catch error:_ ->
+ {_K, V} = proplist_lookup(Key, Args),
+ V
+ end.
+
+%% @spec get_field(Key::string(), Args) -> term()
+%% @doc Consecutively call get_value/2 on parts of Key delimited by ".",
+%% replacing Args with the result of the previous get_value. This
+%% is used to implement formats such as {0.0}.
+get_field(Key, Args) ->
+ get_field(Key, Args, ?MODULE).
+
+%% @spec get_field(Key::string(), Args, Module) -> term()
+%% @doc Consecutively call Module:get_value/2 on parts of Key delimited by ".",
+%% replacing Args with the result of the previous get_value. This
+%% is used to implement formats such as {0.0}.
+get_field(Key, Args, Module) ->
+ {Name, Next} = lists:splitwith(fun (C) -> C =/= $. end, Key),
+ Res = try Module:get_value(Name, Args)
+ catch error:undef -> get_value(Name, Args) end,
+ case Next of
+ "" ->
+ Res;
+ "." ++ S1 ->
+ get_field(S1, Res, Module)
+ end.
+
+%% @spec format(Format::string(), Args) -> iolist()
+%% @doc Format Args with Format.
+format(Format, Args) ->
+ format(Format, Args, ?MODULE).
+
+%% @spec format(Format::string(), Args, Module) -> iolist()
+%% @doc Format Args with Format using Module.
+format({?MODULE, Parts}, Args, Module) ->
+ format2(Parts, Args, Module, []);
+format(S, Args, Module) ->
+ format(tokenize(S), Args, Module).
+
+%% @spec format_field(Arg, Format) -> iolist()
+%% @doc Format Arg with Format.
+format_field(Arg, Format) ->
+ format_field(Arg, Format, ?MODULE).
+
+%% @spec format_field(Arg, Format, _Module) -> iolist()
+%% @doc Format Arg with Format.
+format_field(Arg, Format, _Module) ->
+ F = default_ctype(Arg, parse_std_conversion(Format)),
+ fix_padding(fix_sign(convert2(Arg, F), F), F).
+
+%% @spec f(Format::string(), Args) -> string()
+%% @doc Format Args with Format and return a string().
+f(Format, Args) ->
+ f(Format, Args, ?MODULE).
+
+%% @spec f(Format::string(), Args, Module) -> string()
+%% @doc Format Args with Format using Module and return a string().
+f(Format, Args, Module) ->
+ case lists:member(${, Format) of
+ true ->
+ binary_to_list(bformat(Format, Args, Module));
+ false ->
+ Format
+ end.
+
+%% @spec bformat(Format::string(), Args) -> binary()
+%% @doc Format Args with Format and return a binary().
+bformat(Format, Args) ->
+ iolist_to_binary(format(Format, Args)).
+
+%% @spec bformat(Format::string(), Args, Module) -> binary()
+%% @doc Format Args with Format using Module and return a binary().
+bformat(Format, Args, Module) ->
+ iolist_to_binary(format(Format, Args, Module)).
+
+%% @spec test() -> ok
+%% @doc Run tests.
+test() ->
+ ok = test_tokenize(),
+ ok = test_format(),
+ ok = test_std(),
+ ok = test_records(),
+ ok.
+
+%% Internal API
+
+add_raw("", Acc) ->
+ Acc;
+add_raw(S, Acc) ->
+ [{raw, lists:reverse(S)} | Acc].
+
+tokenize([], S, Acc) ->
+ lists:reverse(add_raw(S, Acc));
+tokenize("{{" ++ Rest, S, Acc) ->
+ tokenize(Rest, "{" ++ S, Acc);
+tokenize("{" ++ Rest, S, Acc) ->
+ {Format, Rest1} = tokenize_format(Rest),
+ tokenize(Rest1, "", [{format, make_format(Format)} | add_raw(S, Acc)]);
+tokenize("}}" ++ Rest, S, Acc) ->
+ tokenize(Rest, "}" ++ S, Acc);
+tokenize([C | Rest], S, Acc) ->
+ tokenize(Rest, [C | S], Acc).
+
+tokenize_format(S) ->
+ tokenize_format(S, 1, []).
+
+tokenize_format("}" ++ Rest, 1, Acc) ->
+ {lists:reverse(Acc), Rest};
+tokenize_format("}" ++ Rest, N, Acc) ->
+ tokenize_format(Rest, N - 1, "}" ++ Acc);
+tokenize_format("{" ++ Rest, N, Acc) ->
+ tokenize_format(Rest, 1 + N, "{" ++ Acc);
+tokenize_format([C | Rest], N, Acc) ->
+ tokenize_format(Rest, N, [C | Acc]).
+
+make_format(S) ->
+ {Name0, Spec} = case lists:splitwith(fun (C) -> C =/= $: end, S) of
+ {_, ""} ->
+ {S, ""};
+ {SN, ":" ++ SS} ->
+ {SN, SS}
+ end,
+ {Name, Transform} = case lists:splitwith(fun (C) -> C =/= $! end, Name0) of
+ {_, ""} ->
+ {Name0, ""};
+ {TN, "!" ++ TT} ->
+ {TN, TT}
+ end,
+ {Name, Transform, Spec}.
+
+proplist_lookup(S, P) ->
+ A = try list_to_existing_atom(S)
+ catch error:_ -> make_ref() end,
+ B = try list_to_binary(S)
+ catch error:_ -> make_ref() end,
+ proplist_lookup2({S, A, B}, P).
+
+proplist_lookup2({KS, KA, KB}, [{K, V} | _])
+ when KS =:= K orelse KA =:= K orelse KB =:= K ->
+ {K, V};
+proplist_lookup2(Keys, [_ | Rest]) ->
+ proplist_lookup2(Keys, Rest).
+
+format2([], _Args, _Module, Acc) ->
+ lists:reverse(Acc);
+format2([{raw, S} | Rest], Args, Module, Acc) ->
+ format2(Rest, Args, Module, [S | Acc]);
+format2([{format, {Key, Convert, Format0}} | Rest], Args, Module, Acc) ->
+ Format = f(Format0, Args, Module),
+ V = case Module of
+ ?MODULE ->
+ V0 = get_field(Key, Args),
+ V1 = convert_field(V0, Convert),
+ format_field(V1, Format);
+ _ ->
+ V0 = try Module:get_field(Key, Args)
+ catch error:undef -> get_field(Key, Args, Module) end,
+ V1 = try Module:convert_field(V0, Convert)
+ catch error:undef -> convert_field(V0, Convert) end,
+ try Module:format_field(V1, Format)
+ catch error:undef -> format_field(V1, Format, Module) end
+ end,
+ format2(Rest, Args, Module, [V | Acc]).
+
+default_ctype(_Arg, C=#conversion{ctype=N}) when N =/= undefined ->
+ C;
+default_ctype(Arg, C) when is_integer(Arg) ->
+ C#conversion{ctype=decimal};
+default_ctype(Arg, C) when is_float(Arg) ->
+ C#conversion{ctype=general};
+default_ctype(_Arg, C) ->
+ C#conversion{ctype=string}.
+
+fix_padding(Arg, #conversion{length=undefined}) ->
+ Arg;
+fix_padding(Arg, F=#conversion{length=Length, fill_char=Fill0, align=Align0,
+ ctype=Type}) ->
+ Padding = Length - iolist_size(Arg),
+ Fill = case Fill0 of
+ undefined ->
+ $\s;
+ _ ->
+ Fill0
+ end,
+ Align = case Align0 of
+ undefined ->
+ case Type of
+ string ->
+ left;
+ _ ->
+ right
+ end;
+ _ ->
+ Align0
+ end,
+ case Padding > 0 of
+ true ->
+ do_padding(Arg, Padding, Fill, Align, F);
+ false ->
+ Arg
+ end.
+
+do_padding(Arg, Padding, Fill, right, _F) ->
+ [lists:duplicate(Padding, Fill), Arg];
+do_padding(Arg, Padding, Fill, center, _F) ->
+ LPadding = lists:duplicate(Padding div 2, Fill),
+ RPadding = case Padding band 1 of
+ 1 ->
+ [Fill | LPadding];
+ _ ->
+ LPadding
+ end,
+ [LPadding, Arg, RPadding];
+do_padding([$- | Arg], Padding, Fill, sign_right, _F) ->
+ [[$- | lists:duplicate(Padding, Fill)], Arg];
+do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=$-}) ->
+ [lists:duplicate(Padding, Fill), Arg];
+do_padding([S | Arg], Padding, Fill, sign_right, #conversion{sign=S}) ->
+ [[S | lists:duplicate(Padding, Fill)], Arg];
+do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=undefined}) ->
+ [lists:duplicate(Padding, Fill), Arg];
+do_padding(Arg, Padding, Fill, left, _F) ->
+ [Arg | lists:duplicate(Padding, Fill)].
+
+fix_sign(Arg, #conversion{sign=$+}) when Arg >= 0 ->
+ [$+, Arg];
+fix_sign(Arg, #conversion{sign=$\s}) when Arg >= 0 ->
+ [$\s, Arg];
+fix_sign(Arg, _F) ->
+ Arg.
+
+ctype($\%) -> percent;
+ctype($s) -> string;
+ctype($b) -> bin;
+ctype($o) -> oct;
+ctype($X) -> upper_hex;
+ctype($x) -> hex;
+ctype($c) -> char;
+ctype($d) -> decimal;
+ctype($g) -> general;
+ctype($f) -> fixed;
+ctype($e) -> exp.
+
+align($<) -> left;
+align($>) -> right;
+align($^) -> center;
+align($=) -> sign_right.
+
+convert2(Arg, F=#conversion{ctype=percent}) ->
+ [convert2(100.0 * Arg, F#conversion{ctype=fixed}), $\%];
+convert2(Arg, #conversion{ctype=string}) ->
+ str(Arg);
+convert2(Arg, #conversion{ctype=bin}) ->
+ erlang:integer_to_list(Arg, 2);
+convert2(Arg, #conversion{ctype=oct}) ->
+ erlang:integer_to_list(Arg, 8);
+convert2(Arg, #conversion{ctype=upper_hex}) ->
+ erlang:integer_to_list(Arg, 16);
+convert2(Arg, #conversion{ctype=hex}) ->
+ string:to_lower(erlang:integer_to_list(Arg, 16));
+convert2(Arg, #conversion{ctype=char}) when Arg < 16#80 ->
+ [Arg];
+convert2(Arg, #conversion{ctype=char}) ->
+ xmerl_ucs:to_utf8(Arg);
+convert2(Arg, #conversion{ctype=decimal}) ->
+ integer_to_list(Arg);
+convert2(Arg, #conversion{ctype=general, precision=undefined}) ->
+ try mochinum:digits(Arg)
+ catch error:undef -> io_lib:format("~g", [Arg]) end;
+convert2(Arg, #conversion{ctype=fixed, precision=undefined}) ->
+ io_lib:format("~f", [Arg]);
+convert2(Arg, #conversion{ctype=exp, precision=undefined}) ->
+ io_lib:format("~e", [Arg]);
+convert2(Arg, #conversion{ctype=general, precision=P}) ->
+ io_lib:format("~." ++ integer_to_list(P) ++ "g", [Arg]);
+convert2(Arg, #conversion{ctype=fixed, precision=P}) ->
+ io_lib:format("~." ++ integer_to_list(P) ++ "f", [Arg]);
+convert2(Arg, #conversion{ctype=exp, precision=P}) ->
+ io_lib:format("~." ++ integer_to_list(P) ++ "e", [Arg]).
+
+str(A) when is_atom(A) ->
+ atom_to_list(A);
+str(I) when is_integer(I) ->
+ integer_to_list(I);
+str(F) when is_float(F) ->
+ try mochinum:digits(F)
+ catch error:undef -> io_lib:format("~g", [F]) end;
+str(L) when is_list(L) ->
+ L;
+str(B) when is_binary(B) ->
+ B;
+str(P) ->
+ repr(P).
+
+repr(P) when is_float(P) ->
+ try mochinum:digits(P)
+ catch error:undef -> float_to_list(P) end;
+repr(P) ->
+ io_lib:format("~p", [P]).
+
+parse_std_conversion(S) ->
+ parse_std_conversion(S, #conversion{}).
+
+parse_std_conversion("", Acc) ->
+ Acc;
+parse_std_conversion([Fill, Align | Spec], Acc)
+ when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ ->
+ parse_std_conversion(Spec, Acc#conversion{fill_char=Fill,
+ align=align(Align)});
+parse_std_conversion([Align | Spec], Acc)
+ when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ ->
+ parse_std_conversion(Spec, Acc#conversion{align=align(Align)});
+parse_std_conversion([Sign | Spec], Acc)
+ when Sign =:= $+ orelse Sign =:= $- orelse Sign =:= $\s ->
+ parse_std_conversion(Spec, Acc#conversion{sign=Sign});
+parse_std_conversion("0" ++ Spec, Acc) ->
+ Align = case Acc#conversion.align of
+ undefined ->
+ sign_right;
+ A ->
+ A
+ end,
+ parse_std_conversion(Spec, Acc#conversion{fill_char=$0, align=Align});
+parse_std_conversion(Spec=[D|_], Acc) when D >= $0 andalso D =< $9 ->
+ {W, Spec1} = lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec),
+ parse_std_conversion(Spec1, Acc#conversion{length=list_to_integer(W)});
+parse_std_conversion([$. | Spec], Acc) ->
+ case lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec) of
+ {"", Spec1} ->
+ parse_std_conversion(Spec1, Acc);
+ {P, Spec1} ->
+ parse_std_conversion(Spec1,
+ Acc#conversion{precision=list_to_integer(P)})
+ end;
+parse_std_conversion([Type], Acc) ->
+ parse_std_conversion("", Acc#conversion{ctype=ctype(Type)}).
+
+test_tokenize() ->
+ {?MODULE, [{raw, "ABC"}]} = tokenize("ABC"),
+ {?MODULE, [{format, {"0", "", ""}}]} = tokenize("{0}"),
+ {?MODULE, [{raw, "ABC"}, {format, {"1", "", ""}}, {raw, "DEF"}]} =
+ tokenize("ABC{1}DEF"),
+ ok.
+
+test_format() ->
+ <<" -4">> = bformat("{0:4}", [-4]),
+ <<" 4">> = bformat("{0:4}", [4]),
+ <<" 4">> = bformat("{0:{0}}", [4]),
+ <<"4 ">> = bformat("{0:4}", ["4"]),
+ <<"4 ">> = bformat("{0:{0}}", ["4"]),
+ <<"1.2yoDEF">> = bformat("{2}{0}{1}{3}", {yo, "DE", 1.2, <<"F">>}),
+ <<"cafebabe">> = bformat("{0:x}", {16#cafebabe}),
+ <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}),
+ <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}),
+ <<"755">> = bformat("{0:o}", {8#755}),
+ <<"a">> = bformat("{0:c}", {97}),
+ %% Horizontal ellipsis
+ <<226, 128, 166>> = bformat("{0:c}", {16#2026}),
+ <<"11">> = bformat("{0:b}", {3}),
+ <<"11">> = bformat("{0:b}", [3]),
+ <<"11">> = bformat("{three:b}", [{three, 3}]),
+ <<"11">> = bformat("{three:b}", [{"three", 3}]),
+ <<"11">> = bformat("{three:b}", [{<<"three">>, 3}]),
+ <<"\"foo\"">> = bformat("{0!r}", {"foo"}),
+ <<"2008-5-4">> = bformat("{0.0}-{0.1}-{0.2}", {{2008,5,4}}),
+ <<"2008-05-04">> = bformat("{0.0:04}-{0.1:02}-{0.2:02}", {{2008,5,4}}),
+ <<"foo6bar-6">> = bformat("foo{1}{0}-{1}", {bar, 6}),
+ <<"-'atom test'-">> = bformat("-{arg!r}-", [{arg, 'atom test'}]),
+ <<"2008-05-04">> = bformat("{0.0:0{1.0}}-{0.1:0{1.1}}-{0.2:0{1.2}}",
+ {{2008,5,4}, {4, 2, 2}}),
+ ok.
+
+test_std() ->
+ M = mochifmt_std:new(),
+ <<"01">> = bformat("{0}{1}", [0, 1], M),
+ ok.
+
+test_records() ->
+ M = mochifmt_records:new([{conversion, record_info(fields, conversion)}]),
+ R = #conversion{length=long, precision=hard, sign=peace},
+ long = M:get_value("length", R),
+ hard = M:get_value("precision", R),
+ peace = M:get_value("sign", R),
+ <<"long hard">> = bformat("{length} {precision}", R, M),
+ <<"long hard">> = bformat("{0.length} {0.precision}", [R], M),
+ ok.
diff --git a/apps/mochiweb/src/mochifmt_records.erl b/apps/mochiweb/src/mochifmt_records.erl
new file mode 100644
index 00000000..94c77978
--- /dev/null
+++ b/apps/mochiweb/src/mochifmt_records.erl
@@ -0,0 +1,30 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2008 Mochi Media, Inc.
+
+%% @doc Formatter that understands records.
+%%
+%% Usage:
+%%
+%% 1> M = mochifmt_records:new([{rec, record_info(fields, rec)}]),
+%% M:format("{0.bar}", [#rec{bar=foo}]).
+%% foo
+
+-module(mochifmt_records, [Recs]).
+-author('bob@mochimedia.com').
+-export([get_value/2]).
+
+get_value(Key, Rec) when is_tuple(Rec) and is_atom(element(1, Rec)) ->
+ try begin
+ Atom = list_to_existing_atom(Key),
+ {_, Fields} = proplists:lookup(element(1, Rec), Recs),
+ element(get_rec_index(Atom, Fields, 2), Rec)
+ end
+ catch error:_ -> mochifmt:get_value(Key, Rec)
+ end;
+get_value(Key, Args) ->
+ mochifmt:get_value(Key, Args).
+
+get_rec_index(Atom, [Atom | _], Index) ->
+ Index;
+get_rec_index(Atom, [_ | Rest], Index) ->
+ get_rec_index(Atom, Rest, 1 + Index).
diff --git a/apps/mochiweb/src/mochifmt_std.erl b/apps/mochiweb/src/mochifmt_std.erl
new file mode 100644
index 00000000..9442016a
--- /dev/null
+++ b/apps/mochiweb/src/mochifmt_std.erl
@@ -0,0 +1,23 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2008 Mochi Media, Inc.
+
+%% @doc Template module for a mochifmt formatter.
+
+-module(mochifmt_std, []).
+-author('bob@mochimedia.com').
+-export([format/2, get_value/2, format_field/2, get_field/2, convert_field/2]).
+
+format(Format, Args) ->
+ mochifmt:format(Format, Args, THIS).
+
+get_field(Key, Args) ->
+ mochifmt:get_field(Key, Args, THIS).
+
+convert_field(Key, Args) ->
+ mochifmt:convert_field(Key, Args).
+
+get_value(Key, Args) ->
+ mochifmt:get_value(Key, Args).
+
+format_field(Arg, Format) ->
+ mochifmt:format_field(Arg, Format, THIS).
diff --git a/apps/mochiweb/src/mochihex.erl b/apps/mochiweb/src/mochihex.erl
new file mode 100644
index 00000000..7fe6899e
--- /dev/null
+++ b/apps/mochiweb/src/mochihex.erl
@@ -0,0 +1,75 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2006 Mochi Media, Inc.
+
+%% @doc Utilities for working with hexadecimal strings.
+
+-module(mochihex).
+-author('bob@mochimedia.com').
+
+-export([test/0, to_hex/1, to_bin/1, to_int/1, dehex/1, hexdigit/1]).
+
+%% @type iolist() = [char() | binary() | iolist()]
+%% @type iodata() = iolist() | binary()
+
+%% @spec to_hex(integer | iolist()) -> string()
+%% @doc Convert an iolist to a hexadecimal string.
+to_hex(0) ->
+ "0";
+to_hex(I) when is_integer(I), I > 0 ->
+ to_hex_int(I, []);
+to_hex(B) ->
+ to_hex(iolist_to_binary(B), []).
+
+%% @spec to_bin(string()) -> binary()
+%% @doc Convert a hexadecimal string to a binary.
+to_bin(L) ->
+ to_bin(L, []).
+
+%% @spec to_int(string()) -> integer()
+%% @doc Convert a hexadecimal string to an integer.
+to_int(L) ->
+ erlang:list_to_integer(L, 16).
+
+%% @spec dehex(char()) -> integer()
+%% @doc Convert a hex digit to its integer value.
+dehex(C) when C >= $0, C =< $9 ->
+ C - $0;
+dehex(C) when C >= $a, C =< $f ->
+ C - $a + 10;
+dehex(C) when C >= $A, C =< $F ->
+ C - $A + 10.
+
+%% @spec hexdigit(integer()) -> char()
+%% @doc Convert an integer less than 16 to a hex digit.
+hexdigit(C) when C >= 0, C =< 9 ->
+ C + $0;
+hexdigit(C) when C =< 15 ->
+ C + $a - 10.
+
+%% @spec test() -> ok
+%% @doc Test this module.
+test() ->
+ "ff000ff1" = to_hex([255, 0, 15, 241]),
+ <<255, 0, 15, 241>> = to_bin("ff000ff1"),
+ 16#ff000ff1 = to_int("ff000ff1"),
+ "ff000ff1" = to_hex(16#ff000ff1),
+ ok.
+
+
+%% Internal API
+
+to_hex(<<>>, Acc) ->
+ lists:reverse(Acc);
+to_hex(<<C1:4, C2:4, Rest/binary>>, Acc) ->
+ to_hex(Rest, [hexdigit(C2), hexdigit(C1) | Acc]).
+
+to_hex_int(0, Acc) ->
+ Acc;
+to_hex_int(I, Acc) ->
+ to_hex_int(I bsr 4, [hexdigit(I band 15) | Acc]).
+
+to_bin([], Acc) ->
+ iolist_to_binary(lists:reverse(Acc));
+to_bin([C1, C2 | Rest], Acc) ->
+ to_bin(Rest, [(dehex(C1) bsl 4) bor dehex(C2) | Acc]).
+
diff --git a/apps/mochiweb/src/mochijson.erl b/apps/mochiweb/src/mochijson.erl
new file mode 100644
index 00000000..74695a75
--- /dev/null
+++ b/apps/mochiweb/src/mochijson.erl
@@ -0,0 +1,528 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2006 Mochi Media, Inc.
+
+%% @doc Yet another JSON (RFC 4627) library for Erlang.
+-module(mochijson).
+-author('bob@mochimedia.com').
+-export([encoder/1, encode/1]).
+-export([decoder/1, decode/1]).
+-export([binary_encoder/1, binary_encode/1]).
+-export([binary_decoder/1, binary_decode/1]).
+-export([test/0]).
+
+% This is a macro to placate syntax highlighters..
+-define(Q, $\").
+-define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}).
+-define(INC_COL(S), S#decoder{column=1+S#decoder.column}).
+-define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}).
+
+%% @type iolist() = [char() | binary() | iolist()]
+%% @type iodata() = iolist() | binary()
+%% @type json_string() = atom | string() | binary()
+%% @type json_number() = integer() | float()
+%% @type json_array() = {array, [json_term()]}
+%% @type json_object() = {struct, [{json_string(), json_term()}]}
+%% @type json_term() = json_string() | json_number() | json_array() |
+%% json_object()
+%% @type encoding() = utf8 | unicode
+%% @type encoder_option() = {input_encoding, encoding()} |
+%% {handler, function()}
+%% @type decoder_option() = {input_encoding, encoding()} |
+%% {object_hook, function()}
+%% @type bjson_string() = binary()
+%% @type bjson_number() = integer() | float()
+%% @type bjson_array() = [bjson_term()]
+%% @type bjson_object() = {struct, [{bjson_string(), bjson_term()}]}
+%% @type bjson_term() = bjson_string() | bjson_number() | bjson_array() |
+%% bjson_object()
+%% @type binary_encoder_option() = {handler, function()}
+%% @type binary_decoder_option() = {object_hook, function()}
+
+-record(encoder, {input_encoding=unicode,
+ handler=null}).
+
+-record(decoder, {input_encoding=utf8,
+ object_hook=null,
+ line=1,
+ column=1,
+ state=null}).
+
+%% @spec encoder([encoder_option()]) -> function()
+%% @doc Create an encoder/1 with the given options.
+encoder(Options) ->
+ State = parse_encoder_options(Options, #encoder{}),
+ fun (O) -> json_encode(O, State) end.
+
+%% @spec encode(json_term()) -> iolist()
+%% @doc Encode the given as JSON to an iolist.
+encode(Any) ->
+ json_encode(Any, #encoder{}).
+
+%% @spec decoder([decoder_option()]) -> function()
+%% @doc Create a decoder/1 with the given options.
+decoder(Options) ->
+ State = parse_decoder_options(Options, #decoder{}),
+ fun (O) -> json_decode(O, State) end.
+
+%% @spec decode(iolist()) -> json_term()
+%% @doc Decode the given iolist to Erlang terms.
+decode(S) ->
+ json_decode(S, #decoder{}).
+
+%% @spec binary_decoder([binary_decoder_option()]) -> function()
+%% @doc Create a binary_decoder/1 with the given options.
+binary_decoder(Options) ->
+ mochijson2:decoder(Options).
+
+%% @spec binary_encoder([binary_encoder_option()]) -> function()
+%% @doc Create a binary_encoder/1 with the given options.
+binary_encoder(Options) ->
+ mochijson2:encoder(Options).
+
+%% @spec binary_encode(bjson_term()) -> iolist()
+%% @doc Encode the given as JSON to an iolist, using lists for arrays and
+%% binaries for strings.
+binary_encode(Any) ->
+ mochijson2:encode(Any).
+
+%% @spec binary_decode(iolist()) -> bjson_term()
+%% @doc Decode the given iolist to Erlang terms, using lists for arrays and
+%% binaries for strings.
+binary_decode(S) ->
+ mochijson2:decode(S).
+
+test() ->
+ test_all(),
+ mochijson2:test().
+
+%% Internal API
+
+parse_encoder_options([], State) ->
+ State;
+parse_encoder_options([{input_encoding, Encoding} | Rest], State) ->
+ parse_encoder_options(Rest, State#encoder{input_encoding=Encoding});
+parse_encoder_options([{handler, Handler} | Rest], State) ->
+ parse_encoder_options(Rest, State#encoder{handler=Handler}).
+
+parse_decoder_options([], State) ->
+ State;
+parse_decoder_options([{input_encoding, Encoding} | Rest], State) ->
+ parse_decoder_options(Rest, State#decoder{input_encoding=Encoding});
+parse_decoder_options([{object_hook, Hook} | Rest], State) ->
+ parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
+
+json_encode(true, _State) ->
+ "true";
+json_encode(false, _State) ->
+ "false";
+json_encode(null, _State) ->
+ "null";
+json_encode(I, _State) when is_integer(I) ->
+ integer_to_list(I);
+json_encode(F, _State) when is_float(F) ->
+ mochinum:digits(F);
+json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) ->
+ json_encode_string(L, State);
+json_encode({array, Props}, State) when is_list(Props) ->
+ json_encode_array(Props, State);
+json_encode({struct, Props}, State) when is_list(Props) ->
+ json_encode_proplist(Props, State);
+json_encode(Bad, #encoder{handler=null}) ->
+ exit({json_encode, {bad_term, Bad}});
+json_encode(Bad, State=#encoder{handler=Handler}) ->
+ json_encode(Handler(Bad), State).
+
+json_encode_array([], _State) ->
+ "[]";
+json_encode_array(L, State) ->
+ F = fun (O, Acc) ->
+ [$,, json_encode(O, State) | Acc]
+ end,
+ [$, | Acc1] = lists:foldl(F, "[", L),
+ lists:reverse([$\] | Acc1]).
+
+json_encode_proplist([], _State) ->
+ "{}";
+json_encode_proplist(Props, State) ->
+ F = fun ({K, V}, Acc) ->
+ KS = case K of
+ K when is_atom(K) ->
+ json_encode_string_utf8(atom_to_list(K));
+ K when is_integer(K) ->
+ json_encode_string(integer_to_list(K), State);
+ K when is_list(K); is_binary(K) ->
+ json_encode_string(K, State)
+ end,
+ VS = json_encode(V, State),
+ [$,, VS, $:, KS | Acc]
+ end,
+ [$, | Acc1] = lists:foldl(F, "{", Props),
+ lists:reverse([$\} | Acc1]).
+
+json_encode_string(A, _State) when is_atom(A) ->
+ json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)));
+json_encode_string(B, _State) when is_binary(B) ->
+ json_encode_string_unicode(xmerl_ucs:from_utf8(B));
+json_encode_string(S, #encoder{input_encoding=utf8}) ->
+ json_encode_string_utf8(S);
+json_encode_string(S, #encoder{input_encoding=unicode}) ->
+ json_encode_string_unicode(S).
+
+json_encode_string_utf8(S) ->
+ [?Q | json_encode_string_utf8_1(S)].
+
+json_encode_string_utf8_1([C | Cs]) when C >= 0, C =< 16#7f ->
+ NewC = case C of
+ $\\ -> "\\\\";
+ ?Q -> "\\\"";
+ _ when C >= $\s, C < 16#7f -> C;
+ $\t -> "\\t";
+ $\n -> "\\n";
+ $\r -> "\\r";
+ $\f -> "\\f";
+ $\b -> "\\b";
+ _ when C >= 0, C =< 16#7f -> unihex(C);
+ _ -> exit({json_encode, {bad_char, C}})
+ end,
+ [NewC | json_encode_string_utf8_1(Cs)];
+json_encode_string_utf8_1(All=[C | _]) when C >= 16#80, C =< 16#10FFFF ->
+ [?Q | Rest] = json_encode_string_unicode(xmerl_ucs:from_utf8(All)),
+ Rest;
+json_encode_string_utf8_1([]) ->
+ "\"".
+
+json_encode_string_unicode(S) ->
+ [?Q | json_encode_string_unicode_1(S)].
+
+json_encode_string_unicode_1([C | Cs]) ->
+ NewC = case C of
+ $\\ -> "\\\\";
+ ?Q -> "\\\"";
+ _ when C >= $\s, C < 16#7f -> C;
+ $\t -> "\\t";
+ $\n -> "\\n";
+ $\r -> "\\r";
+ $\f -> "\\f";
+ $\b -> "\\b";
+ _ when C >= 0, C =< 16#10FFFF -> unihex(C);
+ _ -> exit({json_encode, {bad_char, C}})
+ end,
+ [NewC | json_encode_string_unicode_1(Cs)];
+json_encode_string_unicode_1([]) ->
+ "\"".
+
+dehex(C) when C >= $0, C =< $9 ->
+ C - $0;
+dehex(C) when C >= $a, C =< $f ->
+ C - $a + 10;
+dehex(C) when C >= $A, C =< $F ->
+ C - $A + 10.
+
+hexdigit(C) when C >= 0, C =< 9 ->
+ C + $0;
+hexdigit(C) when C =< 15 ->
+ C + $a - 10.
+
+unihex(C) when C < 16#10000 ->
+ <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
+ Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
+ [$\\, $u | Digits];
+unihex(C) when C =< 16#10FFFF ->
+ N = C - 16#10000,
+ S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
+ S2 = 16#dc00 bor (N band 16#3ff),
+ [unihex(S1), unihex(S2)].
+
+json_decode(B, S) when is_binary(B) ->
+ json_decode(binary_to_list(B), S);
+json_decode(L, S) ->
+ {Res, L1, S1} = decode1(L, S),
+ {eof, [], _} = tokenize(L1, S1#decoder{state=trim}),
+ Res.
+
+decode1(L, S=#decoder{state=null}) ->
+ case tokenize(L, S#decoder{state=any}) of
+ {{const, C}, L1, S1} ->
+ {C, L1, S1};
+ {start_array, L1, S1} ->
+ decode_array(L1, S1#decoder{state=any}, []);
+ {start_object, L1, S1} ->
+ decode_object(L1, S1#decoder{state=key}, [])
+ end.
+
+make_object(V, #decoder{object_hook=null}) ->
+ V;
+make_object(V, #decoder{object_hook=Hook}) ->
+ Hook(V).
+
+decode_object(L, S=#decoder{state=key}, Acc) ->
+ case tokenize(L, S) of
+ {end_object, Rest, S1} ->
+ V = make_object({struct, lists:reverse(Acc)}, S1),
+ {V, Rest, S1#decoder{state=null}};
+ {{const, K}, Rest, S1} when is_list(K) ->
+ {colon, L2, S2} = tokenize(Rest, S1),
+ {V, L3, S3} = decode1(L2, S2#decoder{state=null}),
+ decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc])
+ end;
+decode_object(L, S=#decoder{state=comma}, Acc) ->
+ case tokenize(L, S) of
+ {end_object, Rest, S1} ->
+ V = make_object({struct, lists:reverse(Acc)}, S1),
+ {V, Rest, S1#decoder{state=null}};
+ {comma, Rest, S1} ->
+ decode_object(Rest, S1#decoder{state=key}, Acc)
+ end.
+
+decode_array(L, S=#decoder{state=any}, Acc) ->
+ case tokenize(L, S) of
+ {end_array, Rest, S1} ->
+ {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
+ {start_array, Rest, S1} ->
+ {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []),
+ decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
+ {start_object, Rest, S1} ->
+ {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []),
+ decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
+ {{const, Const}, Rest, S1} ->
+ decode_array(Rest, S1#decoder{state=comma}, [Const | Acc])
+ end;
+decode_array(L, S=#decoder{state=comma}, Acc) ->
+ case tokenize(L, S) of
+ {end_array, Rest, S1} ->
+ {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
+ {comma, Rest, S1} ->
+ decode_array(Rest, S1#decoder{state=any}, Acc)
+ end.
+
+tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc)
+ when is_list(C); is_binary(C); C >= 16#7f ->
+ List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
+ tokenize_string(List, S#decoder{input_encoding=unicode}, Acc);
+tokenize_string("\"" ++ Rest, S, Acc) ->
+ {lists:reverse(Acc), Rest, ?INC_COL(S)};
+tokenize_string("\\\"" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]);
+tokenize_string("\\\\" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]);
+tokenize_string("\\/" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]);
+tokenize_string("\\b" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]);
+tokenize_string("\\f" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$\f | Acc]);
+tokenize_string("\\n" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]);
+tokenize_string("\\r" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]);
+tokenize_string("\\t" ++ Rest, S, Acc) ->
+ tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]);
+tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) ->
+ % coalesce UTF-16 surrogate pair?
+ C = dehex(C0) bor
+ (dehex(C1) bsl 4) bor
+ (dehex(C2) bsl 8) bor
+ (dehex(C3) bsl 12),
+ tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]);
+tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF ->
+ tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]).
+
+tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc)
+ when is_list(C); is_binary(C); C >= 16#7f ->
+ List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
+ tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc);
+tokenize_number([$- | Rest], sign, S, []) ->
+ tokenize_number(Rest, int, ?INC_COL(S), [$-]);
+tokenize_number(Rest, sign, S, []) ->
+ tokenize_number(Rest, int, S, []);
+tokenize_number([$0 | Rest], int, S, Acc) ->
+ tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]);
+tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 ->
+ tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
+tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 ->
+ tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
+tokenize_number(Rest, int1, S, Acc) ->
+ tokenize_number(Rest, frac, S, Acc);
+tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 ->
+ tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
+tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E ->
+ tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
+tokenize_number(Rest, frac, S, Acc) ->
+ {{int, lists:reverse(Acc)}, Rest, S};
+tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 ->
+ tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]);
+tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E ->
+ tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]);
+tokenize_number(Rest, frac1, S, Acc) ->
+ {{float, lists:reverse(Acc)}, Rest, S};
+tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ ->
+ tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]);
+tokenize_number(Rest, esign, S, Acc) ->
+ tokenize_number(Rest, eint, S, Acc);
+tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 ->
+ tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
+tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 ->
+ tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
+tokenize_number(Rest, eint1, S, Acc) ->
+ {{float, lists:reverse(Acc)}, Rest, S}.
+
+tokenize([], S=#decoder{state=trim}) ->
+ {eof, [], S};
+tokenize([L | Rest], S) when is_list(L) ->
+ tokenize(L ++ Rest, S);
+tokenize([B | Rest], S) when is_binary(B) ->
+ tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S);
+tokenize("\r\n" ++ Rest, S) ->
+ tokenize(Rest, ?INC_LINE(S));
+tokenize("\n" ++ Rest, S) ->
+ tokenize(Rest, ?INC_LINE(S));
+tokenize([C | Rest], S) when C == $\s; C == $\t ->
+ tokenize(Rest, ?INC_COL(S));
+tokenize("{" ++ Rest, S) ->
+ {start_object, Rest, ?INC_COL(S)};
+tokenize("}" ++ Rest, S) ->
+ {end_object, Rest, ?INC_COL(S)};
+tokenize("[" ++ Rest, S) ->
+ {start_array, Rest, ?INC_COL(S)};
+tokenize("]" ++ Rest, S) ->
+ {end_array, Rest, ?INC_COL(S)};
+tokenize("," ++ Rest, S) ->
+ {comma, Rest, ?INC_COL(S)};
+tokenize(":" ++ Rest, S) ->
+ {colon, Rest, ?INC_COL(S)};
+tokenize("null" ++ Rest, S) ->
+ {{const, null}, Rest, ?ADV_COL(S, 4)};
+tokenize("true" ++ Rest, S) ->
+ {{const, true}, Rest, ?ADV_COL(S, 4)};
+tokenize("false" ++ Rest, S) ->
+ {{const, false}, Rest, ?ADV_COL(S, 5)};
+tokenize("\"" ++ Rest, S) ->
+ {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []),
+ {{const, String}, Rest1, S1};
+tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- ->
+ case tokenize_number(L, sign, S, []) of
+ {{int, Int}, Rest, S1} ->
+ {{const, list_to_integer(Int)}, Rest, S1};
+ {{float, Float}, Rest, S1} ->
+ {{const, list_to_float(Float)}, Rest, S1}
+ end.
+
+%% testing constructs borrowed from the Yaws JSON implementation.
+
+%% Create an object from a list of Key/Value pairs.
+
+obj_new() ->
+ {struct, []}.
+
+is_obj({struct, Props}) ->
+ F = fun ({K, _}) when is_list(K) ->
+ true;
+ (_) ->
+ false
+ end,
+ lists:all(F, Props).
+
+obj_from_list(Props) ->
+ Obj = {struct, Props},
+ case is_obj(Obj) of
+ true -> Obj;
+ false -> exit(json_bad_object)
+ end.
+
+%% Test for equivalence of Erlang terms.
+%% Due to arbitrary order of construction, equivalent objects might
+%% compare unequal as erlang terms, so we need to carefully recurse
+%% through aggregates (tuples and objects).
+
+equiv({struct, Props1}, {struct, Props2}) ->
+ equiv_object(Props1, Props2);
+equiv({array, L1}, {array, L2}) ->
+ equiv_list(L1, L2);
+equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
+equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2;
+equiv(true, true) -> true;
+equiv(false, false) -> true;
+equiv(null, null) -> true.
+
+%% Object representation and traversal order is unknown.
+%% Use the sledgehammer and sort property lists.
+
+equiv_object(Props1, Props2) ->
+ L1 = lists:keysort(1, Props1),
+ L2 = lists:keysort(1, Props2),
+ Pairs = lists:zip(L1, L2),
+ true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
+ equiv(K1, K2) and equiv(V1, V2)
+ end, Pairs).
+
+%% Recursively compare tuple elements for equivalence.
+
+equiv_list([], []) ->
+ true;
+equiv_list([V1 | L1], [V2 | L2]) ->
+ equiv(V1, V2) andalso equiv_list(L1, L2).
+
+test_all() ->
+ test_issue33(),
+ test_one(e2j_test_vec(utf8), 1).
+
+test_issue33() ->
+ %% http://code.google.com/p/mochiweb/issues/detail?id=33
+ Js = {struct, [{"key", [194, 163]}]},
+ Encoder = encoder([{input_encoding, utf8}]),
+ "{\"key\":\"\\u00a3\"}" = lists:flatten(Encoder(Js)).
+
+test_one([], _N) ->
+ %% io:format("~p tests passed~n", [N-1]),
+ ok;
+test_one([{E, J} | Rest], N) ->
+ %% io:format("[~p] ~p ~p~n", [N, E, J]),
+ true = equiv(E, decode(J)),
+ true = equiv(E, decode(encode(E))),
+ test_one(Rest, 1+N).
+
+e2j_test_vec(utf8) ->
+ [
+ {1, "1"},
+ {3.1416, "3.14160"}, % text representation may truncate, trail zeroes
+ {-1, "-1"},
+ {-3.1416, "-3.14160"},
+ {12.0e10, "1.20000e+11"},
+ {1.234E+10, "1.23400e+10"},
+ {-1.234E-10, "-1.23400e-10"},
+ {10.0, "1.0e+01"},
+ {123.456, "1.23456E+2"},
+ {10.0, "1e1"},
+ {"foo", "\"foo\""},
+ {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""},
+ {"", "\"\""},
+ {"\"", "\"\\\"\""},
+ {"\n\n\n", "\"\\n\\n\\n\""},
+ {"\\", "\"\\\\\""},
+ {"\" \b\f\r\n\t\"", "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
+ {obj_new(), "{}"},
+ {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"},
+ {obj_from_list([{"foo", "bar"}, {"baz", 123}]),
+ "{\"foo\":\"bar\",\"baz\":123}"},
+ {{array, []}, "[]"},
+ {{array, [{array, []}]}, "[[]]"},
+ {{array, [1, "foo"]}, "[1,\"foo\"]"},
+
+ % json array in a json object
+ {obj_from_list([{"foo", {array, [123]}}]),
+ "{\"foo\":[123]}"},
+
+ % json object in a json object
+ {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]),
+ "{\"foo\":{\"bar\":true}}"},
+
+ % fold evaluation order
+ {obj_from_list([{"foo", {array, []}},
+ {"bar", obj_from_list([{"baz", true}])},
+ {"alice", "bob"}]),
+ "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
+
+ % json object in a json array
+ {{array, [-123, "foo", obj_from_list([{"bar", {array, []}}]), null]},
+ "[-123,\"foo\",{\"bar\":[]},null]"}
+ ].
diff --git a/apps/mochiweb/src/mochijson2.erl b/apps/mochiweb/src/mochijson2.erl
new file mode 100644
index 00000000..111c37bd
--- /dev/null
+++ b/apps/mochiweb/src/mochijson2.erl
@@ -0,0 +1,660 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
+%% with binaries as strings, arrays as lists (without an {array, _})
+%% wrapper and it only knows how to decode UTF-8 (and ASCII).
+
+-module(mochijson2).
+-author('bob@mochimedia.com').
+-export([encoder/1, encode/1]).
+-export([decoder/1, decode/1]).
+-export([test/0]).
+
+% This is a macro to placate syntax highlighters..
+-define(Q, $\").
+-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
+ column=N+S#decoder.column}).
+-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
+ column=1+S#decoder.column}).
+-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
+ column=1,
+ line=1+S#decoder.line}).
+-define(INC_CHAR(S, C),
+ case C of
+ $\n ->
+ S#decoder{column=1,
+ line=1+S#decoder.line,
+ offset=1+S#decoder.offset};
+ _ ->
+ S#decoder{column=1+S#decoder.column,
+ offset=1+S#decoder.offset}
+ end).
+-define(IS_WHITESPACE(C),
+ (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
+
+%% @type iolist() = [char() | binary() | iolist()]
+%% @type iodata() = iolist() | binary()
+%% @type json_string() = atom | binary()
+%% @type json_number() = integer() | float()
+%% @type json_array() = [json_term()]
+%% @type json_object() = {struct, [{json_string(), json_term()}]}
+%% @type json_term() = json_string() | json_number() | json_array() |
+%% json_object()
+
+-record(encoder, {handler=null,
+ utf8=false}).
+
+-record(decoder, {object_hook=null,
+ offset=0,
+ line=1,
+ column=1,
+ state=null}).
+
+%% @spec encoder([encoder_option()]) -> function()
+%% @doc Create an encoder/1 with the given options.
+%% @type encoder_option() = handler_option() | utf8_option()
+%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
+encoder(Options) ->
+ State = parse_encoder_options(Options, #encoder{}),
+ fun (O) -> json_encode(O, State) end.
+
+%% @spec encode(json_term()) -> iolist()
+%% @doc Encode the given as JSON to an iolist.
+encode(Any) ->
+ json_encode(Any, #encoder{}).
+
+%% @spec decoder([decoder_option()]) -> function()
+%% @doc Create a decoder/1 with the given options.
+decoder(Options) ->
+ State = parse_decoder_options(Options, #decoder{}),
+ fun (O) -> json_decode(O, State) end.
+
+%% @spec decode(iolist()) -> json_term()
+%% @doc Decode the given iolist to Erlang terms.
+decode(S) ->
+ json_decode(S, #decoder{}).
+
+test() ->
+ test_all().
+
+%% Internal API
+
+parse_encoder_options([], State) ->
+ State;
+parse_encoder_options([{handler, Handler} | Rest], State) ->
+ parse_encoder_options(Rest, State#encoder{handler=Handler});
+parse_encoder_options([{utf8, Switch} | Rest], State) ->
+ parse_encoder_options(Rest, State#encoder{utf8=Switch}).
+
+parse_decoder_options([], State) ->
+ State;
+parse_decoder_options([{object_hook, Hook} | Rest], State) ->
+ parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
+
+json_encode(true, _State) ->
+ <<"true">>;
+json_encode(false, _State) ->
+ <<"false">>;
+json_encode(null, _State) ->
+ <<"null">>;
+json_encode(I, _State) when is_integer(I) ->
+ integer_to_list(I);
+json_encode(F, _State) when is_float(F) ->
+ mochinum:digits(F);
+json_encode(S, State) when is_binary(S); is_atom(S) ->
+ json_encode_string(S, State);
+json_encode(Array, State) when is_list(Array) ->
+ json_encode_array(Array, State);
+json_encode({struct, Props}, State) when is_list(Props) ->
+ json_encode_proplist(Props, State);
+json_encode(Bad, #encoder{handler=null}) ->
+ exit({json_encode, {bad_term, Bad}});
+json_encode(Bad, State=#encoder{handler=Handler}) ->
+ json_encode(Handler(Bad), State).
+
+json_encode_array([], _State) ->
+ <<"[]">>;
+json_encode_array(L, State) ->
+ F = fun (O, Acc) ->
+ [$,, json_encode(O, State) | Acc]
+ end,
+ [$, | Acc1] = lists:foldl(F, "[", L),
+ lists:reverse([$\] | Acc1]).
+
+json_encode_proplist([], _State) ->
+ <<"{}">>;
+json_encode_proplist(Props, State) ->
+ F = fun ({K, V}, Acc) ->
+ KS = json_encode_string(K, State),
+ VS = json_encode(V, State),
+ [$,, VS, $:, KS | Acc]
+ end,
+ [$, | Acc1] = lists:foldl(F, "{", Props),
+ lists:reverse([$\} | Acc1]).
+
+json_encode_string(A, State) when is_atom(A) ->
+ L = atom_to_list(A),
+ case json_string_is_safe(L) of
+ true ->
+ [?Q, L, ?Q];
+ false ->
+ json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
+ end;
+json_encode_string(B, State) when is_binary(B) ->
+ case json_bin_is_safe(B) of
+ true ->
+ [?Q, B, ?Q];
+ false ->
+ json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
+ end;
+json_encode_string(I, _State) when is_integer(I) ->
+ [?Q, integer_to_list(I), ?Q];
+json_encode_string(L, State) when is_list(L) ->
+ case json_string_is_safe(L) of
+ true ->
+ [?Q, L, ?Q];
+ false ->
+ json_encode_string_unicode(L, State, [?Q])
+ end.
+
+json_string_is_safe([]) ->
+ true;
+json_string_is_safe([C | Rest]) ->
+ case C of
+ ?Q ->
+ false;
+ $\\ ->
+ false;
+ $\b ->
+ false;
+ $\f ->
+ false;
+ $\n ->
+ false;
+ $\r ->
+ false;
+ $\t ->
+ false;
+ C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
+ false;
+ C when C < 16#7f ->
+ json_string_is_safe(Rest);
+ _ ->
+ false
+ end.
+
+json_bin_is_safe(<<>>) ->
+ true;
+json_bin_is_safe(<<C, Rest/binary>>) ->
+ case C of
+ ?Q ->
+ false;
+ $\\ ->
+ false;
+ $\b ->
+ false;
+ $\f ->
+ false;
+ $\n ->
+ false;
+ $\r ->
+ false;
+ $\t ->
+ false;
+ C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
+ false;
+ C when C < 16#7f ->
+ json_bin_is_safe(Rest);
+ _ ->
+ false
+ end.
+
+json_encode_string_unicode([], _State, Acc) ->
+ lists:reverse([$\" | Acc]);
+json_encode_string_unicode([C | Cs], State, Acc) ->
+ Acc1 = case C of
+ ?Q ->
+ [?Q, $\\ | Acc];
+ %% Escaping solidus is only useful when trying to protect
+ %% against "</script>" injection attacks which are only
+ %% possible when JSON is inserted into a HTML document
+ %% in-line. mochijson2 does not protect you from this, so
+ %% if you do insert directly into HTML then you need to
+ %% uncomment the following case or escape the output of encode.
+ %%
+ %% $/ ->
+ %% [$/, $\\ | Acc];
+ %%
+ $\\ ->
+ [$\\, $\\ | Acc];
+ $\b ->
+ [$b, $\\ | Acc];
+ $\f ->
+ [$f, $\\ | Acc];
+ $\n ->
+ [$n, $\\ | Acc];
+ $\r ->
+ [$r, $\\ | Acc];
+ $\t ->
+ [$t, $\\ | Acc];
+ C when C >= 0, C < $\s ->
+ [unihex(C) | Acc];
+ C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
+ [xmerl_ucs:to_utf8(C) | Acc];
+ C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
+ [unihex(C) | Acc];
+ C when C < 16#7f ->
+ [C | Acc];
+ _ ->
+ exit({json_encode, {bad_char, C}})
+ end,
+ json_encode_string_unicode(Cs, State, Acc1).
+
+hexdigit(C) when C >= 0, C =< 9 ->
+ C + $0;
+hexdigit(C) when C =< 15 ->
+ C + $a - 10.
+
+unihex(C) when C < 16#10000 ->
+ <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
+ Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
+ [$\\, $u | Digits];
+unihex(C) when C =< 16#10FFFF ->
+ N = C - 16#10000,
+ S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
+ S2 = 16#dc00 bor (N band 16#3ff),
+ [unihex(S1), unihex(S2)].
+
+json_decode(L, S) when is_list(L) ->
+ json_decode(iolist_to_binary(L), S);
+json_decode(B, S) ->
+ {Res, S1} = decode1(B, S),
+ {eof, _} = tokenize(B, S1#decoder{state=trim}),
+ Res.
+
+decode1(B, S=#decoder{state=null}) ->
+ case tokenize(B, S#decoder{state=any}) of
+ {{const, C}, S1} ->
+ {C, S1};
+ {start_array, S1} ->
+ decode_array(B, S1);
+ {start_object, S1} ->
+ decode_object(B, S1)
+ end.
+
+make_object(V, #decoder{object_hook=null}) ->
+ V;
+make_object(V, #decoder{object_hook=Hook}) ->
+ Hook(V).
+
+decode_object(B, S) ->
+ decode_object(B, S#decoder{state=key}, []).
+
+decode_object(B, S=#decoder{state=key}, Acc) ->
+ case tokenize(B, S) of
+ {end_object, S1} ->
+ V = make_object({struct, lists:reverse(Acc)}, S1),
+ {V, S1#decoder{state=null}};
+ {{const, K}, S1} ->
+ {colon, S2} = tokenize(B, S1),
+ {V, S3} = decode1(B, S2#decoder{state=null}),
+ decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
+ end;
+decode_object(B, S=#decoder{state=comma}, Acc) ->
+ case tokenize(B, S) of
+ {end_object, S1} ->
+ V = make_object({struct, lists:reverse(Acc)}, S1),
+ {V, S1#decoder{state=null}};
+ {comma, S1} ->
+ decode_object(B, S1#decoder{state=key}, Acc)
+ end.
+
+decode_array(B, S) ->
+ decode_array(B, S#decoder{state=any}, []).
+
+decode_array(B, S=#decoder{state=any}, Acc) ->
+ case tokenize(B, S) of
+ {end_array, S1} ->
+ {lists:reverse(Acc), S1#decoder{state=null}};
+ {start_array, S1} ->
+ {Array, S2} = decode_array(B, S1),
+ decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
+ {start_object, S1} ->
+ {Array, S2} = decode_object(B, S1),
+ decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
+ {{const, Const}, S1} ->
+ decode_array(B, S1#decoder{state=comma}, [Const | Acc])
+ end;
+decode_array(B, S=#decoder{state=comma}, Acc) ->
+ case tokenize(B, S) of
+ {end_array, S1} ->
+ {lists:reverse(Acc), S1#decoder{state=null}};
+ {comma, S1} ->
+ decode_array(B, S1#decoder{state=any}, Acc)
+ end.
+
+tokenize_string(B, S=#decoder{offset=O}) ->
+ case tokenize_string_fast(B, O) of
+ {escape, O1} ->
+ Length = O1 - O,
+ S1 = ?ADV_COL(S, Length),
+ <<_:O/binary, Head:Length/binary, _/binary>> = B,
+ tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
+ O1 ->
+ Length = O1 - O,
+ <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
+ {{const, String}, ?ADV_COL(S, Length + 1)}
+ end.
+
+tokenize_string_fast(B, O) ->
+ case B of
+ <<_:O/binary, ?Q, _/binary>> ->
+ O;
+ <<_:O/binary, $\\, _/binary>> ->
+ {escape, O};
+ <<_:O/binary, C1, _/binary>> when C1 < 128 ->
+ tokenize_string_fast(B, 1 + O);
+ <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
+ C2 >= 128, C2 =< 191 ->
+ tokenize_string_fast(B, 2 + O);
+ <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
+ C2 >= 128, C2 =< 191,
+ C3 >= 128, C3 =< 191 ->
+ tokenize_string_fast(B, 3 + O);
+ <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
+ C2 >= 128, C2 =< 191,
+ C3 >= 128, C3 =< 191,
+ C4 >= 128, C4 =< 191 ->
+ tokenize_string_fast(B, 4 + O);
+ _ ->
+ throw(invalid_utf8)
+ end.
+
+tokenize_string(B, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, ?Q, _/binary>> ->
+ {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
+ <<_:O/binary, "\\\"", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
+ <<_:O/binary, "\\\\", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
+ <<_:O/binary, "\\/", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
+ <<_:O/binary, "\\b", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
+ <<_:O/binary, "\\f", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
+ <<_:O/binary, "\\n", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
+ <<_:O/binary, "\\r", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
+ <<_:O/binary, "\\t", _/binary>> ->
+ tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
+ <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
+ C = erlang:list_to_integer([C3, C2, C1, C0], 16),
+ if C > 16#D7FF, C < 16#DC00 ->
+ %% coalesce UTF-16 surrogate pair
+ <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
+ D = erlang:list_to_integer([D3,D2,D1,D0], 16),
+ [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
+ D:16/big-unsigned-integer>>),
+ Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
+ tokenize_string(B, ?ADV_COL(S, 12), Acc1);
+ true ->
+ Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
+ tokenize_string(B, ?ADV_COL(S, 6), Acc1)
+ end;
+ <<_:O/binary, C, _/binary>> ->
+ tokenize_string(B, ?INC_CHAR(S, C), [C | Acc])
+ end.
+
+tokenize_number(B, S) ->
+ case tokenize_number(B, sign, S, []) of
+ {{int, Int}, S1} ->
+ {{const, list_to_integer(Int)}, S1};
+ {{float, Float}, S1} ->
+ {{const, list_to_float(Float)}, S1}
+ end.
+
+tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
+ case B of
+ <<_:O/binary, $-, _/binary>> ->
+ tokenize_number(B, int, ?INC_COL(S), [$-]);
+ _ ->
+ tokenize_number(B, int, S, [])
+ end;
+tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, $0, _/binary>> ->
+ tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
+ <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
+ tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
+ end;
+tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+ tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
+ _ ->
+ tokenize_number(B, frac, S, Acc)
+ end;
+tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
+ tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
+ <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
+ tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
+ _ ->
+ {{int, lists:reverse(Acc)}, S}
+ end;
+tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+ tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
+ <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
+ tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
+ _ ->
+ {{float, lists:reverse(Acc)}, S}
+ end;
+tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
+ tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
+ _ ->
+ tokenize_number(B, eint, S, Acc)
+ end;
+tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+ tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
+ end;
+tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+ tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
+ _ ->
+ {{float, lists:reverse(Acc)}, S}
+ end.
+
+tokenize(B, S=#decoder{offset=O}) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+ tokenize(B, ?INC_CHAR(S, C));
+ <<_:O/binary, "{", _/binary>> ->
+ {start_object, ?INC_COL(S)};
+ <<_:O/binary, "}", _/binary>> ->
+ {end_object, ?INC_COL(S)};
+ <<_:O/binary, "[", _/binary>> ->
+ {start_array, ?INC_COL(S)};
+ <<_:O/binary, "]", _/binary>> ->
+ {end_array, ?INC_COL(S)};
+ <<_:O/binary, ",", _/binary>> ->
+ {comma, ?INC_COL(S)};
+ <<_:O/binary, ":", _/binary>> ->
+ {colon, ?INC_COL(S)};
+ <<_:O/binary, "null", _/binary>> ->
+ {{const, null}, ?ADV_COL(S, 4)};
+ <<_:O/binary, "true", _/binary>> ->
+ {{const, true}, ?ADV_COL(S, 4)};
+ <<_:O/binary, "false", _/binary>> ->
+ {{const, false}, ?ADV_COL(S, 5)};
+ <<_:O/binary, "\"", _/binary>> ->
+ tokenize_string(B, ?INC_COL(S));
+ <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
+ orelse C =:= $- ->
+ tokenize_number(B, S);
+ <<_:O/binary>> ->
+ trim = S#decoder.state,
+ {eof, S}
+ end.
+
+%% testing constructs borrowed from the Yaws JSON implementation.
+
+%% Create an object from a list of Key/Value pairs.
+
+obj_new() ->
+ {struct, []}.
+
+is_obj({struct, Props}) ->
+ F = fun ({K, _}) when is_binary(K) ->
+ true;
+ (_) ->
+ false
+ end,
+ lists:all(F, Props).
+
+obj_from_list(Props) ->
+ Obj = {struct, Props},
+ case is_obj(Obj) of
+ true -> Obj;
+ false -> exit({json_bad_object, Obj})
+ end.
+
+%% Test for equivalence of Erlang terms.
+%% Due to arbitrary order of construction, equivalent objects might
+%% compare unequal as erlang terms, so we need to carefully recurse
+%% through aggregates (tuples and objects).
+
+equiv({struct, Props1}, {struct, Props2}) ->
+ equiv_object(Props1, Props2);
+equiv(L1, L2) when is_list(L1), is_list(L2) ->
+ equiv_list(L1, L2);
+equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
+equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
+equiv(true, true) -> true;
+equiv(false, false) -> true;
+equiv(null, null) -> true.
+
+%% Object representation and traversal order is unknown.
+%% Use the sledgehammer and sort property lists.
+
+equiv_object(Props1, Props2) ->
+ L1 = lists:keysort(1, Props1),
+ L2 = lists:keysort(1, Props2),
+ Pairs = lists:zip(L1, L2),
+ true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
+ equiv(K1, K2) and equiv(V1, V2)
+ end, Pairs).
+
+%% Recursively compare tuple elements for equivalence.
+
+equiv_list([], []) ->
+ true;
+equiv_list([V1 | L1], [V2 | L2]) ->
+ equiv(V1, V2) andalso equiv_list(L1, L2).
+
+test_all() ->
+ [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
+ <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]),
+ test_encoder_utf8(),
+ test_input_validation(),
+ test_one(e2j_test_vec(utf8), 1).
+
+test_one([], _N) ->
+ %% io:format("~p tests passed~n", [N-1]),
+ ok;
+test_one([{E, J} | Rest], N) ->
+ %% io:format("[~p] ~p ~p~n", [N, E, J]),
+ true = equiv(E, decode(J)),
+ true = equiv(E, decode(encode(E))),
+ test_one(Rest, 1+N).
+
+e2j_test_vec(utf8) ->
+ [
+ {1, "1"},
+ {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
+ {-1, "-1"},
+ {-3.1416, "-3.14160"},
+ {12.0e10, "1.20000e+11"},
+ {1.234E+10, "1.23400e+10"},
+ {-1.234E-10, "-1.23400e-10"},
+ {10.0, "1.0e+01"},
+ {123.456, "1.23456E+2"},
+ {10.0, "1e1"},
+ {<<"foo">>, "\"foo\""},
+ {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
+ {<<"">>, "\"\""},
+ {<<"\n\n\n">>, "\"\\n\\n\\n\""},
+ {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
+ {obj_new(), "{}"},
+ {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
+ {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
+ "{\"foo\":\"bar\",\"baz\":123}"},
+ {[], "[]"},
+ {[[]], "[[]]"},
+ {[1, <<"foo">>], "[1,\"foo\"]"},
+
+ %% json array in a json object
+ {obj_from_list([{<<"foo">>, [123]}]),
+ "{\"foo\":[123]}"},
+
+ %% json object in a json object
+ {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
+ "{\"foo\":{\"bar\":true}}"},
+
+ %% fold evaluation order
+ {obj_from_list([{<<"foo">>, []},
+ {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
+ {<<"alice">>, <<"bob">>}]),
+ "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
+
+ %% json object in a json array
+ {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
+ "[-123,\"foo\",{\"bar\":[]},null]"}
+ ].
+
+%% test utf8 encoding
+test_encoder_utf8() ->
+ %% safe conversion case (default)
+ [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
+ encode(<<1,"\321\202\320\265\321\201\321\202">>),
+
+ %% raw utf8 output (optional)
+ Enc = mochijson2:encoder([{utf8, true}]),
+ [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
+ Enc(<<1,"\321\202\320\265\321\201\321\202">>).
+
+test_input_validation() ->
+ Good = [
+ {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, % pound
+ {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, % euro
+ {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} % denarius
+ ],
+ lists:foreach(fun({CodePoint, UTF8}) ->
+ Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
+ Expect = decode(UTF8)
+ end, Good),
+
+ Bad = [
+ % 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
+ <<?Q, 16#80, ?Q>>,
+ % missing continuations, last byte in each should be 80-BF
+ <<?Q, 16#C2, 16#7F, ?Q>>,
+ <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
+ <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
+ % we don't support code points > 10FFFF per RFC 3629
+ <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>
+ ],
+ lists:foreach(fun(X) ->
+ ok = try decode(X) catch invalid_utf8 -> ok end
+ end, Bad).
diff --git a/apps/mochiweb/src/mochinum.erl b/apps/mochiweb/src/mochinum.erl
new file mode 100644
index 00000000..6a866042
--- /dev/null
+++ b/apps/mochiweb/src/mochinum.erl
@@ -0,0 +1,289 @@
+%% @copyright 2007 Mochi Media, Inc.
+%% @author Bob Ippolito <bob@mochimedia.com>
+
+%% @doc Useful numeric algorithms for floats that cover some deficiencies
+%% in the math module. More interesting is digits/1, which implements
+%% the algorithm from:
+%% http://www.cs.indiana.edu/~burger/fp/index.html
+%% See also "Printing Floating-Point Numbers Quickly and Accurately"
+%% in Proceedings of the SIGPLAN '96 Conference on Programming Language
+%% Design and Implementation.
+
+-module(mochinum).
+-author("Bob Ippolito <bob@mochimedia.com>").
+-export([digits/1, frexp/1, int_pow/2, int_ceil/1, test/0]).
+
+%% IEEE 754 Float exponent bias
+-define(FLOAT_BIAS, 1022).
+-define(MIN_EXP, -1074).
+-define(BIG_POW, 4503599627370496).
+
+%% External API
+
+%% @spec digits(number()) -> string()
+%% @doc Returns a string that accurately represents the given integer or float
+%% using a conservative amount of digits. Great for generating
+%% human-readable output, or compact ASCII serializations for floats.
+digits(N) when is_integer(N) ->
+ integer_to_list(N);
+digits(0.0) ->
+ "0.0";
+digits(Float) ->
+ {Frac, Exp} = frexp(Float),
+ Exp1 = Exp - 53,
+ Frac1 = trunc(abs(Frac) * (1 bsl 53)),
+ [Place | Digits] = digits1(Float, Exp1, Frac1),
+ R = insert_decimal(Place, [$0 + D || D <- Digits]),
+ case Float < 0 of
+ true ->
+ [$- | R];
+ _ ->
+ R
+ end.
+
+%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()}
+%% @doc Return the fractional and exponent part of an IEEE 754 double,
+%% equivalent to the libc function of the same name.
+%% F = Frac * pow(2, Exp).
+frexp(F) ->
+ frexp1(unpack(F)).
+
+%% @spec int_pow(X::integer(), N::integer()) -> Y::integer()
+%% @doc Moderately efficient way to exponentiate integers.
+%% int_pow(10, 2) = 100.
+int_pow(_X, 0) ->
+ 1;
+int_pow(X, N) when N > 0 ->
+ int_pow(X, N, 1).
+
+%% @spec int_ceil(F::float()) -> integer()
+%% @doc Return the ceiling of F as an integer. The ceiling is defined as
+%% F when F == trunc(F);
+%% trunc(F) when F &lt; 0;
+%% trunc(F) + 1 when F &gt; 0.
+int_ceil(X) ->
+ T = trunc(X),
+ case (X - T) of
+ Neg when Neg < 0 -> T;
+ Pos when Pos > 0 -> T + 1;
+ _ -> T
+ end.
+
+
+%% Internal API
+
+int_pow(X, N, R) when N < 2 ->
+ R * X;
+int_pow(X, N, R) ->
+ int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end).
+
+insert_decimal(0, S) ->
+ "0." ++ S;
+insert_decimal(Place, S) when Place > 0 ->
+ L = length(S),
+ case Place - L of
+ 0 ->
+ S ++ ".0";
+ N when N < 0 ->
+ {S0, S1} = lists:split(L + N, S),
+ S0 ++ "." ++ S1;
+ N when N < 6 ->
+ %% More places than digits
+ S ++ lists:duplicate(N, $0) ++ ".0";
+ _ ->
+ insert_decimal_exp(Place, S)
+ end;
+insert_decimal(Place, S) when Place > -6 ->
+ "0." ++ lists:duplicate(abs(Place), $0) ++ S;
+insert_decimal(Place, S) ->
+ insert_decimal_exp(Place, S).
+
+insert_decimal_exp(Place, S) ->
+ [C | S0] = S,
+ S1 = case S0 of
+ [] ->
+ "0";
+ _ ->
+ S0
+ end,
+ Exp = case Place < 0 of
+ true ->
+ "e-";
+ false ->
+ "e+"
+ end,
+ [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)).
+
+
+digits1(Float, Exp, Frac) ->
+ Round = ((Frac band 1) =:= 0),
+ case Exp >= 0 of
+ true ->
+ BExp = 1 bsl Exp,
+ case (Frac /= ?BIG_POW) of
+ true ->
+ scale((Frac * BExp * 2), 2, BExp, BExp,
+ Round, Round, Float);
+ false ->
+ scale((Frac * BExp * 4), 4, (BExp * 2), BExp,
+ Round, Round, Float)
+ end;
+ false ->
+ case (Exp == ?MIN_EXP) orelse (Frac /= ?BIG_POW) of
+ true ->
+ scale((Frac * 2), 1 bsl (1 - Exp), 1, 1,
+ Round, Round, Float);
+ false ->
+ scale((Frac * 4), 1 bsl (2 - Exp), 2, 1,
+ Round, Round, Float)
+ end
+ end.
+
+scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) ->
+ Est = int_ceil(math:log10(abs(Float)) - 1.0e-10),
+ %% Note that the scheme implementation uses a 326 element look-up table
+ %% for int_pow(10, N) where we do not.
+ case Est >= 0 of
+ true ->
+ fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est,
+ LowOk, HighOk);
+ false ->
+ Scale = int_pow(10, -Est),
+ fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est,
+ LowOk, HighOk)
+ end.
+
+fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) ->
+ TooLow = case HighOk of
+ true ->
+ (R + MPlus) >= S;
+ false ->
+ (R + MPlus) > S
+ end,
+ case TooLow of
+ true ->
+ [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)];
+ false ->
+ [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)]
+ end.
+
+generate(R0, S, MPlus, MMinus, LowOk, HighOk) ->
+ D = R0 div S,
+ R = R0 rem S,
+ TC1 = case LowOk of
+ true ->
+ R =< MMinus;
+ false ->
+ R < MMinus
+ end,
+ TC2 = case HighOk of
+ true ->
+ (R + MPlus) >= S;
+ false ->
+ (R + MPlus) > S
+ end,
+ case TC1 of
+ false ->
+ case TC2 of
+ false ->
+ [D | generate(R * 10, S, MPlus * 10, MMinus * 10,
+ LowOk, HighOk)];
+ true ->
+ [D + 1]
+ end;
+ true ->
+ case TC2 of
+ false ->
+ [D];
+ true ->
+ case R * 2 < S of
+ true ->
+ [D];
+ false ->
+ [D + 1]
+ end
+ end
+ end.
+
+unpack(Float) ->
+ <<Sign:1, Exp:11, Frac:52>> = <<Float:64/float>>,
+ {Sign, Exp, Frac}.
+
+frexp1({_Sign, 0, 0}) ->
+ {0.0, 0};
+frexp1({Sign, 0, Frac}) ->
+ Exp = log2floor(Frac),
+ <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, (Frac-1):52>>,
+ {Frac1, -(?FLOAT_BIAS) - 52 + Exp};
+frexp1({Sign, Exp, Frac}) ->
+ <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, Frac:52>>,
+ {Frac1, Exp - ?FLOAT_BIAS}.
+
+log2floor(Int) ->
+ log2floor(Int, 0).
+
+log2floor(0, N) ->
+ N;
+log2floor(Int, N) ->
+ log2floor(Int bsr 1, 1 + N).
+
+
+test() ->
+ ok = test_frexp(),
+ ok = test_int_ceil(),
+ ok = test_int_pow(),
+ ok = test_digits(),
+ ok.
+
+test_int_ceil() ->
+ 1 = int_ceil(0.0001),
+ 0 = int_ceil(0.0),
+ 1 = int_ceil(0.99),
+ 1 = int_ceil(1.0),
+ -1 = int_ceil(-1.5),
+ -2 = int_ceil(-2.0),
+ ok.
+
+test_int_pow() ->
+ 1 = int_pow(1, 1),
+ 1 = int_pow(1, 0),
+ 1 = int_pow(10, 0),
+ 10 = int_pow(10, 1),
+ 100 = int_pow(10, 2),
+ 1000 = int_pow(10, 3),
+ ok.
+
+test_digits() ->
+ "0" = digits(0),
+ "0.0" = digits(0.0),
+ "1.0" = digits(1.0),
+ "-1.0" = digits(-1.0),
+ "0.1" = digits(0.1),
+ "0.01" = digits(0.01),
+ "0.001" = digits(0.001),
+ ok.
+
+test_frexp() ->
+ %% zero
+ {0.0, 0} = frexp(0.0),
+ %% one
+ {0.5, 1} = frexp(1.0),
+ %% negative one
+ {-0.5, 1} = frexp(-1.0),
+ %% small denormalized number
+ %% 4.94065645841246544177e-324
+ <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>,
+ {0.5, -1073} = frexp(SmallDenorm),
+ %% large denormalized number
+ %% 2.22507385850720088902e-308
+ <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>,
+ {0.99999999999999978, -1022} = frexp(BigDenorm),
+ %% small normalized number
+ %% 2.22507385850720138309e-308
+ <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>,
+ {0.5, -1021} = frexp(SmallNorm),
+ %% large normalized number
+ %% 1.79769313486231570815e+308
+ <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>,
+ {0.99999999999999989, 1024} = frexp(LargeNorm),
+ ok.
diff --git a/apps/mochiweb/src/mochiweb.app.src b/apps/mochiweb/src/mochiweb.app.src
new file mode 100644
index 00000000..b0f90144
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb.app.src
@@ -0,0 +1,32 @@
+{application, mochiweb,
+ [{description, "MochiMedia Web Server"},
+ {vsn, "113"},
+ {modules, [
+ mochihex,
+ mochijson,
+ mochijson2,
+ mochinum,
+ mochiweb,
+ mochiweb_app,
+ mochiweb_charref,
+ mochiweb_cookies,
+ mochiweb_echo,
+ mochiweb_headers,
+ mochiweb_html,
+ mochiweb_http,
+ mochiweb_multipart,
+ mochiweb_request,
+ mochiweb_response,
+ mochiweb_skel,
+ mochiweb_socket_server,
+ mochiweb_sup,
+ mochiweb_util,
+ reloader,
+ mochifmt,
+ mochifmt_std,
+ mochifmt_records
+ ]},
+ {registered, []},
+ {mod, {mochiweb_app, []}},
+ {env, []},
+ {applications, [kernel, stdlib]}]}.
diff --git a/apps/mochiweb/src/mochiweb.erl b/apps/mochiweb/src/mochiweb.erl
new file mode 100644
index 00000000..0f4d52a6
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb.erl
@@ -0,0 +1,110 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Start and stop the MochiWeb server.
+
+-module(mochiweb).
+-author('bob@mochimedia.com').
+
+-export([start/0, stop/0]).
+-export([new_request/1, new_response/1]).
+-export([all_loaded/0, all_loaded/1, reload/0]).
+-export([test/0]).
+
+%% @spec start() -> ok
+%% @doc Start the MochiWeb server.
+start() ->
+ ensure_started(crypto),
+ application:start(mochiweb).
+
+%% @spec stop() -> ok
+%% @doc Stop the MochiWeb server.
+stop() ->
+ Res = application:stop(mochiweb),
+ application:stop(crypto),
+ Res.
+
+%% @spec test() -> ok
+%% @doc Run all of the tests for MochiWeb.
+test() ->
+ mochiweb_util:test(),
+ mochiweb_headers:test(),
+ mochiweb_cookies:test(),
+ mochihex:test(),
+ mochinum:test(),
+ mochijson:test(),
+ mochiweb_charref:test(),
+ mochiweb_html:test(),
+ mochifmt:test(),
+ test_request(),
+ ok.
+
+reload() ->
+ [c:l(Module) || Module <- all_loaded()].
+
+all_loaded() ->
+ all_loaded(filename:dirname(code:which(?MODULE))).
+
+all_loaded(Base) when is_atom(Base) ->
+ [];
+all_loaded(Base) ->
+ FullBase = Base ++ "/",
+ F = fun ({_Module, Loaded}, Acc) when is_atom(Loaded) ->
+ Acc;
+ ({Module, Loaded}, Acc) ->
+ case lists:prefix(FullBase, Loaded) of
+ true ->
+ [Module | Acc];
+ false ->
+ Acc
+ end
+ end,
+ lists:foldl(F, [], code:all_loaded()).
+
+
+%% @spec new_request({Socket, Request, Headers}) -> MochiWebRequest
+%% @doc Return a mochiweb_request data structure.
+new_request({Socket, {Method, {abs_path, Uri}, Version}, Headers}) ->
+ mochiweb_request:new(Socket,
+ Method,
+ Uri,
+ Version,
+ mochiweb_headers:make(Headers));
+% this case probably doesn't "exist".
+new_request({Socket, {Method, {absoluteURI, _Protocol, _Host, _Port, Uri},
+ Version}, Headers}) ->
+ mochiweb_request:new(Socket,
+ Method,
+ Uri,
+ Version,
+ mochiweb_headers:make(Headers));
+%% Request-URI is "*"
+%% From http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
+new_request({Socket, {Method, '*'=Uri, Version}, Headers}) ->
+ mochiweb_request:new(Socket,
+ Method,
+ Uri,
+ Version,
+ mochiweb_headers:make(Headers)).
+
+%% @spec new_response({Request, integer(), Headers}) -> MochiWebResponse
+%% @doc Return a mochiweb_response data structure.
+new_response({Request, Code, Headers}) ->
+ mochiweb_response:new(Request,
+ Code,
+ mochiweb_headers:make(Headers)).
+
+%% Internal API
+
+test_request() ->
+ R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []),
+ "/foo/bar/baz wibble quux" = R:get(path),
+ ok.
+
+ensure_started(App) ->
+ case application:start(App) of
+ ok ->
+ ok;
+ {error, {already_started, App}} ->
+ ok
+ end.
diff --git a/apps/mochiweb/src/mochiweb_app.erl b/apps/mochiweb/src/mochiweb_app.erl
new file mode 100644
index 00000000..2b437f6c
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_app.erl
@@ -0,0 +1,20 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Callbacks for the mochiweb application.
+
+-module(mochiweb_app).
+-author('bob@mochimedia.com').
+
+-behaviour(application).
+-export([start/2,stop/1]).
+
+%% @spec start(_Type, _StartArgs) -> ServerRet
+%% @doc application start callback for mochiweb.
+start(_Type, _StartArgs) ->
+ mochiweb_sup:start_link().
+
+%% @spec stop(_State) -> ServerRet
+%% @doc application stop callback for mochiweb.
+stop(_State) ->
+ ok.
diff --git a/apps/mochiweb/src/mochiweb_charref.erl b/apps/mochiweb/src/mochiweb_charref.erl
new file mode 100644
index 00000000..d037d2f8
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_charref.erl
@@ -0,0 +1,295 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Converts HTML 4 charrefs and entities to codepoints.
+-module(mochiweb_charref).
+-export([charref/1, test/0]).
+
+%% External API.
+
+%% @spec charref(S) -> integer() | undefined
+%% @doc Convert a decimal charref, hex charref, or html entity to a unicode
+%% codepoint, or return undefined on failure.
+%% The input should not include an ampersand or semicolon.
+%% charref("#38") = 38, charref("#x26") = 38, charref("amp") = 38.
+charref(B) when is_binary(B) ->
+ charref(binary_to_list(B));
+charref([$#, C | L]) when C =:= $x orelse C =:= $X ->
+ try erlang:list_to_integer(L, 16)
+ catch
+ error:badarg -> undefined
+ end;
+charref([$# | L]) ->
+ try list_to_integer(L)
+ catch
+ error:badarg -> undefined
+ end;
+charref(L) ->
+ entity(L).
+
+%% @spec test() -> ok
+%% @doc Run tests for mochiweb_charref.
+test() ->
+ 1234 = charref("#1234"),
+ 255 = charref("#xfF"),
+ 255 = charref("#XFf"),
+ 38 = charref("amp"),
+ undefined = charref("not_an_entity"),
+ ok.
+
+%% Internal API.
+
+entity("nbsp") -> 160;
+entity("iexcl") -> 161;
+entity("cent") -> 162;
+entity("pound") -> 163;
+entity("curren") -> 164;
+entity("yen") -> 165;
+entity("brvbar") -> 166;
+entity("sect") -> 167;
+entity("uml") -> 168;
+entity("copy") -> 169;
+entity("ordf") -> 170;
+entity("laquo") -> 171;
+entity("not") -> 172;
+entity("shy") -> 173;
+entity("reg") -> 174;
+entity("macr") -> 175;
+entity("deg") -> 176;
+entity("plusmn") -> 177;
+entity("sup2") -> 178;
+entity("sup3") -> 179;
+entity("acute") -> 180;
+entity("micro") -> 181;
+entity("para") -> 182;
+entity("middot") -> 183;
+entity("cedil") -> 184;
+entity("sup1") -> 185;
+entity("ordm") -> 186;
+entity("raquo") -> 187;
+entity("frac14") -> 188;
+entity("frac12") -> 189;
+entity("frac34") -> 190;
+entity("iquest") -> 191;
+entity("Agrave") -> 192;
+entity("Aacute") -> 193;
+entity("Acirc") -> 194;
+entity("Atilde") -> 195;
+entity("Auml") -> 196;
+entity("Aring") -> 197;
+entity("AElig") -> 198;
+entity("Ccedil") -> 199;
+entity("Egrave") -> 200;
+entity("Eacute") -> 201;
+entity("Ecirc") -> 202;
+entity("Euml") -> 203;
+entity("Igrave") -> 204;
+entity("Iacute") -> 205;
+entity("Icirc") -> 206;
+entity("Iuml") -> 207;
+entity("ETH") -> 208;
+entity("Ntilde") -> 209;
+entity("Ograve") -> 210;
+entity("Oacute") -> 211;
+entity("Ocirc") -> 212;
+entity("Otilde") -> 213;
+entity("Ouml") -> 214;
+entity("times") -> 215;
+entity("Oslash") -> 216;
+entity("Ugrave") -> 217;
+entity("Uacute") -> 218;
+entity("Ucirc") -> 219;
+entity("Uuml") -> 220;
+entity("Yacute") -> 221;
+entity("THORN") -> 222;
+entity("szlig") -> 223;
+entity("agrave") -> 224;
+entity("aacute") -> 225;
+entity("acirc") -> 226;
+entity("atilde") -> 227;
+entity("auml") -> 228;
+entity("aring") -> 229;
+entity("aelig") -> 230;
+entity("ccedil") -> 231;
+entity("egrave") -> 232;
+entity("eacute") -> 233;
+entity("ecirc") -> 234;
+entity("euml") -> 235;
+entity("igrave") -> 236;
+entity("iacute") -> 237;
+entity("icirc") -> 238;
+entity("iuml") -> 239;
+entity("eth") -> 240;
+entity("ntilde") -> 241;
+entity("ograve") -> 242;
+entity("oacute") -> 243;
+entity("ocirc") -> 244;
+entity("otilde") -> 245;
+entity("ouml") -> 246;
+entity("divide") -> 247;
+entity("oslash") -> 248;
+entity("ugrave") -> 249;
+entity("uacute") -> 250;
+entity("ucirc") -> 251;
+entity("uuml") -> 252;
+entity("yacute") -> 253;
+entity("thorn") -> 254;
+entity("yuml") -> 255;
+entity("fnof") -> 402;
+entity("Alpha") -> 913;
+entity("Beta") -> 914;
+entity("Gamma") -> 915;
+entity("Delta") -> 916;
+entity("Epsilon") -> 917;
+entity("Zeta") -> 918;
+entity("Eta") -> 919;
+entity("Theta") -> 920;
+entity("Iota") -> 921;
+entity("Kappa") -> 922;
+entity("Lambda") -> 923;
+entity("Mu") -> 924;
+entity("Nu") -> 925;
+entity("Xi") -> 926;
+entity("Omicron") -> 927;
+entity("Pi") -> 928;
+entity("Rho") -> 929;
+entity("Sigma") -> 931;
+entity("Tau") -> 932;
+entity("Upsilon") -> 933;
+entity("Phi") -> 934;
+entity("Chi") -> 935;
+entity("Psi") -> 936;
+entity("Omega") -> 937;
+entity("alpha") -> 945;
+entity("beta") -> 946;
+entity("gamma") -> 947;
+entity("delta") -> 948;
+entity("epsilon") -> 949;
+entity("zeta") -> 950;
+entity("eta") -> 951;
+entity("theta") -> 952;
+entity("iota") -> 953;
+entity("kappa") -> 954;
+entity("lambda") -> 955;
+entity("mu") -> 956;
+entity("nu") -> 957;
+entity("xi") -> 958;
+entity("omicron") -> 959;
+entity("pi") -> 960;
+entity("rho") -> 961;
+entity("sigmaf") -> 962;
+entity("sigma") -> 963;
+entity("tau") -> 964;
+entity("upsilon") -> 965;
+entity("phi") -> 966;
+entity("chi") -> 967;
+entity("psi") -> 968;
+entity("omega") -> 969;
+entity("thetasym") -> 977;
+entity("upsih") -> 978;
+entity("piv") -> 982;
+entity("bull") -> 8226;
+entity("hellip") -> 8230;
+entity("prime") -> 8242;
+entity("Prime") -> 8243;
+entity("oline") -> 8254;
+entity("frasl") -> 8260;
+entity("weierp") -> 8472;
+entity("image") -> 8465;
+entity("real") -> 8476;
+entity("trade") -> 8482;
+entity("alefsym") -> 8501;
+entity("larr") -> 8592;
+entity("uarr") -> 8593;
+entity("rarr") -> 8594;
+entity("darr") -> 8595;
+entity("harr") -> 8596;
+entity("crarr") -> 8629;
+entity("lArr") -> 8656;
+entity("uArr") -> 8657;
+entity("rArr") -> 8658;
+entity("dArr") -> 8659;
+entity("hArr") -> 8660;
+entity("forall") -> 8704;
+entity("part") -> 8706;
+entity("exist") -> 8707;
+entity("empty") -> 8709;
+entity("nabla") -> 8711;
+entity("isin") -> 8712;
+entity("notin") -> 8713;
+entity("ni") -> 8715;
+entity("prod") -> 8719;
+entity("sum") -> 8721;
+entity("minus") -> 8722;
+entity("lowast") -> 8727;
+entity("radic") -> 8730;
+entity("prop") -> 8733;
+entity("infin") -> 8734;
+entity("ang") -> 8736;
+entity("and") -> 8743;
+entity("or") -> 8744;
+entity("cap") -> 8745;
+entity("cup") -> 8746;
+entity("int") -> 8747;
+entity("there4") -> 8756;
+entity("sim") -> 8764;
+entity("cong") -> 8773;
+entity("asymp") -> 8776;
+entity("ne") -> 8800;
+entity("equiv") -> 8801;
+entity("le") -> 8804;
+entity("ge") -> 8805;
+entity("sub") -> 8834;
+entity("sup") -> 8835;
+entity("nsub") -> 8836;
+entity("sube") -> 8838;
+entity("supe") -> 8839;
+entity("oplus") -> 8853;
+entity("otimes") -> 8855;
+entity("perp") -> 8869;
+entity("sdot") -> 8901;
+entity("lceil") -> 8968;
+entity("rceil") -> 8969;
+entity("lfloor") -> 8970;
+entity("rfloor") -> 8971;
+entity("lang") -> 9001;
+entity("rang") -> 9002;
+entity("loz") -> 9674;
+entity("spades") -> 9824;
+entity("clubs") -> 9827;
+entity("hearts") -> 9829;
+entity("diams") -> 9830;
+entity("quot") -> 34;
+entity("amp") -> 38;
+entity("lt") -> 60;
+entity("gt") -> 62;
+entity("OElig") -> 338;
+entity("oelig") -> 339;
+entity("Scaron") -> 352;
+entity("scaron") -> 353;
+entity("Yuml") -> 376;
+entity("circ") -> 710;
+entity("tilde") -> 732;
+entity("ensp") -> 8194;
+entity("emsp") -> 8195;
+entity("thinsp") -> 8201;
+entity("zwnj") -> 8204;
+entity("zwj") -> 8205;
+entity("lrm") -> 8206;
+entity("rlm") -> 8207;
+entity("ndash") -> 8211;
+entity("mdash") -> 8212;
+entity("lsquo") -> 8216;
+entity("rsquo") -> 8217;
+entity("sbquo") -> 8218;
+entity("ldquo") -> 8220;
+entity("rdquo") -> 8221;
+entity("bdquo") -> 8222;
+entity("dagger") -> 8224;
+entity("Dagger") -> 8225;
+entity("permil") -> 8240;
+entity("lsaquo") -> 8249;
+entity("rsaquo") -> 8250;
+entity("euro") -> 8364;
+entity(_) -> undefined.
+
diff --git a/apps/mochiweb/src/mochiweb_cookies.erl b/apps/mochiweb/src/mochiweb_cookies.erl
new file mode 100644
index 00000000..61711ff0
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_cookies.erl
@@ -0,0 +1,257 @@
+%% @author Emad El-Haraty <emad@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc HTTP Cookie parsing and generating (RFC 2109, RFC 2965).
+
+-module(mochiweb_cookies).
+-export([parse_cookie/1, cookie/3, cookie/2, test/0]).
+
+-define(QUOTE, $\").
+
+-define(IS_WHITESPACE(C),
+ (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
+
+%% RFC 2616 separators (called tspecials in RFC 2068)
+-define(IS_SEPARATOR(C),
+ (C < 32 orelse
+ C =:= $\s orelse C =:= $\t orelse
+ C =:= $( orelse C =:= $) orelse C =:= $< orelse C =:= $> orelse
+ C =:= $@ orelse C =:= $, orelse C =:= $; orelse C =:= $: orelse
+ C =:= $\\ orelse C =:= $\" orelse C =:= $/ orelse
+ C =:= $[ orelse C =:= $] orelse C =:= $? orelse C =:= $= orelse
+ C =:= ${ orelse C =:= $})).
+
+%% @type proplist() = [{Key::string(), Value::string()}].
+%% @type header() = {Name::string(), Value::string()}.
+
+%% @spec cookie(Key::string(), Value::string()) -> header()
+%% @doc Short-hand for <code>cookie(Key, Value, [])</code>.
+cookie(Key, Value) ->
+ cookie(Key, Value, []).
+
+%% @spec cookie(Key::string(), Value::string(), Options::[Option]) -> header()
+%% where Option = {max_age, integer()} | {local_time, {date(), time()}}
+%% | {domain, string()} | {path, string()}
+%% | {secure, true | false} | {http_only, true | false}
+%%
+%% @doc Generate a Set-Cookie header field tuple.
+cookie(Key, Value, Options) ->
+ Cookie = [any_to_list(Key), "=", quote(Value), "; Version=1"],
+ %% Set-Cookie:
+ %% Comment, Domain, Max-Age, Path, Secure, Version
+ %% Set-Cookie2:
+ %% Comment, CommentURL, Discard, Domain, Max-Age, Path, Port, Secure,
+ %% Version
+ ExpiresPart =
+ case proplists:get_value(max_age, Options) of
+ undefined ->
+ "";
+ RawAge ->
+ When = case proplists:get_value(local_time, Options) of
+ undefined ->
+ calendar:local_time();
+ LocalTime ->
+ LocalTime
+ end,
+ Age = case RawAge < 0 of
+ true ->
+ 0;
+ false ->
+ RawAge
+ end,
+ ["; Expires=", age_to_cookie_date(Age, When),
+ "; Max-Age=", quote(Age)]
+ end,
+ SecurePart =
+ case proplists:get_value(secure, Options) of
+ true ->
+ "; Secure";
+ _ ->
+ ""
+ end,
+ DomainPart =
+ case proplists:get_value(domain, Options) of
+ undefined ->
+ "";
+ Domain ->
+ ["; Domain=", quote(Domain)]
+ end,
+ PathPart =
+ case proplists:get_value(path, Options) of
+ undefined ->
+ "";
+ Path ->
+ ["; Path=", quote(Path)]
+ end,
+ HttpOnlyPart =
+ case proplists:get_value(http_only, Options) of
+ true ->
+ "; HttpOnly";
+ _ ->
+ ""
+ end,
+ CookieParts = [Cookie, ExpiresPart, SecurePart, DomainPart, PathPart, HttpOnlyPart],
+ {"Set-Cookie", lists:flatten(CookieParts)}.
+
+
+%% Every major browser incorrectly handles quoted strings in a
+%% different and (worse) incompatible manner. Instead of wasting time
+%% writing redundant code for each browser, we restrict cookies to
+%% only contain characters that browsers handle compatibly.
+%%
+%% By replacing the definition of quote with this, we generate
+%% RFC-compliant cookies:
+%%
+%% quote(V) ->
+%% Fun = fun(?QUOTE, Acc) -> [$\\, ?QUOTE | Acc];
+%% (Ch, Acc) -> [Ch | Acc]
+%% end,
+%% [?QUOTE | lists:foldr(Fun, [?QUOTE], V)].
+
+%% Convert to a string and raise an error if quoting is required.
+quote(V0) ->
+ V = any_to_list(V0),
+ lists:all(fun(Ch) -> Ch =:= $/ orelse not ?IS_SEPARATOR(Ch) end, V)
+ orelse erlang:error({cookie_quoting_required, V}),
+ V.
+
+add_seconds(Secs, LocalTime) ->
+ Greg = calendar:datetime_to_gregorian_seconds(LocalTime),
+ calendar:gregorian_seconds_to_datetime(Greg + Secs).
+
+age_to_cookie_date(Age, LocalTime) ->
+ httpd_util:rfc1123_date(add_seconds(Age, LocalTime)).
+
+%% @spec parse_cookie(string()) -> [{K::string(), V::string()}]
+%% @doc Parse the contents of a Cookie header field, ignoring cookie
+%% attributes, and return a simple property list.
+parse_cookie("") ->
+ [];
+parse_cookie(Cookie) ->
+ parse_cookie(Cookie, []).
+
+%% @spec test() -> ok
+%% @doc Run tests for mochiweb_cookies.
+test() ->
+ parse_cookie_test(),
+ cookie_test(),
+ ok.
+
+%% Internal API
+
+parse_cookie([], Acc) ->
+ lists:reverse(Acc);
+parse_cookie(String, Acc) ->
+ {{Token, Value}, Rest} = read_pair(String),
+ Acc1 = case Token of
+ "" ->
+ Acc;
+ "$" ++ _ ->
+ Acc;
+ _ ->
+ [{Token, Value} | Acc]
+ end,
+ parse_cookie(Rest, Acc1).
+
+read_pair(String) ->
+ {Token, Rest} = read_token(skip_whitespace(String)),
+ {Value, Rest1} = read_value(skip_whitespace(Rest)),
+ {{Token, Value}, skip_past_separator(Rest1)}.
+
+read_value([$= | Value]) ->
+ Value1 = skip_whitespace(Value),
+ case Value1 of
+ [?QUOTE | _] ->
+ read_quoted(Value1);
+ _ ->
+ read_token(Value1)
+ end;
+read_value(String) ->
+ {"", String}.
+
+read_quoted([?QUOTE | String]) ->
+ read_quoted(String, []).
+
+read_quoted([], Acc) ->
+ {lists:reverse(Acc), []};
+read_quoted([?QUOTE | Rest], Acc) ->
+ {lists:reverse(Acc), Rest};
+read_quoted([$\\, Any | Rest], Acc) ->
+ read_quoted(Rest, [Any | Acc]);
+read_quoted([C | Rest], Acc) ->
+ read_quoted(Rest, [C | Acc]).
+
+skip_whitespace(String) ->
+ F = fun (C) -> ?IS_WHITESPACE(C) end,
+ lists:dropwhile(F, String).
+
+read_token(String) ->
+ F = fun (C) -> not ?IS_SEPARATOR(C) end,
+ lists:splitwith(F, String).
+
+skip_past_separator([]) ->
+ [];
+skip_past_separator([$; | Rest]) ->
+ Rest;
+skip_past_separator([$, | Rest]) ->
+ Rest;
+skip_past_separator([_ | Rest]) ->
+ skip_past_separator(Rest).
+
+parse_cookie_test() ->
+ %% RFC example
+ C1 = "$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";
+ Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\";
+ Shipping=\"FedEx\"; $Path=\"/acme\"",
+ [
+ {"Customer","WILE_E_COYOTE"},
+ {"Part_Number","Rocket_Launcher_0001"},
+ {"Shipping","FedEx"}
+ ] = parse_cookie(C1),
+ %% Potential edge cases
+ [{"foo", "x"}] = parse_cookie("foo=\"\\x\""),
+ [] = parse_cookie("="),
+ [{"foo", ""}, {"bar", ""}] = parse_cookie(" foo ; bar "),
+ [{"foo", ""}, {"bar", ""}] = parse_cookie("foo=;bar="),
+ [{"foo", "\";"}, {"bar", ""}] = parse_cookie("foo = \"\\\";\";bar "),
+ [{"foo", "\";bar"}] = parse_cookie("foo=\"\\\";bar").
+
+any_to_list(V) when is_list(V) ->
+ V;
+any_to_list(V) when is_atom(V) ->
+ atom_to_list(V);
+any_to_list(V) when is_binary(V) ->
+ binary_to_list(V);
+any_to_list(V) when is_integer(V) ->
+ integer_to_list(V).
+
+
+cookie_test() ->
+ C1 = {"Set-Cookie",
+ "Customer=WILE_E_COYOTE; "
+ "Version=1; "
+ "Path=/acme"},
+ C1 = cookie("Customer", "WILE_E_COYOTE", [{path, "/acme"}]),
+ C1 = cookie("Customer", "WILE_E_COYOTE",
+ [{path, "/acme"}, {badoption, "negatory"}]),
+ C1 = cookie('Customer', 'WILE_E_COYOTE', [{path, '/acme'}]),
+ C1 = cookie(<<"Customer">>, <<"WILE_E_COYOTE">>, [{path, <<"/acme">>}]),
+
+ {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey", []),
+
+ LocalTime = calendar:universal_time_to_local_time({{2007, 5, 15}, {13, 45, 33}}),
+ C2 = {"Set-Cookie",
+ "Customer=WILE_E_COYOTE; "
+ "Version=1; "
+ "Expires=Tue, 15 May 2007 13:45:33 GMT; "
+ "Max-Age=0"},
+ C2 = cookie("Customer", "WILE_E_COYOTE",
+ [{max_age, -111}, {local_time, LocalTime}]),
+ C3 = {"Set-Cookie",
+ "Customer=WILE_E_COYOTE; "
+ "Version=1; "
+ "Expires=Wed, 16 May 2007 13:45:50 GMT; "
+ "Max-Age=86417"},
+ C3 = cookie("Customer", "WILE_E_COYOTE",
+ [{max_age, 86417}, {local_time, LocalTime}]),
+ ok.
diff --git a/apps/mochiweb/src/mochiweb_echo.erl b/apps/mochiweb/src/mochiweb_echo.erl
new file mode 100644
index 00000000..f32d6803
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_echo.erl
@@ -0,0 +1,31 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Simple and stupid echo server to demo mochiweb_socket_server.
+
+-module(mochiweb_echo).
+-author('bob@mochimedia.com').
+-export([start/0, stop/0, loop/1]).
+
+stop() ->
+ mochiweb_socket_server:stop(?MODULE).
+
+start() ->
+ mochiweb_socket_server:start([{name, ?MODULE},
+ {port, 6789},
+ {ip, "127.0.0.1"},
+ {max, 1},
+ {loop, {?MODULE, loop}}]).
+
+loop(Socket) ->
+ case gen_tcp:recv(Socket, 0, 30000) of
+ {ok, Data} ->
+ case gen_tcp:send(Socket, Data) of
+ ok ->
+ loop(Socket);
+ _ ->
+ exit(normal)
+ end;
+ _Other ->
+ exit(normal)
+ end.
diff --git a/apps/mochiweb/src/mochiweb_headers.erl b/apps/mochiweb/src/mochiweb_headers.erl
new file mode 100644
index 00000000..d90fd679
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_headers.erl
@@ -0,0 +1,251 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Case preserving (but case insensitive) HTTP Header dictionary.
+
+-module(mochiweb_headers).
+-author('bob@mochimedia.com').
+-export([empty/0, from_list/1, insert/3, enter/3, get_value/2, lookup/2]).
+-export([delete_any/2, get_primary_value/2]).
+-export([default/3, enter_from_list/2, default_from_list/2]).
+-export([to_list/1, make/1]).
+-export([from_binary/1]).
+-export([test/0]).
+
+%% @type headers().
+%% @type key() = atom() | binary() | string().
+%% @type value() = atom() | binary() | string() | integer().
+
+%% @spec test() -> ok
+%% @doc Run tests for this module.
+test() ->
+ H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]),
+ [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H),
+ H1 = ?MODULE:insert(taco, grande, H),
+ [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1),
+ H2 = ?MODULE:make([{"Set-Cookie", "foo"}]),
+ [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2),
+ H3 = ?MODULE:insert("Set-Cookie", "bar", H2),
+ [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3),
+ "foo, bar" = ?MODULE:get_value("set-cookie", H3),
+ {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3),
+ undefined = ?MODULE:get_value("shibby", H3),
+ none = ?MODULE:lookup("shibby", H3),
+ H4 = ?MODULE:insert("content-type",
+ "application/x-www-form-urlencoded; charset=utf8",
+ H3),
+ "application/x-www-form-urlencoded" = ?MODULE:get_primary_value(
+ "content-type", H4),
+ H4 = ?MODULE:delete_any("nonexistent-header", H4),
+ H3 = ?MODULE:delete_any("content-type", H4),
+ HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>,
+ H_HB = ?MODULE:from_binary(HB),
+ H_HB = ?MODULE:from_binary(binary_to_list(HB)),
+ "47" = ?MODULE:get_value("Content-Length", H_HB),
+ "text/plain" = ?MODULE:get_value("Content-Type", H_HB),
+ L_H_HB = ?MODULE:to_list(H_HB),
+ 2 = length(L_H_HB),
+ true = lists:member({'Content-Length', "47"}, L_H_HB),
+ true = lists:member({'Content-Type', "text/plain"}, L_H_HB),
+ HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ],
+ HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ],
+ HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ],
+ H_HL = ?MODULE:from_binary(HL),
+ H_HL = ?MODULE:from_binary(HL2),
+ H_HL = ?MODULE:from_binary(HL3),
+ "47" = ?MODULE:get_value("Content-Length", H_HL),
+ "text/plain" = ?MODULE:get_value("Content-Type", H_HL),
+ L_H_HL = ?MODULE:to_list(H_HL),
+ 2 = length(L_H_HL),
+ true = lists:member({'Content-Length', "47"}, L_H_HL),
+ true = lists:member({'Content-Type', "text/plain"}, L_H_HL),
+ [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)),
+ [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)),
+ [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)),
+ [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)),
+ [] = ?MODULE:to_list(?MODULE:from_binary("")),
+ [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])),
+ [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])),
+ [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])),
+ [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])),
+ ok.
+
+%% @spec empty() -> headers()
+%% @doc Create an empty headers structure.
+empty() ->
+ gb_trees:empty().
+
+%% @spec make(headers() | [{key(), value()}]) -> headers()
+%% @doc Construct a headers() from the given list.
+make(L) when is_list(L) ->
+ from_list(L);
+%% assume a tuple is already mochiweb_headers.
+make(T) when is_tuple(T) ->
+ T.
+
+%% @spec from_binary(RawHttpHeader()) -> headers()
+%% @type RawHttpHeader() -> string() | binary() | [ string() | binary() ]
+%%
+%% @doc Transforms a raw HTTP header into a mochiweb headers structure.
+%%
+%% The given raw HTTP header can be one of the following:
+%%
+%% 1) A string or a binary representing a full HTTP header ending with
+%% double CRLF.
+%% Examples:
+%% "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n"
+%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>
+%%
+%% 2) A list of binaries or strings where each element represents a raw
+%% HTTP header line ending with a single CRLF.
+%% Examples:
+%% [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ]
+%% [ "Content-Length: 47\r\n", "Content-Type: text/plain\r\n" ]
+%% [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ]
+%%
+from_binary(RawHttpHeader) when is_binary(RawHttpHeader) ->
+ from_binary(RawHttpHeader, []);
+
+from_binary(RawHttpHeaderList) ->
+ from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])).
+
+from_binary(RawHttpHeader, Acc) ->
+ case erlang:decode_packet(httph, RawHttpHeader, []) of
+ { ok, {http_header, _, H, _, V}, Rest } ->
+ from_binary(Rest, [{H, V} | Acc]);
+ _ ->
+ make(Acc)
+ end.
+
+%% @spec from_list([{key(), value()}]) -> headers()
+%% @doc Construct a headers() from the given list.
+from_list(List) ->
+ lists:foldl(fun ({K, V}, T) -> insert(K, V, T) end, empty(), List).
+
+%% @spec enter_from_list([{key(), value()}], headers()) -> headers()
+%% @doc Insert pairs into the headers, replace any values for existing keys.
+enter_from_list(List, T) ->
+ lists:foldl(fun ({K, V}, T1) -> enter(K, V, T1) end, T, List).
+
+%% @spec default_from_list([{key(), value()}], headers()) -> headers()
+%% @doc Insert pairs into the headers for keys that do not already exist.
+default_from_list(List, T) ->
+ lists:foldl(fun ({K, V}, T1) -> default(K, V, T1) end, T, List).
+
+%% @spec to_list(headers()) -> [{key(), string()}]
+%% @doc Return the contents of the headers. The keys will be the exact key
+%% that was first inserted (e.g. may be an atom or binary, case is
+%% preserved).
+to_list(T) ->
+ F = fun ({K, {array, L}}, Acc) ->
+ L1 = lists:reverse(L),
+ lists:foldl(fun (V, Acc1) -> [{K, V} | Acc1] end, Acc, L1);
+ (Pair, Acc) ->
+ [Pair | Acc]
+ end,
+ lists:reverse(lists:foldl(F, [], gb_trees:values(T))).
+
+%% @spec get_value(key(), headers()) -> string() | undefined
+%% @doc Return the value of the given header using a case insensitive search.
+%% undefined will be returned for keys that are not present.
+get_value(K, T) ->
+ case lookup(K, T) of
+ {value, {_, V}} ->
+ expand(V);
+ none ->
+ undefined
+ end.
+
+%% @spec get_primary_value(key(), headers()) -> string() | undefined
+%% @doc Return the value of the given header up to the first semicolon using
+%% a case insensitive search. undefined will be returned for keys
+%% that are not present.
+get_primary_value(K, T) ->
+ case get_value(K, T) of
+ undefined ->
+ undefined;
+ V ->
+ lists:takewhile(fun (C) -> C =/= $; end, V)
+ end.
+
+%% @spec lookup(key(), headers()) -> {value, {key(), string()}} | none
+%% @doc Return the case preserved key and value for the given header using
+%% a case insensitive search. none will be returned for keys that are
+%% not present.
+lookup(K, T) ->
+ case gb_trees:lookup(normalize(K), T) of
+ {value, {K0, V}} ->
+ {value, {K0, expand(V)}};
+ none ->
+ none
+ end.
+
+%% @spec default(key(), value(), headers()) -> headers()
+%% @doc Insert the pair into the headers if it does not already exist.
+default(K, V, T) ->
+ K1 = normalize(K),
+ V1 = any_to_list(V),
+ try gb_trees:insert(K1, {K, V1}, T)
+ catch
+ error:{key_exists, _} ->
+ T
+ end.
+
+%% @spec enter(key(), value(), headers()) -> headers()
+%% @doc Insert the pair into the headers, replacing any pre-existing key.
+enter(K, V, T) ->
+ K1 = normalize(K),
+ V1 = any_to_list(V),
+ gb_trees:enter(K1, {K, V1}, T).
+
+%% @spec insert(key(), value(), headers()) -> headers()
+%% @doc Insert the pair into the headers, merging with any pre-existing key.
+%% A merge is done with Value = V0 ++ ", " ++ V1.
+insert(K, V, T) ->
+ K1 = normalize(K),
+ V1 = any_to_list(V),
+ try gb_trees:insert(K1, {K, V1}, T)
+ catch
+ error:{key_exists, _} ->
+ {K0, V0} = gb_trees:get(K1, T),
+ V2 = merge(K1, V1, V0),
+ gb_trees:update(K1, {K0, V2}, T)
+ end.
+
+%% @spec delete_any(key(), headers()) -> headers()
+%% @doc Delete the header corresponding to key if it is present.
+delete_any(K, T) ->
+ K1 = normalize(K),
+ gb_trees:delete_any(K1, T).
+
+%% Internal API
+
+expand({array, L}) ->
+ mochiweb_util:join(lists:reverse(L), ", ");
+expand(V) ->
+ V.
+
+merge("set-cookie", V1, {array, L}) ->
+ {array, [V1 | L]};
+merge("set-cookie", V1, V0) ->
+ {array, [V1, V0]};
+merge(_, V1, V0) ->
+ V0 ++ ", " ++ V1.
+
+normalize(K) when is_list(K) ->
+ string:to_lower(K);
+normalize(K) when is_atom(K) ->
+ normalize(atom_to_list(K));
+normalize(K) when is_binary(K) ->
+ normalize(binary_to_list(K)).
+
+any_to_list(V) when is_list(V) ->
+ V;
+any_to_list(V) when is_atom(V) ->
+ atom_to_list(V);
+any_to_list(V) when is_binary(V) ->
+ binary_to_list(V);
+any_to_list(V) when is_integer(V) ->
+ integer_to_list(V).
+
+
diff --git a/apps/mochiweb/src/mochiweb_html.erl b/apps/mochiweb/src/mochiweb_html.erl
new file mode 100644
index 00000000..77100d50
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_html.erl
@@ -0,0 +1,893 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Loosely tokenizes and generates parse trees for HTML 4.
+-module(mochiweb_html).
+-export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1,
+ escape_attr/1, to_html/1, test/0]).
+
+% This is a macro to placate syntax highlighters..
+-define(QUOTE, $\").
+-define(SQUOTE, $\').
+-define(ADV_COL(S, N),
+ S#decoder{column=N+S#decoder.column,
+ offset=N+S#decoder.offset}).
+-define(INC_COL(S),
+ S#decoder{column=1+S#decoder.column,
+ offset=1+S#decoder.offset}).
+-define(INC_LINE(S),
+ S#decoder{column=1,
+ line=1+S#decoder.line,
+ offset=1+S#decoder.offset}).
+-define(INC_CHAR(S, C),
+ case C of
+ $\n ->
+ S#decoder{column=1,
+ line=1+S#decoder.line,
+ offset=1+S#decoder.offset};
+ _ ->
+ S#decoder{column=1+S#decoder.column,
+ offset=1+S#decoder.offset}
+ end).
+
+-define(IS_WHITESPACE(C),
+ (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
+-define(IS_LITERAL_SAFE(C),
+ ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z)
+ orelse (C >= $0 andalso C =< $9))).
+
+-record(decoder, {line=1,
+ column=1,
+ offset=0}).
+
+%% @type html_node() = {string(), [html_attr()], [html_node() | string()]}
+%% @type html_attr() = {string(), string()}
+%% @type html_token() = html_data() | start_tag() | end_tag() | inline_html() | html_comment() | html_doctype()
+%% @type html_data() = {data, string(), Whitespace::boolean()}
+%% @type start_tag() = {start_tag, Name, [html_attr()], Singleton::boolean()}
+%% @type end_tag() = {end_tag, Name}
+%% @type html_comment() = {comment, Comment}
+%% @type html_doctype() = {doctype, [Doctype]}
+%% @type inline_html() = {'=', iolist()}
+
+%% External API.
+
+%% @spec parse(string() | binary()) -> html_node()
+%% @doc tokenize and then transform the token stream into a HTML tree.
+parse(Input) ->
+ parse_tokens(tokens(Input)).
+
+%% @spec parse_tokens([html_token()]) -> html_node()
+%% @doc Transform the output of tokens(Doc) into a HTML tree.
+parse_tokens(Tokens) when is_list(Tokens) ->
+ %% Skip over doctype, processing instructions
+ F = fun (X) ->
+ case X of
+ {start_tag, _, _, false} ->
+ false;
+ _ ->
+ true
+ end
+ end,
+ [{start_tag, Tag, Attrs, false} | Rest] = lists:dropwhile(F, Tokens),
+ {Tree, _} = tree(Rest, [norm({Tag, Attrs})]),
+ Tree.
+
+%% @spec tokens(StringOrBinary) -> [html_token()]
+%% @doc Transform the input UTF-8 HTML into a token stream.
+tokens(Input) ->
+ tokens(iolist_to_binary(Input), #decoder{}, []).
+
+%% @spec to_tokens(html_node()) -> [html_token()]
+%% @doc Convert a html_node() tree to a list of tokens.
+to_tokens({Tag0}) ->
+ to_tokens({Tag0, [], []});
+to_tokens(T={'=', _}) ->
+ [T];
+to_tokens(T={doctype, _}) ->
+ [T];
+to_tokens(T={comment, _}) ->
+ [T];
+to_tokens({Tag0, Acc}) ->
+ to_tokens({Tag0, [], Acc});
+to_tokens({Tag0, Attrs, Acc}) ->
+ Tag = to_tag(Tag0),
+ to_tokens([{Tag, Acc}], [{start_tag, Tag, Attrs, is_singleton(Tag)}]).
+
+%% @spec to_html([html_token()] | html_node()) -> iolist()
+%% @doc Convert a list of html_token() to a HTML document.
+to_html(Node) when is_tuple(Node) ->
+ to_html(to_tokens(Node));
+to_html(Tokens) when is_list(Tokens) ->
+ to_html(Tokens, []).
+
+%% @spec escape(string() | atom() | binary()) -> binary()
+%% @doc Escape a string such that it's safe for HTML (amp; lt; gt;).
+escape(B) when is_binary(B) ->
+ escape(binary_to_list(B), []);
+escape(A) when is_atom(A) ->
+ escape(atom_to_list(A), []);
+escape(S) when is_list(S) ->
+ escape(S, []).
+
+%% @spec escape_attr(string() | binary() | atom() | integer() | float()) -> binary()
+%% @doc Escape a string such that it's safe for HTML attrs
+%% (amp; lt; gt; quot;).
+escape_attr(B) when is_binary(B) ->
+ escape_attr(binary_to_list(B), []);
+escape_attr(A) when is_atom(A) ->
+ escape_attr(atom_to_list(A), []);
+escape_attr(S) when is_list(S) ->
+ escape_attr(S, []);
+escape_attr(I) when is_integer(I) ->
+ escape_attr(integer_to_list(I), []);
+escape_attr(F) when is_float(F) ->
+ escape_attr(mochinum:digits(F), []).
+
+%% @spec test() -> ok
+%% @doc Run tests for mochiweb_html.
+test() ->
+ test_destack(),
+ test_tokens(),
+ test_tokens2(),
+ test_parse(),
+ test_parse2(),
+ test_parse_tokens(),
+ test_escape(),
+ test_escape_attr(),
+ test_to_html(),
+ ok.
+
+
+%% Internal API
+
+test_to_html() ->
+ Expect = <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div><!-- comment! --></body></html>">>,
+ Expect = iolist_to_binary(
+ to_html({html, [],
+ [{<<"head">>, [],
+ [{title, <<"hey!">>}]},
+ {body, [],
+ [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
+ {'div', <<"sucka">>},
+ {comment, <<" comment! ">>}]}]})),
+ Expect1 = <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
+ Expect1 = iolist_to_binary(
+ to_html({doctype,
+ [<<"html">>, <<"PUBLIC">>,
+ <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
+ <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]})),
+ ok.
+to_html([], Acc) ->
+ lists:reverse(Acc);
+to_html([{'=', Content} | Rest], Acc) ->
+ to_html(Rest, [Content | Acc]);
+to_html([{pi, Tag, Attrs} | Rest], Acc) ->
+ Open = [<<"<?">>,
+ Tag,
+ attrs_to_html(Attrs, []),
+ <<"?>">>],
+ to_html(Rest, [Open | Acc]);
+to_html([{comment, Comment} | Rest], Acc) ->
+ to_html(Rest, [[<<"<!--">>, Comment, <<"-->">>] | Acc]);
+to_html([{doctype, Parts} | Rest], Acc) ->
+ Inside = doctype_to_html(Parts, Acc),
+ to_html(Rest, [[<<"<!DOCTYPE">>, Inside, <<">">>] | Acc]);
+to_html([{data, Data, _Whitespace} | Rest], Acc) ->
+ to_html(Rest, [escape(Data) | Acc]);
+to_html([{start_tag, Tag, Attrs, Singleton} | Rest], Acc) ->
+ Open = [<<"<">>,
+ Tag,
+ attrs_to_html(Attrs, []),
+ case Singleton of
+ true -> <<" />">>;
+ false -> <<">">>
+ end],
+ to_html(Rest, [Open | Acc]);
+to_html([{end_tag, Tag} | Rest], Acc) ->
+ to_html(Rest, [[<<"</">>, Tag, <<">">>] | Acc]).
+
+doctype_to_html([], Acc) ->
+ lists:reverse(Acc);
+doctype_to_html([Word | Rest], Acc) ->
+ case lists:all(fun (C) -> ?IS_LITERAL_SAFE(C) end,
+ binary_to_list(iolist_to_binary(Word))) of
+ true ->
+ doctype_to_html(Rest, [[<<" ">>, Word] | Acc]);
+ false ->
+ doctype_to_html(Rest, [[<<" \"">>, escape_attr(Word), ?QUOTE] | Acc])
+ end.
+
+attrs_to_html([], Acc) ->
+ lists:reverse(Acc);
+attrs_to_html([{K, V} | Rest], Acc) ->
+ attrs_to_html(Rest,
+ [[<<" ">>, escape(K), <<"=\"">>,
+ escape_attr(V), <<"\"">>] | Acc]).
+
+test_escape() ->
+ <<"&amp;quot;\"word &lt;&lt;up!&amp;quot;">> =
+ escape(<<"&quot;\"word <<up!&quot;">>),
+ ok.
+
+test_escape_attr() ->
+ <<"&amp;quot;&quot;word &lt;&lt;up!&amp;quot;">> =
+ escape_attr(<<"&quot;\"word <<up!&quot;">>),
+ ok.
+
+escape([], Acc) ->
+ list_to_binary(lists:reverse(Acc));
+escape("<" ++ Rest, Acc) ->
+ escape(Rest, lists:reverse("&lt;", Acc));
+escape(">" ++ Rest, Acc) ->
+ escape(Rest, lists:reverse("&gt;", Acc));
+escape("&" ++ Rest, Acc) ->
+ escape(Rest, lists:reverse("&amp;", Acc));
+escape([C | Rest], Acc) ->
+ escape(Rest, [C | Acc]).
+
+escape_attr([], Acc) ->
+ list_to_binary(lists:reverse(Acc));
+escape_attr("<" ++ Rest, Acc) ->
+ escape_attr(Rest, lists:reverse("&lt;", Acc));
+escape_attr(">" ++ Rest, Acc) ->
+ escape_attr(Rest, lists:reverse("&gt;", Acc));
+escape_attr("&" ++ Rest, Acc) ->
+ escape_attr(Rest, lists:reverse("&amp;", Acc));
+escape_attr([?QUOTE | Rest], Acc) ->
+ escape_attr(Rest, lists:reverse("&quot;", Acc));
+escape_attr([C | Rest], Acc) ->
+ escape_attr(Rest, [C | Acc]).
+
+to_tag(A) when is_atom(A) ->
+ norm(atom_to_list(A));
+to_tag(L) ->
+ norm(L).
+
+to_tokens([], Acc) ->
+ lists:reverse(Acc);
+to_tokens([{Tag, []} | Rest], Acc) ->
+ to_tokens(Rest, [{end_tag, to_tag(Tag)} | Acc]);
+to_tokens([{Tag0, [{T0} | R1]} | Rest], Acc) ->
+ %% Allow {br}
+ to_tokens([{Tag0, [{T0, [], []} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [T0={'=', _C0} | R1]} | Rest], Acc) ->
+ %% Allow {'=', iolist()}
+ to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) ->
+ %% Allow {comment, iolist()}
+ to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) ->
+ %% Allow {p, [{"class", "foo"}]}
+ to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, C0} | R1]} | Rest], Acc) ->
+ %% Allow {p, "content"} and {p, <<"content">>}
+ to_tokens([{Tag0, [{T0, [], C0} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, A1, C0} | R1]} | Rest], Acc) when is_binary(C0) ->
+ %% Allow {"p", [{"class", "foo"}], <<"content">>}
+ to_tokens([{Tag0, [{T0, A1, binary_to_list(C0)} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, A1, C0=[C | _]} | R1]} | Rest], Acc)
+ when is_integer(C) ->
+ %% Allow {"p", [{"class", "foo"}], "content"}
+ to_tokens([{Tag0, [{T0, A1, [C0]} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, A1, C1} | R1]} | Rest], Acc) ->
+ %% Native {"p", [{"class", "foo"}], ["content"]}
+ Tag = to_tag(Tag0),
+ T1 = to_tag(T0),
+ case is_singleton(norm(T1)) of
+ true ->
+ to_tokens([{Tag, R1} | Rest], [{start_tag, T1, A1, true} | Acc]);
+ false ->
+ to_tokens([{T1, C1}, {Tag, R1} | Rest],
+ [{start_tag, T1, A1, false} | Acc])
+ end;
+to_tokens([{Tag0, [L | R1]} | Rest], Acc) when is_list(L) ->
+ %% List text
+ Tag = to_tag(Tag0),
+ to_tokens([{Tag, R1} | Rest], [{data, iolist_to_binary(L), false} | Acc]);
+to_tokens([{Tag0, [B | R1]} | Rest], Acc) when is_binary(B) ->
+ %% Binary text
+ Tag = to_tag(Tag0),
+ to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]).
+
+test_tokens() ->
+ [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+ {<<"wibble">>, <<"wibble">>},
+ {<<"alice">>, <<"bob">>}], true}] =
+ tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>),
+ [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+ {<<"wibble">>, <<"wibble">>},
+ {<<"alice">>, <<"bob">>}], true}] =
+ tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>),
+ [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}] =
+ tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>),
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}] =
+ tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>),
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}] =
+ tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>),
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}] =
+ tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>),
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}] =
+ tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>),
+ [{start_tag, <<"textarea">>, [], false},
+ {data, <<"<html></body>">>, false},
+ {end_tag, <<"textarea">>}] =
+ tokens(<<"<textarea><html></body></textarea>">>),
+ ok.
+
+tokens(B, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary>> ->
+ lists:reverse(Acc);
+ _ ->
+ {Tag, S1} = tokenize(B, S),
+ case parse_flag(Tag) of
+ script ->
+ {Tag2, S2} = tokenize_script(B, S1),
+ tokens(B, S2, [Tag2, Tag | Acc]);
+ textarea ->
+ {Tag2, S2} = tokenize_textarea(B, S1),
+ tokens(B, S2, [Tag2, Tag | Acc]);
+ none ->
+ tokens(B, S1, [Tag | Acc])
+ end
+ end.
+
+parse_flag({start_tag, B, _, false}) ->
+ case string:to_lower(binary_to_list(B)) of
+ "script" ->
+ script;
+ "textarea" ->
+ textarea;
+ _ ->
+ none
+ end;
+parse_flag(_) ->
+ none.
+
+tokenize(B, S=#decoder{offset=O}) ->
+ case B of
+ <<_:O/binary, "<!--", _/binary>> ->
+ tokenize_comment(B, ?ADV_COL(S, 4));
+ <<_:O/binary, "<!DOCTYPE", _/binary>> ->
+ tokenize_doctype(B, ?ADV_COL(S, 10));
+ <<_:O/binary, "<![CDATA[", _/binary>> ->
+ tokenize_cdata(B, ?ADV_COL(S, 9));
+ <<_:O/binary, "<?", _/binary>> ->
+ {Tag, S1} = tokenize_literal(B, ?ADV_COL(S, 2)),
+ {Attrs, S2} = tokenize_attributes(B, S1),
+ S3 = find_qgt(B, S2),
+ {{pi, Tag, Attrs}, S3};
+ <<_:O/binary, "&", _/binary>> ->
+ tokenize_charref(B, ?INC_COL(S));
+ <<_:O/binary, "</", _/binary>> ->
+ {Tag, S1} = tokenize_literal(B, ?ADV_COL(S, 2)),
+ {S2, _} = find_gt(B, S1),
+ {{end_tag, Tag}, S2};
+ <<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) ->
+ %% This isn't really strict HTML
+ tokenize_data(B, ?INC_COL(S));
+ <<_:O/binary, "<", _/binary>> ->
+ {Tag, S1} = tokenize_literal(B, ?INC_COL(S)),
+ {Attrs, S2} = tokenize_attributes(B, S1),
+ {S3, HasSlash} = find_gt(B, S2),
+ Singleton = HasSlash orelse is_singleton(norm(binary_to_list(Tag))),
+ {{start_tag, Tag, Attrs, Singleton}, S3};
+ _ ->
+ tokenize_data(B, S)
+ end.
+
+test_parse() ->
+ D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
+<html>
+ <head>
+ <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">
+ <title>Foo</title>
+ <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\">
+ <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\">
+ <!--[if lt IE 7]>
+ <style type=\"text/css\">
+ .no_ie { display: none; }
+ </style>
+ <![endif]-->
+ <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+ <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+ </head>
+ <body id=\"home\" class=\"tundra\"><![CDATA[&lt;<this<!-- is -->CDATA>&gt;]]></body>
+</html>">>,
+ Expect = {<<"html">>, [],
+ [{<<"head">>, [],
+ [{<<"meta">>,
+ [{<<"http-equiv">>,<<"Content-Type">>},
+ {<<"content">>,<<"text/html; charset=UTF-8">>}],
+ []},
+ {<<"title">>,[],[<<"Foo">>]},
+ {<<"link">>,
+ [{<<"rel">>,<<"stylesheet">>},
+ {<<"type">>,<<"text/css">>},
+ {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
+ {<<"media">>,<<"screen">>}],
+ []},
+ {<<"link">>,
+ [{<<"rel">>,<<"stylesheet">>},
+ {<<"type">>,<<"text/css">>},
+ {<<"href">>,<<"/static/foo.css">>},
+ {<<"media">>,<<"screen">>}],
+ []},
+ {comment,<<"[if lt IE 7]>\n <style type=\"text/css\">\n .no_ie { display: none; }\n </style>\n <![endif]">>},
+ {<<"link">>,
+ [{<<"rel">>,<<"icon">>},
+ {<<"href">>,<<"/static/images/favicon.ico">>},
+ {<<"type">>,<<"image/x-icon">>}],
+ []},
+ {<<"link">>,
+ [{<<"rel">>,<<"shortcut icon">>},
+ {<<"href">>,<<"/static/images/favicon.ico">>},
+ {<<"type">>,<<"image/x-icon">>}],
+ []}]},
+ {<<"body">>,
+ [{<<"id">>,<<"home">>},
+ {<<"class">>,<<"tundra">>}],
+ [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
+ Expect = parse(D0),
+ ok.
+
+test_tokens2() ->
+ D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>,
+ Expect = [{start_tag,<<"channel">>,[],false},
+ {start_tag,<<"title">>,[],false},
+ {data,<<"from __future__ import *">>,false},
+ {end_tag,<<"title">>},
+ {start_tag,<<"link">>,[],true},
+ {data,<<"http://bob.pythonmac.org">>,false},
+ {end_tag,<<"link">>},
+ {start_tag,<<"description">>,[],false},
+ {data,<<"Bob's Rants">>,false},
+ {end_tag,<<"description">>},
+ {end_tag,<<"channel">>}],
+ Expect = tokens(D0),
+ ok.
+
+test_parse2() ->
+ D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>,
+ Expect = {<<"channel">>,[],
+ [{<<"title">>,[],[<<"from __future__ import *">>]},
+ {<<"link">>,[],[
+ <<"http://bob.pythonmac.org">>,
+ {<<"br">>,[],[]},
+ <<"foo">>]},
+ {<<"description">>,[],[<<"Bob's Rants">>]}]},
+ Expect = parse(D0),
+ ok.
+
+test_parse_tokens() ->
+ D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]},
+ {data,<<"\n">>,true},
+ {start_tag,<<"html">>,[],false}],
+ {<<"html">>, [], []} = parse_tokens(D0),
+ D1 = D0 ++ [{end_tag, <<"html">>}],
+ {<<"html">>, [], []} = parse_tokens(D1),
+ D2 = D0 ++ [{start_tag, <<"body">>, [], false}],
+ {<<"html">>, [], [{<<"body">>, [], []}]} = parse_tokens(D2),
+ D3 = D0 ++ [{start_tag, <<"head">>, [], false},
+ {end_tag, <<"head">>},
+ {start_tag, <<"body">>, [], false}],
+ {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]} = parse_tokens(D3),
+ D4 = D3 ++ [{data,<<"\n">>,true},
+ {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false},
+ {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false},
+ {end_tag,<<"a">>},
+ {end_tag,<<"div">>},
+ {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false},
+ {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false},
+ {end_tag,<<"div">>},
+ {end_tag,<<"div">>}],
+ {<<"html">>, [],
+ [{<<"head">>, [], []},
+ {<<"body">>, [],
+ [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
+ {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
+ ]}]} = parse_tokens(D4),
+ D5 = [{start_tag,<<"html">>,[],false},
+ {data,<<"\n">>,true},
+ {data,<<"boo">>,false},
+ {data,<<"hoo">>,false},
+ {data,<<"\n">>,true},
+ {end_tag,<<"html">>}],
+ {<<"html">>, [], [<<"\nboohoo\n">>]} = parse_tokens(D5),
+ D6 = [{start_tag,<<"html">>,[],false},
+ {data,<<"\n">>,true},
+ {data,<<"\n">>,true},
+ {end_tag,<<"html">>}],
+ {<<"html">>, [], []} = parse_tokens(D6),
+ D7 = [{start_tag,<<"html">>,[],false},
+ {start_tag,<<"ul">>,[],false},
+ {start_tag,<<"li">>,[],false},
+ {data,<<"word">>,false},
+ {start_tag,<<"li">>,[],false},
+ {data,<<"up">>,false},
+ {end_tag,<<"li">>},
+ {start_tag,<<"li">>,[],false},
+ {data,<<"fdsa">>,false},
+ {start_tag,<<"br">>,[],true},
+ {data,<<"asdf">>,false},
+ {end_tag,<<"ul">>},
+ {end_tag,<<"html">>}],
+ {<<"html">>, [],
+ [{<<"ul">>, [],
+ [{<<"li">>, [], [<<"word">>]},
+ {<<"li">>, [], [<<"up">>]},
+ {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]} = parse_tokens(D7),
+ ok.
+
+tree_data([{data, Data, Whitespace} | Rest], AllWhitespace, Acc) ->
+ tree_data(Rest, (Whitespace andalso AllWhitespace), [Data | Acc]);
+tree_data(Rest, AllWhitespace, Acc) ->
+ {iolist_to_binary(lists:reverse(Acc)), AllWhitespace, Rest}.
+
+tree([], Stack) ->
+ {destack(Stack), []};
+tree([{end_tag, Tag} | Rest], Stack) ->
+ case destack(norm(Tag), Stack) of
+ S when is_list(S) ->
+ tree(Rest, S);
+ Result ->
+ {Result, []}
+ end;
+tree([{start_tag, Tag, Attrs, true} | Rest], S) ->
+ tree(Rest, append_stack_child(norm({Tag, Attrs}), S));
+tree([{start_tag, Tag, Attrs, false} | Rest], S) ->
+ tree(Rest, stack(norm({Tag, Attrs}), S));
+tree([T={pi, _Tag, _Attrs} | Rest], S) ->
+ tree(Rest, append_stack_child(T, S));
+tree([T={comment, _Comment} | Rest], S) ->
+ tree(Rest, append_stack_child(T, S));
+tree(L=[{data, _Data, _Whitespace} | _], S) ->
+ case tree_data(L, true, []) of
+ {_, true, Rest} ->
+ tree(Rest, S);
+ {Data, false, Rest} ->
+ tree(Rest, append_stack_child(Data, S))
+ end.
+
+norm({Tag, Attrs}) ->
+ {norm(Tag), [{norm(K), iolist_to_binary(V)} || {K, V} <- Attrs], []};
+norm(Tag) when is_binary(Tag) ->
+ Tag;
+norm(Tag) ->
+ list_to_binary(string:to_lower(Tag)).
+
+test_destack() ->
+ {<<"a">>, [], []} =
+ destack([{<<"a">>, [], []}]),
+ {<<"a">>, [], [{<<"b">>, [], []}]} =
+ destack([{<<"b">>, [], []}, {<<"a">>, [], []}]),
+ {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} =
+ destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+ [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] =
+ destack(<<"b">>,
+ [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+ [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] =
+ destack(<<"c">>,
+ [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]),
+ ok.
+
+stack(T1={TN, _, _}, Stack=[{TN, _, _} | _Rest])
+ when TN =:= <<"li">> orelse TN =:= <<"option">> ->
+ [T1 | destack(TN, Stack)];
+stack(T1={TN0, _, _}, Stack=[{TN1, _, _} | _Rest])
+ when (TN0 =:= <<"dd">> orelse TN0 =:= <<"dt">>) andalso
+ (TN1 =:= <<"dd">> orelse TN1 =:= <<"dt">>) ->
+ [T1 | destack(TN1, Stack)];
+stack(T1, Stack) ->
+ [T1 | Stack].
+
+append_stack_child(StartTag, [{Name, Attrs, Acc} | Stack]) ->
+ [{Name, Attrs, [StartTag | Acc]} | Stack].
+
+destack(TagName, Stack) when is_list(Stack) ->
+ F = fun (X) ->
+ case X of
+ {TagName, _, _} ->
+ false;
+ _ ->
+ true
+ end
+ end,
+ case lists:splitwith(F, Stack) of
+ {_, []} ->
+ %% If we're parsing something like XML we might find
+ %% a <link>tag</link> that is normally a singleton
+ %% in HTML but isn't here
+ case {is_singleton(TagName), Stack} of
+ {true, [{T0, A0, Acc0} | Post0]} ->
+ case lists:splitwith(F, Acc0) of
+ {_, []} ->
+ %% Actually was a singleton
+ Stack;
+ {Pre, [{T1, A1, []} | Post1]} ->
+ [{T0, A0, [{T1, A1, lists:reverse(Pre)} | Post1]}
+ | Post0]
+ end;
+ _ ->
+ %% No match, no state change
+ Stack
+ end;
+ {_Pre, [_T]} ->
+ %% Unfurl the whole stack, we're done
+ destack(Stack);
+ {Pre, [T, {T0, A0, Acc0} | Post]} ->
+ %% Unfurl up to the tag, then accumulate it
+ [{T0, A0, [destack(Pre ++ [T]) | Acc0]} | Post]
+ end.
+
+destack([{Tag, Attrs, Acc}]) ->
+ {Tag, Attrs, lists:reverse(Acc)};
+destack([{T1, A1, Acc1}, {T0, A0, Acc0} | Rest]) ->
+ destack([{T0, A0, [{T1, A1, lists:reverse(Acc1)} | Acc0]} | Rest]).
+
+is_singleton(<<"br">>) -> true;
+is_singleton(<<"hr">>) -> true;
+is_singleton(<<"img">>) -> true;
+is_singleton(<<"input">>) -> true;
+is_singleton(<<"base">>) -> true;
+is_singleton(<<"meta">>) -> true;
+is_singleton(<<"link">>) -> true;
+is_singleton(<<"area">>) -> true;
+is_singleton(<<"param">>) -> true;
+is_singleton(<<"col">>) -> true;
+is_singleton(_) -> false.
+
+tokenize_data(B, S=#decoder{offset=O}) ->
+ tokenize_data(B, S, O, true).
+
+tokenize_data(B, S=#decoder{offset=O}, Start, Whitespace) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when (C =/= $< andalso C =/= $&) ->
+ tokenize_data(B, ?INC_CHAR(S, C), Start,
+ (Whitespace andalso ?IS_WHITESPACE(C)));
+ _ ->
+ Len = O - Start,
+ <<_:Start/binary, Data:Len/binary, _/binary>> = B,
+ {{data, Data, Whitespace}, S}
+ end.
+
+tokenize_attributes(B, S) ->
+ tokenize_attributes(B, S, []).
+
+tokenize_attributes(B, S=#decoder{offset=O}, Acc) ->
+ case B of
+ <<_:O/binary>> ->
+ {lists:reverse(Acc), S};
+ <<_:O/binary, C, _/binary>> when (C =:= $> orelse C =:= $/) ->
+ {lists:reverse(Acc), S};
+ <<_:O/binary, "?>", _/binary>> ->
+ {lists:reverse(Acc), S};
+ <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+ tokenize_attributes(B, ?INC_CHAR(S, C), Acc);
+ _ ->
+ {Attr, S1} = tokenize_literal(B, S),
+ {Value, S2} = tokenize_attr_value(Attr, B, S1),
+ tokenize_attributes(B, S2, [{Attr, Value} | Acc])
+ end.
+
+tokenize_attr_value(Attr, B, S) ->
+ S1 = skip_whitespace(B, S),
+ O = S1#decoder.offset,
+ case B of
+ <<_:O/binary, "=", _/binary>> ->
+ S2 = skip_whitespace(B, ?INC_COL(S1)),
+ tokenize_word_or_literal(B, S2);
+ _ ->
+ {Attr, S1}
+ end.
+
+skip_whitespace(B, S=#decoder{offset=O}) ->
+ case B of
+ <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+ skip_whitespace(B, ?INC_CHAR(S, C));
+ _ ->
+ S
+ end.
+
+tokenize_literal(Bin, S) ->
+ tokenize_literal(Bin, S, []).
+
+tokenize_literal(Bin, S=#decoder{offset=O}, Acc) ->
+ case Bin of
+ <<_:O/binary, $&, _/binary>> ->
+ {{data, Data, false}, S1} = tokenize_charref(Bin, ?INC_COL(S)),
+ tokenize_literal(Bin, S1, [Data | Acc]);
+ <<_:O/binary, C, _/binary>> when not (?IS_WHITESPACE(C)
+ orelse C =:= $>
+ orelse C =:= $/
+ orelse C =:= $=) ->
+ tokenize_literal(Bin, ?INC_COL(S), [C | Acc]);
+ _ ->
+ {iolist_to_binary(lists:reverse(Acc)), S}
+ end.
+
+find_qgt(Bin, S=#decoder{offset=O}) ->
+ case Bin of
+ <<_:O/binary, "?>", _/binary>> ->
+ ?ADV_COL(S, 2);
+ <<_:O/binary, C, _/binary>> ->
+ find_qgt(Bin, ?INC_CHAR(S, C));
+ _ ->
+ S
+ end.
+
+find_gt(Bin, S) ->
+ find_gt(Bin, S, false).
+
+find_gt(Bin, S=#decoder{offset=O}, HasSlash) ->
+ case Bin of
+ <<_:O/binary, $/, _/binary>> ->
+ find_gt(Bin, ?INC_COL(S), true);
+ <<_:O/binary, $>, _/binary>> ->
+ {?INC_COL(S), HasSlash};
+ <<_:O/binary, C, _/binary>> ->
+ find_gt(Bin, ?INC_CHAR(S, C), HasSlash);
+ _ ->
+ {S, HasSlash}
+ end.
+
+tokenize_charref(Bin, S=#decoder{offset=O}) ->
+ tokenize_charref(Bin, S, O).
+
+tokenize_charref(Bin, S=#decoder{offset=O}, Start) ->
+ case Bin of
+ <<_:O/binary>> ->
+ <<_:Start/binary, Raw/binary>> = Bin,
+ {{data, Raw, false}, S};
+ <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C)
+ orelse C =:= ?SQUOTE
+ orelse C =:= ?QUOTE
+ orelse C =:= $/
+ orelse C =:= $> ->
+ Len = O - Start,
+ <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+ {{data, Raw, false}, S};
+ <<_:O/binary, $;, _/binary>> ->
+ Len = O - Start,
+ <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+ Data = case mochiweb_charref:charref(Raw) of
+ undefined ->
+ Start1 = Start - 1,
+ Len1 = Len + 2,
+ <<_:Start1/binary, R:Len1/binary, _/binary>> = Bin,
+ R;
+ Unichar ->
+ list_to_binary(xmerl_ucs:to_utf8(Unichar))
+ end,
+ {{data, Data, false}, ?INC_COL(S)};
+ _ ->
+ tokenize_charref(Bin, ?INC_COL(S), Start)
+ end.
+
+tokenize_doctype(Bin, S) ->
+ tokenize_doctype(Bin, S, []).
+
+tokenize_doctype(Bin, S=#decoder{offset=O}, Acc) ->
+ case Bin of
+ <<_:O/binary>> ->
+ {{doctype, lists:reverse(Acc)}, S};
+ <<_:O/binary, $>, _/binary>> ->
+ {{doctype, lists:reverse(Acc)}, ?INC_COL(S)};
+ <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+ tokenize_doctype(Bin, ?INC_CHAR(S, C), Acc);
+ _ ->
+ {Word, S1} = tokenize_word_or_literal(Bin, S),
+ tokenize_doctype(Bin, S1, [Word | Acc])
+ end.
+
+tokenize_word_or_literal(Bin, S=#decoder{offset=O}) ->
+ case Bin of
+ <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+ {error, {whitespace, [C], S}};
+ <<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE ->
+ tokenize_word(Bin, ?INC_COL(S), C);
+ _ ->
+ tokenize_literal(Bin, S, [])
+ end.
+
+tokenize_word(Bin, S, Quote) ->
+ tokenize_word(Bin, S, Quote, []).
+
+tokenize_word(Bin, S=#decoder{offset=O}, Quote, Acc) ->
+ case Bin of
+ <<_:O/binary>> ->
+ {iolist_to_binary(lists:reverse(Acc)), S};
+ <<_:O/binary, Quote, _/binary>> ->
+ {iolist_to_binary(lists:reverse(Acc)), ?INC_COL(S)};
+ <<_:O/binary, $&, _/binary>> ->
+ {{data, Data, false}, S1} = tokenize_charref(Bin, ?INC_COL(S)),
+ tokenize_word(Bin, S1, Quote, [Data | Acc]);
+ <<_:O/binary, C, _/binary>> ->
+ tokenize_word(Bin, ?INC_CHAR(S, C), Quote, [C | Acc])
+ end.
+
+tokenize_cdata(Bin, S=#decoder{offset=O}) ->
+ tokenize_cdata(Bin, S, O).
+
+tokenize_cdata(Bin, S=#decoder{offset=O}, Start) ->
+ case Bin of
+ <<_:O/binary, "]]>", _/binary>> ->
+ Len = O - Start,
+ <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+ {{data, Raw, false}, ?ADV_COL(S, 3)};
+ <<_:O/binary, C, _/binary>> ->
+ tokenize_cdata(Bin, ?INC_CHAR(S, C), Start);
+ _ ->
+ <<_:O/binary, Raw/binary>> = Bin,
+ {{data, Raw, false}, S}
+ end.
+
+tokenize_comment(Bin, S=#decoder{offset=O}) ->
+ tokenize_comment(Bin, S, O).
+
+tokenize_comment(Bin, S=#decoder{offset=O}, Start) ->
+ case Bin of
+ <<_:O/binary, "-->", _/binary>> ->
+ Len = O - Start,
+ <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+ {{comment, Raw}, ?ADV_COL(S, 3)};
+ <<_:O/binary, C, _/binary>> ->
+ tokenize_comment(Bin, ?INC_CHAR(S, C), Start);
+ <<_:Start/binary, Raw/binary>> ->
+ {{comment, Raw}, S}
+ end.
+
+tokenize_script(Bin, S=#decoder{offset=O}) ->
+ tokenize_script(Bin, S, O).
+
+tokenize_script(Bin, S=#decoder{offset=O}, Start) ->
+ case Bin of
+ %% Just a look-ahead, we want the end_tag separately
+ <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>>
+ when (SS =:= $s orelse SS =:= $S) andalso
+ (CC =:= $c orelse CC =:= $C) andalso
+ (RR =:= $r orelse RR =:= $R) andalso
+ (II =:= $i orelse II =:= $I) andalso
+ (PP =:= $p orelse PP =:= $P) andalso
+ (TT=:= $t orelse TT =:= $T) ->
+ Len = O - Start,
+ <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+ {{data, Raw, false}, S};
+ <<_:O/binary, C, _/binary>> ->
+ tokenize_script(Bin, ?INC_CHAR(S, C), Start);
+ <<_:Start/binary, Raw/binary>> ->
+ {{data, Raw, false}, S}
+ end.
+
+tokenize_textarea(Bin, S=#decoder{offset=O}) ->
+ tokenize_textarea(Bin, S, O).
+
+tokenize_textarea(Bin, S=#decoder{offset=O}, Start) ->
+ case Bin of
+ %% Just a look-ahead, we want the end_tag separately
+ <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>>
+ when (TT =:= $t orelse TT =:= $T) andalso
+ (EE =:= $e orelse EE =:= $E) andalso
+ (XX =:= $x orelse XX =:= $X) andalso
+ (TT2 =:= $t orelse TT2 =:= $T) andalso
+ (AA =:= $a orelse AA =:= $A) andalso
+ (RR =:= $r orelse RR =:= $R) andalso
+ (EE2 =:= $e orelse EE2 =:= $E) andalso
+ (AA2 =:= $a orelse AA2 =:= $A) ->
+ Len = O - Start,
+ <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+ {{data, Raw, false}, S};
+ <<_:O/binary, C, _/binary>> ->
+ tokenize_textarea(Bin, ?INC_CHAR(S, C), Start);
+ <<_:Start/binary, Raw/binary>> ->
+ {{data, Raw, false}, S}
+ end.
diff --git a/apps/mochiweb/src/mochiweb_http.erl b/apps/mochiweb/src/mochiweb_http.erl
new file mode 100644
index 00000000..f1821f40
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_http.erl
@@ -0,0 +1,152 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc HTTP server.
+
+-module(mochiweb_http).
+-author('bob@mochimedia.com').
+-export([start/0, start/1, stop/0, stop/1]).
+-export([loop/2, default_body/1]).
+-export([after_response/2, reentry/1]).
+
+-define(IDLE_TIMEOUT, 30000).
+
+-define(MAX_HEADERS, 1000).
+-define(DEFAULTS, [{name, ?MODULE},
+ {port, 8888}]).
+
+set_default({Prop, Value}, PropList) ->
+ case proplists:is_defined(Prop, PropList) of
+ true ->
+ PropList;
+ false ->
+ [{Prop, Value} | PropList]
+ end.
+
+set_defaults(Defaults, PropList) ->
+ lists:foldl(fun set_default/2, PropList, Defaults).
+
+parse_options(Options) ->
+ {loop, HttpLoop} = proplists:lookup(loop, Options),
+ Loop = fun (S) ->
+ ?MODULE:loop(S, HttpLoop)
+ end,
+ Options1 = [{loop, Loop} | proplists:delete(loop, Options)],
+ set_defaults(?DEFAULTS, Options1).
+
+stop() ->
+ mochiweb_socket_server:stop(?MODULE).
+
+stop(Name) ->
+ mochiweb_socket_server:stop(Name).
+
+start() ->
+ start([{ip, "127.0.0.1"},
+ {loop, {?MODULE, default_body}}]).
+
+start(Options) ->
+ mochiweb_socket_server:start(parse_options(Options)).
+
+frm(Body) ->
+ ["<html><head></head><body>"
+ "<form method=\"POST\">"
+ "<input type=\"hidden\" value=\"message\" name=\"hidden\"/>"
+ "<input type=\"submit\" value=\"regular POST\">"
+ "</form>"
+ "<br />"
+ "<form method=\"POST\" enctype=\"multipart/form-data\""
+ " action=\"/multipart\">"
+ "<input type=\"hidden\" value=\"multipart message\" name=\"hidden\"/>"
+ "<input type=\"file\" name=\"file\"/>"
+ "<input type=\"submit\" value=\"multipart POST\" />"
+ "</form>"
+ "<pre>", Body, "</pre>"
+ "</body></html>"].
+
+default_body(Req, M, "/chunked") when M =:= 'GET'; M =:= 'HEAD' ->
+ Res = Req:ok({"text/plain", [], chunked}),
+ Res:write_chunk("First chunk\r\n"),
+ timer:sleep(5000),
+ Res:write_chunk("Last chunk\r\n"),
+ Res:write_chunk("");
+default_body(Req, M, _Path) when M =:= 'GET'; M =:= 'HEAD' ->
+ Body = io_lib:format("~p~n", [[{parse_qs, Req:parse_qs()},
+ {parse_cookie, Req:parse_cookie()},
+ Req:dump()]]),
+ Req:ok({"text/html",
+ [mochiweb_cookies:cookie("mochiweb_http", "test_cookie")],
+ frm(Body)});
+default_body(Req, 'POST', "/multipart") ->
+ Body = io_lib:format("~p~n", [[{parse_qs, Req:parse_qs()},
+ {parse_cookie, Req:parse_cookie()},
+ {body, Req:recv_body()},
+ Req:dump()]]),
+ Req:ok({"text/html", [], frm(Body)});
+default_body(Req, 'POST', _Path) ->
+ Body = io_lib:format("~p~n", [[{parse_qs, Req:parse_qs()},
+ {parse_cookie, Req:parse_cookie()},
+ {parse_post, Req:parse_post()},
+ Req:dump()]]),
+ Req:ok({"text/html", [], frm(Body)});
+default_body(Req, _Method, _Path) ->
+ Req:respond({501, [], []}).
+
+default_body(Req) ->
+ default_body(Req, Req:get(method), Req:get(path)).
+
+loop(Socket, Body) ->
+ inet:setopts(Socket, [{packet, http}]),
+ request(Socket, Body).
+
+request(Socket, Body) ->
+ case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+ {ok, {http_request, Method, Path, Version}} ->
+ headers(Socket, {Method, Path, Version}, [], Body, 0);
+ {error, {http_error, "\r\n"}} ->
+ request(Socket, Body);
+ {error, {http_error, "\n"}} ->
+ request(Socket, Body);
+ _Other ->
+ gen_tcp:close(Socket),
+ exit(normal)
+ end.
+
+reentry(Body) ->
+ fun (Req) ->
+ ?MODULE:after_response(Body, Req)
+ end.
+
+headers(Socket, Request, Headers, _Body, ?MAX_HEADERS) ->
+ %% Too many headers sent, bad request.
+ inet:setopts(Socket, [{packet, raw}]),
+ Req = mochiweb:new_request({Socket, Request,
+ lists:reverse(Headers)}),
+ Req:respond({400, [], []}),
+ gen_tcp:close(Socket),
+ exit(normal);
+headers(Socket, Request, Headers, Body, HeaderCount) ->
+ case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+ {ok, http_eoh} ->
+ inet:setopts(Socket, [{packet, raw}]),
+ Req = mochiweb:new_request({Socket, Request,
+ lists:reverse(Headers)}),
+ Body(Req),
+ ?MODULE:after_response(Body, Req);
+ {ok, {http_header, _, Name, _, Value}} ->
+ headers(Socket, Request, [{Name, Value} | Headers], Body,
+ 1 + HeaderCount);
+ _Other ->
+ gen_tcp:close(Socket),
+ exit(normal)
+ end.
+
+after_response(Body, Req) ->
+ Socket = Req:get(socket),
+ case Req:should_close() of
+ true ->
+ gen_tcp:close(Socket),
+ exit(normal);
+ false ->
+ Req:cleanup(),
+ ?MODULE:loop(Socket, Body)
+ end.
diff --git a/apps/mochiweb/src/mochiweb_multipart.erl b/apps/mochiweb/src/mochiweb_multipart.erl
new file mode 100644
index 00000000..0368a9a6
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_multipart.erl
@@ -0,0 +1,530 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Utilities for parsing multipart/form-data.
+
+-module(mochiweb_multipart).
+-author('bob@mochimedia.com').
+
+-export([parse_form/1, parse_form/2]).
+-export([parse_multipart_request/2]).
+-export([test/0]).
+
+-define(CHUNKSIZE, 4096).
+
+-record(mp, {state, boundary, length, buffer, callback, req}).
+
+%% TODO: DOCUMENT THIS MODULE.
+
+parse_form(Req) ->
+ parse_form(Req, fun default_file_handler/2).
+
+parse_form(Req, FileHandler) ->
+ Callback = fun (Next) -> parse_form_outer(Next, FileHandler, []) end,
+ {_, _, Res} = parse_multipart_request(Req, Callback),
+ Res.
+
+parse_form_outer(eof, _, Acc) ->
+ lists:reverse(Acc);
+parse_form_outer({headers, H}, FileHandler, State) ->
+ {"form-data", H1} = proplists:get_value("content-disposition", H),
+ Name = proplists:get_value("name", H1),
+ Filename = proplists:get_value("filename", H1),
+ case Filename of
+ undefined ->
+ fun (Next) ->
+ parse_form_value(Next, {Name, []}, FileHandler, State)
+ end;
+ _ ->
+ ContentType = proplists:get_value("content-type", H),
+ Handler = FileHandler(Filename, ContentType),
+ fun (Next) ->
+ parse_form_file(Next, {Name, Handler}, FileHandler, State)
+ end
+ end.
+
+parse_form_value(body_end, {Name, Acc}, FileHandler, State) ->
+ Value = binary_to_list(iolist_to_binary(lists:reverse(Acc))),
+ State1 = [{Name, Value} | State],
+ fun (Next) -> parse_form_outer(Next, FileHandler, State1) end;
+parse_form_value({body, Data}, {Name, Acc}, FileHandler, State) ->
+ Acc1 = [Data | Acc],
+ fun (Next) -> parse_form_value(Next, {Name, Acc1}, FileHandler, State) end.
+
+parse_form_file(body_end, {Name, Handler}, FileHandler, State) ->
+ Value = Handler(eof),
+ State1 = [{Name, Value} | State],
+ fun (Next) -> parse_form_outer(Next, FileHandler, State1) end;
+parse_form_file({body, Data}, {Name, Handler}, FileHandler, State) ->
+ H1 = Handler(Data),
+ fun (Next) -> parse_form_file(Next, {Name, H1}, FileHandler, State) end.
+
+default_file_handler(Filename, ContentType) ->
+ default_file_handler_1(Filename, ContentType, []).
+
+default_file_handler_1(Filename, ContentType, Acc) ->
+ fun(eof) ->
+ Value = iolist_to_binary(lists:reverse(Acc)),
+ {Filename, ContentType, Value};
+ (Next) ->
+ default_file_handler_1(Filename, ContentType, [Next | Acc])
+ end.
+
+parse_multipart_request(Req, Callback) ->
+ %% TODO: Support chunked?
+ Length = list_to_integer(Req:get_header_value("content-length")),
+ Boundary = iolist_to_binary(
+ get_boundary(Req:get_header_value("content-type"))),
+ Prefix = <<"\r\n--", Boundary/binary>>,
+ BS = byte_size(Boundary),
+ Chunk = read_chunk(Req, Length),
+ Length1 = Length - byte_size(Chunk),
+ <<"--", Boundary:BS/binary, "\r\n", Rest/binary>> = Chunk,
+ feed_mp(headers, flash_multipart_hack(#mp{boundary=Prefix,
+ length=Length1,
+ buffer=Rest,
+ callback=Callback,
+ req=Req})).
+
+parse_headers(<<>>) ->
+ [];
+parse_headers(Binary) ->
+ parse_headers(Binary, []).
+
+parse_headers(Binary, Acc) ->
+ case find_in_binary(<<"\r\n">>, Binary) of
+ {exact, N} ->
+ <<Line:N/binary, "\r\n", Rest/binary>> = Binary,
+ parse_headers(Rest, [split_header(Line) | Acc]);
+ not_found ->
+ lists:reverse([split_header(Binary) | Acc])
+ end.
+
+split_header(Line) ->
+ {Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end,
+ binary_to_list(Line)),
+ {string:to_lower(string:strip(Name)),
+ mochiweb_util:parse_header(Value)}.
+
+read_chunk(Req, Length) when Length > 0 ->
+ case Length of
+ Length when Length < ?CHUNKSIZE ->
+ Req:recv(Length);
+ _ ->
+ Req:recv(?CHUNKSIZE)
+ end.
+
+read_more(State=#mp{length=Length, buffer=Buffer, req=Req}) ->
+ Data = read_chunk(Req, Length),
+ Buffer1 = <<Buffer/binary, Data/binary>>,
+ flash_multipart_hack(State#mp{length=Length - byte_size(Data),
+ buffer=Buffer1}).
+
+flash_multipart_hack(State=#mp{length=0, buffer=Buffer, boundary=Prefix}) ->
+ %% http://code.google.com/p/mochiweb/issues/detail?id=22
+ %% Flash doesn't terminate multipart with \r\n properly so we fix it up here
+ PrefixSize = size(Prefix),
+ case size(Buffer) - (2 + PrefixSize) of
+ Seek when Seek >= 0 ->
+ case Buffer of
+ <<_:Seek/binary, Prefix:PrefixSize/binary, "--">> ->
+ Buffer1 = <<Buffer/binary, "\r\n">>,
+ State#mp{buffer=Buffer1};
+ _ ->
+ State
+ end;
+ _ ->
+ State
+ end;
+flash_multipart_hack(State) ->
+ State.
+
+feed_mp(headers, State=#mp{buffer=Buffer, callback=Callback}) ->
+ {State1, P} = case find_in_binary(<<"\r\n\r\n">>, Buffer) of
+ {exact, N} ->
+ {State, N};
+ _ ->
+ S1 = read_more(State),
+ %% Assume headers must be less than ?CHUNKSIZE
+ {exact, N} = find_in_binary(<<"\r\n\r\n">>,
+ S1#mp.buffer),
+ {S1, N}
+ end,
+ <<Headers:P/binary, "\r\n\r\n", Rest/binary>> = State1#mp.buffer,
+ NextCallback = Callback({headers, parse_headers(Headers)}),
+ feed_mp(body, State1#mp{buffer=Rest,
+ callback=NextCallback});
+feed_mp(body, State=#mp{boundary=Prefix, buffer=Buffer, callback=Callback}) ->
+ Boundary = find_boundary(Prefix, Buffer),
+ case Boundary of
+ {end_boundary, Start, Skip} ->
+ <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer,
+ C1 = Callback({body, Data}),
+ C2 = C1(body_end),
+ {State#mp.length, Rest, C2(eof)};
+ {next_boundary, Start, Skip} ->
+ <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer,
+ C1 = Callback({body, Data}),
+ feed_mp(headers, State#mp{callback=C1(body_end),
+ buffer=Rest});
+ {maybe, Start} ->
+ <<Data:Start/binary, Rest/binary>> = Buffer,
+ feed_mp(body, read_more(State#mp{callback=Callback({body, Data}),
+ buffer=Rest}));
+ not_found ->
+ {Data, Rest} = {Buffer, <<>>},
+ feed_mp(body, read_more(State#mp{callback=Callback({body, Data}),
+ buffer=Rest}))
+ end.
+
+get_boundary(ContentType) ->
+ {"multipart/form-data", Opts} = mochiweb_util:parse_header(ContentType),
+ case proplists:get_value("boundary", Opts) of
+ S when is_list(S) ->
+ S
+ end.
+
+find_in_binary(B, Data) when size(B) > 0 ->
+ case size(Data) - size(B) of
+ Last when Last < 0 ->
+ partial_find(B, Data, 0, size(Data));
+ Last ->
+ find_in_binary(B, size(B), Data, 0, Last)
+ end.
+
+find_in_binary(B, BS, D, N, Last) when N =< Last->
+ case D of
+ <<_:N/binary, B:BS/binary, _/binary>> ->
+ {exact, N};
+ _ ->
+ find_in_binary(B, BS, D, 1 + N, Last)
+ end;
+find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last ->
+ partial_find(B, D, N, BS - 1).
+
+partial_find(_B, _D, _N, 0) ->
+ not_found;
+partial_find(B, D, N, K) ->
+ <<B1:K/binary, _/binary>> = B,
+ case D of
+ <<_Skip:N/binary, B1:K/binary>> ->
+ {partial, N, K};
+ _ ->
+ partial_find(B, D, 1 + N, K - 1)
+ end.
+
+find_boundary(Prefix, Data) ->
+ case find_in_binary(Prefix, Data) of
+ {exact, Skip} ->
+ PrefixSkip = Skip + size(Prefix),
+ case Data of
+ <<_:PrefixSkip/binary, "\r\n", _/binary>> ->
+ {next_boundary, Skip, size(Prefix) + 2};
+ <<_:PrefixSkip/binary, "--\r\n", _/binary>> ->
+ {end_boundary, Skip, size(Prefix) + 4};
+ _ when size(Data) < PrefixSkip + 4 ->
+ %% Underflow
+ {maybe, Skip};
+ _ ->
+ %% False positive
+ not_found
+ end;
+ {partial, Skip, Length} when (Skip + Length) =:= size(Data) ->
+ %% Underflow
+ {maybe, Skip};
+ _ ->
+ not_found
+ end.
+
+with_socket_server(ServerFun, ClientFun) ->
+ {ok, Server} = mochiweb_socket_server:start([{ip, "127.0.0.1"},
+ {port, 0},
+ {loop, ServerFun}]),
+ Port = mochiweb_socket_server:get(Server, port),
+ {ok, Client} = gen_tcp:connect("127.0.0.1", Port,
+ [binary, {active, false}]),
+ Res = (catch ClientFun(Client)),
+ mochiweb_socket_server:stop(Server),
+ Res.
+
+fake_request(Socket, ContentType, Length) ->
+ mochiweb_request:new(Socket,
+ 'POST',
+ "/multipart",
+ {1,1},
+ mochiweb_headers:make(
+ [{"content-type", ContentType},
+ {"content-length", Length}])).
+
+test_callback(Expect, [Expect | Rest]) ->
+ case Rest of
+ [] ->
+ ok;
+ _ ->
+ fun (Next) -> test_callback(Next, Rest) end
+ end;
+test_callback({body, Got}, [{body, Expect} | Rest]) ->
+ GotSize = size(Got),
+ <<Got:GotSize/binary, Expect1/binary>> = Expect,
+ fun (Next) -> test_callback(Next, [{body, Expect1} | Rest]) end.
+
+test_parse3() ->
+ ContentType = "multipart/form-data; boundary=---------------------------7386909285754635891697677882",
+ BinContent = <<"-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test_file.txt\"\r\nContent-Type: text/plain\r\n\r\nWoo multiline text file\n\nLa la la\r\n-----------------------------7386909285754635891697677882--\r\n">>,
+ Expect = [{headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "hidden"}]}}]},
+ {body, <<"multipart message">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "file"}, {"filename", "test_file.txt"}]}},
+ {"content-type", {"text/plain", []}}]},
+ {body, <<"Woo multiline text file\n\nLa la la">>},
+ body_end,
+ eof],
+ TestCallback = fun (Next) -> test_callback(Next, Expect) end,
+ ServerFun = fun (Socket) ->
+ ok = gen_tcp:send(Socket, BinContent),
+ exit(normal)
+ end,
+ ClientFun = fun (Socket) ->
+ Req = fake_request(Socket, ContentType,
+ byte_size(BinContent)),
+ Res = parse_multipart_request(Req, TestCallback),
+ {0, <<>>, ok} = Res,
+ ok
+ end,
+ ok = with_socket_server(ServerFun, ClientFun),
+ ok.
+
+
+test_parse2() ->
+ ContentType = "multipart/form-data; boundary=---------------------------6072231407570234361599764024",
+ BinContent = <<"-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"file\"; filename=\"\"\r\nContent-Type: application/octet-stream\r\n\r\n\r\n-----------------------------6072231407570234361599764024--\r\n">>,
+ Expect = [{headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "hidden"}]}}]},
+ {body, <<"multipart message">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "file"}, {"filename", ""}]}},
+ {"content-type", {"application/octet-stream", []}}]},
+ {body, <<>>},
+ body_end,
+ eof],
+ TestCallback = fun (Next) -> test_callback(Next, Expect) end,
+ ServerFun = fun (Socket) ->
+ ok = gen_tcp:send(Socket, BinContent),
+ exit(normal)
+ end,
+ ClientFun = fun (Socket) ->
+ Req = fake_request(Socket, ContentType,
+ byte_size(BinContent)),
+ Res = parse_multipart_request(Req, TestCallback),
+ {0, <<>>, ok} = Res,
+ ok
+ end,
+ ok = with_socket_server(ServerFun, ClientFun),
+ ok.
+
+test_parse_form() ->
+ ContentType = "multipart/form-data; boundary=AaB03x",
+ "AaB03x" = get_boundary(ContentType),
+ Content = mochiweb_util:join(
+ ["--AaB03x",
+ "Content-Disposition: form-data; name=\"submit-name\"",
+ "",
+ "Larry",
+ "--AaB03x",
+ "Content-Disposition: form-data; name=\"files\";"
+ ++ "filename=\"file1.txt\"",
+ "Content-Type: text/plain",
+ "",
+ "... contents of file1.txt ...",
+ "--AaB03x--",
+ ""], "\r\n"),
+ BinContent = iolist_to_binary(Content),
+ ServerFun = fun (Socket) ->
+ ok = gen_tcp:send(Socket, BinContent),
+ exit(normal)
+ end,
+ ClientFun = fun (Socket) ->
+ Req = fake_request(Socket, ContentType,
+ byte_size(BinContent)),
+ Res = parse_form(Req),
+ [{"submit-name", "Larry"},
+ {"files", {"file1.txt", {"text/plain",[]},
+ <<"... contents of file1.txt ...">>}
+ }] = Res,
+ ok
+ end,
+ ok = with_socket_server(ServerFun, ClientFun),
+ ok.
+
+test_parse() ->
+ ContentType = "multipart/form-data; boundary=AaB03x",
+ "AaB03x" = get_boundary(ContentType),
+ Content = mochiweb_util:join(
+ ["--AaB03x",
+ "Content-Disposition: form-data; name=\"submit-name\"",
+ "",
+ "Larry",
+ "--AaB03x",
+ "Content-Disposition: form-data; name=\"files\";"
+ ++ "filename=\"file1.txt\"",
+ "Content-Type: text/plain",
+ "",
+ "... contents of file1.txt ...",
+ "--AaB03x--",
+ ""], "\r\n"),
+ BinContent = iolist_to_binary(Content),
+ Expect = [{headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "submit-name"}]}}]},
+ {body, <<"Larry">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "files"}, {"filename", "file1.txt"}]}},
+ {"content-type", {"text/plain", []}}]},
+ {body, <<"... contents of file1.txt ...">>},
+ body_end,
+ eof],
+ TestCallback = fun (Next) -> test_callback(Next, Expect) end,
+ ServerFun = fun (Socket) ->
+ ok = gen_tcp:send(Socket, BinContent),
+ exit(normal)
+ end,
+ ClientFun = fun (Socket) ->
+ Req = fake_request(Socket, ContentType,
+ byte_size(BinContent)),
+ Res = parse_multipart_request(Req, TestCallback),
+ {0, <<>>, ok} = Res,
+ ok
+ end,
+ ok = with_socket_server(ServerFun, ClientFun),
+ ok.
+
+test_find_boundary() ->
+ B = <<"\r\n--X">>,
+ {next_boundary, 0, 7} = find_boundary(B, <<"\r\n--X\r\nRest">>),
+ {next_boundary, 1, 7} = find_boundary(B, <<"!\r\n--X\r\nRest">>),
+ {end_boundary, 0, 9} = find_boundary(B, <<"\r\n--X--\r\nRest">>),
+ {end_boundary, 1, 9} = find_boundary(B, <<"!\r\n--X--\r\nRest">>),
+ not_found = find_boundary(B, <<"--X\r\nRest">>),
+ {maybe, 0} = find_boundary(B, <<"\r\n--X\r">>),
+ {maybe, 1} = find_boundary(B, <<"!\r\n--X\r">>),
+ P = <<"\r\n-----------------------------16037454351082272548568224146">>,
+ B0 = <<55,212,131,77,206,23,216,198,35,87,252,118,252,8,25,211,132,229,
+ 182,42,29,188,62,175,247,243,4,4,0,59, 13,10,45,45,45,45,45,45,45,
+ 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,
+ 49,54,48,51,55,52,53,52,51,53,49>>,
+ {maybe, 30} = find_boundary(P, B0),
+ ok.
+
+test_find_in_binary() ->
+ {exact, 0} = find_in_binary(<<"foo">>, <<"foobarbaz">>),
+ {exact, 1} = find_in_binary(<<"oo">>, <<"foobarbaz">>),
+ {exact, 8} = find_in_binary(<<"z">>, <<"foobarbaz">>),
+ not_found = find_in_binary(<<"q">>, <<"foobarbaz">>),
+ {partial, 7, 2} = find_in_binary(<<"azul">>, <<"foobarbaz">>),
+ {exact, 0} = find_in_binary(<<"foobarbaz">>, <<"foobarbaz">>),
+ {partial, 0, 3} = find_in_binary(<<"foobar">>, <<"foo">>),
+ {partial, 1, 3} = find_in_binary(<<"foobar">>, <<"afoo">>),
+ ok.
+
+test_flash_parse() ->
+ ContentType = "multipart/form-data; boundary=----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5",
+ "----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5" = get_boundary(ContentType),
+ BinContent = <<"------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Filename\"\r\n\r\nhello.txt\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"success_action_status\"\r\n\r\n201\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\nContent-Type: application/octet-stream\r\n\r\nhello\n\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Upload\"\r\n\r\nSubmit Query\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5--">>,
+ Expect = [{headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "Filename"}]}}]},
+ {body, <<"hello.txt">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "success_action_status"}]}}]},
+ {body, <<"201">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "file"}, {"filename", "hello.txt"}]}},
+ {"content-type", {"application/octet-stream", []}}]},
+ {body, <<"hello\n">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "Upload"}]}}]},
+ {body, <<"Submit Query">>},
+ body_end,
+ eof],
+ TestCallback = fun (Next) -> test_callback(Next, Expect) end,
+ ServerFun = fun (Socket) ->
+ ok = gen_tcp:send(Socket, BinContent),
+ exit(normal)
+ end,
+ ClientFun = fun (Socket) ->
+ Req = fake_request(Socket, ContentType,
+ byte_size(BinContent)),
+ Res = parse_multipart_request(Req, TestCallback),
+ {0, <<>>, ok} = Res,
+ ok
+ end,
+ ok = with_socket_server(ServerFun, ClientFun),
+ ok.
+
+test_flash_parse2() ->
+ ContentType = "multipart/form-data; boundary=----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5",
+ "----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5" = get_boundary(ContentType),
+ Chunk = iolist_to_binary(string:copies("%", 4096)),
+ BinContent = <<"------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Filename\"\r\n\r\nhello.txt\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"success_action_status\"\r\n\r\n201\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\nContent-Type: application/octet-stream\r\n\r\n", Chunk/binary, "\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Upload\"\r\n\r\nSubmit Query\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5--">>,
+ Expect = [{headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "Filename"}]}}]},
+ {body, <<"hello.txt">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "success_action_status"}]}}]},
+ {body, <<"201">>},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "file"}, {"filename", "hello.txt"}]}},
+ {"content-type", {"application/octet-stream", []}}]},
+ {body, Chunk},
+ body_end,
+ {headers,
+ [{"content-disposition",
+ {"form-data", [{"name", "Upload"}]}}]},
+ {body, <<"Submit Query">>},
+ body_end,
+ eof],
+ TestCallback = fun (Next) -> test_callback(Next, Expect) end,
+ ServerFun = fun (Socket) ->
+ ok = gen_tcp:send(Socket, BinContent),
+ exit(normal)
+ end,
+ ClientFun = fun (Socket) ->
+ Req = fake_request(Socket, ContentType,
+ byte_size(BinContent)),
+ Res = parse_multipart_request(Req, TestCallback),
+ {0, <<>>, ok} = Res,
+ ok
+ end,
+ ok = with_socket_server(ServerFun, ClientFun),
+ ok.
+
+test() ->
+ test_find_in_binary(),
+ test_find_boundary(),
+ test_parse(),
+ test_parse2(),
+ test_parse3(),
+ test_parse_form(),
+ test_flash_parse(),
+ test_flash_parse2(),
+ ok.
diff --git a/apps/mochiweb/src/mochiweb_request.erl b/apps/mochiweb/src/mochiweb_request.erl
new file mode 100644
index 00000000..5d7af26b
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_request.erl
@@ -0,0 +1,867 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc MochiWeb HTTP Request abstraction.
+
+-module(mochiweb_request, [Socket, Method, RawPath, Version, Headers]).
+-author('bob@mochimedia.com').
+
+-include_lib("kernel/include/file.hrl").
+
+-define(QUIP, "Any of you quaids got a smint?").
+-define(READ_SIZE, 8192).
+
+-export([get_header_value/1, get_primary_header_value/1, get/1, dump/0]).
+-export([send/1, recv/1, recv/2, recv_body/0, recv_body/1, stream_body/3]).
+-export([start_response/1, start_response_length/1, start_raw_response/1]).
+-export([respond/1, ok/1]).
+-export([not_found/0, not_found/1]).
+-export([parse_post/0, parse_qs/0]).
+-export([should_close/0, cleanup/0]).
+-export([parse_cookie/0, get_cookie_value/1]).
+-export([serve_file/2, serve_file/3]).
+-export([accepted_encodings/1]).
+-export([test/0]).
+
+-define(SAVE_QS, mochiweb_request_qs).
+-define(SAVE_PATH, mochiweb_request_path).
+-define(SAVE_RECV, mochiweb_request_recv).
+-define(SAVE_BODY, mochiweb_request_body).
+-define(SAVE_BODY_LENGTH, mochiweb_request_body_length).
+-define(SAVE_POST, mochiweb_request_post).
+-define(SAVE_COOKIE, mochiweb_request_cookie).
+-define(SAVE_FORCE_CLOSE, mochiweb_request_force_close).
+
+%% @type iolist() = [iolist() | binary() | char()].
+%% @type iodata() = binary() | iolist().
+%% @type key() = atom() | string() | binary()
+%% @type value() = atom() | string() | binary() | integer()
+%% @type headers(). A mochiweb_headers structure.
+%% @type response(). A mochiweb_response parameterized module instance.
+%% @type ioheaders() = headers() | [{key(), value()}].
+
+% 5 minute default idle timeout
+-define(IDLE_TIMEOUT, 300000).
+
+% Maximum recv_body() length of 1MB
+-define(MAX_RECV_BODY, (1024*1024)).
+
+%% @spec get_header_value(K) -> undefined | Value
+%% @doc Get the value of a given request header.
+get_header_value(K) ->
+ mochiweb_headers:get_value(K, Headers).
+
+get_primary_header_value(K) ->
+ mochiweb_headers:get_primary_value(K, Headers).
+
+%% @type field() = socket | method | raw_path | version | headers | peer | path | body_length | range
+
+%% @spec get(field()) -> term()
+%% @doc Return the internal representation of the given field.
+get(socket) ->
+ Socket;
+get(method) ->
+ Method;
+get(raw_path) ->
+ RawPath;
+get(version) ->
+ Version;
+get(headers) ->
+ Headers;
+get(peer) ->
+ case inet:peername(Socket) of
+ {ok, {Addr={10, _, _, _}, _Port}} ->
+ case get_header_value("x-forwarded-for") of
+ undefined ->
+ inet_parse:ntoa(Addr);
+ Hosts ->
+ string:strip(lists:last(string:tokens(Hosts, ",")))
+ end;
+ {ok, {{127, 0, 0, 1}, _Port}} ->
+ case get_header_value("x-forwarded-for") of
+ undefined ->
+ "127.0.0.1";
+ Hosts ->
+ string:strip(lists:last(string:tokens(Hosts, ",")))
+ end;
+ {ok, {Addr, _Port}} ->
+ inet_parse:ntoa(Addr)
+ end;
+get(path) ->
+ case erlang:get(?SAVE_PATH) of
+ undefined ->
+ {Path0, _, _} = mochiweb_util:urlsplit_path(RawPath),
+ Path = mochiweb_util:unquote(Path0),
+ put(?SAVE_PATH, Path),
+ Path;
+ Cached ->
+ Cached
+ end;
+get(body_length) ->
+ erlang:get(?SAVE_BODY_LENGTH);
+get(range) ->
+ case get_header_value(range) of
+ undefined ->
+ undefined;
+ RawRange ->
+ parse_range_request(RawRange)
+ end.
+
+%% @spec dump() -> {mochiweb_request, [{atom(), term()}]}
+%% @doc Dump the internal representation to a "human readable" set of terms
+%% for debugging/inspection purposes.
+dump() ->
+ {?MODULE, [{method, Method},
+ {version, Version},
+ {raw_path, RawPath},
+ {headers, mochiweb_headers:to_list(Headers)}]}.
+
+%% @spec send(iodata()) -> ok
+%% @doc Send data over the socket.
+send(Data) ->
+ case gen_tcp:send(Socket, Data) of
+ ok ->
+ ok;
+ _ ->
+ exit(normal)
+ end.
+
+%% @spec recv(integer()) -> binary()
+%% @doc Receive Length bytes from the client as a binary, with the default
+%% idle timeout.
+recv(Length) ->
+ recv(Length, ?IDLE_TIMEOUT).
+
+%% @spec recv(integer(), integer()) -> binary()
+%% @doc Receive Length bytes from the client as a binary, with the given
+%% Timeout in msec.
+recv(Length, Timeout) ->
+ case gen_tcp:recv(Socket, Length, Timeout) of
+ {ok, Data} ->
+ put(?SAVE_RECV, true),
+ Data;
+ _ ->
+ exit(normal)
+ end.
+
+%% @spec body_length() -> undefined | chunked | unknown_transfer_encoding | integer()
+%% @doc Infer body length from transfer-encoding and content-length headers.
+body_length() ->
+ case get_header_value("transfer-encoding") of
+ undefined ->
+ case get_header_value("content-length") of
+ undefined ->
+ undefined;
+ Length ->
+ list_to_integer(Length)
+ end;
+ "chunked" ->
+ chunked;
+ Unknown ->
+ {unknown_transfer_encoding, Unknown}
+ end.
+
+
+%% @spec recv_body() -> binary()
+%% @doc Receive the body of the HTTP request (defined by Content-Length).
+%% Will only receive up to the default max-body length of 1MB.
+recv_body() ->
+ recv_body(?MAX_RECV_BODY).
+
+%% @spec recv_body(integer()) -> binary()
+%% @doc Receive the body of the HTTP request (defined by Content-Length).
+%% Will receive up to MaxBody bytes.
+recv_body(MaxBody) ->
+ % we could use a sane constant for max chunk size
+ Body = stream_body(?MAX_RECV_BODY, fun
+ ({0, _ChunkedFooter}, {_LengthAcc, BinAcc}) ->
+ iolist_to_binary(lists:reverse(BinAcc));
+ ({Length, Bin}, {LengthAcc, BinAcc}) ->
+ NewLength = Length + LengthAcc,
+ if NewLength > MaxBody ->
+ exit({body_too_large, chunked});
+ true ->
+ {NewLength, [Bin | BinAcc]}
+ end
+ end, {0, []}, MaxBody),
+ put(?SAVE_BODY, Body),
+ Body.
+
+stream_body(MaxChunkSize, ChunkFun, FunState) ->
+ stream_body(MaxChunkSize, ChunkFun, FunState, undefined).
+
+stream_body(MaxChunkSize, ChunkFun, FunState, MaxBodyLength) ->
+ Expect = case get_header_value("expect") of
+ undefined ->
+ undefined;
+ Value when is_list(Value) ->
+ string:to_lower(Value)
+ end,
+ case Expect of
+ "100-continue" ->
+ start_raw_response({100, gb_trees:empty()});
+ _Else ->
+ ok
+ end,
+ case body_length() of
+ undefined ->
+ undefined;
+ {unknown_transfer_encoding, Unknown} ->
+ exit({unknown_transfer_encoding, Unknown});
+ chunked ->
+ % In this case the MaxBody is actually used to
+ % determine the maximum allowed size of a single
+ % chunk.
+ stream_chunked_body(MaxChunkSize, ChunkFun, FunState);
+ 0 ->
+ <<>>;
+ Length when is_integer(Length) ->
+ case MaxBodyLength of
+ MaxBodyLength when is_integer(MaxBodyLength), MaxBodyLength < Length ->
+ exit({body_too_large, content_length});
+ _ ->
+ stream_unchunked_body(Length, ChunkFun, FunState)
+ end;
+ Length ->
+ exit({length_not_integer, Length})
+ end.
+
+
+%% @spec start_response({integer(), ioheaders()}) -> response()
+%% @doc Start the HTTP response by sending the Code HTTP response and
+%% ResponseHeaders. The server will set header defaults such as Server
+%% and Date if not present in ResponseHeaders.
+start_response({Code, ResponseHeaders}) ->
+ HResponse = mochiweb_headers:make(ResponseHeaders),
+ HResponse1 = mochiweb_headers:default_from_list(server_headers(),
+ HResponse),
+ start_raw_response({Code, HResponse1}).
+
+%% @spec start_raw_response({integer(), headers()}) -> response()
+%% @doc Start the HTTP response by sending the Code HTTP response and
+%% ResponseHeaders.
+start_raw_response({Code, ResponseHeaders}) ->
+ F = fun ({K, V}, Acc) ->
+ [make_io(K), <<": ">>, V, <<"\r\n">> | Acc]
+ end,
+ End = lists:foldl(F, [<<"\r\n">>],
+ mochiweb_headers:to_list(ResponseHeaders)),
+ send([make_version(Version), make_code(Code), <<"\r\n">> | End]),
+ mochiweb:new_response({THIS, Code, ResponseHeaders}).
+
+
+%% @spec start_response_length({integer(), ioheaders(), integer()}) -> response()
+%% @doc Start the HTTP response by sending the Code HTTP response and
+%% ResponseHeaders including a Content-Length of Length. The server
+%% will set header defaults such as Server
+%% and Date if not present in ResponseHeaders.
+start_response_length({Code, ResponseHeaders, Length}) ->
+ HResponse = mochiweb_headers:make(ResponseHeaders),
+ HResponse1 = mochiweb_headers:enter("Content-Length", Length, HResponse),
+ start_response({Code, HResponse1}).
+
+%% @spec respond({integer(), ioheaders(), iodata() | chunked | {file, IoDevice}}) -> response()
+%% @doc Start the HTTP response with start_response, and send Body to the
+%% client (if the get(method) /= 'HEAD'). The Content-Length header
+%% will be set by the Body length, and the server will insert header
+%% defaults.
+respond({Code, ResponseHeaders, {file, IoDevice}}) ->
+ Length = iodevice_size(IoDevice),
+ Response = start_response_length({Code, ResponseHeaders, Length}),
+ case Method of
+ 'HEAD' ->
+ ok;
+ _ ->
+ iodevice_stream(IoDevice)
+ end,
+ Response;
+respond({Code, ResponseHeaders, chunked}) ->
+ HResponse = mochiweb_headers:make(ResponseHeaders),
+ HResponse1 = case Method of
+ 'HEAD' ->
+ %% This is what Google does, http://www.google.com/
+ %% is chunked but HEAD gets Content-Length: 0.
+ %% The RFC is ambiguous so emulating Google is smart.
+ mochiweb_headers:enter("Content-Length", "0",
+ HResponse);
+ _ when Version >= {1, 1} ->
+ %% Only use chunked encoding for HTTP/1.1
+ mochiweb_headers:enter("Transfer-Encoding", "chunked",
+ HResponse);
+ _ ->
+ %% For pre-1.1 clients we send the data as-is
+ %% without a Content-Length header and without
+ %% chunk delimiters. Since the end of the document
+ %% is now ambiguous we must force a close.
+ put(?SAVE_FORCE_CLOSE, true),
+ HResponse
+ end,
+ start_response({Code, HResponse1});
+respond({Code, ResponseHeaders, Body}) ->
+ Response = start_response_length({Code, ResponseHeaders, iolist_size(Body)}),
+ case Method of
+ 'HEAD' ->
+ ok;
+ _ ->
+ send(Body)
+ end,
+ Response.
+
+%% @spec not_found() -> response()
+%% @doc Alias for <code>not_found([])</code>.
+not_found() ->
+ not_found([]).
+
+%% @spec not_found(ExtraHeaders) -> response()
+%% @doc Alias for <code>respond({404, [{"Content-Type", "text/plain"}
+%% | ExtraHeaders], &lt;&lt;"Not found."&gt;&gt;})</code>.
+not_found(ExtraHeaders) ->
+ respond({404, [{"Content-Type", "text/plain"} | ExtraHeaders],
+ <<"Not found.">>}).
+
+%% @spec ok({value(), iodata()} | {value(), ioheaders(), iodata() | {file, IoDevice}}) ->
+%% response()
+%% @doc respond({200, [{"Content-Type", ContentType} | Headers], Body}).
+ok({ContentType, Body}) ->
+ ok({ContentType, [], Body});
+ok({ContentType, ResponseHeaders, Body}) ->
+ HResponse = mochiweb_headers:make(ResponseHeaders),
+ case THIS:get(range) of
+ X when X =:= undefined; X =:= fail ->
+ HResponse1 = mochiweb_headers:enter("Content-Type", ContentType, HResponse),
+ respond({200, HResponse1, Body});
+ Ranges ->
+ {PartList, Size} = range_parts(Body, Ranges),
+ case PartList of
+ [] -> %% no valid ranges
+ HResponse1 = mochiweb_headers:enter("Content-Type",
+ ContentType,
+ HResponse),
+ %% could be 416, for now we'll just return 200
+ respond({200, HResponse1, Body});
+ PartList ->
+ {RangeHeaders, RangeBody} =
+ parts_to_body(PartList, ContentType, Size),
+ HResponse1 = mochiweb_headers:enter_from_list(
+ [{"Accept-Ranges", "bytes"} |
+ RangeHeaders],
+ HResponse),
+ respond({206, HResponse1, RangeBody})
+ end
+ end.
+
+%% @spec should_close() -> bool()
+%% @doc Return true if the connection must be closed. If false, using
+%% Keep-Alive should be safe.
+should_close() ->
+ ForceClose = erlang:get(mochiweb_request_force_close) =/= undefined,
+ DidNotRecv = erlang:get(mochiweb_request_recv) =:= undefined,
+ ForceClose orelse Version < {1, 0}
+ %% Connection: close
+ orelse get_header_value("connection") =:= "close"
+ %% HTTP 1.0 requires Connection: Keep-Alive
+ orelse (Version =:= {1, 0}
+ andalso get_header_value("connection") =/= "Keep-Alive")
+ %% unread data left on the socket, can't safely continue
+ orelse (DidNotRecv
+ andalso get_header_value("content-length") =/= undefined
+ andalso list_to_integer(get_header_value("content-length")) > 0)
+ orelse (DidNotRecv
+ andalso get_header_value("transfer-encoding") =:= "chunked").
+
+%% @spec cleanup() -> ok
+%% @doc Clean up any junk in the process dictionary, required before continuing
+%% a Keep-Alive request.
+cleanup() ->
+ [erase(K) || K <- [?SAVE_QS,
+ ?SAVE_PATH,
+ ?SAVE_RECV,
+ ?SAVE_BODY,
+ ?SAVE_POST,
+ ?SAVE_COOKIE,
+ ?SAVE_FORCE_CLOSE]],
+ ok.
+
+%% @spec parse_qs() -> [{Key::string(), Value::string()}]
+%% @doc Parse the query string of the URL.
+parse_qs() ->
+ case erlang:get(?SAVE_QS) of
+ undefined ->
+ {_, QueryString, _} = mochiweb_util:urlsplit_path(RawPath),
+ Parsed = mochiweb_util:parse_qs(QueryString),
+ put(?SAVE_QS, Parsed),
+ Parsed;
+ Cached ->
+ Cached
+ end.
+
+%% @spec get_cookie_value(Key::string) -> string() | undefined
+%% @doc Get the value of the given cookie.
+get_cookie_value(Key) ->
+ proplists:get_value(Key, parse_cookie()).
+
+%% @spec parse_cookie() -> [{Key::string(), Value::string()}]
+%% @doc Parse the cookie header.
+parse_cookie() ->
+ case erlang:get(?SAVE_COOKIE) of
+ undefined ->
+ Cookies = case get_header_value("cookie") of
+ undefined ->
+ [];
+ Value ->
+ mochiweb_cookies:parse_cookie(Value)
+ end,
+ put(?SAVE_COOKIE, Cookies),
+ Cookies;
+ Cached ->
+ Cached
+ end.
+
+%% @spec parse_post() -> [{Key::string(), Value::string()}]
+%% @doc Parse an application/x-www-form-urlencoded form POST. This
+%% has the side-effect of calling recv_body().
+parse_post() ->
+ case erlang:get(?SAVE_POST) of
+ undefined ->
+ Parsed = case recv_body() of
+ undefined ->
+ [];
+ Binary ->
+ case get_primary_header_value("content-type") of
+ "application/x-www-form-urlencoded" ++ _ ->
+ mochiweb_util:parse_qs(Binary);
+ _ ->
+ []
+ end
+ end,
+ put(?SAVE_POST, Parsed),
+ Parsed;
+ Cached ->
+ Cached
+ end.
+
+%% @spec stream_chunked_body(integer(), fun(), term()) -> term()
+%% @doc The function is called for each chunk.
+%% Used internally by read_chunked_body.
+stream_chunked_body(MaxChunkSize, Fun, FunState) ->
+ case read_chunk_length() of
+ 0 ->
+ Fun({0, read_chunk(0)}, FunState);
+ Length when Length > MaxChunkSize ->
+ NewState = read_sub_chunks(Length, MaxChunkSize, Fun, FunState),
+ stream_chunked_body(MaxChunkSize, Fun, NewState);
+ Length ->
+ NewState = Fun({Length, read_chunk(Length)}, FunState),
+ stream_chunked_body(MaxChunkSize, Fun, NewState)
+ end.
+
+stream_unchunked_body(0, Fun, FunState) ->
+ Fun({0, <<>>}, FunState);
+stream_unchunked_body(Length, Fun, FunState) when Length > 0 ->
+ Bin = recv(0),
+ BinSize = byte_size(Bin),
+ if BinSize > Length ->
+ <<OurBody:Length/binary, Extra/binary>> = Bin,
+ gen_tcp:unrecv(Socket, Extra),
+ NewState = Fun({Length, OurBody}, FunState),
+ stream_unchunked_body(0, Fun, NewState);
+ true ->
+ NewState = Fun({BinSize, Bin}, FunState),
+ stream_unchunked_body(Length - BinSize, Fun, NewState)
+ end.
+
+
+%% @spec read_chunk_length() -> integer()
+%% @doc Read the length of the next HTTP chunk.
+read_chunk_length() ->
+ inet:setopts(Socket, [{packet, line}]),
+ case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+ {ok, Header} ->
+ inet:setopts(Socket, [{packet, raw}]),
+ Splitter = fun (C) ->
+ C =/= $\r andalso C =/= $\n andalso C =/= $
+ end,
+ {Hex, _Rest} = lists:splitwith(Splitter, binary_to_list(Header)),
+ mochihex:to_int(Hex);
+ _ ->
+ exit(normal)
+ end.
+
+%% @spec read_chunk(integer()) -> Chunk::binary() | [Footer::binary()]
+%% @doc Read in a HTTP chunk of the given length. If Length is 0, then read the
+%% HTTP footers (as a list of binaries, since they're nominal).
+read_chunk(0) ->
+ inet:setopts(Socket, [{packet, line}]),
+ F = fun (F1, Acc) ->
+ case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+ {ok, <<"\r\n">>} ->
+ Acc;
+ {ok, Footer} ->
+ F1(F1, [Footer | Acc]);
+ _ ->
+ exit(normal)
+ end
+ end,
+ Footers = F(F, []),
+ inet:setopts(Socket, [{packet, raw}]),
+ Footers;
+read_chunk(Length) ->
+ case gen_tcp:recv(Socket, 2 + Length, ?IDLE_TIMEOUT) of
+ {ok, <<Chunk:Length/binary, "\r\n">>} ->
+ Chunk;
+ _ ->
+ exit(normal)
+ end.
+
+read_sub_chunks(Length, MaxChunkSize, Fun, FunState) when Length > MaxChunkSize ->
+ Bin = recv(MaxChunkSize),
+ NewState = Fun({size(Bin), Bin}, FunState),
+ read_sub_chunks(Length - MaxChunkSize, MaxChunkSize, Fun, NewState);
+
+read_sub_chunks(Length, _MaxChunkSize, Fun, FunState) ->
+ Fun({Length, read_chunk(Length)}, FunState).
+
+%% @spec serve_file(Path, DocRoot) -> Response
+%% @doc Serve a file relative to DocRoot.
+serve_file(Path, DocRoot) ->
+ serve_file(Path, DocRoot, []).
+
+%% @spec serve_file(Path, DocRoot, ExtraHeaders) -> Response
+%% @doc Serve a file relative to DocRoot.
+serve_file(Path, DocRoot, ExtraHeaders) ->
+ case mochiweb_util:safe_relative_path(Path) of
+ undefined ->
+ not_found(ExtraHeaders);
+ RelPath ->
+ FullPath = filename:join([DocRoot, RelPath]),
+ case filelib:is_dir(FullPath) of
+ true ->
+ maybe_redirect(RelPath, FullPath, ExtraHeaders);
+ false ->
+ maybe_serve_file(FullPath, ExtraHeaders)
+ end
+ end.
+
+%% Internal API
+
+%% This has the same effect as the DirectoryIndex directive in httpd
+directory_index(FullPath) ->
+ filename:join([FullPath, "index.html"]).
+
+maybe_redirect([], FullPath, ExtraHeaders) ->
+ maybe_serve_file(directory_index(FullPath), ExtraHeaders);
+
+maybe_redirect(RelPath, FullPath, ExtraHeaders) ->
+ case string:right(RelPath, 1) of
+ "/" ->
+ maybe_serve_file(directory_index(FullPath), ExtraHeaders);
+ _ ->
+ Host = mochiweb_headers:get_value("host", Headers),
+ Location = "http://" ++ Host ++ "/" ++ RelPath ++ "/",
+ LocationBin = list_to_binary(Location),
+ MoreHeaders = [{"Location", Location},
+ {"Content-Type", "text/html"} | ExtraHeaders],
+ Top = <<"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">"
+ "<html><head>"
+ "<title>301 Moved Permanently</title>"
+ "</head><body>"
+ "<h1>Moved Permanently</h1>"
+ "<p>The document has moved <a href=\"">>,
+ Bottom = <<">here</a>.</p></body></html>\n">>,
+ Body = <<Top/binary, LocationBin/binary, Bottom/binary>>,
+ respond({301, MoreHeaders, Body})
+ end.
+
+maybe_serve_file(File, ExtraHeaders) ->
+ case file:read_file_info(File) of
+ {ok, FileInfo} ->
+ LastModified = httpd_util:rfc1123_date(FileInfo#file_info.mtime),
+ case get_header_value("if-modified-since") of
+ LastModified ->
+ respond({304, ExtraHeaders, ""});
+ _ ->
+ case file:open(File, [raw, binary]) of
+ {ok, IoDevice} ->
+ ContentType = mochiweb_util:guess_mime(File),
+ Res = ok({ContentType,
+ [{"last-modified", LastModified}
+ | ExtraHeaders],
+ {file, IoDevice}}),
+ file:close(IoDevice),
+ Res;
+ _ ->
+ not_found(ExtraHeaders)
+ end
+ end;
+ {error, _} ->
+ not_found(ExtraHeaders)
+ end.
+
+server_headers() ->
+ [{"Server", "MochiWeb/1.0 (" ++ ?QUIP ++ ")"},
+ {"Date", httpd_util:rfc1123_date()}].
+
+make_io(Atom) when is_atom(Atom) ->
+ atom_to_list(Atom);
+make_io(Integer) when is_integer(Integer) ->
+ integer_to_list(Integer);
+make_io(Io) when is_list(Io); is_binary(Io) ->
+ Io.
+
+make_code(X) when is_integer(X) ->
+ [integer_to_list(X), [" " | httpd_util:reason_phrase(X)]];
+make_code(Io) when is_list(Io); is_binary(Io) ->
+ Io.
+
+make_version({1, 0}) ->
+ <<"HTTP/1.0 ">>;
+make_version(_) ->
+ <<"HTTP/1.1 ">>.
+
+iodevice_stream(IoDevice) ->
+ case file:read(IoDevice, ?READ_SIZE) of
+ eof ->
+ ok;
+ {ok, Data} ->
+ ok = send(Data),
+ iodevice_stream(IoDevice)
+ end.
+
+
+parts_to_body([{Start, End, Body}], ContentType, Size) ->
+ %% return body for a range reponse with a single body
+ HeaderList = [{"Content-Type", ContentType},
+ {"Content-Range",
+ ["bytes ",
+ make_io(Start), "-", make_io(End),
+ "/", make_io(Size)]}],
+ {HeaderList, Body};
+parts_to_body(BodyList, ContentType, Size) when is_list(BodyList) ->
+ %% return
+ %% header Content-Type: multipart/byteranges; boundary=441934886133bdee4
+ %% and multipart body
+ Boundary = mochihex:to_hex(crypto:rand_bytes(8)),
+ HeaderList = [{"Content-Type",
+ ["multipart/byteranges; ",
+ "boundary=", Boundary]}],
+ MultiPartBody = multipart_body(BodyList, ContentType, Boundary, Size),
+
+ {HeaderList, MultiPartBody}.
+
+multipart_body([], _ContentType, Boundary, _Size) ->
+ ["--", Boundary, "--\r\n"];
+multipart_body([{Start, End, Body} | BodyList], ContentType, Boundary, Size) ->
+ ["--", Boundary, "\r\n",
+ "Content-Type: ", ContentType, "\r\n",
+ "Content-Range: ",
+ "bytes ", make_io(Start), "-", make_io(End),
+ "/", make_io(Size), "\r\n\r\n",
+ Body, "\r\n"
+ | multipart_body(BodyList, ContentType, Boundary, Size)].
+
+iodevice_size(IoDevice) ->
+ {ok, Size} = file:position(IoDevice, eof),
+ {ok, 0} = file:position(IoDevice, bof),
+ Size.
+
+range_parts({file, IoDevice}, Ranges) ->
+ Size = iodevice_size(IoDevice),
+ F = fun (Spec, Acc) ->
+ case range_skip_length(Spec, Size) of
+ invalid_range ->
+ Acc;
+ V ->
+ [V | Acc]
+ end
+ end,
+ LocNums = lists:foldr(F, [], Ranges),
+ {ok, Data} = file:pread(IoDevice, LocNums),
+ Bodies = lists:zipwith(fun ({Skip, Length}, PartialBody) ->
+ {Skip, Skip + Length - 1, PartialBody}
+ end,
+ LocNums, Data),
+ {Bodies, Size};
+range_parts(Body0, Ranges) ->
+ Body = iolist_to_binary(Body0),
+ Size = size(Body),
+ F = fun(Spec, Acc) ->
+ case range_skip_length(Spec, Size) of
+ invalid_range ->
+ Acc;
+ {Skip, Length} ->
+ <<_:Skip/binary, PartialBody:Length/binary, _/binary>> = Body,
+ [{Skip, Skip + Length - 1, PartialBody} | Acc]
+ end
+ end,
+ {lists:foldr(F, [], Ranges), Size}.
+
+range_skip_length(Spec, Size) ->
+ case Spec of
+ {none, R} when R =< Size, R >= 0 ->
+ {Size - R, R};
+ {none, _OutOfRange} ->
+ {0, Size};
+ {R, none} when R >= 0, R < Size ->
+ {R, Size - R};
+ {_OutOfRange, none} ->
+ invalid_range;
+ {Start, End} when 0 =< Start, Start =< End, End < Size ->
+ {Start, End - Start + 1};
+ {_OutOfRange, _End} ->
+ invalid_range
+ end.
+
+parse_range_request(RawRange) when is_list(RawRange) ->
+ try
+ "bytes=" ++ RangeString = RawRange,
+ Ranges = string:tokens(RangeString, ","),
+ lists:map(fun ("-" ++ V) ->
+ {none, list_to_integer(V)};
+ (R) ->
+ case string:tokens(R, "-") of
+ [S1, S2] ->
+ {list_to_integer(S1), list_to_integer(S2)};
+ [S] ->
+ {list_to_integer(S), none}
+ end
+ end,
+ Ranges)
+ catch
+ _:_ ->
+ fail
+ end.
+
+%% @spec accepted_encodings([encoding()]) -> [encoding()] | error()
+%% @type encoding() -> string()
+%% @type error() -> bad_accept_encoding_value
+%%
+%% @doc Returns a list of encodings accepted by a request. Encodings that are
+%% not supported by the server will not be included in the return list.
+%% This list is computed from the "Accept-Encoding" header and
+%% its elements are ordered, descendingly, according to their Q values.
+%%
+%% Section 14.3 of the RFC 2616 (HTTP 1.1) describes the "Accept-Encoding"
+%% header and the process of determining which server supported encodings
+%% can be used for encoding the body for the request's response.
+%%
+%% Examples
+%%
+%% 1) For a missing "Accept-Encoding" header:
+%% accepted_encodings(["gzip", "identity"]) -> ["identity"]
+%%
+%% 2) For an "Accept-Encoding" header with value "gzip, deflate":
+%% accepted_encodings(["gzip", "identity"]) -> ["gzip", "identity"]
+%%
+%% 3) For an "Accept-Encoding" header with value "gzip;q=0.5, deflate":
+%% accepted_encodings(["gzip", "deflate", "identity"]) ->
+%% ["deflate", "gzip", "identity"]
+%%
+accepted_encodings(SupportedEncodings) ->
+ AcceptEncodingHeader = case get_header_value("Accept-Encoding") of
+ undefined ->
+ "";
+ Value ->
+ Value
+ end,
+ case mochiweb_util:parse_qvalues(AcceptEncodingHeader) of
+ invalid_qvalue_string ->
+ bad_accept_encoding_value;
+ QList ->
+ mochiweb_util:pick_accepted_encodings(
+ QList, SupportedEncodings, "identity"
+ )
+ end.
+
+test() ->
+ ok = test_range(),
+ ok.
+
+test_range() ->
+ %% valid, single ranges
+ io:format("Testing parse_range_request with valid single ranges~n"),
+ io:format("1"),
+ [{20, 30}] = parse_range_request("bytes=20-30"),
+ io:format("2"),
+ [{20, none}] = parse_range_request("bytes=20-"),
+ io:format("3"),
+ [{none, 20}] = parse_range_request("bytes=-20"),
+ io:format(".. ok ~n"),
+
+ %% invalid, single ranges
+ io:format("Testing parse_range_request with invalid ranges~n"),
+ io:format("1"),
+ fail = parse_range_request(""),
+ io:format("2"),
+ fail = parse_range_request("garbage"),
+ io:format("3"),
+ fail = parse_range_request("bytes=-20-30"),
+ io:format(".. ok ~n"),
+
+ %% valid, multiple range
+ io:format("Testing parse_range_request with valid multiple ranges~n"),
+ io:format("1"),
+ [{20, 30}, {50, 100}, {110, 200}] =
+ parse_range_request("bytes=20-30,50-100,110-200"),
+ io:format("2"),
+ [{20, none}, {50, 100}, {none, 200}] =
+ parse_range_request("bytes=20-,50-100,-200"),
+ io:format(".. ok~n"),
+
+ %% no ranges
+ io:format("Testing out parse_range_request with no ranges~n"),
+ io:format("1"),
+ [] = parse_range_request("bytes="),
+ io:format(".. ok~n"),
+
+ Body = <<"012345678901234567890123456789012345678901234567890123456789">>,
+ BodySize = byte_size(Body), %% 60
+ BodySize = 60,
+
+ %% these values assume BodySize =:= 60
+ io:format("Testing out range_skip_length on valid ranges~n"),
+ io:format("1"),
+ {1,9} = range_skip_length({1,9}, BodySize), %% 1-9
+ io:format("2"),
+ {10,10} = range_skip_length({10,19}, BodySize), %% 10-19
+ io:format("3"),
+ {40, 20} = range_skip_length({none, 20}, BodySize), %% -20
+ io:format("4"),
+ {30, 30} = range_skip_length({30, none}, BodySize), %% 30-
+ io:format(".. ok ~n"),
+
+ %% valid edge cases for range_skip_length
+ io:format("Testing out range_skip_length on valid edge case ranges~n"),
+ io:format("1"),
+ {BodySize, 0} = range_skip_length({none, 0}, BodySize),
+ io:format("2"),
+ {0, BodySize} = range_skip_length({none, BodySize}, BodySize),
+ io:format("3"),
+ {0, BodySize} = range_skip_length({0, none}, BodySize),
+ BodySizeLess1 = BodySize - 1,
+ io:format("4"),
+ {BodySizeLess1, 1} = range_skip_length({BodySize - 1, none}, BodySize),
+
+ %% out of range, return whole thing
+ io:format("5"),
+ {0, BodySize} = range_skip_length({none, BodySize + 1}, BodySize),
+ io:format("6"),
+ {0, BodySize} = range_skip_length({none, -1}, BodySize),
+ io:format(".. ok ~n"),
+
+ %% invalid ranges
+ io:format("Testing out range_skip_length on invalid ranges~n"),
+ io:format("1"),
+ invalid_range = range_skip_length({-1, 30}, BodySize),
+ io:format("2"),
+ invalid_range = range_skip_length({0, BodySize + 1}, BodySize),
+ io:format("3"),
+ invalid_range = range_skip_length({-1, BodySize + 1}, BodySize),
+ io:format("4"),
+ invalid_range = range_skip_length({BodySize, 40}, BodySize),
+ io:format("5"),
+ invalid_range = range_skip_length({-1, none}, BodySize),
+ io:format("6"),
+ invalid_range = range_skip_length({BodySize, none}, BodySize),
+ io:format(".. ok ~n"),
+ ok.
+
diff --git a/apps/mochiweb/src/mochiweb_response.erl b/apps/mochiweb/src/mochiweb_response.erl
new file mode 100644
index 00000000..6285c4c4
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_response.erl
@@ -0,0 +1,56 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Response abstraction.
+
+-module(mochiweb_response, [Request, Code, Headers]).
+-author('bob@mochimedia.com').
+
+-define(QUIP, "Any of you quaids got a smint?").
+
+-export([get_header_value/1, get/1, dump/0]).
+-export([send/1, write_chunk/1]).
+
+%% @spec get_header_value(string() | atom() | binary()) -> string() | undefined
+%% @doc Get the value of the given response header.
+get_header_value(K) ->
+ mochiweb_headers:get_value(K, Headers).
+
+%% @spec get(request | code | headers) -> term()
+%% @doc Return the internal representation of the given field.
+get(request) ->
+ Request;
+get(code) ->
+ Code;
+get(headers) ->
+ Headers.
+
+%% @spec dump() -> {mochiweb_request, [{atom(), term()}]}
+%% @doc Dump the internal representation to a "human readable" set of terms
+%% for debugging/inspection purposes.
+dump() ->
+ [{request, Request:dump()},
+ {code, Code},
+ {headers, mochiweb_headers:to_list(Headers)}].
+
+%% @spec send(iodata()) -> ok
+%% @doc Send data over the socket if the method is not HEAD.
+send(Data) ->
+ case Request:get(method) of
+ 'HEAD' ->
+ ok;
+ _ ->
+ Request:send(Data)
+ end.
+
+%% @spec write_chunk(iodata()) -> ok
+%% @doc Write a chunk of a HTTP chunked response. If Data is zero length,
+%% then the chunked response will be finished.
+write_chunk(Data) ->
+ case Request:get(version) of
+ Version when Version >= {1, 1} ->
+ Length = iolist_size(Data),
+ send([io_lib:format("~.16b\r\n", [Length]), Data, <<"\r\n">>]);
+ _ ->
+ send(Data)
+ end.
diff --git a/apps/mochiweb/src/mochiweb_skel.erl b/apps/mochiweb/src/mochiweb_skel.erl
new file mode 100644
index 00000000..36b48be5
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_skel.erl
@@ -0,0 +1,73 @@
+-module(mochiweb_skel).
+-export([skelcopy/2]).
+
+-include_lib("kernel/include/file.hrl").
+
+%% External API
+
+skelcopy(DestDir, Name) ->
+ ok = ensuredir(DestDir),
+ LDst = case length(filename:dirname(DestDir)) of
+ 1 -> %% handle case when dirname returns "/"
+ 0;
+ N ->
+ N + 1
+ end,
+ skelcopy(src(), DestDir, Name, LDst),
+ ok = file:make_symlink(
+ filename:join(filename:dirname(code:which(?MODULE)), ".."),
+ filename:join([DestDir, Name, "deps", "mochiweb-src"])).
+
+
+%% Internal API
+
+src() ->
+ Dir = filename:dirname(code:which(?MODULE)),
+ filename:join(Dir, "../priv/skel").
+
+skel() ->
+ "skel".
+
+skelcopy(Src, DestDir, Name, LDst) ->
+ Dest = re:replace(filename:basename(Src), skel(), Name,
+ [global, {return, list}]),
+ case file:read_file_info(Src) of
+ {ok, #file_info{type=directory, mode=Mode}} ->
+ Dir = DestDir ++ "/" ++ Dest,
+ EDst = lists:nthtail(LDst, Dir),
+ ok = ensuredir(Dir),
+ ok = file:write_file_info(Dir, #file_info{mode=Mode}),
+ {ok, Files} = file:list_dir(Src),
+ io:format("~s/~n", [EDst]),
+ lists:foreach(fun ("." ++ _) -> ok;
+ (F) ->
+ skelcopy(filename:join(Src, F),
+ Dir,
+ Name,
+ LDst)
+ end,
+ Files),
+ ok;
+ {ok, #file_info{type=regular, mode=Mode}} ->
+ OutFile = filename:join(DestDir, Dest),
+ {ok, B} = file:read_file(Src),
+ S = re:replace(binary_to_list(B), skel(), Name,
+ [{return, list}, global]),
+ ok = file:write_file(OutFile, list_to_binary(S)),
+ ok = file:write_file_info(OutFile, #file_info{mode=Mode}),
+ io:format(" ~s~n", [filename:basename(Src)]),
+ ok;
+ {ok, _} ->
+ io:format("ignored source file: ~p~n", [Src]),
+ ok
+ end.
+
+ensuredir(Dir) ->
+ case file:make_dir(Dir) of
+ ok ->
+ ok;
+ {error, eexist} ->
+ ok;
+ E ->
+ E
+ end.
diff --git a/apps/mochiweb/src/mochiweb_socket_server.erl b/apps/mochiweb/src/mochiweb_socket_server.erl
new file mode 100644
index 00000000..7aafe290
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_socket_server.erl
@@ -0,0 +1,248 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc MochiWeb socket server.
+
+-module(mochiweb_socket_server).
+-author('bob@mochimedia.com').
+-behaviour(gen_server).
+
+-export([start/1, stop/1]).
+-export([init/1, handle_call/3, handle_cast/2, terminate/2, code_change/3,
+ handle_info/2]).
+-export([get/2]).
+
+-export([acceptor_loop/1]).
+
+-record(mochiweb_socket_server,
+ {port,
+ loop,
+ name=undefined,
+ max=2048,
+ ip=any,
+ listen=null,
+ acceptor=null,
+ backlog=128}).
+
+start(State=#mochiweb_socket_server{}) ->
+ start_server(State);
+start(Options) ->
+ start(parse_options(Options)).
+
+get(Name, Property) ->
+ gen_server:call(Name, {get, Property}).
+
+stop(Name) when is_atom(Name) ->
+ gen_server:cast(Name, stop);
+stop(Pid) when is_pid(Pid) ->
+ gen_server:cast(Pid, stop);
+stop({local, Name}) ->
+ stop(Name);
+stop({global, Name}) ->
+ stop(Name);
+stop(Options) ->
+ State = parse_options(Options),
+ stop(State#mochiweb_socket_server.name).
+
+%% Internal API
+
+parse_options(Options) ->
+ parse_options(Options, #mochiweb_socket_server{}).
+
+parse_options([], State) ->
+ State;
+parse_options([{name, L} | Rest], State) when is_list(L) ->
+ Name = {local, list_to_atom(L)},
+ parse_options(Rest, State#mochiweb_socket_server{name=Name});
+parse_options([{name, A} | Rest], State) when is_atom(A) ->
+ Name = {local, A},
+ parse_options(Rest, State#mochiweb_socket_server{name=Name});
+parse_options([{name, Name} | Rest], State) ->
+ parse_options(Rest, State#mochiweb_socket_server{name=Name});
+parse_options([{port, L} | Rest], State) when is_list(L) ->
+ Port = list_to_integer(L),
+ parse_options(Rest, State#mochiweb_socket_server{port=Port});
+parse_options([{port, Port} | Rest], State) ->
+ parse_options(Rest, State#mochiweb_socket_server{port=Port});
+parse_options([{ip, Ip} | Rest], State) ->
+ ParsedIp = case Ip of
+ any ->
+ any;
+ Ip when is_tuple(Ip) ->
+ Ip;
+ Ip when is_list(Ip) ->
+ {ok, IpTuple} = inet_parse:address(Ip),
+ IpTuple
+ end,
+ parse_options(Rest, State#mochiweb_socket_server{ip=ParsedIp});
+parse_options([{loop, Loop} | Rest], State) ->
+ parse_options(Rest, State#mochiweb_socket_server{loop=Loop});
+parse_options([{backlog, Backlog} | Rest], State) ->
+ parse_options(Rest, State#mochiweb_socket_server{backlog=Backlog});
+parse_options([{max, Max} | Rest], State) ->
+ MaxInt = case Max of
+ Max when is_list(Max) ->
+ list_to_integer(Max);
+ Max when is_integer(Max) ->
+ Max
+ end,
+ parse_options(Rest, State#mochiweb_socket_server{max=MaxInt}).
+
+start_server(State=#mochiweb_socket_server{name=Name}) ->
+ case Name of
+ undefined ->
+ gen_server:start_link(?MODULE, State, []);
+ _ ->
+ gen_server:start_link(Name, ?MODULE, State, [])
+ end.
+
+ipv6_supported() ->
+ case (catch inet:getaddr("localhost", inet6)) of
+ {ok, _Addr} ->
+ true;
+ {error, _} ->
+ false
+ end.
+
+init(State=#mochiweb_socket_server{ip=Ip, port=Port, backlog=Backlog}) ->
+ process_flag(trap_exit, true),
+ BaseOpts = [binary,
+ {reuseaddr, true},
+ {packet, 0},
+ {backlog, Backlog},
+ {recbuf, 8192},
+ {active, false},
+ {nodelay, true}],
+ Opts = case Ip of
+ any ->
+ case ipv6_supported() of % IPv4, and IPv6 if supported
+ true -> [inet, inet6 | BaseOpts];
+ _ -> BaseOpts
+ end;
+ {_, _, _, _} -> % IPv4
+ [inet, {ip, Ip} | BaseOpts];
+ {_, _, _, _, _, _, _, _} -> % IPv6
+ [inet6, {ip, Ip} | BaseOpts]
+ end,
+ case gen_tcp_listen(Port, Opts, State) of
+ {stop, eacces} ->
+ case Port < 1024 of
+ true ->
+ case fdsrv:start() of
+ {ok, _} ->
+ case fdsrv:bind_socket(tcp, Port) of
+ {ok, Fd} ->
+ gen_tcp_listen(Port, [{fd, Fd} | Opts], State);
+ _ ->
+ {stop, fdsrv_bind_failed}
+ end;
+ _ ->
+ {stop, fdsrv_start_failed}
+ end;
+ false ->
+ {stop, eacces}
+ end;
+ Other ->
+ Other
+ end.
+
+gen_tcp_listen(Port, Opts, State) ->
+ case gen_tcp:listen(Port, Opts) of
+ {ok, Listen} ->
+ {ok, ListenPort} = inet:port(Listen),
+ {ok, new_acceptor(State#mochiweb_socket_server{listen=Listen,
+ port=ListenPort})};
+ {error, Reason} ->
+ {stop, Reason}
+ end.
+
+new_acceptor(State=#mochiweb_socket_server{max=0}) ->
+ io:format("Not accepting new connections~n"),
+ State#mochiweb_socket_server{acceptor=null};
+new_acceptor(State=#mochiweb_socket_server{listen=Listen,loop=Loop}) ->
+ Pid = proc_lib:spawn_link(?MODULE, acceptor_loop,
+ [{self(), Listen, Loop}]),
+ State#mochiweb_socket_server{acceptor=Pid}.
+
+call_loop({M, F}, Socket) ->
+ M:F(Socket);
+call_loop(Loop, Socket) ->
+ Loop(Socket).
+
+acceptor_loop({Server, Listen, Loop}) ->
+ case catch gen_tcp:accept(Listen) of
+ {ok, Socket} ->
+ gen_server:cast(Server, {accepted, self()}),
+ call_loop(Loop, Socket);
+ {error, closed} ->
+ exit({error, closed});
+ Other ->
+ error_logger:error_report(
+ [{application, mochiweb},
+ "Accept failed error",
+ lists:flatten(io_lib:format("~p", [Other]))]),
+ exit({error, accept_failed})
+ end.
+
+
+do_get(port, #mochiweb_socket_server{port=Port}) ->
+ Port.
+
+handle_call({get, Property}, _From, State) ->
+ Res = do_get(Property, State),
+ {reply, Res, State};
+handle_call(_Message, _From, State) ->
+ Res = error,
+ {reply, Res, State}.
+
+handle_cast({accepted, Pid},
+ State=#mochiweb_socket_server{acceptor=Pid, max=Max}) ->
+ % io:format("accepted ~p~n", [Pid]),
+ State1 = State#mochiweb_socket_server{max=Max - 1},
+ {noreply, new_acceptor(State1)};
+handle_cast(stop, State) ->
+ {stop, normal, State}.
+
+terminate(_Reason, #mochiweb_socket_server{listen=Listen, port=Port}) ->
+ gen_tcp:close(Listen),
+ case Port < 1024 of
+ true ->
+ catch fdsrv:stop(),
+ ok;
+ false ->
+ ok
+ end.
+
+code_change(_OldVsn, State, _Extra) ->
+ State.
+
+handle_info({'EXIT', Pid, normal},
+ State=#mochiweb_socket_server{acceptor=Pid}) ->
+ % io:format("normal acceptor down~n"),
+ {noreply, new_acceptor(State)};
+handle_info({'EXIT', Pid, Reason},
+ State=#mochiweb_socket_server{acceptor=Pid}) ->
+ error_logger:error_report({?MODULE, ?LINE,
+ {acceptor_error, Reason}}),
+ timer:sleep(100),
+ {noreply, new_acceptor(State)};
+handle_info({'EXIT', _LoopPid, Reason},
+ State=#mochiweb_socket_server{acceptor=Pid, max=Max}) ->
+ case Reason of
+ normal ->
+ ok;
+ _ ->
+ error_logger:error_report({?MODULE, ?LINE,
+ {child_error, Reason}})
+ end,
+ State1 = State#mochiweb_socket_server{max=Max + 1},
+ State2 = case Pid of
+ null ->
+ new_acceptor(State1);
+ _ ->
+ State1
+ end,
+ {noreply, State2};
+handle_info(Info, State) ->
+ error_logger:info_report([{'INFO', Info}, {'State', State}]),
+ {noreply, State}.
diff --git a/apps/mochiweb/src/mochiweb_sup.erl b/apps/mochiweb/src/mochiweb_sup.erl
new file mode 100644
index 00000000..5cb525b5
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_sup.erl
@@ -0,0 +1,34 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Supervisor for the mochiweb application.
+
+-module(mochiweb_sup).
+-author('bob@mochimedia.com').
+
+-behaviour(supervisor).
+
+%% External exports
+-export([start_link/0, upgrade/0]).
+
+%% supervisor callbacks
+-export([init/1]).
+
+%% @spec start_link() -> ServerRet
+%% @doc API for starting the supervisor.
+start_link() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+%% @spec upgrade() -> ok
+%% @doc Add processes if necessary.
+upgrade() ->
+ {ok, {_, Specs}} = init([]),
+ [supervisor:start_child(?MODULE, Spec) || Spec <- Specs],
+ ok.
+
+%% @spec init([]) -> SupervisorTree
+%% @doc supervisor callback, ensures yaws is in embedded mode and then
+%% returns the supervisor tree.
+init([]) ->
+ Processes = [],
+ {ok, {{one_for_one, 10, 10}, Processes}}.
diff --git a/apps/mochiweb/src/mochiweb_util.erl b/apps/mochiweb/src/mochiweb_util.erl
new file mode 100644
index 00000000..d8fc89d5
--- /dev/null
+++ b/apps/mochiweb/src/mochiweb_util.erl
@@ -0,0 +1,859 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Utilities for parsing and quoting.
+
+-module(mochiweb_util).
+-author('bob@mochimedia.com').
+-export([join/2, quote_plus/1, urlencode/1, parse_qs/1, unquote/1]).
+-export([path_split/1]).
+-export([urlsplit/1, urlsplit_path/1, urlunsplit/1, urlunsplit_path/1]).
+-export([guess_mime/1, parse_header/1]).
+-export([shell_quote/1, cmd/1, cmd_string/1, cmd_port/2]).
+-export([record_to_proplist/2, record_to_proplist/3]).
+-export([safe_relative_path/1, partition/2]).
+-export([parse_qvalues/1, pick_accepted_encodings/3]).
+-export([test/0]).
+
+-define(PERCENT, 37). % $\%
+-define(FULLSTOP, 46). % $\.
+-define(IS_HEX(C), ((C >= $0 andalso C =< $9) orelse
+ (C >= $a andalso C =< $f) orelse
+ (C >= $A andalso C =< $F))).
+-define(QS_SAFE(C), ((C >= $a andalso C =< $z) orelse
+ (C >= $A andalso C =< $Z) orelse
+ (C >= $0 andalso C =< $9) orelse
+ (C =:= ?FULLSTOP orelse C =:= $- orelse C =:= $~ orelse
+ C =:= $_))).
+
+hexdigit(C) when C < 10 -> $0 + C;
+hexdigit(C) when C < 16 -> $A + (C - 10).
+
+unhexdigit(C) when C >= $0, C =< $9 -> C - $0;
+unhexdigit(C) when C >= $a, C =< $f -> C - $a + 10;
+unhexdigit(C) when C >= $A, C =< $F -> C - $A + 10.
+
+%% @spec partition(String, Sep) -> {String, [], []} | {Prefix, Sep, Postfix}
+%% @doc Inspired by Python 2.5's str.partition:
+%% partition("foo/bar", "/") = {"foo", "/", "bar"},
+%% partition("foo", "/") = {"foo", "", ""}.
+partition(String, Sep) ->
+ case partition(String, Sep, []) of
+ undefined ->
+ {String, "", ""};
+ Result ->
+ Result
+ end.
+
+partition("", _Sep, _Acc) ->
+ undefined;
+partition(S, Sep, Acc) ->
+ case partition2(S, Sep) of
+ undefined ->
+ [C | Rest] = S,
+ partition(Rest, Sep, [C | Acc]);
+ Rest ->
+ {lists:reverse(Acc), Sep, Rest}
+ end.
+
+partition2(Rest, "") ->
+ Rest;
+partition2([C | R1], [C | R2]) ->
+ partition2(R1, R2);
+partition2(_S, _Sep) ->
+ undefined.
+
+
+
+%% @spec safe_relative_path(string()) -> string() | undefined
+%% @doc Return the reduced version of a relative path or undefined if it
+%% is not safe. safe relative paths can be joined with an absolute path
+%% and will result in a subdirectory of the absolute path.
+safe_relative_path("/" ++ _) ->
+ undefined;
+safe_relative_path(P) ->
+ safe_relative_path(P, []).
+
+safe_relative_path("", Acc) ->
+ case Acc of
+ [] ->
+ "";
+ _ ->
+ string:join(lists:reverse(Acc), "/")
+ end;
+safe_relative_path(P, Acc) ->
+ case partition(P, "/") of
+ {"", "/", _} ->
+ %% /foo or foo//bar
+ undefined;
+ {"..", _, _} when Acc =:= [] ->
+ undefined;
+ {"..", _, Rest} ->
+ safe_relative_path(Rest, tl(Acc));
+ {Part, "/", ""} ->
+ safe_relative_path("", ["", Part | Acc]);
+ {Part, _, Rest} ->
+ safe_relative_path(Rest, [Part | Acc])
+ end.
+
+%% @spec shell_quote(string()) -> string()
+%% @doc Quote a string according to UNIX shell quoting rules, returns a string
+%% surrounded by double quotes.
+shell_quote(L) ->
+ shell_quote(L, [$\"]).
+
+%% @spec cmd_port([string()], Options) -> port()
+%% @doc open_port({spawn, mochiweb_util:cmd_string(Argv)}, Options).
+cmd_port(Argv, Options) ->
+ open_port({spawn, cmd_string(Argv)}, Options).
+
+%% @spec cmd([string()]) -> string()
+%% @doc os:cmd(cmd_string(Argv)).
+cmd(Argv) ->
+ os:cmd(cmd_string(Argv)).
+
+%% @spec cmd_string([string()]) -> string()
+%% @doc Create a shell quoted command string from a list of arguments.
+cmd_string(Argv) ->
+ join([shell_quote(X) || X <- Argv], " ").
+
+%% @spec join([string()], Separator) -> string()
+%% @doc Join a list of strings together with the given separator
+%% string or char.
+join([], _Separator) ->
+ [];
+join([S], _Separator) ->
+ lists:flatten(S);
+join(Strings, Separator) ->
+ lists:flatten(revjoin(lists:reverse(Strings), Separator, [])).
+
+revjoin([], _Separator, Acc) ->
+ Acc;
+revjoin([S | Rest], Separator, []) ->
+ revjoin(Rest, Separator, [S]);
+revjoin([S | Rest], Separator, Acc) ->
+ revjoin(Rest, Separator, [S, Separator | Acc]).
+
+%% @spec quote_plus(atom() | integer() | float() | string() | binary()) -> string()
+%% @doc URL safe encoding of the given term.
+quote_plus(Atom) when is_atom(Atom) ->
+ quote_plus(atom_to_list(Atom));
+quote_plus(Int) when is_integer(Int) ->
+ quote_plus(integer_to_list(Int));
+quote_plus(Binary) when is_binary(Binary) ->
+ quote_plus(binary_to_list(Binary));
+quote_plus(Float) when is_float(Float) ->
+ quote_plus(mochinum:digits(Float));
+quote_plus(String) ->
+ quote_plus(String, []).
+
+quote_plus([], Acc) ->
+ lists:reverse(Acc);
+quote_plus([C | Rest], Acc) when ?QS_SAFE(C) ->
+ quote_plus(Rest, [C | Acc]);
+quote_plus([$\s | Rest], Acc) ->
+ quote_plus(Rest, [$+ | Acc]);
+quote_plus([C | Rest], Acc) ->
+ <<Hi:4, Lo:4>> = <<C>>,
+ quote_plus(Rest, [hexdigit(Lo), hexdigit(Hi), ?PERCENT | Acc]).
+
+%% @spec urlencode([{Key, Value}]) -> string()
+%% @doc URL encode the property list.
+urlencode(Props) ->
+ RevPairs = lists:foldl(fun ({K, V}, Acc) ->
+ [[quote_plus(K), $=, quote_plus(V)] | Acc]
+ end, [], Props),
+ lists:flatten(revjoin(RevPairs, $&, [])).
+
+%% @spec parse_qs(string() | binary()) -> [{Key, Value}]
+%% @doc Parse a query string or application/x-www-form-urlencoded.
+parse_qs(Binary) when is_binary(Binary) ->
+ parse_qs(binary_to_list(Binary));
+parse_qs(String) ->
+ parse_qs(String, []).
+
+parse_qs([], Acc) ->
+ lists:reverse(Acc);
+parse_qs(String, Acc) ->
+ {Key, Rest} = parse_qs_key(String),
+ {Value, Rest1} = parse_qs_value(Rest),
+ parse_qs(Rest1, [{Key, Value} | Acc]).
+
+parse_qs_key(String) ->
+ parse_qs_key(String, []).
+
+parse_qs_key([], Acc) ->
+ {qs_revdecode(Acc), ""};
+parse_qs_key([$= | Rest], Acc) ->
+ {qs_revdecode(Acc), Rest};
+parse_qs_key(Rest=[$; | _], Acc) ->
+ {qs_revdecode(Acc), Rest};
+parse_qs_key(Rest=[$& | _], Acc) ->
+ {qs_revdecode(Acc), Rest};
+parse_qs_key([C | Rest], Acc) ->
+ parse_qs_key(Rest, [C | Acc]).
+
+parse_qs_value(String) ->
+ parse_qs_value(String, []).
+
+parse_qs_value([], Acc) ->
+ {qs_revdecode(Acc), ""};
+parse_qs_value([$; | Rest], Acc) ->
+ {qs_revdecode(Acc), Rest};
+parse_qs_value([$& | Rest], Acc) ->
+ {qs_revdecode(Acc), Rest};
+parse_qs_value([C | Rest], Acc) ->
+ parse_qs_value(Rest, [C | Acc]).
+
+%% @spec unquote(string() | binary()) -> string()
+%% @doc Unquote a URL encoded string.
+unquote(Binary) when is_binary(Binary) ->
+ unquote(binary_to_list(Binary));
+unquote(String) ->
+ qs_revdecode(lists:reverse(String)).
+
+qs_revdecode(S) ->
+ qs_revdecode(S, []).
+
+qs_revdecode([], Acc) ->
+ Acc;
+qs_revdecode([$+ | Rest], Acc) ->
+ qs_revdecode(Rest, [$\s | Acc]);
+qs_revdecode([Lo, Hi, ?PERCENT | Rest], Acc) when ?IS_HEX(Lo), ?IS_HEX(Hi) ->
+ qs_revdecode(Rest, [(unhexdigit(Lo) bor (unhexdigit(Hi) bsl 4)) | Acc]);
+qs_revdecode([C | Rest], Acc) ->
+ qs_revdecode(Rest, [C | Acc]).
+
+%% @spec urlsplit(Url) -> {Scheme, Netloc, Path, Query, Fragment}
+%% @doc Return a 5-tuple, does not expand % escapes. Only supports HTTP style
+%% URLs.
+urlsplit(Url) ->
+ {Scheme, Url1} = urlsplit_scheme(Url),
+ {Netloc, Url2} = urlsplit_netloc(Url1),
+ {Path, Query, Fragment} = urlsplit_path(Url2),
+ {Scheme, Netloc, Path, Query, Fragment}.
+
+urlsplit_scheme(Url) ->
+ urlsplit_scheme(Url, []).
+
+urlsplit_scheme([], Acc) ->
+ {"", lists:reverse(Acc)};
+urlsplit_scheme(":" ++ Rest, Acc) ->
+ {string:to_lower(lists:reverse(Acc)), Rest};
+urlsplit_scheme([C | Rest], Acc) ->
+ urlsplit_scheme(Rest, [C | Acc]).
+
+urlsplit_netloc("//" ++ Rest) ->
+ urlsplit_netloc(Rest, []);
+urlsplit_netloc(Path) ->
+ {"", Path}.
+
+urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# ->
+ {lists:reverse(Acc), Rest};
+urlsplit_netloc([C | Rest], Acc) ->
+ urlsplit_netloc(Rest, [C | Acc]).
+
+
+%% @spec path_split(string()) -> {Part, Rest}
+%% @doc Split a path starting from the left, as in URL traversal.
+%% path_split("foo/bar") = {"foo", "bar"},
+%% path_split("/foo/bar") = {"", "foo/bar"}.
+path_split(S) ->
+ path_split(S, []).
+
+path_split("", Acc) ->
+ {lists:reverse(Acc), ""};
+path_split("/" ++ Rest, Acc) ->
+ {lists:reverse(Acc), Rest};
+path_split([C | Rest], Acc) ->
+ path_split(Rest, [C | Acc]).
+
+
+%% @spec urlunsplit({Scheme, Netloc, Path, Query, Fragment}) -> string()
+%% @doc Assemble a URL from the 5-tuple. Path must be absolute.
+urlunsplit({Scheme, Netloc, Path, Query, Fragment}) ->
+ lists:flatten([case Scheme of "" -> ""; _ -> [Scheme, "://"] end,
+ Netloc,
+ urlunsplit_path({Path, Query, Fragment})]).
+
+%% @spec urlunsplit_path({Path, Query, Fragment}) -> string()
+%% @doc Assemble a URL path from the 3-tuple.
+urlunsplit_path({Path, Query, Fragment}) ->
+ lists:flatten([Path,
+ case Query of "" -> ""; _ -> [$? | Query] end,
+ case Fragment of "" -> ""; _ -> [$# | Fragment] end]).
+
+%% @spec urlsplit_path(Url) -> {Path, Query, Fragment}
+%% @doc Return a 3-tuple, does not expand % escapes. Only supports HTTP style
+%% paths.
+urlsplit_path(Path) ->
+ urlsplit_path(Path, []).
+
+urlsplit_path("", Acc) ->
+ {lists:reverse(Acc), "", ""};
+urlsplit_path("?" ++ Rest, Acc) ->
+ {Query, Fragment} = urlsplit_query(Rest),
+ {lists:reverse(Acc), Query, Fragment};
+urlsplit_path("#" ++ Rest, Acc) ->
+ {lists:reverse(Acc), "", Rest};
+urlsplit_path([C | Rest], Acc) ->
+ urlsplit_path(Rest, [C | Acc]).
+
+urlsplit_query(Query) ->
+ urlsplit_query(Query, []).
+
+urlsplit_query("", Acc) ->
+ {lists:reverse(Acc), ""};
+urlsplit_query("#" ++ Rest, Acc) ->
+ {lists:reverse(Acc), Rest};
+urlsplit_query([C | Rest], Acc) ->
+ urlsplit_query(Rest, [C | Acc]).
+
+%% @spec guess_mime(string()) -> string()
+%% @doc Guess the mime type of a file by the extension of its filename.
+guess_mime(File) ->
+ case filename:extension(File) of
+ ".html" ->
+ "text/html";
+ ".xhtml" ->
+ "application/xhtml+xml";
+ ".xml" ->
+ "application/xml";
+ ".css" ->
+ "text/css";
+ ".js" ->
+ "application/x-javascript";
+ ".jpg" ->
+ "image/jpeg";
+ ".gif" ->
+ "image/gif";
+ ".png" ->
+ "image/png";
+ ".swf" ->
+ "application/x-shockwave-flash";
+ ".zip" ->
+ "application/zip";
+ ".bz2" ->
+ "application/x-bzip2";
+ ".gz" ->
+ "application/x-gzip";
+ ".tar" ->
+ "application/x-tar";
+ ".tgz" ->
+ "application/x-gzip";
+ ".txt" ->
+ "text/plain";
+ ".doc" ->
+ "application/msword";
+ ".pdf" ->
+ "application/pdf";
+ ".xls" ->
+ "application/vnd.ms-excel";
+ ".rtf" ->
+ "application/rtf";
+ ".mov" ->
+ "video/quicktime";
+ ".mp3" ->
+ "audio/mpeg";
+ ".z" ->
+ "application/x-compress";
+ ".wav" ->
+ "audio/x-wav";
+ ".ico" ->
+ "image/x-icon";
+ ".bmp" ->
+ "image/bmp";
+ ".m4a" ->
+ "audio/mpeg";
+ ".m3u" ->
+ "audio/x-mpegurl";
+ ".exe" ->
+ "application/octet-stream";
+ ".csv" ->
+ "text/csv";
+ _ ->
+ "text/plain"
+ end.
+
+%% @spec parse_header(string()) -> {Type, [{K, V}]}
+%% @doc Parse a Content-Type like header, return the main Content-Type
+%% and a property list of options.
+parse_header(String) ->
+ %% TODO: This is exactly as broken as Python's cgi module.
+ %% Should parse properly like mochiweb_cookies.
+ [Type | Parts] = [string:strip(S) || S <- string:tokens(String, ";")],
+ F = fun (S, Acc) ->
+ case lists:splitwith(fun (C) -> C =/= $= end, S) of
+ {"", _} ->
+ %% Skip anything with no name
+ Acc;
+ {_, ""} ->
+ %% Skip anything with no value
+ Acc;
+ {Name, [$\= | Value]} ->
+ [{string:to_lower(string:strip(Name)),
+ unquote_header(string:strip(Value))} | Acc]
+ end
+ end,
+ {string:to_lower(Type),
+ lists:foldr(F, [], Parts)}.
+
+unquote_header("\"" ++ Rest) ->
+ unquote_header(Rest, []);
+unquote_header(S) ->
+ S.
+
+unquote_header("", Acc) ->
+ lists:reverse(Acc);
+unquote_header("\"", Acc) ->
+ lists:reverse(Acc);
+unquote_header([$\\, C | Rest], Acc) ->
+ unquote_header(Rest, [C | Acc]);
+unquote_header([C | Rest], Acc) ->
+ unquote_header(Rest, [C | Acc]).
+
+%% @spec record_to_proplist(Record, Fields) -> proplist()
+%% @doc calls record_to_proplist/3 with a default TypeKey of '__record'
+record_to_proplist(Record, Fields) ->
+ record_to_proplist(Record, Fields, '__record').
+
+%% @spec record_to_proplist(Record, Fields, TypeKey) -> proplist()
+%% @doc Return a proplist of the given Record with each field in the
+%% Fields list set as a key with the corresponding value in the Record.
+%% TypeKey is the key that is used to store the record type
+%% Fields should be obtained by calling record_info(fields, record_type)
+%% where record_type is the record type of Record
+record_to_proplist(Record, Fields, TypeKey)
+ when tuple_size(Record) - 1 =:= length(Fields) ->
+ lists:zip([TypeKey | Fields], tuple_to_list(Record)).
+
+
+shell_quote([], Acc) ->
+ lists:reverse([$\" | Acc]);
+shell_quote([C | Rest], Acc) when C =:= $\" orelse C =:= $\` orelse
+ C =:= $\\ orelse C =:= $\$ ->
+ shell_quote(Rest, [C, $\\ | Acc]);
+shell_quote([C | Rest], Acc) ->
+ shell_quote(Rest, [C | Acc]).
+
+%% @spec parse_qvalues(string()) -> [qvalue()] | error()
+%% @type qvalue() -> {element(), q()}
+%% @type element() -> string()
+%% @type q() -> 0.0 .. 1.0
+%% @type error() -> invalid_qvalue_string
+%%
+%% @doc Parses a list (given as a string) of elements with Q values associated
+%% to them. Elements are separated by commas and each element is separated
+%% from its Q value by a semicolon. Q values are optional but when missing
+%% the value of an element is considered as 1.0. A Q value is always in the
+%% range [0.0, 1.0]. A Q value list is used for example as the value of the
+%% HTTP "Accept-Encoding" header.
+%%
+%% Q values are described in section 2.9 of the RFC 2616 (HTTP 1.1).
+%%
+%% Example:
+%%
+%% parse_qvalues("gzip; q=0.5, deflate, identity;q=0.0") ->
+%% [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}]
+%%
+parse_qvalues(QValuesStr) ->
+ try
+ {ok, Re} = re:compile("^\\s*q\\s*=\\s*((?:0|1)(?:\\.\\d{1,3})?)\\s*$"),
+ lists:map(
+ fun(Pair) ->
+ case string:tokens(Pair, ";") of
+ [Enc] ->
+ {string:strip(Enc), 1.0};
+ [Enc, QStr] ->
+ case re:run(QStr, Re, [{capture, [1], list}]) of
+ {match, [Q]} ->
+ QVal = case Q of
+ "0" ->
+ 0.0;
+ "1" ->
+ 1.0;
+ Else ->
+ list_to_float(Else)
+ end,
+ case QVal < 0.0 orelse QVal > 1.0 of
+ false ->
+ {string:strip(Enc), QVal}
+ end
+ end
+ end
+ end,
+ string:tokens(string:to_lower(QValuesStr), ",")
+ )
+ catch
+ _Type:_Error ->
+ invalid_qvalue_string
+ end.
+
+%% @spec pick_accepted_encodings(qvalues(), [encoding()], encoding()) ->
+%% [encoding()]
+%% @type qvalues() -> [ {encoding(), q()} ]
+%% @type encoding() -> string()
+%% @type q() -> 0.0 .. 1.0
+%%
+%% @doc Determines which encodings specified in the given Q values list are
+%% valid according to a list of supported encodings and a default encoding.
+%%
+%% The returned list of encodings is sorted, descendingly, according to the
+%% Q values of the given list. The last element of this list is the given
+%% default encoding unless this encoding is explicitily or implicitily
+%% marked with a Q value of 0.0 in the given Q values list.
+%% Note: encodings with the same Q value are kept in the same order as
+%% found in the input Q values list.
+%%
+%% This encoding picking process is described in section 14.3 of the
+%% RFC 2616 (HTTP 1.1).
+%%
+%% Example:
+%%
+%% pick_accepted_encodings(
+%% [{"gzip", 0.5}, {"deflate", 1.0}],
+%% ["gzip", "identity"],
+%% "identity"
+%% ) ->
+%% ["gzip", "identity"]
+%%
+pick_accepted_encodings(AcceptedEncs, SupportedEncs, DefaultEnc) ->
+ SortedQList = lists:reverse(
+ lists:sort(fun({_, Q1}, {_, Q2}) -> Q1 < Q2 end, AcceptedEncs)
+ ),
+ {Accepted, Refused} = lists:foldr(
+ fun({E, Q}, {A, R}) ->
+ case Q > 0.0 of
+ true ->
+ {[E | A], R};
+ false ->
+ {A, [E | R]}
+ end
+ end,
+ {[], []},
+ SortedQList
+ ),
+ Refused1 = lists:foldr(
+ fun(Enc, Acc) ->
+ case Enc of
+ "*" ->
+ lists:subtract(SupportedEncs, Accepted) ++ Acc;
+ _ ->
+ [Enc | Acc]
+ end
+ end,
+ [],
+ Refused
+ ),
+ Accepted1 = lists:foldr(
+ fun(Enc, Acc) ->
+ case Enc of
+ "*" ->
+ lists:subtract(SupportedEncs, Accepted ++ Refused1) ++ Acc;
+ _ ->
+ [Enc | Acc]
+ end
+ end,
+ [],
+ Accepted
+ ),
+ Accepted2 = case lists:member(DefaultEnc, Accepted1) of
+ true ->
+ Accepted1;
+ false ->
+ Accepted1 ++ [DefaultEnc]
+ end,
+ [E || E <- Accepted2, lists:member(E, SupportedEncs),
+ not lists:member(E, Refused1)].
+
+test() ->
+ test_join(),
+ test_quote_plus(),
+ test_unquote(),
+ test_urlencode(),
+ test_parse_qs(),
+ test_urlsplit_path(),
+ test_urlunsplit_path(),
+ test_urlsplit(),
+ test_urlunsplit(),
+ test_path_split(),
+ test_guess_mime(),
+ test_parse_header(),
+ test_shell_quote(),
+ test_cmd(),
+ test_cmd_string(),
+ test_partition(),
+ test_safe_relative_path(),
+ test_parse_qvalues(),
+ test_pick_accepted_encodings(),
+ ok.
+
+test_shell_quote() ->
+ "\"foo \\$bar\\\"\\`' baz\"" = shell_quote("foo $bar\"`' baz"),
+ ok.
+
+test_cmd() ->
+ "$bling$ `word`!\n" = cmd(["echo", "$bling$ `word`!"]),
+ ok.
+
+test_cmd_string() ->
+ "\"echo\" \"\\$bling\\$ \\`word\\`!\"" = cmd_string(["echo", "$bling$ `word`!"]),
+ ok.
+
+test_parse_header() ->
+ {"multipart/form-data", [{"boundary", "AaB03x"}]} =
+ parse_header("multipart/form-data; boundary=AaB03x"),
+ ok.
+
+test_guess_mime() ->
+ "text/plain" = guess_mime(""),
+ "text/plain" = guess_mime(".text"),
+ "application/zip" = guess_mime(".zip"),
+ "application/zip" = guess_mime("x.zip"),
+ "text/html" = guess_mime("x.html"),
+ "application/xhtml+xml" = guess_mime("x.xhtml"),
+ ok.
+
+test_path_split() ->
+ {"", "foo/bar"} = path_split("/foo/bar"),
+ {"foo", "bar"} = path_split("foo/bar"),
+ {"bar", ""} = path_split("bar"),
+ ok.
+
+test_urlsplit() ->
+ {"", "", "/foo", "", "bar?baz"} = urlsplit("/foo#bar?baz"),
+ {"http", "host:port", "/foo", "", "bar?baz"} =
+ urlsplit("http://host:port/foo#bar?baz"),
+ ok.
+
+test_urlsplit_path() ->
+ {"/foo/bar", "", ""} = urlsplit_path("/foo/bar"),
+ {"/foo", "baz", ""} = urlsplit_path("/foo?baz"),
+ {"/foo", "", "bar?baz"} = urlsplit_path("/foo#bar?baz"),
+ {"/foo", "", "bar?baz#wibble"} = urlsplit_path("/foo#bar?baz#wibble"),
+ {"/foo", "bar", "baz"} = urlsplit_path("/foo?bar#baz"),
+ {"/foo", "bar?baz", "baz"} = urlsplit_path("/foo?bar?baz#baz"),
+ ok.
+
+test_urlunsplit() ->
+ "/foo#bar?baz" = urlunsplit({"", "", "/foo", "", "bar?baz"}),
+ "http://host:port/foo#bar?baz" =
+ urlunsplit({"http", "host:port", "/foo", "", "bar?baz"}),
+ ok.
+
+test_urlunsplit_path() ->
+ "/foo/bar" = urlunsplit_path({"/foo/bar", "", ""}),
+ "/foo?baz" = urlunsplit_path({"/foo", "baz", ""}),
+ "/foo#bar?baz" = urlunsplit_path({"/foo", "", "bar?baz"}),
+ "/foo#bar?baz#wibble" = urlunsplit_path({"/foo", "", "bar?baz#wibble"}),
+ "/foo?bar#baz" = urlunsplit_path({"/foo", "bar", "baz"}),
+ "/foo?bar?baz#baz" = urlunsplit_path({"/foo", "bar?baz", "baz"}),
+ ok.
+
+test_join() ->
+ "foo,bar,baz" = join(["foo", "bar", "baz"], $,),
+ "foo,bar,baz" = join(["foo", "bar", "baz"], ","),
+ "foo bar" = join([["foo", " bar"]], ","),
+ "foo bar,baz" = join([["foo", " bar"], "baz"], ","),
+ "foo" = join(["foo"], ","),
+ "foobarbaz" = join(["foo", "bar", "baz"], ""),
+ ok.
+
+test_quote_plus() ->
+ "foo" = quote_plus(foo),
+ "1" = quote_plus(1),
+ "1.1" = quote_plus(1.1),
+ "foo" = quote_plus("foo"),
+ "foo+bar" = quote_plus("foo bar"),
+ "foo%0A" = quote_plus("foo\n"),
+ "foo%0A" = quote_plus("foo\n"),
+ "foo%3B%26%3D" = quote_plus("foo;&="),
+ ok.
+
+test_unquote() ->
+ "foo bar" = unquote("foo+bar"),
+ "foo bar" = unquote("foo%20bar"),
+ "foo\r\n" = unquote("foo%0D%0A"),
+ ok.
+
+test_urlencode() ->
+ "foo=bar&baz=wibble+%0D%0A&z=1" = urlencode([{foo, "bar"},
+ {"baz", "wibble \r\n"},
+ {z, 1}]),
+ ok.
+
+test_parse_qs() ->
+ [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}] =
+ parse_qs("foo=bar&baz=wibble+%0D%0A&z=1"),
+ ok.
+
+test_partition() ->
+ {"foo", "", ""} = partition("foo", "/"),
+ {"foo", "/", "bar"} = partition("foo/bar", "/"),
+ {"foo", "/", ""} = partition("foo/", "/"),
+ {"", "/", "bar"} = partition("/bar", "/"),
+ {"f", "oo/ba", "r"} = partition("foo/bar", "oo/ba"),
+ ok.
+
+test_safe_relative_path() ->
+ "foo" = safe_relative_path("foo"),
+ "foo/" = safe_relative_path("foo/"),
+ "foo" = safe_relative_path("foo/bar/.."),
+ "bar" = safe_relative_path("foo/../bar"),
+ "bar/" = safe_relative_path("foo/../bar/"),
+ "" = safe_relative_path("foo/.."),
+ "" = safe_relative_path("foo/../"),
+ undefined = safe_relative_path("/foo"),
+ undefined = safe_relative_path("../foo"),
+ undefined = safe_relative_path("foo/../.."),
+ undefined = safe_relative_path("foo//"),
+ ok.
+
+test_parse_qvalues() ->
+ [] = parse_qvalues(""),
+ [{"identity", 0.0}] = parse_qvalues("identity;q=0"),
+ [{"identity", 0.0}] = parse_qvalues("identity ;q=0"),
+ [{"identity", 0.0}] = parse_qvalues(" identity; q =0 "),
+ [{"identity", 0.0}] = parse_qvalues("identity ; q = 0"),
+ [{"identity", 0.0}] = parse_qvalues("identity ; q= 0.0"),
+ [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues(
+ "gzip,deflate,identity;q=0.0"
+ ),
+ [{"deflate", 1.0}, {"gzip", 1.0}, {"identity", 0.0}] = parse_qvalues(
+ "deflate,gzip,identity;q=0.0"
+ ),
+ [{"gzip", 1.0}, {"deflate", 1.0}, {"gzip", 1.0}, {"identity", 0.0}] =
+ parse_qvalues("gzip,deflate,gzip,identity;q=0"),
+ [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues(
+ "gzip, deflate , identity; q=0.0"
+ ),
+ [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues(
+ "gzip; q=1, deflate;q=1.0, identity;q=0.0"
+ ),
+ [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues(
+ "gzip; q=0.5, deflate;q=1.0, identity;q=0"
+ ),
+ [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues(
+ "gzip; q=0.5, deflate , identity;q=0.0"
+ ),
+ [{"gzip", 0.5}, {"deflate", 0.8}, {"identity", 0.0}] = parse_qvalues(
+ "gzip; q=0.5, deflate;q=0.8, identity;q=0.0"
+ ),
+ [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 1.0}] = parse_qvalues(
+ "gzip; q=0.5,deflate,identity"
+ ),
+ [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 1.0}, {"identity", 1.0}] =
+ parse_qvalues("gzip; q=0.5,deflate,identity, identity "),
+ invalid_qvalue_string = parse_qvalues("gzip; q=1.1, deflate"),
+ invalid_qvalue_string = parse_qvalues("gzip; q=0.5, deflate;q=2"),
+ invalid_qvalue_string = parse_qvalues("gzip, deflate;q=AB"),
+ invalid_qvalue_string = parse_qvalues("gzip; q=2.1, deflate"),
+ ok.
+
+test_pick_accepted_encodings() ->
+ ["identity"] = pick_accepted_encodings(
+ [],
+ ["gzip", "identity"],
+ "identity"
+ ),
+ ["gzip", "identity"] = pick_accepted_encodings(
+ [{"gzip", 1.0}],
+ ["gzip", "identity"],
+ "identity"
+ ),
+ ["identity"] = pick_accepted_encodings(
+ [{"gzip", 0.0}],
+ ["gzip", "identity"],
+ "identity"
+ ),
+ ["gzip", "identity"] = pick_accepted_encodings(
+ [{"gzip", 1.0}, {"deflate", 1.0}],
+ ["gzip", "identity"],
+ "identity"
+ ),
+ ["gzip", "identity"] = pick_accepted_encodings(
+ [{"gzip", 0.5}, {"deflate", 1.0}],
+ ["gzip", "identity"],
+ "identity"
+ ),
+ ["identity"] = pick_accepted_encodings(
+ [{"gzip", 0.0}, {"deflate", 0.0}],
+ ["gzip", "identity"],
+ "identity"
+ ),
+ ["gzip"] = pick_accepted_encodings(
+ [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}],
+ ["gzip", "identity"],
+ "identity"
+ ),
+ ["gzip", "deflate", "identity"] = pick_accepted_encodings(
+ [{"gzip", 1.0}, {"deflate", 1.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip", "deflate"] = pick_accepted_encodings(
+ [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["deflate", "gzip", "identity"] = pick_accepted_encodings(
+ [{"gzip", 0.2}, {"deflate", 1.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["deflate", "deflate", "gzip", "identity"] = pick_accepted_encodings(
+ [{"gzip", 0.2}, {"deflate", 1.0}, {"deflate", 1.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["deflate", "gzip", "gzip", "identity"] = pick_accepted_encodings(
+ [{"gzip", 0.2}, {"deflate", 1.0}, {"gzip", 1.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip", "deflate", "gzip", "identity"] = pick_accepted_encodings(
+ [{"gzip", 0.2}, {"deflate", 0.9}, {"gzip", 1.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ [] = pick_accepted_encodings(
+ [{"*", 0.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip", "deflate", "identity"] = pick_accepted_encodings(
+ [{"*", 1.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip", "deflate", "identity"] = pick_accepted_encodings(
+ [{"*", 0.6}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip"] = pick_accepted_encodings(
+ [{"gzip", 1.0}, {"*", 0.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip", "deflate"] = pick_accepted_encodings(
+ [{"gzip", 1.0}, {"deflate", 0.6}, {"*", 0.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["deflate", "gzip"] = pick_accepted_encodings(
+ [{"gzip", 0.5}, {"deflate", 1.0}, {"*", 0.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip", "identity"] = pick_accepted_encodings(
+ [{"deflate", 0.0}, {"*", 1.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ["gzip", "identity"] = pick_accepted_encodings(
+ [{"*", 1.0}, {"deflate", 0.0}],
+ ["gzip", "deflate", "identity"],
+ "identity"
+ ),
+ ok.
diff --git a/apps/mochiweb/src/reloader.erl b/apps/mochiweb/src/reloader.erl
new file mode 100644
index 00000000..6835f8f9
--- /dev/null
+++ b/apps/mochiweb/src/reloader.erl
@@ -0,0 +1,123 @@
+%% @copyright 2007 Mochi Media, Inc.
+%% @author Matthew Dempsky <matthew@mochimedia.com>
+%%
+%% @doc Erlang module for automatically reloading modified modules
+%% during development.
+
+-module(reloader).
+-author("Matthew Dempsky <matthew@mochimedia.com>").
+
+-include_lib("kernel/include/file.hrl").
+
+-behaviour(gen_server).
+-export([start/0, start_link/0]).
+-export([stop/0]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
+
+-record(state, {last, tref}).
+
+%% External API
+
+%% @spec start() -> ServerRet
+%% @doc Start the reloader.
+start() ->
+ gen_server:start({local, ?MODULE}, ?MODULE, [], []).
+
+%% @spec start_link() -> ServerRet
+%% @doc Start the reloader.
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+%% @spec stop() -> ok
+%% @doc Stop the reloader.
+stop() ->
+ gen_server:call(?MODULE, stop).
+
+%% gen_server callbacks
+
+%% @spec init([]) -> {ok, State}
+%% @doc gen_server init, opens the server in an initial state.
+init([]) ->
+ {ok, TRef} = timer:send_interval(timer:seconds(1), doit),
+ {ok, #state{last = stamp(), tref = TRef}}.
+
+%% @spec handle_call(Args, From, State) -> tuple()
+%% @doc gen_server callback.
+handle_call(stop, _From, State) ->
+ {stop, shutdown, stopped, State};
+handle_call(_Req, _From, State) ->
+ {reply, {error, badrequest}, State}.
+
+%% @spec handle_cast(Cast, State) -> tuple()
+%% @doc gen_server callback.
+handle_cast(_Req, State) ->
+ {noreply, State}.
+
+%% @spec handle_info(Info, State) -> tuple()
+%% @doc gen_server callback.
+handle_info(doit, State) ->
+ Now = stamp(),
+ doit(State#state.last, Now),
+ {noreply, State#state{last = Now}};
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+%% @spec terminate(Reason, State) -> ok
+%% @doc gen_server termination callback.
+terminate(_Reason, State) ->
+ {ok, cancel} = timer:cancel(State#state.tref),
+ ok.
+
+
+%% @spec code_change(_OldVsn, State, _Extra) -> State
+%% @doc gen_server code_change callback (trivial).
+code_change(_Vsn, State, _Extra) ->
+ {ok, State}.
+
+%% Internal API
+
+doit(From, To) ->
+ [case file:read_file_info(Filename) of
+ {ok, #file_info{mtime = Mtime}} when Mtime >= From, Mtime < To ->
+ reload(Module);
+ {ok, _} ->
+ unmodified;
+ {error, enoent} ->
+ %% The Erlang compiler deletes existing .beam files if
+ %% recompiling fails. Maybe it's worth spitting out a
+ %% warning here, but I'd want to limit it to just once.
+ gone;
+ {error, Reason} ->
+ io:format("Error reading ~s's file info: ~p~n",
+ [Filename, Reason]),
+ error
+ end || {Module, Filename} <- code:all_loaded(), is_list(Filename)].
+
+reload(Module) ->
+ io:format("Reloading ~p ...", [Module]),
+ code:purge(Module),
+ case code:load_file(Module) of
+ {module, Module} ->
+ io:format(" ok.~n"),
+ case erlang:function_exported(Module, test, 0) of
+ true ->
+ io:format(" - Calling ~p:test() ...", [Module]),
+ case catch Module:test() of
+ ok ->
+ io:format(" ok.~n"),
+ reload;
+ Reason ->
+ io:format(" fail: ~p.~n", [Reason]),
+ reload_but_test_failed
+ end;
+ false ->
+ reload
+ end;
+ {error, Reason} ->
+ io:format(" fail: ~p.~n", [Reason]),
+ error
+ end.
+
+
+stamp() ->
+ erlang:localtime().