From e8e4b0d293021fe90326a85828f3cfb087bf18b7 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 17 May 2011 11:15:14 +0000 Subject: tagging 1.1.0 git-svn-id: https://svn.apache.org/repos/asf/couchdb/tags/1.1.0@1104149 13f79535-47bb-0310-9956-ffa450edef68 --- 1.1.x/src/mochiweb/Makefile.am | 102 +++ 1.1.x/src/mochiweb/internal.hrl | 3 + 1.1.x/src/mochiweb/mochifmt.erl | 425 ++++++++++ 1.1.x/src/mochiweb/mochifmt_records.erl | 38 + 1.1.x/src/mochiweb/mochifmt_std.erl | 30 + 1.1.x/src/mochiweb/mochiglobal.erl | 107 +++ 1.1.x/src/mochiweb/mochihex.erl | 91 +++ 1.1.x/src/mochiweb/mochijson.erl | 531 +++++++++++++ 1.1.x/src/mochiweb/mochijson2.erl | 802 +++++++++++++++++++ 1.1.x/src/mochiweb/mochilists.erl | 104 +++ 1.1.x/src/mochiweb/mochilogfile2.erl | 140 ++++ 1.1.x/src/mochiweb/mochinum.erl | 331 ++++++++ 1.1.x/src/mochiweb/mochitemp.erl | 310 ++++++++ 1.1.x/src/mochiweb/mochiutf8.erl | 316 ++++++++ 1.1.x/src/mochiweb/mochiweb.app.in | 32 + 1.1.x/src/mochiweb/mochiweb.app.src | 9 + 1.1.x/src/mochiweb/mochiweb.erl | 289 +++++++ 1.1.x/src/mochiweb/mochiweb_acceptor.erl | 48 ++ 1.1.x/src/mochiweb/mochiweb_app.erl | 27 + 1.1.x/src/mochiweb/mochiweb_charref.erl | 308 +++++++ 1.1.x/src/mochiweb/mochiweb_cookies.erl | 309 +++++++ 1.1.x/src/mochiweb/mochiweb_cover.erl | 75 ++ 1.1.x/src/mochiweb/mochiweb_echo.erl | 38 + 1.1.x/src/mochiweb/mochiweb_headers.erl | 299 +++++++ 1.1.x/src/mochiweb/mochiweb_html.erl | 1061 +++++++++++++++++++++++++ 1.1.x/src/mochiweb/mochiweb_http.erl | 273 +++++++ 1.1.x/src/mochiweb/mochiweb_io.erl | 46 ++ 1.1.x/src/mochiweb/mochiweb_mime.erl | 94 +++ 1.1.x/src/mochiweb/mochiweb_multipart.erl | 824 +++++++++++++++++++ 1.1.x/src/mochiweb/mochiweb_request.erl | 768 ++++++++++++++++++ 1.1.x/src/mochiweb/mochiweb_response.erl | 64 ++ 1.1.x/src/mochiweb/mochiweb_skel.erl | 86 ++ 1.1.x/src/mochiweb/mochiweb_socket.erl | 84 ++ 1.1.x/src/mochiweb/mochiweb_socket_server.erl | 272 +++++++ 1.1.x/src/mochiweb/mochiweb_sup.erl | 41 + 1.1.x/src/mochiweb/mochiweb_util.erl | 973 +++++++++++++++++++++++ 1.1.x/src/mochiweb/reloader.erl | 161 ++++ 37 files changed, 9511 insertions(+) create mode 100644 1.1.x/src/mochiweb/Makefile.am create mode 100644 1.1.x/src/mochiweb/internal.hrl create mode 100644 1.1.x/src/mochiweb/mochifmt.erl create mode 100644 1.1.x/src/mochiweb/mochifmt_records.erl create mode 100644 1.1.x/src/mochiweb/mochifmt_std.erl create mode 100644 1.1.x/src/mochiweb/mochiglobal.erl create mode 100644 1.1.x/src/mochiweb/mochihex.erl create mode 100644 1.1.x/src/mochiweb/mochijson.erl create mode 100644 1.1.x/src/mochiweb/mochijson2.erl create mode 100644 1.1.x/src/mochiweb/mochilists.erl create mode 100644 1.1.x/src/mochiweb/mochilogfile2.erl create mode 100644 1.1.x/src/mochiweb/mochinum.erl create mode 100644 1.1.x/src/mochiweb/mochitemp.erl create mode 100644 1.1.x/src/mochiweb/mochiutf8.erl create mode 100644 1.1.x/src/mochiweb/mochiweb.app.in create mode 100644 1.1.x/src/mochiweb/mochiweb.app.src create mode 100644 1.1.x/src/mochiweb/mochiweb.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_acceptor.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_app.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_charref.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_cookies.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_cover.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_echo.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_headers.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_html.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_http.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_io.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_mime.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_multipart.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_request.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_response.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_skel.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_socket.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_socket_server.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_sup.erl create mode 100644 1.1.x/src/mochiweb/mochiweb_util.erl create mode 100644 1.1.x/src/mochiweb/reloader.erl (limited to '1.1.x/src/mochiweb') diff --git a/1.1.x/src/mochiweb/Makefile.am b/1.1.x/src/mochiweb/Makefile.am new file mode 100644 index 00000000..752118df --- /dev/null +++ b/1.1.x/src/mochiweb/Makefile.am @@ -0,0 +1,102 @@ +## Licensed under the Apache License, Version 2.0 (the "License"); you may not +## use this file except in compliance with the License. You may obtain a copy of +## the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +## WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +## License for the specific language governing permissions and limitations under +## the License. + +mochiwebebindir = $(localerlanglibdir)/mochiweb-7c2bc2/ebin + +mochiweb_file_collection = \ + mochifmt.erl \ + mochifmt_records.erl \ + mochifmt_std.erl \ + mochiglobal.erl \ + mochihex.erl \ + mochijson.erl \ + mochijson2.erl \ + mochilists.erl \ + mochilogfile2.erl \ + mochinum.erl \ + mochitemp.erl \ + mochiutf8.erl \ + mochiweb.app.in \ + mochiweb.erl \ + mochiweb_acceptor.erl \ + mochiweb_app.erl \ + mochiweb_charref.erl \ + mochiweb_cookies.erl \ + mochiweb_cover.erl \ + mochiweb_echo.erl \ + mochiweb_headers.erl \ + mochiweb_html.erl \ + mochiweb_http.erl \ + mochiweb_io.erl \ + mochiweb_mime.erl \ + mochiweb_multipart.erl \ + mochiweb_request.erl \ + mochiweb_response.erl \ + mochiweb_skel.erl \ + mochiweb_socket.erl \ + mochiweb_socket_server.erl \ + mochiweb_sup.erl \ + mochiweb_util.erl \ + reloader.erl + +mochiwebebin_make_generated_file_list = \ + mochifmt.beam \ + mochifmt_records.beam \ + mochifmt_std.beam \ + mochiglobal.beam \ + mochihex.beam \ + mochijson.beam \ + mochijson2.beam \ + mochilists.beam \ + mochilogfile2.beam \ + mochinum.beam \ + mochitemp.beam \ + mochiutf8.beam \ + mochiweb.app \ + mochiweb.beam \ + mochiweb_acceptor.beam \ + mochiweb_app.beam \ + mochiweb_charref.beam \ + mochiweb_cookies.beam \ + mochiweb_cover.beam \ + mochiweb_echo.beam \ + mochiweb_headers.beam \ + mochiweb_html.beam \ + mochiweb_http.beam \ + mochiweb_io.beam \ + mochiweb_mime.beam \ + mochiweb_multipart.beam \ + mochiweb_request.beam \ + mochiweb_response.beam \ + mochiweb_skel.beam \ + mochiweb_socket.beam \ + mochiweb_socket_server.beam \ + mochiweb_sup.beam \ + mochiweb_util.beam \ + reloader.beam + +mochiwebebin_DATA = \ + $(mochiwebebin_make_generated_file_list) + +EXTRA_DIST = \ + $(mochiweb_file_collection) \ + internal.hrl + +CLEANFILES = \ + $(mochiwebebin_make_generated_file_list) + +%.app: %.app.in + cp $< $@ + +%.beam: %.erl + + $(ERLC) $(ERLC_FLAGS) $< diff --git a/1.1.x/src/mochiweb/internal.hrl b/1.1.x/src/mochiweb/internal.hrl new file mode 100644 index 00000000..6db899a0 --- /dev/null +++ b/1.1.x/src/mochiweb/internal.hrl @@ -0,0 +1,3 @@ + +-define(RECBUF_SIZE, 8192). + diff --git a/1.1.x/src/mochiweb/mochifmt.erl b/1.1.x/src/mochiweb/mochifmt.erl new file mode 100644 index 00000000..5bc6b9c4 --- /dev/null +++ b/1.1.x/src/mochiweb/mochifmt.erl @@ -0,0 +1,425 @@ +%% @author Bob Ippolito +%% @copyright 2008 Mochi Media, Inc. + +%% @doc String Formatting for Erlang, inspired by Python 2.6 +%% (PEP 3101). +%% +-module(mochifmt). +-author('bob@mochimedia.com'). +-export([format/2, format_field/2, convert_field/2, get_value/2, get_field/2]). +-export([tokenize/1, format/3, get_field/3, format_field/3]). +-export([bformat/2, bformat/3]). +-export([f/2, f/3]). + +-record(conversion, {length, precision, ctype, align, fill_char, sign}). + +%% @spec tokenize(S::string()) -> tokens() +%% @doc Tokenize a format string into mochifmt's internal format. +tokenize(S) -> + {?MODULE, tokenize(S, "", [])}. + +%% @spec convert_field(Arg, Conversion::conversion()) -> term() +%% @doc Process Arg according to the given explicit conversion specifier. +convert_field(Arg, "") -> + Arg; +convert_field(Arg, "r") -> + repr(Arg); +convert_field(Arg, "s") -> + str(Arg). + +%% @spec get_value(Key::string(), Args::args()) -> term() +%% @doc Get the Key from Args. If Args is a tuple then convert Key to +%% an integer and get element(1 + Key, Args). If Args is a list and Key +%% can be parsed as an integer then use lists:nth(1 + Key, Args), +%% otherwise try and look for Key in Args as a proplist, converting +%% Key to an atom or binary if necessary. +get_value(Key, Args) when is_tuple(Args) -> + element(1 + list_to_integer(Key), Args); +get_value(Key, Args) when is_list(Args) -> + try lists:nth(1 + list_to_integer(Key), Args) + catch error:_ -> + {_K, V} = proplist_lookup(Key, Args), + V + end. + +%% @spec get_field(Key::string(), Args) -> term() +%% @doc Consecutively call get_value/2 on parts of Key delimited by ".", +%% replacing Args with the result of the previous get_value. This +%% is used to implement formats such as {0.0}. +get_field(Key, Args) -> + get_field(Key, Args, ?MODULE). + +%% @spec get_field(Key::string(), Args, Module) -> term() +%% @doc Consecutively call Module:get_value/2 on parts of Key delimited by ".", +%% replacing Args with the result of the previous get_value. This +%% is used to implement formats such as {0.0}. +get_field(Key, Args, Module) -> + {Name, Next} = lists:splitwith(fun (C) -> C =/= $. end, Key), + Res = try Module:get_value(Name, Args) + catch error:undef -> get_value(Name, Args) end, + case Next of + "" -> + Res; + "." ++ S1 -> + get_field(S1, Res, Module) + end. + +%% @spec format(Format::string(), Args) -> iolist() +%% @doc Format Args with Format. +format(Format, Args) -> + format(Format, Args, ?MODULE). + +%% @spec format(Format::string(), Args, Module) -> iolist() +%% @doc Format Args with Format using Module. +format({?MODULE, Parts}, Args, Module) -> + format2(Parts, Args, Module, []); +format(S, Args, Module) -> + format(tokenize(S), Args, Module). + +%% @spec format_field(Arg, Format) -> iolist() +%% @doc Format Arg with Format. +format_field(Arg, Format) -> + format_field(Arg, Format, ?MODULE). + +%% @spec format_field(Arg, Format, _Module) -> iolist() +%% @doc Format Arg with Format. +format_field(Arg, Format, _Module) -> + F = default_ctype(Arg, parse_std_conversion(Format)), + fix_padding(fix_sign(convert2(Arg, F), F), F). + +%% @spec f(Format::string(), Args) -> string() +%% @doc Format Args with Format and return a string(). +f(Format, Args) -> + f(Format, Args, ?MODULE). + +%% @spec f(Format::string(), Args, Module) -> string() +%% @doc Format Args with Format using Module and return a string(). +f(Format, Args, Module) -> + case lists:member(${, Format) of + true -> + binary_to_list(bformat(Format, Args, Module)); + false -> + Format + end. + +%% @spec bformat(Format::string(), Args) -> binary() +%% @doc Format Args with Format and return a binary(). +bformat(Format, Args) -> + iolist_to_binary(format(Format, Args)). + +%% @spec bformat(Format::string(), Args, Module) -> binary() +%% @doc Format Args with Format using Module and return a binary(). +bformat(Format, Args, Module) -> + iolist_to_binary(format(Format, Args, Module)). + +%% Internal API + +add_raw("", Acc) -> + Acc; +add_raw(S, Acc) -> + [{raw, lists:reverse(S)} | Acc]. + +tokenize([], S, Acc) -> + lists:reverse(add_raw(S, Acc)); +tokenize("{{" ++ Rest, S, Acc) -> + tokenize(Rest, "{" ++ S, Acc); +tokenize("{" ++ Rest, S, Acc) -> + {Format, Rest1} = tokenize_format(Rest), + tokenize(Rest1, "", [{format, make_format(Format)} | add_raw(S, Acc)]); +tokenize("}}" ++ Rest, S, Acc) -> + tokenize(Rest, "}" ++ S, Acc); +tokenize([C | Rest], S, Acc) -> + tokenize(Rest, [C | S], Acc). + +tokenize_format(S) -> + tokenize_format(S, 1, []). + +tokenize_format("}" ++ Rest, 1, Acc) -> + {lists:reverse(Acc), Rest}; +tokenize_format("}" ++ Rest, N, Acc) -> + tokenize_format(Rest, N - 1, "}" ++ Acc); +tokenize_format("{" ++ Rest, N, Acc) -> + tokenize_format(Rest, 1 + N, "{" ++ Acc); +tokenize_format([C | Rest], N, Acc) -> + tokenize_format(Rest, N, [C | Acc]). + +make_format(S) -> + {Name0, Spec} = case lists:splitwith(fun (C) -> C =/= $: end, S) of + {_, ""} -> + {S, ""}; + {SN, ":" ++ SS} -> + {SN, SS} + end, + {Name, Transform} = case lists:splitwith(fun (C) -> C =/= $! end, Name0) of + {_, ""} -> + {Name0, ""}; + {TN, "!" ++ TT} -> + {TN, TT} + end, + {Name, Transform, Spec}. + +proplist_lookup(S, P) -> + A = try list_to_existing_atom(S) + catch error:_ -> make_ref() end, + B = try list_to_binary(S) + catch error:_ -> make_ref() end, + proplist_lookup2({S, A, B}, P). + +proplist_lookup2({KS, KA, KB}, [{K, V} | _]) + when KS =:= K orelse KA =:= K orelse KB =:= K -> + {K, V}; +proplist_lookup2(Keys, [_ | Rest]) -> + proplist_lookup2(Keys, Rest). + +format2([], _Args, _Module, Acc) -> + lists:reverse(Acc); +format2([{raw, S} | Rest], Args, Module, Acc) -> + format2(Rest, Args, Module, [S | Acc]); +format2([{format, {Key, Convert, Format0}} | Rest], Args, Module, Acc) -> + Format = f(Format0, Args, Module), + V = case Module of + ?MODULE -> + V0 = get_field(Key, Args), + V1 = convert_field(V0, Convert), + format_field(V1, Format); + _ -> + V0 = try Module:get_field(Key, Args) + catch error:undef -> get_field(Key, Args, Module) end, + V1 = try Module:convert_field(V0, Convert) + catch error:undef -> convert_field(V0, Convert) end, + try Module:format_field(V1, Format) + catch error:undef -> format_field(V1, Format, Module) end + end, + format2(Rest, Args, Module, [V | Acc]). + +default_ctype(_Arg, C=#conversion{ctype=N}) when N =/= undefined -> + C; +default_ctype(Arg, C) when is_integer(Arg) -> + C#conversion{ctype=decimal}; +default_ctype(Arg, C) when is_float(Arg) -> + C#conversion{ctype=general}; +default_ctype(_Arg, C) -> + C#conversion{ctype=string}. + +fix_padding(Arg, #conversion{length=undefined}) -> + Arg; +fix_padding(Arg, F=#conversion{length=Length, fill_char=Fill0, align=Align0, + ctype=Type}) -> + Padding = Length - iolist_size(Arg), + Fill = case Fill0 of + undefined -> + $\s; + _ -> + Fill0 + end, + Align = case Align0 of + undefined -> + case Type of + string -> + left; + _ -> + right + end; + _ -> + Align0 + end, + case Padding > 0 of + true -> + do_padding(Arg, Padding, Fill, Align, F); + false -> + Arg + end. + +do_padding(Arg, Padding, Fill, right, _F) -> + [lists:duplicate(Padding, Fill), Arg]; +do_padding(Arg, Padding, Fill, center, _F) -> + LPadding = lists:duplicate(Padding div 2, Fill), + RPadding = case Padding band 1 of + 1 -> + [Fill | LPadding]; + _ -> + LPadding + end, + [LPadding, Arg, RPadding]; +do_padding([$- | Arg], Padding, Fill, sign_right, _F) -> + [[$- | lists:duplicate(Padding, Fill)], Arg]; +do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=$-}) -> + [lists:duplicate(Padding, Fill), Arg]; +do_padding([S | Arg], Padding, Fill, sign_right, #conversion{sign=S}) -> + [[S | lists:duplicate(Padding, Fill)], Arg]; +do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=undefined}) -> + [lists:duplicate(Padding, Fill), Arg]; +do_padding(Arg, Padding, Fill, left, _F) -> + [Arg | lists:duplicate(Padding, Fill)]. + +fix_sign(Arg, #conversion{sign=$+}) when Arg >= 0 -> + [$+, Arg]; +fix_sign(Arg, #conversion{sign=$\s}) when Arg >= 0 -> + [$\s, Arg]; +fix_sign(Arg, _F) -> + Arg. + +ctype($\%) -> percent; +ctype($s) -> string; +ctype($b) -> bin; +ctype($o) -> oct; +ctype($X) -> upper_hex; +ctype($x) -> hex; +ctype($c) -> char; +ctype($d) -> decimal; +ctype($g) -> general; +ctype($f) -> fixed; +ctype($e) -> exp. + +align($<) -> left; +align($>) -> right; +align($^) -> center; +align($=) -> sign_right. + +convert2(Arg, F=#conversion{ctype=percent}) -> + [convert2(100.0 * Arg, F#conversion{ctype=fixed}), $\%]; +convert2(Arg, #conversion{ctype=string}) -> + str(Arg); +convert2(Arg, #conversion{ctype=bin}) -> + erlang:integer_to_list(Arg, 2); +convert2(Arg, #conversion{ctype=oct}) -> + erlang:integer_to_list(Arg, 8); +convert2(Arg, #conversion{ctype=upper_hex}) -> + erlang:integer_to_list(Arg, 16); +convert2(Arg, #conversion{ctype=hex}) -> + string:to_lower(erlang:integer_to_list(Arg, 16)); +convert2(Arg, #conversion{ctype=char}) when Arg < 16#80 -> + [Arg]; +convert2(Arg, #conversion{ctype=char}) -> + xmerl_ucs:to_utf8(Arg); +convert2(Arg, #conversion{ctype=decimal}) -> + integer_to_list(Arg); +convert2(Arg, #conversion{ctype=general, precision=undefined}) -> + try mochinum:digits(Arg) + catch error:undef -> io_lib:format("~g", [Arg]) end; +convert2(Arg, #conversion{ctype=fixed, precision=undefined}) -> + io_lib:format("~f", [Arg]); +convert2(Arg, #conversion{ctype=exp, precision=undefined}) -> + io_lib:format("~e", [Arg]); +convert2(Arg, #conversion{ctype=general, precision=P}) -> + io_lib:format("~." ++ integer_to_list(P) ++ "g", [Arg]); +convert2(Arg, #conversion{ctype=fixed, precision=P}) -> + io_lib:format("~." ++ integer_to_list(P) ++ "f", [Arg]); +convert2(Arg, #conversion{ctype=exp, precision=P}) -> + io_lib:format("~." ++ integer_to_list(P) ++ "e", [Arg]). + +str(A) when is_atom(A) -> + atom_to_list(A); +str(I) when is_integer(I) -> + integer_to_list(I); +str(F) when is_float(F) -> + try mochinum:digits(F) + catch error:undef -> io_lib:format("~g", [F]) end; +str(L) when is_list(L) -> + L; +str(B) when is_binary(B) -> + B; +str(P) -> + repr(P). + +repr(P) when is_float(P) -> + try mochinum:digits(P) + catch error:undef -> float_to_list(P) end; +repr(P) -> + io_lib:format("~p", [P]). + +parse_std_conversion(S) -> + parse_std_conversion(S, #conversion{}). + +parse_std_conversion("", Acc) -> + Acc; +parse_std_conversion([Fill, Align | Spec], Acc) + when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ -> + parse_std_conversion(Spec, Acc#conversion{fill_char=Fill, + align=align(Align)}); +parse_std_conversion([Align | Spec], Acc) + when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ -> + parse_std_conversion(Spec, Acc#conversion{align=align(Align)}); +parse_std_conversion([Sign | Spec], Acc) + when Sign =:= $+ orelse Sign =:= $- orelse Sign =:= $\s -> + parse_std_conversion(Spec, Acc#conversion{sign=Sign}); +parse_std_conversion("0" ++ Spec, Acc) -> + Align = case Acc#conversion.align of + undefined -> + sign_right; + A -> + A + end, + parse_std_conversion(Spec, Acc#conversion{fill_char=$0, align=Align}); +parse_std_conversion(Spec=[D|_], Acc) when D >= $0 andalso D =< $9 -> + {W, Spec1} = lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec), + parse_std_conversion(Spec1, Acc#conversion{length=list_to_integer(W)}); +parse_std_conversion([$. | Spec], Acc) -> + case lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec) of + {"", Spec1} -> + parse_std_conversion(Spec1, Acc); + {P, Spec1} -> + parse_std_conversion(Spec1, + Acc#conversion{precision=list_to_integer(P)}) + end; +parse_std_conversion([Type], Acc) -> + parse_std_conversion("", Acc#conversion{ctype=ctype(Type)}). + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +tokenize_test() -> + {?MODULE, [{raw, "ABC"}]} = tokenize("ABC"), + {?MODULE, [{format, {"0", "", ""}}]} = tokenize("{0}"), + {?MODULE, [{raw, "ABC"}, {format, {"1", "", ""}}, {raw, "DEF"}]} = + tokenize("ABC{1}DEF"), + ok. + +format_test() -> + <<" -4">> = bformat("{0:4}", [-4]), + <<" 4">> = bformat("{0:4}", [4]), + <<" 4">> = bformat("{0:{0}}", [4]), + <<"4 ">> = bformat("{0:4}", ["4"]), + <<"4 ">> = bformat("{0:{0}}", ["4"]), + <<"1.2yoDEF">> = bformat("{2}{0}{1}{3}", {yo, "DE", 1.2, <<"F">>}), + <<"cafebabe">> = bformat("{0:x}", {16#cafebabe}), + <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}), + <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}), + <<"755">> = bformat("{0:o}", {8#755}), + <<"a">> = bformat("{0:c}", {97}), + %% Horizontal ellipsis + <<226, 128, 166>> = bformat("{0:c}", {16#2026}), + <<"11">> = bformat("{0:b}", {3}), + <<"11">> = bformat("{0:b}", [3]), + <<"11">> = bformat("{three:b}", [{three, 3}]), + <<"11">> = bformat("{three:b}", [{"three", 3}]), + <<"11">> = bformat("{three:b}", [{<<"three">>, 3}]), + <<"\"foo\"">> = bformat("{0!r}", {"foo"}), + <<"2008-5-4">> = bformat("{0.0}-{0.1}-{0.2}", {{2008,5,4}}), + <<"2008-05-04">> = bformat("{0.0:04}-{0.1:02}-{0.2:02}", {{2008,5,4}}), + <<"foo6bar-6">> = bformat("foo{1}{0}-{1}", {bar, 6}), + <<"-'atom test'-">> = bformat("-{arg!r}-", [{arg, 'atom test'}]), + <<"2008-05-04">> = bformat("{0.0:0{1.0}}-{0.1:0{1.1}}-{0.2:0{1.2}}", + {{2008,5,4}, {4, 2, 2}}), + ok. + +std_test() -> + M = mochifmt_std:new(), + <<"01">> = bformat("{0}{1}", [0, 1], M), + ok. + +records_test() -> + M = mochifmt_records:new([{conversion, record_info(fields, conversion)}]), + R = #conversion{length=long, precision=hard, sign=peace}, + long = M:get_value("length", R), + hard = M:get_value("precision", R), + peace = M:get_value("sign", R), + <<"long hard">> = bformat("{length} {precision}", R, M), + <<"long hard">> = bformat("{0.length} {0.precision}", [R], M), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochifmt_records.erl b/1.1.x/src/mochiweb/mochifmt_records.erl new file mode 100644 index 00000000..2326d1dd --- /dev/null +++ b/1.1.x/src/mochiweb/mochifmt_records.erl @@ -0,0 +1,38 @@ +%% @author Bob Ippolito +%% @copyright 2008 Mochi Media, Inc. + +%% @doc Formatter that understands records. +%% +%% Usage: +%% +%% 1> M = mochifmt_records:new([{rec, record_info(fields, rec)}]), +%% M:format("{0.bar}", [#rec{bar=foo}]). +%% foo + +-module(mochifmt_records, [Recs]). +-author('bob@mochimedia.com'). +-export([get_value/2]). + +get_value(Key, Rec) when is_tuple(Rec) and is_atom(element(1, Rec)) -> + try begin + Atom = list_to_existing_atom(Key), + {_, Fields} = proplists:lookup(element(1, Rec), Recs), + element(get_rec_index(Atom, Fields, 2), Rec) + end + catch error:_ -> mochifmt:get_value(Key, Rec) + end; +get_value(Key, Args) -> + mochifmt:get_value(Key, Args). + +get_rec_index(Atom, [Atom | _], Index) -> + Index; +get_rec_index(Atom, [_ | Rest], Index) -> + get_rec_index(Atom, Rest, 1 + Index). + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/1.1.x/src/mochiweb/mochifmt_std.erl b/1.1.x/src/mochiweb/mochifmt_std.erl new file mode 100644 index 00000000..d4d74f6f --- /dev/null +++ b/1.1.x/src/mochiweb/mochifmt_std.erl @@ -0,0 +1,30 @@ +%% @author Bob Ippolito +%% @copyright 2008 Mochi Media, Inc. + +%% @doc Template module for a mochifmt formatter. + +-module(mochifmt_std, []). +-author('bob@mochimedia.com'). +-export([format/2, get_value/2, format_field/2, get_field/2, convert_field/2]). + +format(Format, Args) -> + mochifmt:format(Format, Args, THIS). + +get_field(Key, Args) -> + mochifmt:get_field(Key, Args, THIS). + +convert_field(Key, Args) -> + mochifmt:convert_field(Key, Args). + +get_value(Key, Args) -> + mochifmt:get_value(Key, Args). + +format_field(Arg, Format) -> + mochifmt:format_field(Arg, Format, THIS). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/1.1.x/src/mochiweb/mochiglobal.erl b/1.1.x/src/mochiweb/mochiglobal.erl new file mode 100644 index 00000000..c740b878 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiglobal.erl @@ -0,0 +1,107 @@ +%% @author Bob Ippolito +%% @copyright 2010 Mochi Media, Inc. +%% @doc Abuse module constant pools as a "read-only shared heap" (since erts 5.6) +%% [1]. +-module(mochiglobal). +-author("Bob Ippolito "). +-export([get/1, get/2, put/2, delete/1]). + +-spec get(atom()) -> any() | undefined. +%% @equiv get(K, undefined) +get(K) -> + get(K, undefined). + +-spec get(atom(), T) -> any() | T. +%% @doc Get the term for K or return Default. +get(K, Default) -> + get(K, Default, key_to_module(K)). + +get(_K, Default, Mod) -> + try Mod:term() + catch error:undef -> + Default + end. + +-spec put(atom(), any()) -> ok. +%% @doc Store term V at K, replaces an existing term if present. +put(K, V) -> + put(K, V, key_to_module(K)). + +put(_K, V, Mod) -> + Bin = compile(Mod, V), + code:purge(Mod), + code:load_binary(Mod, atom_to_list(Mod) ++ ".erl", Bin), + ok. + +-spec delete(atom()) -> boolean(). +%% @doc Delete term stored at K, no-op if non-existent. +delete(K) -> + delete(K, key_to_module(K)). + +delete(_K, Mod) -> + code:purge(Mod), + code:delete(Mod). + +-spec key_to_module(atom()) -> atom(). +key_to_module(K) -> + list_to_atom("mochiglobal:" ++ atom_to_list(K)). + +-spec compile(atom(), any()) -> binary(). +compile(Module, T) -> + {ok, Module, Bin} = compile:forms(forms(Module, T), + [verbose, report_errors]), + Bin. + +-spec forms(atom(), any()) -> [erl_syntax:syntaxTree()]. +forms(Module, T) -> + [erl_syntax:revert(X) || X <- term_to_abstract(Module, term, T)]. + +-spec term_to_abstract(atom(), atom(), any()) -> [erl_syntax:syntaxTree()]. +term_to_abstract(Module, Getter, T) -> + [%% -module(Module). + erl_syntax:attribute( + erl_syntax:atom(module), + [erl_syntax:atom(Module)]), + %% -export([Getter/0]). + erl_syntax:attribute( + erl_syntax:atom(export), + [erl_syntax:list( + [erl_syntax:arity_qualifier( + erl_syntax:atom(Getter), + erl_syntax:integer(0))])]), + %% Getter() -> T. + erl_syntax:function( + erl_syntax:atom(Getter), + [erl_syntax:clause([], none, [erl_syntax:abstract(T)])])]. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +get_put_delete_test() -> + K = '$$test$$mochiglobal', + delete(K), + ?assertEqual( + bar, + get(K, bar)), + try + ?MODULE:put(K, baz), + ?assertEqual( + baz, + get(K, bar)), + ?MODULE:put(K, wibble), + ?assertEqual( + wibble, + ?MODULE:get(K)) + after + delete(K) + end, + ?assertEqual( + bar, + get(K, bar)), + ?assertEqual( + undefined, + ?MODULE:get(K)), + ok. +-endif. diff --git a/1.1.x/src/mochiweb/mochihex.erl b/1.1.x/src/mochiweb/mochihex.erl new file mode 100644 index 00000000..44a2aa7f --- /dev/null +++ b/1.1.x/src/mochiweb/mochihex.erl @@ -0,0 +1,91 @@ +%% @author Bob Ippolito +%% @copyright 2006 Mochi Media, Inc. + +%% @doc Utilities for working with hexadecimal strings. + +-module(mochihex). +-author('bob@mochimedia.com'). + +-export([to_hex/1, to_bin/1, to_int/1, dehex/1, hexdigit/1]). + +%% @type iolist() = [char() | binary() | iolist()] +%% @type iodata() = iolist() | binary() + +%% @spec to_hex(integer | iolist()) -> string() +%% @doc Convert an iolist to a hexadecimal string. +to_hex(0) -> + "0"; +to_hex(I) when is_integer(I), I > 0 -> + to_hex_int(I, []); +to_hex(B) -> + to_hex(iolist_to_binary(B), []). + +%% @spec to_bin(string()) -> binary() +%% @doc Convert a hexadecimal string to a binary. +to_bin(L) -> + to_bin(L, []). + +%% @spec to_int(string()) -> integer() +%% @doc Convert a hexadecimal string to an integer. +to_int(L) -> + erlang:list_to_integer(L, 16). + +%% @spec dehex(char()) -> integer() +%% @doc Convert a hex digit to its integer value. +dehex(C) when C >= $0, C =< $9 -> + C - $0; +dehex(C) when C >= $a, C =< $f -> + C - $a + 10; +dehex(C) when C >= $A, C =< $F -> + C - $A + 10. + +%% @spec hexdigit(integer()) -> char() +%% @doc Convert an integer less than 16 to a hex digit. +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +%% Internal API + +to_hex(<<>>, Acc) -> + lists:reverse(Acc); +to_hex(<>, Acc) -> + to_hex(Rest, [hexdigit(C2), hexdigit(C1) | Acc]). + +to_hex_int(0, Acc) -> + Acc; +to_hex_int(I, Acc) -> + to_hex_int(I bsr 4, [hexdigit(I band 15) | Acc]). + +to_bin([], Acc) -> + iolist_to_binary(lists:reverse(Acc)); +to_bin([C1, C2 | Rest], Acc) -> + to_bin(Rest, [(dehex(C1) bsl 4) bor dehex(C2) | Acc]). + + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +to_hex_test() -> + "ff000ff1" = to_hex([255, 0, 15, 241]), + "ff000ff1" = to_hex(16#ff000ff1), + "0" = to_hex(16#0), + ok. + +to_bin_test() -> + <<255, 0, 15, 241>> = to_bin("ff000ff1"), + <<255, 0, 10, 161>> = to_bin("Ff000aA1"), + ok. + +to_int_test() -> + 16#ff000ff1 = to_int("ff000ff1"), + 16#ff000aa1 = to_int("FF000Aa1"), + 16#0 = to_int("0"), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochijson.erl b/1.1.x/src/mochiweb/mochijson.erl new file mode 100644 index 00000000..2e3d1452 --- /dev/null +++ b/1.1.x/src/mochiweb/mochijson.erl @@ -0,0 +1,531 @@ +%% @author Bob Ippolito +%% @copyright 2006 Mochi Media, Inc. + +%% @doc Yet another JSON (RFC 4627) library for Erlang. +-module(mochijson). +-author('bob@mochimedia.com'). +-export([encoder/1, encode/1]). +-export([decoder/1, decode/1]). +-export([binary_encoder/1, binary_encode/1]). +-export([binary_decoder/1, binary_decode/1]). + +% This is a macro to placate syntax highlighters.. +-define(Q, $\"). +-define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}). +-define(INC_COL(S), S#decoder{column=1+S#decoder.column}). +-define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}). + +%% @type iolist() = [char() | binary() | iolist()] +%% @type iodata() = iolist() | binary() +%% @type json_string() = atom | string() | binary() +%% @type json_number() = integer() | float() +%% @type json_array() = {array, [json_term()]} +%% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_term() = json_string() | json_number() | json_array() | +%% json_object() +%% @type encoding() = utf8 | unicode +%% @type encoder_option() = {input_encoding, encoding()} | +%% {handler, function()} +%% @type decoder_option() = {input_encoding, encoding()} | +%% {object_hook, function()} +%% @type bjson_string() = binary() +%% @type bjson_number() = integer() | float() +%% @type bjson_array() = [bjson_term()] +%% @type bjson_object() = {struct, [{bjson_string(), bjson_term()}]} +%% @type bjson_term() = bjson_string() | bjson_number() | bjson_array() | +%% bjson_object() +%% @type binary_encoder_option() = {handler, function()} +%% @type binary_decoder_option() = {object_hook, function()} + +-record(encoder, {input_encoding=unicode, + handler=null}). + +-record(decoder, {input_encoding=utf8, + object_hook=null, + line=1, + column=1, + state=null}). + +%% @spec encoder([encoder_option()]) -> function() +%% @doc Create an encoder/1 with the given options. +encoder(Options) -> + State = parse_encoder_options(Options, #encoder{}), + fun (O) -> json_encode(O, State) end. + +%% @spec encode(json_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist. +encode(Any) -> + json_encode(Any, #encoder{}). + +%% @spec decoder([decoder_option()]) -> function() +%% @doc Create a decoder/1 with the given options. +decoder(Options) -> + State = parse_decoder_options(Options, #decoder{}), + fun (O) -> json_decode(O, State) end. + +%% @spec decode(iolist()) -> json_term() +%% @doc Decode the given iolist to Erlang terms. +decode(S) -> + json_decode(S, #decoder{}). + +%% @spec binary_decoder([binary_decoder_option()]) -> function() +%% @doc Create a binary_decoder/1 with the given options. +binary_decoder(Options) -> + mochijson2:decoder(Options). + +%% @spec binary_encoder([binary_encoder_option()]) -> function() +%% @doc Create a binary_encoder/1 with the given options. +binary_encoder(Options) -> + mochijson2:encoder(Options). + +%% @spec binary_encode(bjson_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist, using lists for arrays and +%% binaries for strings. +binary_encode(Any) -> + mochijson2:encode(Any). + +%% @spec binary_decode(iolist()) -> bjson_term() +%% @doc Decode the given iolist to Erlang terms, using lists for arrays and +%% binaries for strings. +binary_decode(S) -> + mochijson2:decode(S). + +%% Internal API + +parse_encoder_options([], State) -> + State; +parse_encoder_options([{input_encoding, Encoding} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{input_encoding=Encoding}); +parse_encoder_options([{handler, Handler} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{handler=Handler}). + +parse_decoder_options([], State) -> + State; +parse_decoder_options([{input_encoding, Encoding} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{input_encoding=Encoding}); +parse_decoder_options([{object_hook, Hook} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{object_hook=Hook}). + +json_encode(true, _State) -> + "true"; +json_encode(false, _State) -> + "false"; +json_encode(null, _State) -> + "null"; +json_encode(I, _State) when is_integer(I) -> + integer_to_list(I); +json_encode(F, _State) when is_float(F) -> + mochinum:digits(F); +json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) -> + json_encode_string(L, State); +json_encode({array, Props}, State) when is_list(Props) -> + json_encode_array(Props, State); +json_encode({struct, Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode(Bad, #encoder{handler=null}) -> + exit({json_encode, {bad_term, Bad}}); +json_encode(Bad, State=#encoder{handler=Handler}) -> + json_encode(Handler(Bad), State). + +json_encode_array([], _State) -> + "[]"; +json_encode_array(L, State) -> + F = fun (O, Acc) -> + [$,, json_encode(O, State) | Acc] + end, + [$, | Acc1] = lists:foldl(F, "[", L), + lists:reverse([$\] | Acc1]). + +json_encode_proplist([], _State) -> + "{}"; +json_encode_proplist(Props, State) -> + F = fun ({K, V}, Acc) -> + KS = case K of + K when is_atom(K) -> + json_encode_string_utf8(atom_to_list(K)); + K when is_integer(K) -> + json_encode_string(integer_to_list(K), State); + K when is_list(K); is_binary(K) -> + json_encode_string(K, State) + end, + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = lists:foldl(F, "{", Props), + lists:reverse([$\} | Acc1]). + +json_encode_string(A, _State) when is_atom(A) -> + json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A))); +json_encode_string(B, _State) when is_binary(B) -> + json_encode_string_unicode(xmerl_ucs:from_utf8(B)); +json_encode_string(S, #encoder{input_encoding=utf8}) -> + json_encode_string_utf8(S); +json_encode_string(S, #encoder{input_encoding=unicode}) -> + json_encode_string_unicode(S). + +json_encode_string_utf8(S) -> + [?Q | json_encode_string_utf8_1(S)]. + +json_encode_string_utf8_1([C | Cs]) when C >= 0, C =< 16#7f -> + NewC = case C of + $\\ -> "\\\\"; + ?Q -> "\\\""; + _ when C >= $\s, C < 16#7f -> C; + $\t -> "\\t"; + $\n -> "\\n"; + $\r -> "\\r"; + $\f -> "\\f"; + $\b -> "\\b"; + _ when C >= 0, C =< 16#7f -> unihex(C); + _ -> exit({json_encode, {bad_char, C}}) + end, + [NewC | json_encode_string_utf8_1(Cs)]; +json_encode_string_utf8_1(All=[C | _]) when C >= 16#80, C =< 16#10FFFF -> + [?Q | Rest] = json_encode_string_unicode(xmerl_ucs:from_utf8(All)), + Rest; +json_encode_string_utf8_1([]) -> + "\"". + +json_encode_string_unicode(S) -> + [?Q | json_encode_string_unicode_1(S)]. + +json_encode_string_unicode_1([C | Cs]) -> + NewC = case C of + $\\ -> "\\\\"; + ?Q -> "\\\""; + _ when C >= $\s, C < 16#7f -> C; + $\t -> "\\t"; + $\n -> "\\n"; + $\r -> "\\r"; + $\f -> "\\f"; + $\b -> "\\b"; + _ when C >= 0, C =< 16#10FFFF -> unihex(C); + _ -> exit({json_encode, {bad_char, C}}) + end, + [NewC | json_encode_string_unicode_1(Cs)]; +json_encode_string_unicode_1([]) -> + "\"". + +dehex(C) when C >= $0, C =< $9 -> + C - $0; +dehex(C) when C >= $a, C =< $f -> + C - $a + 10; +dehex(C) when C >= $A, C =< $F -> + C - $A + 10. + +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +unihex(C) when C < 16#10000 -> + <> = <>, + Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], + [$\\, $u | Digits]; +unihex(C) when C =< 16#10FFFF -> + N = C - 16#10000, + S1 = 16#d800 bor ((N bsr 10) band 16#3ff), + S2 = 16#dc00 bor (N band 16#3ff), + [unihex(S1), unihex(S2)]. + +json_decode(B, S) when is_binary(B) -> + json_decode(binary_to_list(B), S); +json_decode(L, S) -> + {Res, L1, S1} = decode1(L, S), + {eof, [], _} = tokenize(L1, S1#decoder{state=trim}), + Res. + +decode1(L, S=#decoder{state=null}) -> + case tokenize(L, S#decoder{state=any}) of + {{const, C}, L1, S1} -> + {C, L1, S1}; + {start_array, L1, S1} -> + decode_array(L1, S1#decoder{state=any}, []); + {start_object, L1, S1} -> + decode_object(L1, S1#decoder{state=key}, []) + end. + +make_object(V, #decoder{object_hook=null}) -> + V; +make_object(V, #decoder{object_hook=Hook}) -> + Hook(V). + +decode_object(L, S=#decoder{state=key}, Acc) -> + case tokenize(L, S) of + {end_object, Rest, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, Rest, S1#decoder{state=null}}; + {{const, K}, Rest, S1} when is_list(K) -> + {colon, L2, S2} = tokenize(Rest, S1), + {V, L3, S3} = decode1(L2, S2#decoder{state=null}), + decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc]) + end; +decode_object(L, S=#decoder{state=comma}, Acc) -> + case tokenize(L, S) of + {end_object, Rest, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, Rest, S1#decoder{state=null}}; + {comma, Rest, S1} -> + decode_object(Rest, S1#decoder{state=key}, Acc) + end. + +decode_array(L, S=#decoder{state=any}, Acc) -> + case tokenize(L, S) of + {end_array, Rest, S1} -> + {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}}; + {start_array, Rest, S1} -> + {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []), + decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); + {start_object, Rest, S1} -> + {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []), + decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); + {{const, Const}, Rest, S1} -> + decode_array(Rest, S1#decoder{state=comma}, [Const | Acc]) + end; +decode_array(L, S=#decoder{state=comma}, Acc) -> + case tokenize(L, S) of + {end_array, Rest, S1} -> + {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}}; + {comma, Rest, S1} -> + decode_array(Rest, S1#decoder{state=any}, Acc) + end. + +tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc) + when is_list(C); is_binary(C); C >= 16#7f -> + List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)), + tokenize_string(List, S#decoder{input_encoding=unicode}, Acc); +tokenize_string("\"" ++ Rest, S, Acc) -> + {lists:reverse(Acc), Rest, ?INC_COL(S)}; +tokenize_string("\\\"" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]); +tokenize_string("\\\\" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]); +tokenize_string("\\/" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]); +tokenize_string("\\b" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]); +tokenize_string("\\f" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\f | Acc]); +tokenize_string("\\n" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]); +tokenize_string("\\r" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]); +tokenize_string("\\t" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]); +tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) -> + % coalesce UTF-16 surrogate pair? + C = dehex(C0) bor + (dehex(C1) bsl 4) bor + (dehex(C2) bsl 8) bor + (dehex(C3) bsl 12), + tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]); +tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF -> + tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]). + +tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc) + when is_list(C); is_binary(C); C >= 16#7f -> + List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)), + tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc); +tokenize_number([$- | Rest], sign, S, []) -> + tokenize_number(Rest, int, ?INC_COL(S), [$-]); +tokenize_number(Rest, sign, S, []) -> + tokenize_number(Rest, int, S, []); +tokenize_number([$0 | Rest], int, S, Acc) -> + tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]); +tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 -> + tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); +tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); +tokenize_number(Rest, int1, S, Acc) -> + tokenize_number(Rest, frac, S, Acc); +tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); +tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E -> + tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]); +tokenize_number(Rest, frac, S, Acc) -> + {{int, lists:reverse(Acc)}, Rest, S}; +tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]); +tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E -> + tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]); +tokenize_number(Rest, frac1, S, Acc) -> + {{float, lists:reverse(Acc)}, Rest, S}; +tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ -> + tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]); +tokenize_number(Rest, esign, S, Acc) -> + tokenize_number(Rest, eint, S, Acc); +tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); +tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); +tokenize_number(Rest, eint1, S, Acc) -> + {{float, lists:reverse(Acc)}, Rest, S}. + +tokenize([], S=#decoder{state=trim}) -> + {eof, [], S}; +tokenize([L | Rest], S) when is_list(L) -> + tokenize(L ++ Rest, S); +tokenize([B | Rest], S) when is_binary(B) -> + tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S); +tokenize("\r\n" ++ Rest, S) -> + tokenize(Rest, ?INC_LINE(S)); +tokenize("\n" ++ Rest, S) -> + tokenize(Rest, ?INC_LINE(S)); +tokenize([C | Rest], S) when C == $\s; C == $\t -> + tokenize(Rest, ?INC_COL(S)); +tokenize("{" ++ Rest, S) -> + {start_object, Rest, ?INC_COL(S)}; +tokenize("}" ++ Rest, S) -> + {end_object, Rest, ?INC_COL(S)}; +tokenize("[" ++ Rest, S) -> + {start_array, Rest, ?INC_COL(S)}; +tokenize("]" ++ Rest, S) -> + {end_array, Rest, ?INC_COL(S)}; +tokenize("," ++ Rest, S) -> + {comma, Rest, ?INC_COL(S)}; +tokenize(":" ++ Rest, S) -> + {colon, Rest, ?INC_COL(S)}; +tokenize("null" ++ Rest, S) -> + {{const, null}, Rest, ?ADV_COL(S, 4)}; +tokenize("true" ++ Rest, S) -> + {{const, true}, Rest, ?ADV_COL(S, 4)}; +tokenize("false" ++ Rest, S) -> + {{const, false}, Rest, ?ADV_COL(S, 5)}; +tokenize("\"" ++ Rest, S) -> + {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []), + {{const, String}, Rest1, S1}; +tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- -> + case tokenize_number(L, sign, S, []) of + {{int, Int}, Rest, S1} -> + {{const, list_to_integer(Int)}, Rest, S1}; + {{float, Float}, Rest, S1} -> + {{const, list_to_float(Float)}, Rest, S1} + end. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +%% testing constructs borrowed from the Yaws JSON implementation. + +%% Create an object from a list of Key/Value pairs. + +obj_new() -> + {struct, []}. + +is_obj({struct, Props}) -> + F = fun ({K, _}) when is_list(K) -> + true; + (_) -> + false + end, + lists:all(F, Props). + +obj_from_list(Props) -> + Obj = {struct, Props}, + case is_obj(Obj) of + true -> Obj; + false -> exit(json_bad_object) + end. + +%% Test for equivalence of Erlang terms. +%% Due to arbitrary order of construction, equivalent objects might +%% compare unequal as erlang terms, so we need to carefully recurse +%% through aggregates (tuples and objects). + +equiv({struct, Props1}, {struct, Props2}) -> + equiv_object(Props1, Props2); +equiv({array, L1}, {array, L2}) -> + equiv_list(L1, L2); +equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; +equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2; +equiv(true, true) -> true; +equiv(false, false) -> true; +equiv(null, null) -> true. + +%% Object representation and traversal order is unknown. +%% Use the sledgehammer and sort property lists. + +equiv_object(Props1, Props2) -> + L1 = lists:keysort(1, Props1), + L2 = lists:keysort(1, Props2), + Pairs = lists:zip(L1, L2), + true = lists:all(fun({{K1, V1}, {K2, V2}}) -> + equiv(K1, K2) and equiv(V1, V2) + end, Pairs). + +%% Recursively compare tuple elements for equivalence. + +equiv_list([], []) -> + true; +equiv_list([V1 | L1], [V2 | L2]) -> + equiv(V1, V2) andalso equiv_list(L1, L2). + +e2j_vec_test() -> + test_one(e2j_test_vec(utf8), 1). + +issue33_test() -> + %% http://code.google.com/p/mochiweb/issues/detail?id=33 + Js = {struct, [{"key", [194, 163]}]}, + Encoder = encoder([{input_encoding, utf8}]), + "{\"key\":\"\\u00a3\"}" = lists:flatten(Encoder(Js)). + +test_one([], _N) -> + %% io:format("~p tests passed~n", [N-1]), + ok; +test_one([{E, J} | Rest], N) -> + %% io:format("[~p] ~p ~p~n", [N, E, J]), + true = equiv(E, decode(J)), + true = equiv(E, decode(encode(E))), + test_one(Rest, 1+N). + +e2j_test_vec(utf8) -> + [ + {1, "1"}, + {3.1416, "3.14160"}, % text representation may truncate, trail zeroes + {-1, "-1"}, + {-3.1416, "-3.14160"}, + {12.0e10, "1.20000e+11"}, + {1.234E+10, "1.23400e+10"}, + {-1.234E-10, "-1.23400e-10"}, + {10.0, "1.0e+01"}, + {123.456, "1.23456E+2"}, + {10.0, "1e1"}, + {"foo", "\"foo\""}, + {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""}, + {"", "\"\""}, + {"\"", "\"\\\"\""}, + {"\n\n\n", "\"\\n\\n\\n\""}, + {"\\", "\"\\\\\""}, + {"\" \b\f\r\n\t\"", "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, + {obj_new(), "{}"}, + {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"}, + {obj_from_list([{"foo", "bar"}, {"baz", 123}]), + "{\"foo\":\"bar\",\"baz\":123}"}, + {{array, []}, "[]"}, + {{array, [{array, []}]}, "[[]]"}, + {{array, [1, "foo"]}, "[1,\"foo\"]"}, + + % json array in a json object + {obj_from_list([{"foo", {array, [123]}}]), + "{\"foo\":[123]}"}, + + % json object in a json object + {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]), + "{\"foo\":{\"bar\":true}}"}, + + % fold evaluation order + {obj_from_list([{"foo", {array, []}}, + {"bar", obj_from_list([{"baz", true}])}, + {"alice", "bob"}]), + "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, + + % json object in a json array + {{array, [-123, "foo", obj_from_list([{"bar", {array, []}}]), null]}, + "[-123,\"foo\",{\"bar\":[]},null]"} + ]. + +-endif. diff --git a/1.1.x/src/mochiweb/mochijson2.erl b/1.1.x/src/mochiweb/mochijson2.erl new file mode 100644 index 00000000..64cabc86 --- /dev/null +++ b/1.1.x/src/mochiweb/mochijson2.erl @@ -0,0 +1,802 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works +%% with binaries as strings, arrays as lists (without an {array, _}) +%% wrapper and it only knows how to decode UTF-8 (and ASCII). + +-module(mochijson2). +-author('bob@mochimedia.com'). +-export([encoder/1, encode/1]). +-export([decoder/1, decode/1]). + +% This is a macro to placate syntax highlighters.. +-define(Q, $\"). +-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, + column=N+S#decoder.column}). +-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, + column=1+S#decoder.column}). +-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, + column=1, + line=1+S#decoder.line}). +-define(INC_CHAR(S, C), + case C of + $\n -> + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}; + _ -> + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset} + end). +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). + +%% @type iolist() = [char() | binary() | iolist()] +%% @type iodata() = iolist() | binary() +%% @type json_string() = atom | binary() +%% @type json_number() = integer() | float() +%% @type json_array() = [json_term()] +%% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_iolist() = {json, iolist()} +%% @type json_term() = json_string() | json_number() | json_array() | +%% json_object() | json_iolist() + +-record(encoder, {handler=null, + utf8=false}). + +-record(decoder, {object_hook=null, + offset=0, + line=1, + column=1, + state=null}). + +%% @spec encoder([encoder_option()]) -> function() +%% @doc Create an encoder/1 with the given options. +%% @type encoder_option() = handler_option() | utf8_option() +%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) +encoder(Options) -> + State = parse_encoder_options(Options, #encoder{}), + fun (O) -> json_encode(O, State) end. + +%% @spec encode(json_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist. +encode(Any) -> + json_encode(Any, #encoder{}). + +%% @spec decoder([decoder_option()]) -> function() +%% @doc Create a decoder/1 with the given options. +decoder(Options) -> + State = parse_decoder_options(Options, #decoder{}), + fun (O) -> json_decode(O, State) end. + +%% @spec decode(iolist()) -> json_term() +%% @doc Decode the given iolist to Erlang terms. +decode(S) -> + json_decode(S, #decoder{}). + +%% Internal API + +parse_encoder_options([], State) -> + State; +parse_encoder_options([{handler, Handler} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{handler=Handler}); +parse_encoder_options([{utf8, Switch} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{utf8=Switch}). + +parse_decoder_options([], State) -> + State; +parse_decoder_options([{object_hook, Hook} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{object_hook=Hook}). + +json_encode(true, _State) -> + <<"true">>; +json_encode(false, _State) -> + <<"false">>; +json_encode(null, _State) -> + <<"null">>; +json_encode(I, _State) when is_integer(I) -> + integer_to_list(I); +json_encode(F, _State) when is_float(F) -> + mochinum:digits(F); +json_encode(S, State) when is_binary(S); is_atom(S) -> + json_encode_string(S, State); +json_encode(Array, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode({struct, Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode({json, IoList}, _State) -> + IoList; +json_encode(Bad, #encoder{handler=null}) -> + exit({json_encode, {bad_term, Bad}}); +json_encode(Bad, State=#encoder{handler=Handler}) -> + json_encode(Handler(Bad), State). + +json_encode_array([], _State) -> + <<"[]">>; +json_encode_array(L, State) -> + F = fun (O, Acc) -> + [$,, json_encode(O, State) | Acc] + end, + [$, | Acc1] = lists:foldl(F, "[", L), + lists:reverse([$\] | Acc1]). + +json_encode_proplist([], _State) -> + <<"{}">>; +json_encode_proplist(Props, State) -> + F = fun ({K, V}, Acc) -> + KS = json_encode_string(K, State), + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = lists:foldl(F, "{", Props), + lists:reverse([$\} | Acc1]). + +json_encode_string(A, State) when is_atom(A) -> + L = atom_to_list(A), + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q]) + end; +json_encode_string(B, State) when is_binary(B) -> + case json_bin_is_safe(B) of + true -> + [?Q, B, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q]) + end; +json_encode_string(I, _State) when is_integer(I) -> + [?Q, integer_to_list(I), ?Q]; +json_encode_string(L, State) when is_list(L) -> + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(L, State, [?Q]) + end. + +json_string_is_safe([]) -> + true; +json_string_is_safe([C | Rest]) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + false; + C when C < 16#7f -> + json_string_is_safe(Rest); + _ -> + false + end. + +json_bin_is_safe(<<>>) -> + true; +json_bin_is_safe(<>) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f -> + false; + C when C < 16#7f -> + json_bin_is_safe(Rest) + end. + +json_encode_string_unicode([], _State, Acc) -> + lists:reverse([$\" | Acc]); +json_encode_string_unicode([C | Cs], State, Acc) -> + Acc1 = case C of + ?Q -> + [?Q, $\\ | Acc]; + %% Escaping solidus is only useful when trying to protect + %% against "" injection attacks which are only + %% possible when JSON is inserted into a HTML document + %% in-line. mochijson2 does not protect you from this, so + %% if you do insert directly into HTML then you need to + %% uncomment the following case or escape the output of encode. + %% + %% $/ -> + %% [$/, $\\ | Acc]; + %% + $\\ -> + [$\\, $\\ | Acc]; + $\b -> + [$b, $\\ | Acc]; + $\f -> + [$f, $\\ | Acc]; + $\n -> + [$n, $\\ | Acc]; + $\r -> + [$r, $\\ | Acc]; + $\t -> + [$t, $\\ | Acc]; + C when C >= 0, C < $\s -> + [unihex(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> + [xmerl_ucs:to_utf8(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> + [unihex(C) | Acc]; + C when C < 16#7f -> + [C | Acc]; + _ -> + exit({json_encode, {bad_char, C}}) + end, + json_encode_string_unicode(Cs, State, Acc1). + +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +unihex(C) when C < 16#10000 -> + <> = <>, + Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], + [$\\, $u | Digits]; +unihex(C) when C =< 16#10FFFF -> + N = C - 16#10000, + S1 = 16#d800 bor ((N bsr 10) band 16#3ff), + S2 = 16#dc00 bor (N band 16#3ff), + [unihex(S1), unihex(S2)]. + +json_decode(L, S) when is_list(L) -> + json_decode(iolist_to_binary(L), S); +json_decode(B, S) -> + {Res, S1} = decode1(B, S), + {eof, _} = tokenize(B, S1#decoder{state=trim}), + Res. + +decode1(B, S=#decoder{state=null}) -> + case tokenize(B, S#decoder{state=any}) of + {{const, C}, S1} -> + {C, S1}; + {start_array, S1} -> + decode_array(B, S1); + {start_object, S1} -> + decode_object(B, S1) + end. + +make_object(V, #decoder{object_hook=null}) -> + V; +make_object(V, #decoder{object_hook=Hook}) -> + Hook(V). + +decode_object(B, S) -> + decode_object(B, S#decoder{state=key}, []). + +decode_object(B, S=#decoder{state=key}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {{const, K}, S1} -> + {colon, S2} = tokenize(B, S1), + {V, S3} = decode1(B, S2#decoder{state=null}), + decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) + end; +decode_object(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {comma, S1} -> + decode_object(B, S1#decoder{state=key}, Acc) + end. + +decode_array(B, S) -> + decode_array(B, S#decoder{state=any}, []). + +decode_array(B, S=#decoder{state=any}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {start_array, S1} -> + {Array, S2} = decode_array(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {start_object, S1} -> + {Array, S2} = decode_object(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {{const, Const}, S1} -> + decode_array(B, S1#decoder{state=comma}, [Const | Acc]) + end; +decode_array(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {comma, S1} -> + decode_array(B, S1#decoder{state=any}, Acc) + end. + +tokenize_string(B, S=#decoder{offset=O}) -> + case tokenize_string_fast(B, O) of + {escape, O1} -> + Length = O1 - O, + S1 = ?ADV_COL(S, Length), + <<_:O/binary, Head:Length/binary, _/binary>> = B, + tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); + O1 -> + Length = O1 - O, + <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, + {{const, String}, ?ADV_COL(S, Length + 1)} + end. + +tokenize_string_fast(B, O) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + O; + <<_:O/binary, $\\, _/binary>> -> + {escape, O}; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string_fast(B, 4 + O); + _ -> + throw(invalid_utf8) + end. + +tokenize_string(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; + <<_:O/binary, "\\\"", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); + <<_:O/binary, "\\\\", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); + <<_:O/binary, "\\/", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); + <<_:O/binary, "\\b", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); + <<_:O/binary, "\\f", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); + <<_:O/binary, "\\n", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); + <<_:O/binary, "\\r", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); + <<_:O/binary, "\\t", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); + <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> + C = erlang:list_to_integer([C3, C2, C1, C0], 16), + if C > 16#D7FF, C < 16#DC00 -> + %% coalesce UTF-16 surrogate pair + <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, + D = erlang:list_to_integer([D3,D2,D1,D0], 16), + [CodePoint] = xmerl_ucs:from_utf16be(<>), + Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc), + tokenize_string(B, ?ADV_COL(S, 12), Acc1); + true -> + Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), + tokenize_string(B, ?ADV_COL(S, 6), Acc1) + end; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); + _ -> + throw(invalid_utf8) + end. + +tokenize_number(B, S) -> + case tokenize_number(B, sign, S, []) of + {{int, Int}, S1} -> + {{const, list_to_integer(Int)}, S1}; + {{float, Float}, S1} -> + {{const, list_to_float(Float)}, S1} + end. + +tokenize_number(B, sign, S=#decoder{offset=O}, []) -> + case B of + <<_:O/binary, $-, _/binary>> -> + tokenize_number(B, int, ?INC_COL(S), [$-]); + _ -> + tokenize_number(B, int, S, []) + end; +tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $0, _/binary>> -> + tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); + <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, frac, S, Acc) + end; +tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> + tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); + _ -> + {{int, lists:reverse(Acc)}, S} + end; +tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end; +tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> + tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, eint, S, Acc) + end; +tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end. + +tokenize(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + tokenize(B, ?INC_CHAR(S, C)); + <<_:O/binary, "{", _/binary>> -> + {start_object, ?INC_COL(S)}; + <<_:O/binary, "}", _/binary>> -> + {end_object, ?INC_COL(S)}; + <<_:O/binary, "[", _/binary>> -> + {start_array, ?INC_COL(S)}; + <<_:O/binary, "]", _/binary>> -> + {end_array, ?INC_COL(S)}; + <<_:O/binary, ",", _/binary>> -> + {comma, ?INC_COL(S)}; + <<_:O/binary, ":", _/binary>> -> + {colon, ?INC_COL(S)}; + <<_:O/binary, "null", _/binary>> -> + {{const, null}, ?ADV_COL(S, 4)}; + <<_:O/binary, "true", _/binary>> -> + {{const, true}, ?ADV_COL(S, 4)}; + <<_:O/binary, "false", _/binary>> -> + {{const, false}, ?ADV_COL(S, 5)}; + <<_:O/binary, "\"", _/binary>> -> + tokenize_string(B, ?INC_COL(S)); + <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) + orelse C =:= $- -> + tokenize_number(B, S); + <<_:O/binary>> -> + trim = S#decoder.state, + {eof, S} + end. +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + + +%% testing constructs borrowed from the Yaws JSON implementation. + +%% Create an object from a list of Key/Value pairs. + +obj_new() -> + {struct, []}. + +is_obj({struct, Props}) -> + F = fun ({K, _}) when is_binary(K) -> true end, + lists:all(F, Props). + +obj_from_list(Props) -> + Obj = {struct, Props}, + ?assert(is_obj(Obj)), + Obj. + +%% Test for equivalence of Erlang terms. +%% Due to arbitrary order of construction, equivalent objects might +%% compare unequal as erlang terms, so we need to carefully recurse +%% through aggregates (tuples and objects). + +equiv({struct, Props1}, {struct, Props2}) -> + equiv_object(Props1, Props2); +equiv(L1, L2) when is_list(L1), is_list(L2) -> + equiv_list(L1, L2); +equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; +equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; +equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. + +%% Object representation and traversal order is unknown. +%% Use the sledgehammer and sort property lists. + +equiv_object(Props1, Props2) -> + L1 = lists:keysort(1, Props1), + L2 = lists:keysort(1, Props2), + Pairs = lists:zip(L1, L2), + true = lists:all(fun({{K1, V1}, {K2, V2}}) -> + equiv(K1, K2) and equiv(V1, V2) + end, Pairs). + +%% Recursively compare tuple elements for equivalence. + +equiv_list([], []) -> + true; +equiv_list([V1 | L1], [V2 | L2]) -> + equiv(V1, V2) andalso equiv_list(L1, L2). + +decode_test() -> + [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), + <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). + +e2j_vec_test() -> + test_one(e2j_test_vec(utf8), 1). + +test_one([], _N) -> + %% io:format("~p tests passed~n", [N-1]), + ok; +test_one([{E, J} | Rest], N) -> + %% io:format("[~p] ~p ~p~n", [N, E, J]), + true = equiv(E, decode(J)), + true = equiv(E, decode(encode(E))), + test_one(Rest, 1+N). + +e2j_test_vec(utf8) -> + [ + {1, "1"}, + {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes + {-1, "-1"}, + {-3.1416, "-3.14160"}, + {12.0e10, "1.20000e+11"}, + {1.234E+10, "1.23400e+10"}, + {-1.234E-10, "-1.23400e-10"}, + {10.0, "1.0e+01"}, + {123.456, "1.23456E+2"}, + {10.0, "1e1"}, + {<<"foo">>, "\"foo\""}, + {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, + {<<"">>, "\"\""}, + {<<"\n\n\n">>, "\"\\n\\n\\n\""}, + {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, + {obj_new(), "{}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), + "{\"foo\":\"bar\",\"baz\":123}"}, + {[], "[]"}, + {[[]], "[[]]"}, + {[1, <<"foo">>], "[1,\"foo\"]"}, + + %% json array in a json object + {obj_from_list([{<<"foo">>, [123]}]), + "{\"foo\":[123]}"}, + + %% json object in a json object + {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), + "{\"foo\":{\"bar\":true}}"}, + + %% fold evaluation order + {obj_from_list([{<<"foo">>, []}, + {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, + {<<"alice">>, <<"bob">>}]), + "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, + + %% json object in a json array + {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], + "[-123,\"foo\",{\"bar\":[]},null]"} + ]. + +%% test utf8 encoding +encoder_utf8_test() -> + %% safe conversion case (default) + [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = + encode(<<1,"\321\202\320\265\321\201\321\202">>), + + %% raw utf8 output (optional) + Enc = mochijson2:encoder([{utf8, true}]), + [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = + Enc(<<1,"\321\202\320\265\321\201\321\202">>). + +input_validation_test() -> + Good = [ + {16#00A3, <>}, %% pound + {16#20AC, <>}, %% euro + {16#10196, <>} %% denarius + ], + lists:foreach(fun({CodePoint, UTF8}) -> + Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), + Expect = decode(UTF8) + end, Good), + + Bad = [ + %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + <>, + %% missing continuations, last byte in each should be 80-BF + <>, + <>, + <>, + %% we don't support code points > 10FFFF per RFC 3629 + <>, + %% escape characters trigger a different code path + <> + ], + lists:foreach( + fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end, + %% could be {ucs,{bad_utf8_character_code}} or + %% {json_encode,{bad_char,_}} + {'EXIT', _} = (catch encode(X)) + end, Bad). + +inline_json_test() -> + ?assertEqual(<<"\"iodata iodata\"">>, + iolist_to_binary( + encode({json, [<<"\"iodata">>, " iodata\""]}))), + ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, + decode( + encode({struct, + [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), + ok. + +big_unicode_test() -> + UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(UTF8Seq))), + ?assertEqual( + UTF8Seq, + decode(iolist_to_binary(encode(UTF8Seq)))), + ok. + +custom_decoder_test() -> + ?assertEqual( + {struct, [{<<"key">>, <<"value">>}]}, + (decoder([]))("{\"key\": \"value\"}")), + F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, + ?assertEqual( + win, + (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), + ok. + +atom_test() -> + %% JSON native atoms + [begin + ?assertEqual(A, decode(atom_to_list(A))), + ?assertEqual(iolist_to_binary(atom_to_list(A)), + iolist_to_binary(encode(A))) + end || A <- [true, false, null]], + %% Atom to string + ?assertEqual( + <<"\"foo\"">>, + iolist_to_binary(encode(foo))), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))), + ok. + +key_encode_test() -> + %% Some forms are accepted as keys that would not be strings in other + %% cases + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{foo, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{"foo", 1}]}))), + ?assertEqual( + <<"{\"\\ud834\\udd20\":1}">>, + iolist_to_binary( + encode({struct, [{[16#0001d120], 1}]}))), + ?assertEqual( + <<"{\"1\":1}">>, + iolist_to_binary(encode({struct, [{1, 1}]}))), + ok. + +unsafe_chars_test() -> + Chars = "\"\\\b\f\n\r\t", + [begin + ?assertEqual(false, json_string_is_safe([C])), + ?assertEqual(false, json_bin_is_safe(<>)), + ?assertEqual(<>, decode(encode(<>))) + end || C <- Chars], + ?assertEqual( + false, + json_string_is_safe([16#0001d120])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))), + ?assertEqual( + [16#0001d120], + xmerl_ucs:from_utf8( + binary_to_list( + decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))), + ?assertEqual( + false, + json_string_is_safe([16#110000])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))), + %% solidus can be escaped but isn't unsafe by default + ?assertEqual( + <<"/">>, + decode(<<"\"\\/\"">>)), + ok. + +int_test() -> + ?assertEqual(0, decode("0")), + ?assertEqual(1, decode("1")), + ?assertEqual(11, decode("11")), + ok. + +large_int_test() -> + ?assertEqual(<<"-2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(-2147483649214748364921474836492147483649))), + ?assertEqual(<<"2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(2147483649214748364921474836492147483649))), + ok. + +float_test() -> + ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), + ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), + ok. + +handler_test() -> + ?assertEqual( + {'EXIT',{json_encode,{bad_term,{}}}}, + catch encode({})), + F = fun ({}) -> [] end, + ?assertEqual( + <<"[]">>, + iolist_to_binary((encoder([{handler, F}]))({}))), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochilists.erl b/1.1.x/src/mochiweb/mochilists.erl new file mode 100644 index 00000000..8981e7b6 --- /dev/null +++ b/1.1.x/src/mochiweb/mochilists.erl @@ -0,0 +1,104 @@ +%% @copyright Copyright (c) 2010 Mochi Media, Inc. +%% @author David Reid + +%% @doc Utility functions for dealing with proplists. + +-module(mochilists). +-author("David Reid "). +-export([get_value/2, get_value/3, is_defined/2, set_default/2, set_defaults/2]). + +%% @spec set_default({Key::term(), Value::term()}, Proplist::list()) -> list() +%% +%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist). +set_default({Key, Value}, Proplist) -> + case is_defined(Key, Proplist) of + true -> + Proplist; + false -> + [{Key, Value} | Proplist] + end. + +%% @spec set_defaults([{Key::term(), Value::term()}], Proplist::list()) -> list() +%% +%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist). +set_defaults(DefaultProps, Proplist) -> + lists:foldl(fun set_default/2, Proplist, DefaultProps). + + +%% @spec is_defined(Key::term(), Proplist::list()) -> bool() +%% +%% @doc Returns true if Propist contains at least one entry associated +%% with Key, otherwise false is returned. +is_defined(Key, Proplist) -> + lists:keyfind(Key, 1, Proplist) =/= false. + + +%% @spec get_value(Key::term(), Proplist::list()) -> term() | undefined +%% +%% @doc Return the value of Key or undefined +get_value(Key, Proplist) -> + get_value(Key, Proplist, undefined). + +%% @spec get_value(Key::term(), Proplist::list(), Default::term()) -> term() +%% +%% @doc Return the value of Key or Default +get_value(_Key, [], Default) -> + Default; +get_value(Key, Proplist, Default) -> + case lists:keyfind(Key, 1, Proplist) of + false -> + Default; + {Key, Value} -> + Value + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +set_defaults_test() -> + ?assertEqual( + [{k, v}], + set_defaults([{k, v}], [])), + ?assertEqual( + [{k, v}], + set_defaults([{k, vee}], [{k, v}])), + ?assertEqual( + lists:sort([{kay, vee}, {k, v}]), + lists:sort(set_defaults([{k, vee}, {kay, vee}], [{k, v}]))), + ok. + +set_default_test() -> + ?assertEqual( + [{k, v}], + set_default({k, v}, [])), + ?assertEqual( + [{k, v}], + set_default({k, vee}, [{k, v}])), + ok. + +get_value_test() -> + ?assertEqual( + undefined, + get_value(foo, [])), + ?assertEqual( + undefined, + get_value(foo, [{bar, baz}])), + ?assertEqual( + bar, + get_value(foo, [{foo, bar}])), + ?assertEqual( + default, + get_value(foo, [], default)), + ?assertEqual( + default, + get_value(foo, [{bar, baz}], default)), + ?assertEqual( + bar, + get_value(foo, [{foo, bar}], default)), + ok. + +-endif. + diff --git a/1.1.x/src/mochiweb/mochilogfile2.erl b/1.1.x/src/mochiweb/mochilogfile2.erl new file mode 100644 index 00000000..c34ee73a --- /dev/null +++ b/1.1.x/src/mochiweb/mochilogfile2.erl @@ -0,0 +1,140 @@ +%% @author Bob Ippolito +%% @copyright 2010 Mochi Media, Inc. + +%% @doc Write newline delimited log files, ensuring that if a truncated +%% entry is found on log open then it is fixed before writing. Uses +%% delayed writes and raw files for performance. +-module(mochilogfile2). +-author('bob@mochimedia.com'). + +-export([open/1, write/2, close/1, name/1]). + +%% @spec open(Name) -> Handle +%% @doc Open the log file Name, creating or appending as necessary. All data +%% at the end of the file will be truncated until a newline is found, to +%% ensure that all records are complete. +open(Name) -> + {ok, FD} = file:open(Name, [raw, read, write, delayed_write, binary]), + fix_log(FD), + {?MODULE, Name, FD}. + +%% @spec name(Handle) -> string() +%% @doc Return the path of the log file. +name({?MODULE, Name, _FD}) -> + Name. + +%% @spec write(Handle, IoData) -> ok +%% @doc Write IoData to the log file referenced by Handle. +write({?MODULE, _Name, FD}, IoData) -> + ok = file:write(FD, [IoData, $\n]), + ok. + +%% @spec close(Handle) -> ok +%% @doc Close the log file referenced by Handle. +close({?MODULE, _Name, FD}) -> + ok = file:sync(FD), + ok = file:close(FD), + ok. + +fix_log(FD) -> + {ok, Location} = file:position(FD, eof), + Seek = find_last_newline(FD, Location), + {ok, Seek} = file:position(FD, Seek), + ok = file:truncate(FD), + ok. + +%% Seek backwards to the last valid log entry +find_last_newline(_FD, N) when N =< 1 -> + 0; +find_last_newline(FD, Location) -> + case file:pread(FD, Location - 1, 1) of + {ok, <<$\n>>} -> + Location; + {ok, _} -> + find_last_newline(FD, Location - 1) + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +name_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "open_close_test.log"), + H = open(FileName), + ?assertEqual( + FileName, + name(H)), + close(H), + file:delete(FileName), + file:del_dir(D), + ok. + +open_close_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "open_close_test.log"), + OpenClose = fun () -> + H = open(FileName), + ?assertEqual( + true, + filelib:is_file(FileName)), + ok = close(H), + ?assertEqual( + {ok, <<>>}, + file:read_file(FileName)), + ok + end, + OpenClose(), + OpenClose(), + file:delete(FileName), + file:del_dir(D), + ok. + +write_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "write_test.log"), + F = fun () -> + H = open(FileName), + write(H, "test line"), + close(H), + ok + end, + F(), + ?assertEqual( + {ok, <<"test line\n">>}, + file:read_file(FileName)), + F(), + ?assertEqual( + {ok, <<"test line\ntest line\n">>}, + file:read_file(FileName)), + file:delete(FileName), + file:del_dir(D), + ok. + +fix_log_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "write_test.log"), + file:write_file(FileName, <<"first line good\nsecond line bad">>), + F = fun () -> + H = open(FileName), + write(H, "test line"), + close(H), + ok + end, + F(), + ?assertEqual( + {ok, <<"first line good\ntest line\n">>}, + file:read_file(FileName)), + file:write_file(FileName, <<"first line bad">>), + F(), + ?assertEqual( + {ok, <<"test line\n">>}, + file:read_file(FileName)), + F(), + ?assertEqual( + {ok, <<"test line\ntest line\n">>}, + file:read_file(FileName)), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochinum.erl b/1.1.x/src/mochiweb/mochinum.erl new file mode 100644 index 00000000..a7e2bfbc --- /dev/null +++ b/1.1.x/src/mochiweb/mochinum.erl @@ -0,0 +1,331 @@ +%% @copyright 2007 Mochi Media, Inc. +%% @author Bob Ippolito + +%% @doc Useful numeric algorithms for floats that cover some deficiencies +%% in the math module. More interesting is digits/1, which implements +%% the algorithm from: +%% http://www.cs.indiana.edu/~burger/fp/index.html +%% See also "Printing Floating-Point Numbers Quickly and Accurately" +%% in Proceedings of the SIGPLAN '96 Conference on Programming Language +%% Design and Implementation. + +-module(mochinum). +-author("Bob Ippolito "). +-export([digits/1, frexp/1, int_pow/2, int_ceil/1]). + +%% IEEE 754 Float exponent bias +-define(FLOAT_BIAS, 1022). +-define(MIN_EXP, -1074). +-define(BIG_POW, 4503599627370496). + +%% External API + +%% @spec digits(number()) -> string() +%% @doc Returns a string that accurately represents the given integer or float +%% using a conservative amount of digits. Great for generating +%% human-readable output, or compact ASCII serializations for floats. +digits(N) when is_integer(N) -> + integer_to_list(N); +digits(0.0) -> + "0.0"; +digits(Float) -> + {Frac, Exp} = frexp(Float), + Exp1 = Exp - 53, + Frac1 = trunc(abs(Frac) * (1 bsl 53)), + [Place | Digits] = digits1(Float, Exp1, Frac1), + R = insert_decimal(Place, [$0 + D || D <- Digits]), + case Float < 0 of + true -> + [$- | R]; + _ -> + R + end. + +%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()} +%% @doc Return the fractional and exponent part of an IEEE 754 double, +%% equivalent to the libc function of the same name. +%% F = Frac * pow(2, Exp). +frexp(F) -> + frexp1(unpack(F)). + +%% @spec int_pow(X::integer(), N::integer()) -> Y::integer() +%% @doc Moderately efficient way to exponentiate integers. +%% int_pow(10, 2) = 100. +int_pow(_X, 0) -> + 1; +int_pow(X, N) when N > 0 -> + int_pow(X, N, 1). + +%% @spec int_ceil(F::float()) -> integer() +%% @doc Return the ceiling of F as an integer. The ceiling is defined as +%% F when F == trunc(F); +%% trunc(F) when F < 0; +%% trunc(F) + 1 when F > 0. +int_ceil(X) -> + T = trunc(X), + case (X - T) of + Neg when Neg < 0 -> T; + Pos when Pos > 0 -> T + 1; + _ -> T + end. + + +%% Internal API + +int_pow(X, N, R) when N < 2 -> + R * X; +int_pow(X, N, R) -> + int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end). + +insert_decimal(0, S) -> + "0." ++ S; +insert_decimal(Place, S) when Place > 0 -> + L = length(S), + case Place - L of + 0 -> + S ++ ".0"; + N when N < 0 -> + {S0, S1} = lists:split(L + N, S), + S0 ++ "." ++ S1; + N when N < 6 -> + %% More places than digits + S ++ lists:duplicate(N, $0) ++ ".0"; + _ -> + insert_decimal_exp(Place, S) + end; +insert_decimal(Place, S) when Place > -6 -> + "0." ++ lists:duplicate(abs(Place), $0) ++ S; +insert_decimal(Place, S) -> + insert_decimal_exp(Place, S). + +insert_decimal_exp(Place, S) -> + [C | S0] = S, + S1 = case S0 of + [] -> + "0"; + _ -> + S0 + end, + Exp = case Place < 0 of + true -> + "e-"; + false -> + "e+" + end, + [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)). + + +digits1(Float, Exp, Frac) -> + Round = ((Frac band 1) =:= 0), + case Exp >= 0 of + true -> + BExp = 1 bsl Exp, + case (Frac =/= ?BIG_POW) of + true -> + scale((Frac * BExp * 2), 2, BExp, BExp, + Round, Round, Float); + false -> + scale((Frac * BExp * 4), 4, (BExp * 2), BExp, + Round, Round, Float) + end; + false -> + case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of + true -> + scale((Frac * 2), 1 bsl (1 - Exp), 1, 1, + Round, Round, Float); + false -> + scale((Frac * 4), 1 bsl (2 - Exp), 2, 1, + Round, Round, Float) + end + end. + +scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) -> + Est = int_ceil(math:log10(abs(Float)) - 1.0e-10), + %% Note that the scheme implementation uses a 326 element look-up table + %% for int_pow(10, N) where we do not. + case Est >= 0 of + true -> + fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est, + LowOk, HighOk); + false -> + Scale = int_pow(10, -Est), + fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est, + LowOk, HighOk) + end. + +fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) -> + TooLow = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TooLow of + true -> + [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)]; + false -> + [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)] + end. + +generate(R0, S, MPlus, MMinus, LowOk, HighOk) -> + D = R0 div S, + R = R0 rem S, + TC1 = case LowOk of + true -> + R =< MMinus; + false -> + R < MMinus + end, + TC2 = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TC1 of + false -> + case TC2 of + false -> + [D | generate(R * 10, S, MPlus * 10, MMinus * 10, + LowOk, HighOk)]; + true -> + [D + 1] + end; + true -> + case TC2 of + false -> + [D]; + true -> + case R * 2 < S of + true -> + [D]; + false -> + [D + 1] + end + end + end. + +unpack(Float) -> + <> = <>, + {Sign, Exp, Frac}. + +frexp1({_Sign, 0, 0}) -> + {0.0, 0}; +frexp1({Sign, 0, Frac}) -> + Exp = log2floor(Frac), + <> = <>, + {Frac1, -(?FLOAT_BIAS) - 52 + Exp}; +frexp1({Sign, Exp, Frac}) -> + <> = <>, + {Frac1, Exp - ?FLOAT_BIAS}. + +log2floor(Int) -> + log2floor(Int, 0). + +log2floor(0, N) -> + N; +log2floor(Int, N) -> + log2floor(Int bsr 1, 1 + N). + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +int_ceil_test() -> + 1 = int_ceil(0.0001), + 0 = int_ceil(0.0), + 1 = int_ceil(0.99), + 1 = int_ceil(1.0), + -1 = int_ceil(-1.5), + -2 = int_ceil(-2.0), + ok. + +int_pow_test() -> + 1 = int_pow(1, 1), + 1 = int_pow(1, 0), + 1 = int_pow(10, 0), + 10 = int_pow(10, 1), + 100 = int_pow(10, 2), + 1000 = int_pow(10, 3), + ok. + +digits_test() -> + ?assertEqual("0", + digits(0)), + ?assertEqual("0.0", + digits(0.0)), + ?assertEqual("1.0", + digits(1.0)), + ?assertEqual("-1.0", + digits(-1.0)), + ?assertEqual("0.1", + digits(0.1)), + ?assertEqual("0.01", + digits(0.01)), + ?assertEqual("0.001", + digits(0.001)), + ?assertEqual("1.0e+6", + digits(1000000.0)), + ?assertEqual("0.5", + digits(0.5)), + ?assertEqual("4503599627370496.0", + digits(4503599627370496.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 + <> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual("4.9406564584124654e-324", + digits(SmallDenorm)), + ?assertEqual(SmallDenorm, + list_to_float(digits(SmallDenorm))), + %% large denormalized number + %% 2.22507385850720088902e-308 + <> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual("2.225073858507201e-308", + digits(BigDenorm)), + ?assertEqual(BigDenorm, + list_to_float(digits(BigDenorm))), + %% small normalized number + %% 2.22507385850720138309e-308 + <> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual("2.2250738585072014e-308", + digits(SmallNorm)), + ?assertEqual(SmallNorm, + list_to_float(digits(SmallNorm))), + %% large normalized number + %% 1.79769313486231570815e+308 + <> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual("1.7976931348623157e+308", + digits(LargeNorm)), + ?assertEqual(LargeNorm, + list_to_float(digits(LargeNorm))), + ok. + +frexp_test() -> + %% zero + {0.0, 0} = frexp(0.0), + %% one + {0.5, 1} = frexp(1.0), + %% negative one + {-0.5, 1} = frexp(-1.0), + %% small denormalized number + %% 4.94065645841246544177e-324 + <> = <<0,0,0,0,0,0,0,1>>, + {0.5, -1073} = frexp(SmallDenorm), + %% large denormalized number + %% 2.22507385850720088902e-308 + <> = <<0,15,255,255,255,255,255,255>>, + {0.99999999999999978, -1022} = frexp(BigDenorm), + %% small normalized number + %% 2.22507385850720138309e-308 + <> = <<0,16,0,0,0,0,0,0>>, + {0.5, -1021} = frexp(SmallNorm), + %% large normalized number + %% 1.79769313486231570815e+308 + <> = <<127,239,255,255,255,255,255,255>>, + {0.99999999999999989, 1024} = frexp(LargeNorm), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochitemp.erl b/1.1.x/src/mochiweb/mochitemp.erl new file mode 100644 index 00000000..bb23d2a6 --- /dev/null +++ b/1.1.x/src/mochiweb/mochitemp.erl @@ -0,0 +1,310 @@ +%% @author Bob Ippolito +%% @copyright 2010 Mochi Media, Inc. + +%% @doc Create temporary files and directories. Requires crypto to be started. + +-module(mochitemp). +-export([gettempdir/0]). +-export([mkdtemp/0, mkdtemp/3]). +-export([rmtempdir/1]). +%% -export([mkstemp/4]). +-define(SAFE_CHARS, {$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, + $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, + $A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, + $N, $O, $P, $Q, $R, $S, $T, $U, $V, $W, $X, $Y, $Z, + $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $_}). +-define(TMP_MAX, 10000). + +-include_lib("kernel/include/file.hrl"). + +%% TODO: An ugly wrapper over the mktemp tool with open_port and sadness? +%% We can't implement this race-free in Erlang without the ability +%% to issue O_CREAT|O_EXCL. I suppose we could hack something with +%% mkdtemp, del_dir, open. +%% mkstemp(Suffix, Prefix, Dir, Options) -> +%% ok. + +rmtempdir(Dir) -> + case file:del_dir(Dir) of + {error, eexist} -> + ok = rmtempdirfiles(Dir), + ok = file:del_dir(Dir); + ok -> + ok + end. + +rmtempdirfiles(Dir) -> + {ok, Files} = file:list_dir(Dir), + ok = rmtempdirfiles(Dir, Files). + +rmtempdirfiles(_Dir, []) -> + ok; +rmtempdirfiles(Dir, [Basename | Rest]) -> + Path = filename:join([Dir, Basename]), + case filelib:is_dir(Path) of + true -> + ok = rmtempdir(Path); + false -> + ok = file:delete(Path) + end, + rmtempdirfiles(Dir, Rest). + +mkdtemp() -> + mkdtemp("", "tmp", gettempdir()). + +mkdtemp(Suffix, Prefix, Dir) -> + mkdtemp_n(rngpath_fun(Suffix, Prefix, Dir), ?TMP_MAX). + + + +mkdtemp_n(RngPath, 1) -> + make_dir(RngPath()); +mkdtemp_n(RngPath, N) -> + try make_dir(RngPath()) + catch throw:{error, eexist} -> + mkdtemp_n(RngPath, N - 1) + end. + +make_dir(Path) -> + case file:make_dir(Path) of + ok -> + ok; + E={error, eexist} -> + throw(E) + end, + %% Small window for a race condition here because dir is created 777 + ok = file:write_file_info(Path, #file_info{mode=8#0700}), + Path. + +rngpath_fun(Prefix, Suffix, Dir) -> + fun () -> + filename:join([Dir, Prefix ++ rngchars(6) ++ Suffix]) + end. + +rngchars(0) -> + ""; +rngchars(N) -> + [rngchar() | rngchars(N - 1)]. + +rngchar() -> + rngchar(crypto:rand_uniform(0, tuple_size(?SAFE_CHARS))). + +rngchar(C) -> + element(1 + C, ?SAFE_CHARS). + +%% @spec gettempdir() -> string() +%% @doc Get a usable temporary directory using the first of these that is a directory: +%% $TMPDIR, $TMP, $TEMP, "/tmp", "/var/tmp", "/usr/tmp", ".". +gettempdir() -> + gettempdir(gettempdir_checks(), fun normalize_dir/1). + +gettempdir_checks() -> + [{fun os:getenv/1, ["TMPDIR", "TMP", "TEMP"]}, + {fun gettempdir_identity/1, ["/tmp", "/var/tmp", "/usr/tmp"]}, + {fun gettempdir_cwd/1, [cwd]}]. + +gettempdir_identity(L) -> + L. + +gettempdir_cwd(cwd) -> + {ok, L} = file:get_cwd(), + L. + +gettempdir([{_F, []} | RestF], Normalize) -> + gettempdir(RestF, Normalize); +gettempdir([{F, [L | RestL]} | RestF], Normalize) -> + case Normalize(F(L)) of + false -> + gettempdir([{F, RestL} | RestF], Normalize); + Dir -> + Dir + end. + +normalize_dir(False) when False =:= false orelse False =:= "" -> + %% Erlang doesn't have an unsetenv, wtf. + false; +normalize_dir(L) -> + Dir = filename:absname(L), + case filelib:is_dir(Dir) of + false -> + false; + true -> + Dir + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +pushenv(L) -> + [{K, os:getenv(K)} || K <- L]. +popenv(L) -> + F = fun ({K, false}) -> + %% Erlang doesn't have an unsetenv, wtf. + os:putenv(K, ""); + ({K, V}) -> + os:putenv(K, V) + end, + lists:foreach(F, L). + +gettempdir_fallback_test() -> + ?assertEqual( + "/", + gettempdir([{fun gettempdir_identity/1, ["/--not-here--/"]}, + {fun gettempdir_identity/1, ["/"]}], + fun normalize_dir/1)), + ?assertEqual( + "/", + %% simulate a true os:getenv unset env + gettempdir([{fun gettempdir_identity/1, [false]}, + {fun gettempdir_identity/1, ["/"]}], + fun normalize_dir/1)), + ok. + +gettempdir_identity_test() -> + ?assertEqual( + "/", + gettempdir([{fun gettempdir_identity/1, ["/"]}], fun normalize_dir/1)), + ok. + +gettempdir_cwd_test() -> + {ok, Cwd} = file:get_cwd(), + ?assertEqual( + normalize_dir(Cwd), + gettempdir([{fun gettempdir_cwd/1, [cwd]}], fun normalize_dir/1)), + ok. + +rngchars_test() -> + crypto:start(), + ?assertEqual( + "", + rngchars(0)), + ?assertEqual( + 10, + length(rngchars(10))), + ok. + +rngchar_test() -> + ?assertEqual( + $a, + rngchar(0)), + ?assertEqual( + $A, + rngchar(26)), + ?assertEqual( + $_, + rngchar(62)), + ok. + +mkdtemp_n_failonce_test() -> + crypto:start(), + D = mkdtemp(), + Path = filename:join([D, "testdir"]), + %% Toggle the existence of a dir so that it fails + %% the first time and succeeds the second. + F = fun () -> + case filelib:is_dir(Path) of + true -> + file:del_dir(Path); + false -> + file:make_dir(Path) + end, + Path + end, + try + %% Fails the first time + ?assertThrow( + {error, eexist}, + mkdtemp_n(F, 1)), + %% Reset state + file:del_dir(Path), + %% Succeeds the second time + ?assertEqual( + Path, + mkdtemp_n(F, 2)) + after rmtempdir(D) + end, + ok. + +mkdtemp_n_fail_test() -> + {ok, Cwd} = file:get_cwd(), + ?assertThrow( + {error, eexist}, + mkdtemp_n(fun () -> Cwd end, 1)), + ?assertThrow( + {error, eexist}, + mkdtemp_n(fun () -> Cwd end, 2)), + ok. + +make_dir_fail_test() -> + {ok, Cwd} = file:get_cwd(), + ?assertThrow( + {error, eexist}, + make_dir(Cwd)), + ok. + +mkdtemp_test() -> + crypto:start(), + D = mkdtemp(), + ?assertEqual( + true, + filelib:is_dir(D)), + ?assertEqual( + ok, + file:del_dir(D)), + ok. + +rmtempdir_test() -> + crypto:start(), + D1 = mkdtemp(), + ?assertEqual( + true, + filelib:is_dir(D1)), + ?assertEqual( + ok, + rmtempdir(D1)), + D2 = mkdtemp(), + ?assertEqual( + true, + filelib:is_dir(D2)), + ok = file:write_file(filename:join([D2, "foo"]), <<"bytes">>), + D3 = mkdtemp("suffix", "prefix", D2), + ?assertEqual( + true, + filelib:is_dir(D3)), + ok = file:write_file(filename:join([D3, "foo"]), <<"bytes">>), + ?assertEqual( + ok, + rmtempdir(D2)), + ?assertEqual( + {error, enoent}, + file:consult(D3)), + ?assertEqual( + {error, enoent}, + file:consult(D2)), + ok. + +gettempdir_env_test() -> + Env = pushenv(["TMPDIR", "TEMP", "TMP"]), + FalseEnv = [{"TMPDIR", false}, {"TEMP", false}, {"TMP", false}], + try + popenv(FalseEnv), + popenv([{"TMPDIR", "/"}]), + ?assertEqual( + "/", + os:getenv("TMPDIR")), + ?assertEqual( + "/", + gettempdir()), + {ok, Cwd} = file:get_cwd(), + popenv(FalseEnv), + popenv([{"TMP", Cwd}]), + ?assertEqual( + normalize_dir(Cwd), + gettempdir()) + after popenv(Env) + end, + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochiutf8.erl b/1.1.x/src/mochiweb/mochiutf8.erl new file mode 100644 index 00000000..206e1186 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiutf8.erl @@ -0,0 +1,316 @@ +%% @copyright 2010 Mochi Media, Inc. +%% @author Bob Ippolito + +%% @doc Algorithm to convert any binary to a valid UTF-8 sequence by ignoring +%% invalid bytes. + +-module(mochiutf8). +-export([valid_utf8_bytes/1, codepoint_to_bytes/1, bytes_to_codepoints/1]). +-export([bytes_foldl/3, codepoint_foldl/3, read_codepoint/1, len/1]). + +%% External API + +-type unichar_low() :: 0..16#d7ff. +-type unichar_high() :: 16#e000..16#10ffff. +-type unichar() :: unichar_low() | unichar_high(). + +-spec codepoint_to_bytes(unichar()) -> binary(). +%% @doc Convert a unicode codepoint to UTF-8 bytes. +codepoint_to_bytes(C) when (C >= 16#00 andalso C =< 16#7f) -> + %% U+0000 - U+007F - 7 bits + <>; +codepoint_to_bytes(C) when (C >= 16#080 andalso C =< 16#07FF) -> + %% U+0080 - U+07FF - 11 bits + <<0:5, B1:5, B0:6>> = <>, + <<2#110:3, B1:5, + 2#10:2, B0:6>>; +codepoint_to_bytes(C) when (C >= 16#0800 andalso C =< 16#FFFF) andalso + (C < 16#D800 orelse C > 16#DFFF) -> + %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points) + <> = <>, + <<2#1110:4, B2:4, + 2#10:2, B1:6, + 2#10:2, B0:6>>; +codepoint_to_bytes(C) when (C >= 16#010000 andalso C =< 16#10FFFF) -> + %% U+10000 - U+10FFFF - 21 bits + <<0:3, B3:3, B2:6, B1:6, B0:6>> = <>, + <<2#11110:5, B3:3, + 2#10:2, B2:6, + 2#10:2, B1:6, + 2#10:2, B0:6>>. + +-spec codepoints_to_bytes([unichar()]) -> binary(). +%% @doc Convert a list of codepoints to a UTF-8 binary. +codepoints_to_bytes(L) -> + <<<<(codepoint_to_bytes(C))/binary>> || C <- L>>. + +-spec read_codepoint(binary()) -> {unichar(), binary(), binary()}. +read_codepoint(Bin = <<2#0:1, C:7, Rest/binary>>) -> + %% U+0000 - U+007F - 7 bits + <> = Bin, + {C, B, Rest}; +read_codepoint(Bin = <<2#110:3, B1:5, + 2#10:2, B0:6, + Rest/binary>>) -> + %% U+0080 - U+07FF - 11 bits + case <> of + <> when C >= 16#80 -> + <> = Bin, + {C, B, Rest} + end; +read_codepoint(Bin = <<2#1110:4, B2:4, + 2#10:2, B1:6, + 2#10:2, B0:6, + Rest/binary>>) -> + %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points) + case <> of + <> when (C >= 16#0800 andalso C =< 16#FFFF) andalso + (C < 16#D800 orelse C > 16#DFFF) -> + <> = Bin, + {C, B, Rest} + end; +read_codepoint(Bin = <<2#11110:5, B3:3, + 2#10:2, B2:6, + 2#10:2, B1:6, + 2#10:2, B0:6, + Rest/binary>>) -> + %% U+10000 - U+10FFFF - 21 bits + case <> of + <> when (C >= 16#010000 andalso C =< 16#10FFFF) -> + <> = Bin, + {C, B, Rest} + end. + +-spec codepoint_foldl(fun((unichar(), _) -> _), _, binary()) -> _. +codepoint_foldl(F, Acc, <<>>) when is_function(F, 2) -> + Acc; +codepoint_foldl(F, Acc, Bin) -> + {C, _, Rest} = read_codepoint(Bin), + codepoint_foldl(F, F(C, Acc), Rest). + +-spec bytes_foldl(fun((binary(), _) -> _), _, binary()) -> _. +bytes_foldl(F, Acc, <<>>) when is_function(F, 2) -> + Acc; +bytes_foldl(F, Acc, Bin) -> + {_, B, Rest} = read_codepoint(Bin), + bytes_foldl(F, F(B, Acc), Rest). + +-spec bytes_to_codepoints(binary()) -> [unichar()]. +bytes_to_codepoints(B) -> + lists:reverse(codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], B)). + +-spec len(binary()) -> non_neg_integer(). +len(<<>>) -> + 0; +len(B) -> + {_, _, Rest} = read_codepoint(B), + 1 + len(Rest). + +-spec valid_utf8_bytes(B::binary()) -> binary(). +%% @doc Return only the bytes in B that represent valid UTF-8. Uses +%% the following recursive algorithm: skip one byte if B does not +%% follow UTF-8 syntax (a 1-4 byte encoding of some number), +%% skip sequence of 2-4 bytes if it represents an overlong encoding +%% or bad code point (surrogate U+D800 - U+DFFF or > U+10FFFF). +valid_utf8_bytes(B) when is_binary(B) -> + binary_skip_bytes(B, invalid_utf8_indexes(B)). + +%% Internal API + +-spec binary_skip_bytes(binary(), [non_neg_integer()]) -> binary(). +%% @doc Return B, but skipping the 0-based indexes in L. +binary_skip_bytes(B, []) -> + B; +binary_skip_bytes(B, L) -> + binary_skip_bytes(B, L, 0, []). + +%% @private +-spec binary_skip_bytes(binary(), [non_neg_integer()], non_neg_integer(), iolist()) -> binary(). +binary_skip_bytes(B, [], _N, Acc) -> + iolist_to_binary(lists:reverse([B | Acc])); +binary_skip_bytes(<<_, RestB/binary>>, [N | RestL], N, Acc) -> + binary_skip_bytes(RestB, RestL, 1 + N, Acc); +binary_skip_bytes(<>, L, N, Acc) -> + binary_skip_bytes(RestB, L, 1 + N, [C | Acc]). + +-spec invalid_utf8_indexes(binary()) -> [non_neg_integer()]. +%% @doc Return the 0-based indexes in B that are not valid UTF-8. +invalid_utf8_indexes(B) -> + invalid_utf8_indexes(B, 0, []). + +%% @private. +-spec invalid_utf8_indexes(binary(), non_neg_integer(), [non_neg_integer()]) -> [non_neg_integer()]. +invalid_utf8_indexes(<>, N, Acc) when C < 16#80 -> + %% U+0000 - U+007F - 7 bits + invalid_utf8_indexes(Rest, 1 + N, Acc); +invalid_utf8_indexes(<>, N, Acc) + when C1 band 16#E0 =:= 16#C0, + C2 band 16#C0 =:= 16#80 -> + %% U+0080 - U+07FF - 11 bits + case ((C1 band 16#1F) bsl 6) bor (C2 band 16#3F) of + C when C < 16#80 -> + %% Overlong encoding. + invalid_utf8_indexes(Rest, 2 + N, [1 + N, N | Acc]); + _ -> + %% Upper bound U+07FF does not need to be checked + invalid_utf8_indexes(Rest, 2 + N, Acc) + end; +invalid_utf8_indexes(<>, N, Acc) + when C1 band 16#F0 =:= 16#E0, + C2 band 16#C0 =:= 16#80, + C3 band 16#C0 =:= 16#80 -> + %% U+0800 - U+FFFF - 16 bits + case ((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor + (C3 band 16#3F) of + C when (C < 16#800) orelse (C >= 16#D800 andalso C =< 16#DFFF) -> + %% Overlong encoding or surrogate. + invalid_utf8_indexes(Rest, 3 + N, [2 + N, 1 + N, N | Acc]); + _ -> + %% Upper bound U+FFFF does not need to be checked + invalid_utf8_indexes(Rest, 3 + N, Acc) + end; +invalid_utf8_indexes(<>, N, Acc) + when C1 band 16#F8 =:= 16#F0, + C2 band 16#C0 =:= 16#80, + C3 band 16#C0 =:= 16#80, + C4 band 16#C0 =:= 16#80 -> + %% U+10000 - U+10FFFF - 21 bits + case ((((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor + (C3 band 16#3F)) bsl 6) bor (C4 band 16#3F) of + C when (C < 16#10000) orelse (C > 16#10FFFF) -> + %% Overlong encoding or invalid code point. + invalid_utf8_indexes(Rest, 4 + N, [3 + N, 2 + N, 1 + N, N | Acc]); + _ -> + invalid_utf8_indexes(Rest, 4 + N, Acc) + end; +invalid_utf8_indexes(<<_, Rest/binary>>, N, Acc) -> + %% Invalid char + invalid_utf8_indexes(Rest, 1 + N, [N | Acc]); +invalid_utf8_indexes(<<>>, _N, Acc) -> + lists:reverse(Acc). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +binary_skip_bytes_test() -> + ?assertEqual(<<"foo">>, + binary_skip_bytes(<<"foo">>, [])), + ?assertEqual(<<"foobar">>, + binary_skip_bytes(<<"foo bar">>, [3])), + ?assertEqual(<<"foo">>, + binary_skip_bytes(<<"foo bar">>, [3, 4, 5, 6])), + ?assertEqual(<<"oo bar">>, + binary_skip_bytes(<<"foo bar">>, [0])), + ok. + +invalid_utf8_indexes_test() -> + ?assertEqual( + [], + invalid_utf8_indexes(<<"unicode snowman for you: ", 226, 152, 131>>)), + ?assertEqual( + [0], + invalid_utf8_indexes(<<128>>)), + ?assertEqual( + [57,59,60,64,66,67], + invalid_utf8_indexes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (", + 167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)), + ok. + +codepoint_to_bytes_test() -> + %% U+0000 - U+007F - 7 bits + %% U+0080 - U+07FF - 11 bits + %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points) + %% U+10000 - U+10FFFF - 21 bits + ?assertEqual( + <<"a">>, + codepoint_to_bytes($a)), + ?assertEqual( + <<16#c2, 16#80>>, + codepoint_to_bytes(16#80)), + ?assertEqual( + <<16#df, 16#bf>>, + codepoint_to_bytes(16#07ff)), + ?assertEqual( + <<16#ef, 16#bf, 16#bf>>, + codepoint_to_bytes(16#ffff)), + ?assertEqual( + <<16#f4, 16#8f, 16#bf, 16#bf>>, + codepoint_to_bytes(16#10ffff)), + ok. + +bytes_foldl_test() -> + ?assertEqual( + <<"abc">>, + bytes_foldl(fun (B, Acc) -> <> end, <<>>, <<"abc">>)), + ?assertEqual( + <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>, + bytes_foldl(fun (B, Acc) -> <> end, <<>>, + <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)), + ok. + +bytes_to_codepoints_test() -> + ?assertEqual( + "abc" ++ [16#2603, 16#4e2d, 16#85, 16#10ffff], + bytes_to_codepoints(<<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)), + ok. + +codepoint_foldl_test() -> + ?assertEqual( + "cba", + codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], <<"abc">>)), + ?assertEqual( + [16#10ffff, 16#85, 16#4e2d, 16#2603 | "cba"], + codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], + <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)), + ok. + +len_test() -> + ?assertEqual( + 29, + len(<<"unicode snowman for you: ", 226, 152, 131, 228, 184, 173, 194, 133, 244, 143, 191, 191>>)), + ok. + +codepoints_to_bytes_test() -> + ?assertEqual( + iolist_to_binary(lists:map(fun codepoint_to_bytes/1, lists:seq(1, 1000))), + codepoints_to_bytes(lists:seq(1, 1000))), + ok. + +valid_utf8_bytes_test() -> + ?assertEqual( + <<"invalid U+11ffff: ">>, + valid_utf8_bytes(<<"invalid U+11ffff: ", 244, 159, 191, 191>>)), + ?assertEqual( + <<"U+10ffff: ", 244, 143, 191, 191>>, + valid_utf8_bytes(<<"U+10ffff: ", 244, 143, 191, 191>>)), + ?assertEqual( + <<"overlong 2-byte encoding (a): ">>, + valid_utf8_bytes(<<"overlong 2-byte encoding (a): ", 2#11000001, 2#10100001>>)), + ?assertEqual( + <<"overlong 2-byte encoding (!): ">>, + valid_utf8_bytes(<<"overlong 2-byte encoding (!): ", 2#11000000, 2#10100001>>)), + ?assertEqual( + <<"mu: ", 194, 181>>, + valid_utf8_bytes(<<"mu: ", 194, 181>>)), + ?assertEqual( + <<"bad coding bytes: ">>, + valid_utf8_bytes(<<"bad coding bytes: ", 2#10011111, 2#10111111, 2#11111111>>)), + ?assertEqual( + <<"low surrogate (unpaired): ">>, + valid_utf8_bytes(<<"low surrogate (unpaired): ", 237, 176, 128>>)), + ?assertEqual( + <<"high surrogate (unpaired): ">>, + valid_utf8_bytes(<<"high surrogate (unpaired): ", 237, 191, 191>>)), + ?assertEqual( + <<"unicode snowman for you: ", 226, 152, 131>>, + valid_utf8_bytes(<<"unicode snowman for you: ", 226, 152, 131>>)), + ?assertEqual( + <<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (AISPW))">>, + valid_utf8_bytes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (", + 167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb.app.in b/1.1.x/src/mochiweb/mochiweb.app.in new file mode 100644 index 00000000..c6a2630b --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb.app.in @@ -0,0 +1,32 @@ +{application, mochiweb, + [{description, "MochiMedia Web Server"}, + {vsn, "7c2bc2"}, + {modules, [ + mochihex, + mochijson, + mochijson2, + mochinum, + mochiweb, + mochiweb_app, + mochiweb_charref, + mochiweb_cookies, + mochiweb_echo, + mochiweb_headers, + mochiweb_html, + mochiweb_http, + mochiweb_multipart, + mochiweb_request, + mochiweb_response, + mochiweb_skel, + mochiweb_socket_server, + mochiweb_sup, + mochiweb_util, + reloader, + mochifmt, + mochifmt_std, + mochifmt_records + ]}, + {registered, []}, + {mod, {mochiweb_app, []}}, + {env, []}, + {applications, [kernel, stdlib]}]}. diff --git a/1.1.x/src/mochiweb/mochiweb.app.src b/1.1.x/src/mochiweb/mochiweb.app.src new file mode 100644 index 00000000..a1c95aae --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb.app.src @@ -0,0 +1,9 @@ +%% This is generated from src/mochiweb.app.src +{application, mochiweb, + [{description, "MochiMedia Web Server"}, + {vsn, "7c2bc2"}, + {modules, []}, + {registered, []}, + {mod, {mochiweb_app, []}}, + {env, []}, + {applications, [kernel, stdlib, crypto, inets]}]}. diff --git a/1.1.x/src/mochiweb/mochiweb.erl b/1.1.x/src/mochiweb/mochiweb.erl new file mode 100644 index 00000000..3118028b --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb.erl @@ -0,0 +1,289 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Start and stop the MochiWeb server. + +-module(mochiweb). +-author('bob@mochimedia.com'). + +-export([start/0, stop/0]). +-export([new_request/1, new_response/1]). +-export([all_loaded/0, all_loaded/1, reload/0]). + +%% @spec start() -> ok +%% @doc Start the MochiWeb server. +start() -> + ensure_started(crypto), + application:start(mochiweb). + +%% @spec stop() -> ok +%% @doc Stop the MochiWeb server. +stop() -> + Res = application:stop(mochiweb), + application:stop(crypto), + Res. + +reload() -> + [c:l(Module) || Module <- all_loaded()]. + +all_loaded() -> + all_loaded(filename:dirname(code:which(?MODULE))). + +all_loaded(Base) when is_atom(Base) -> + []; +all_loaded(Base) -> + FullBase = Base ++ "/", + F = fun ({_Module, Loaded}, Acc) when is_atom(Loaded) -> + Acc; + ({Module, Loaded}, Acc) -> + case lists:prefix(FullBase, Loaded) of + true -> + [Module | Acc]; + false -> + Acc + end + end, + lists:foldl(F, [], code:all_loaded()). + + +%% @spec new_request({Socket, Request, Headers}) -> MochiWebRequest +%% @doc Return a mochiweb_request data structure. +new_request({Socket, {Method, {abs_path, Uri}, Version}, Headers}) -> + mochiweb_request:new(Socket, + Method, + Uri, + Version, + mochiweb_headers:make(Headers)); +% this case probably doesn't "exist". +new_request({Socket, {Method, {absoluteURI, _Protocol, _Host, _Port, Uri}, + Version}, Headers}) -> + mochiweb_request:new(Socket, + Method, + Uri, + Version, + mochiweb_headers:make(Headers)); +%% Request-URI is "*" +%% From http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 +new_request({Socket, {Method, '*'=Uri, Version}, Headers}) -> + mochiweb_request:new(Socket, + Method, + Uri, + Version, + mochiweb_headers:make(Headers)). + +%% @spec new_response({Request, integer(), Headers}) -> MochiWebResponse +%% @doc Return a mochiweb_response data structure. +new_response({Request, Code, Headers}) -> + mochiweb_response:new(Request, + Code, + mochiweb_headers:make(Headers)). + +%% Internal API + +ensure_started(App) -> + case application:start(App) of + ok -> + ok; + {error, {already_started, App}} -> + ok + end. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +-record(treq, {path, body= <<>>, xreply= <<>>}). + +ssl_cert_opts() -> + EbinDir = filename:dirname(code:which(?MODULE)), + CertDir = filename:join([EbinDir, "..", "support", "test-materials"]), + CertFile = filename:join(CertDir, "test_ssl_cert.pem"), + KeyFile = filename:join(CertDir, "test_ssl_key.pem"), + [{certfile, CertFile}, {keyfile, KeyFile}]. + +with_server(Transport, ServerFun, ClientFun) -> + ServerOpts0 = [{ip, "127.0.0.1"}, {port, 0}, {loop, ServerFun}], + ServerOpts = case Transport of + plain -> + ServerOpts0; + ssl -> + ServerOpts0 ++ [{ssl, true}, {ssl_opts, ssl_cert_opts()}] + end, + {ok, Server} = mochiweb_http:start(ServerOpts), + Port = mochiweb_socket_server:get(Server, port), + Res = (catch ClientFun(Transport, Port)), + mochiweb_http:stop(Server), + Res. + +request_test() -> + R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []), + "/foo/bar/baz wibble quux" = R:get(path), + ok. + +single_http_GET_test() -> + do_GET(plain, 1). + +single_https_GET_test() -> + do_GET(ssl, 1). + +multiple_http_GET_test() -> + do_GET(plain, 3). + +multiple_https_GET_test() -> + do_GET(ssl, 3). + +hundred_http_GET_test() -> + do_GET(plain, 100). + +hundred_https_GET_test() -> + do_GET(ssl, 100). + +single_128_http_POST_test() -> + do_POST(plain, 128, 1). + +single_128_https_POST_test() -> + do_POST(ssl, 128, 1). + +single_2k_http_POST_test() -> + do_POST(plain, 2048, 1). + +single_2k_https_POST_test() -> + do_POST(ssl, 2048, 1). + +single_100k_http_POST_test() -> + do_POST(plain, 102400, 1). + +single_100k_https_POST_test() -> + do_POST(ssl, 102400, 1). + +multiple_100k_http_POST_test() -> + do_POST(plain, 102400, 3). + +multiple_100K_https_POST_test() -> + do_POST(ssl, 102400, 3). + +hundred_128_http_POST_test() -> + do_POST(plain, 128, 100). + +hundred_128_https_POST_test() -> + do_POST(ssl, 128, 100). + +do_GET(Transport, Times) -> + PathPrefix = "/whatever/", + ReplyPrefix = "You requested: ", + ServerFun = fun (Req) -> + Reply = ReplyPrefix ++ Req:get(path), + Req:ok({"text/plain", Reply}) + end, + TestReqs = [begin + Path = PathPrefix ++ integer_to_list(N), + ExpectedReply = list_to_binary(ReplyPrefix ++ Path), + #treq{path=Path, xreply=ExpectedReply} + end || N <- lists:seq(1, Times)], + ClientFun = new_client_fun('GET', TestReqs), + ok = with_server(Transport, ServerFun, ClientFun), + ok. + +do_POST(Transport, Size, Times) -> + ServerFun = fun (Req) -> + Body = Req:recv_body(), + Headers = [{"Content-Type", "application/octet-stream"}], + Req:respond({201, Headers, Body}) + end, + TestReqs = [begin + Path = "/stuff/" ++ integer_to_list(N), + Body = crypto:rand_bytes(Size), + #treq{path=Path, body=Body, xreply=Body} + end || N <- lists:seq(1, Times)], + ClientFun = new_client_fun('POST', TestReqs), + ok = with_server(Transport, ServerFun, ClientFun), + ok. + +new_client_fun(Method, TestReqs) -> + fun (Transport, Port) -> + client_request(Transport, Port, Method, TestReqs) + end. + +client_request(Transport, Port, Method, TestReqs) -> + Opts = [binary, {active, false}, {packet, http}], + SockFun = case Transport of + plain -> + {ok, Socket} = gen_tcp:connect("127.0.0.1", Port, Opts), + fun (recv) -> + gen_tcp:recv(Socket, 0); + ({recv, Length}) -> + gen_tcp:recv(Socket, Length); + ({send, Data}) -> + gen_tcp:send(Socket, Data); + ({setopts, L}) -> + inet:setopts(Socket, L) + end; + ssl -> + {ok, Socket} = ssl:connect("127.0.0.1", Port, [{ssl_imp, new} | Opts]), + fun (recv) -> + ssl:recv(Socket, 0); + ({recv, Length}) -> + ssl:recv(Socket, Length); + ({send, Data}) -> + ssl:send(Socket, Data); + ({setopts, L}) -> + ssl:setopts(Socket, L) + end + end, + client_request(SockFun, Method, TestReqs). + +client_request(SockFun, _Method, []) -> + {the_end, {error, closed}} = {the_end, SockFun(recv)}, + ok; +client_request(SockFun, Method, + [#treq{path=Path, body=Body, xreply=ExReply} | Rest]) -> + Request = [atom_to_list(Method), " ", Path, " HTTP/1.1\r\n", + client_headers(Body, Rest =:= []), + "\r\n", + Body], + ok = SockFun({send, Request}), + case Method of + 'GET' -> + {ok, {http_response, {1,1}, 200, "OK"}} = SockFun(recv); + 'POST' -> + {ok, {http_response, {1,1}, 201, "Created"}} = SockFun(recv) + end, + ok = SockFun({setopts, [{packet, httph}]}), + {ok, {http_header, _, 'Server', _, "MochiWeb" ++ _}} = SockFun(recv), + {ok, {http_header, _, 'Date', _, _}} = SockFun(recv), + {ok, {http_header, _, 'Content-Type', _, _}} = SockFun(recv), + {ok, {http_header, _, 'Content-Length', _, ConLenStr}} = SockFun(recv), + ContentLength = list_to_integer(ConLenStr), + {ok, http_eoh} = SockFun(recv), + ok = SockFun({setopts, [{packet, raw}]}), + {payload, ExReply} = {payload, drain_reply(SockFun, ContentLength, <<>>)}, + ok = SockFun({setopts, [{packet, http}]}), + client_request(SockFun, Method, Rest). + +client_headers(Body, IsLastRequest) -> + ["Host: localhost\r\n", + case Body of + <<>> -> + ""; + _ -> + ["Content-Type: application/octet-stream\r\n", + "Content-Length: ", integer_to_list(byte_size(Body)), "\r\n"] + end, + case IsLastRequest of + true -> + "Connection: close\r\n"; + false -> + "" + end]. + +drain_reply(_SockFun, 0, Acc) -> + Acc; +drain_reply(SockFun, Length, Acc) -> + Sz = erlang:min(Length, 1024), + {ok, B} = SockFun({recv, Sz}), + drain_reply(SockFun, Length - Sz, <>). + +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_acceptor.erl b/1.1.x/src/mochiweb/mochiweb_acceptor.erl new file mode 100644 index 00000000..79d172c3 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_acceptor.erl @@ -0,0 +1,48 @@ +%% @author Bob Ippolito +%% @copyright 2010 Mochi Media, Inc. + +%% @doc MochiWeb acceptor. + +-module(mochiweb_acceptor). +-author('bob@mochimedia.com'). + +-include("internal.hrl"). + +-export([start_link/3, init/3]). + +start_link(Server, Listen, Loop) -> + proc_lib:spawn_link(?MODULE, init, [Server, Listen, Loop]). + +init(Server, Listen, Loop) -> + T1 = now(), + case catch mochiweb_socket:accept(Listen) of + {ok, Socket} -> + gen_server:cast(Server, {accepted, self(), timer:now_diff(now(), T1)}), + call_loop(Loop, Socket); + {error, closed} -> + exit(normal); + {error, timeout} -> + exit(normal); + {error, esslaccept} -> + exit(normal); + Other -> + error_logger:error_report( + [{application, mochiweb}, + "Accept failed error", + lists:flatten(io_lib:format("~p", [Other]))]), + exit({error, accept_failed}) + end. + +call_loop({M, F}, Socket) -> + M:F(Socket); +call_loop({M, F, A}, Socket) -> + erlang:apply(M, F, [Socket | A]); +call_loop(Loop, Socket) -> + Loop(Socket). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_app.erl b/1.1.x/src/mochiweb/mochiweb_app.erl new file mode 100644 index 00000000..5d67787b --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_app.erl @@ -0,0 +1,27 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Callbacks for the mochiweb application. + +-module(mochiweb_app). +-author('bob@mochimedia.com'). + +-behaviour(application). +-export([start/2,stop/1]). + +%% @spec start(_Type, _StartArgs) -> ServerRet +%% @doc application start callback for mochiweb. +start(_Type, _StartArgs) -> + mochiweb_sup:start_link(). + +%% @spec stop(_State) -> ServerRet +%% @doc application stop callback for mochiweb. +stop(_State) -> + ok. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_charref.erl b/1.1.x/src/mochiweb/mochiweb_charref.erl new file mode 100644 index 00000000..99cd5502 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_charref.erl @@ -0,0 +1,308 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Converts HTML 4 charrefs and entities to codepoints. +-module(mochiweb_charref). +-export([charref/1]). + +%% External API. + +%% @spec charref(S) -> integer() | undefined +%% @doc Convert a decimal charref, hex charref, or html entity to a unicode +%% codepoint, or return undefined on failure. +%% The input should not include an ampersand or semicolon. +%% charref("#38") = 38, charref("#x26") = 38, charref("amp") = 38. +charref(B) when is_binary(B) -> + charref(binary_to_list(B)); +charref([$#, C | L]) when C =:= $x orelse C =:= $X -> + try erlang:list_to_integer(L, 16) + catch + error:badarg -> undefined + end; +charref([$# | L]) -> + try list_to_integer(L) + catch + error:badarg -> undefined + end; +charref(L) -> + entity(L). + +%% Internal API. + +entity("nbsp") -> 160; +entity("iexcl") -> 161; +entity("cent") -> 162; +entity("pound") -> 163; +entity("curren") -> 164; +entity("yen") -> 165; +entity("brvbar") -> 166; +entity("sect") -> 167; +entity("uml") -> 168; +entity("copy") -> 169; +entity("ordf") -> 170; +entity("laquo") -> 171; +entity("not") -> 172; +entity("shy") -> 173; +entity("reg") -> 174; +entity("macr") -> 175; +entity("deg") -> 176; +entity("plusmn") -> 177; +entity("sup2") -> 178; +entity("sup3") -> 179; +entity("acute") -> 180; +entity("micro") -> 181; +entity("para") -> 182; +entity("middot") -> 183; +entity("cedil") -> 184; +entity("sup1") -> 185; +entity("ordm") -> 186; +entity("raquo") -> 187; +entity("frac14") -> 188; +entity("frac12") -> 189; +entity("frac34") -> 190; +entity("iquest") -> 191; +entity("Agrave") -> 192; +entity("Aacute") -> 193; +entity("Acirc") -> 194; +entity("Atilde") -> 195; +entity("Auml") -> 196; +entity("Aring") -> 197; +entity("AElig") -> 198; +entity("Ccedil") -> 199; +entity("Egrave") -> 200; +entity("Eacute") -> 201; +entity("Ecirc") -> 202; +entity("Euml") -> 203; +entity("Igrave") -> 204; +entity("Iacute") -> 205; +entity("Icirc") -> 206; +entity("Iuml") -> 207; +entity("ETH") -> 208; +entity("Ntilde") -> 209; +entity("Ograve") -> 210; +entity("Oacute") -> 211; +entity("Ocirc") -> 212; +entity("Otilde") -> 213; +entity("Ouml") -> 214; +entity("times") -> 215; +entity("Oslash") -> 216; +entity("Ugrave") -> 217; +entity("Uacute") -> 218; +entity("Ucirc") -> 219; +entity("Uuml") -> 220; +entity("Yacute") -> 221; +entity("THORN") -> 222; +entity("szlig") -> 223; +entity("agrave") -> 224; +entity("aacute") -> 225; +entity("acirc") -> 226; +entity("atilde") -> 227; +entity("auml") -> 228; +entity("aring") -> 229; +entity("aelig") -> 230; +entity("ccedil") -> 231; +entity("egrave") -> 232; +entity("eacute") -> 233; +entity("ecirc") -> 234; +entity("euml") -> 235; +entity("igrave") -> 236; +entity("iacute") -> 237; +entity("icirc") -> 238; +entity("iuml") -> 239; +entity("eth") -> 240; +entity("ntilde") -> 241; +entity("ograve") -> 242; +entity("oacute") -> 243; +entity("ocirc") -> 244; +entity("otilde") -> 245; +entity("ouml") -> 246; +entity("divide") -> 247; +entity("oslash") -> 248; +entity("ugrave") -> 249; +entity("uacute") -> 250; +entity("ucirc") -> 251; +entity("uuml") -> 252; +entity("yacute") -> 253; +entity("thorn") -> 254; +entity("yuml") -> 255; +entity("fnof") -> 402; +entity("Alpha") -> 913; +entity("Beta") -> 914; +entity("Gamma") -> 915; +entity("Delta") -> 916; +entity("Epsilon") -> 917; +entity("Zeta") -> 918; +entity("Eta") -> 919; +entity("Theta") -> 920; +entity("Iota") -> 921; +entity("Kappa") -> 922; +entity("Lambda") -> 923; +entity("Mu") -> 924; +entity("Nu") -> 925; +entity("Xi") -> 926; +entity("Omicron") -> 927; +entity("Pi") -> 928; +entity("Rho") -> 929; +entity("Sigma") -> 931; +entity("Tau") -> 932; +entity("Upsilon") -> 933; +entity("Phi") -> 934; +entity("Chi") -> 935; +entity("Psi") -> 936; +entity("Omega") -> 937; +entity("alpha") -> 945; +entity("beta") -> 946; +entity("gamma") -> 947; +entity("delta") -> 948; +entity("epsilon") -> 949; +entity("zeta") -> 950; +entity("eta") -> 951; +entity("theta") -> 952; +entity("iota") -> 953; +entity("kappa") -> 954; +entity("lambda") -> 955; +entity("mu") -> 956; +entity("nu") -> 957; +entity("xi") -> 958; +entity("omicron") -> 959; +entity("pi") -> 960; +entity("rho") -> 961; +entity("sigmaf") -> 962; +entity("sigma") -> 963; +entity("tau") -> 964; +entity("upsilon") -> 965; +entity("phi") -> 966; +entity("chi") -> 967; +entity("psi") -> 968; +entity("omega") -> 969; +entity("thetasym") -> 977; +entity("upsih") -> 978; +entity("piv") -> 982; +entity("bull") -> 8226; +entity("hellip") -> 8230; +entity("prime") -> 8242; +entity("Prime") -> 8243; +entity("oline") -> 8254; +entity("frasl") -> 8260; +entity("weierp") -> 8472; +entity("image") -> 8465; +entity("real") -> 8476; +entity("trade") -> 8482; +entity("alefsym") -> 8501; +entity("larr") -> 8592; +entity("uarr") -> 8593; +entity("rarr") -> 8594; +entity("darr") -> 8595; +entity("harr") -> 8596; +entity("crarr") -> 8629; +entity("lArr") -> 8656; +entity("uArr") -> 8657; +entity("rArr") -> 8658; +entity("dArr") -> 8659; +entity("hArr") -> 8660; +entity("forall") -> 8704; +entity("part") -> 8706; +entity("exist") -> 8707; +entity("empty") -> 8709; +entity("nabla") -> 8711; +entity("isin") -> 8712; +entity("notin") -> 8713; +entity("ni") -> 8715; +entity("prod") -> 8719; +entity("sum") -> 8721; +entity("minus") -> 8722; +entity("lowast") -> 8727; +entity("radic") -> 8730; +entity("prop") -> 8733; +entity("infin") -> 8734; +entity("ang") -> 8736; +entity("and") -> 8743; +entity("or") -> 8744; +entity("cap") -> 8745; +entity("cup") -> 8746; +entity("int") -> 8747; +entity("there4") -> 8756; +entity("sim") -> 8764; +entity("cong") -> 8773; +entity("asymp") -> 8776; +entity("ne") -> 8800; +entity("equiv") -> 8801; +entity("le") -> 8804; +entity("ge") -> 8805; +entity("sub") -> 8834; +entity("sup") -> 8835; +entity("nsub") -> 8836; +entity("sube") -> 8838; +entity("supe") -> 8839; +entity("oplus") -> 8853; +entity("otimes") -> 8855; +entity("perp") -> 8869; +entity("sdot") -> 8901; +entity("lceil") -> 8968; +entity("rceil") -> 8969; +entity("lfloor") -> 8970; +entity("rfloor") -> 8971; +entity("lang") -> 9001; +entity("rang") -> 9002; +entity("loz") -> 9674; +entity("spades") -> 9824; +entity("clubs") -> 9827; +entity("hearts") -> 9829; +entity("diams") -> 9830; +entity("quot") -> 34; +entity("amp") -> 38; +entity("lt") -> 60; +entity("gt") -> 62; +entity("OElig") -> 338; +entity("oelig") -> 339; +entity("Scaron") -> 352; +entity("scaron") -> 353; +entity("Yuml") -> 376; +entity("circ") -> 710; +entity("tilde") -> 732; +entity("ensp") -> 8194; +entity("emsp") -> 8195; +entity("thinsp") -> 8201; +entity("zwnj") -> 8204; +entity("zwj") -> 8205; +entity("lrm") -> 8206; +entity("rlm") -> 8207; +entity("ndash") -> 8211; +entity("mdash") -> 8212; +entity("lsquo") -> 8216; +entity("rsquo") -> 8217; +entity("sbquo") -> 8218; +entity("ldquo") -> 8220; +entity("rdquo") -> 8221; +entity("bdquo") -> 8222; +entity("dagger") -> 8224; +entity("Dagger") -> 8225; +entity("permil") -> 8240; +entity("lsaquo") -> 8249; +entity("rsaquo") -> 8250; +entity("euro") -> 8364; +entity(_) -> undefined. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +exhaustive_entity_test() -> + T = mochiweb_cover:clause_lookup_table(?MODULE, entity), + [?assertEqual(V, entity(K)) || {K, V} <- T]. + +charref_test() -> + 1234 = charref("#1234"), + 255 = charref("#xfF"), + 255 = charref(<<"#XFf">>), + 38 = charref("amp"), + 38 = charref(<<"amp">>), + undefined = charref("not_an_entity"), + undefined = charref("#not_an_entity"), + undefined = charref("#xnot_an_entity"), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_cookies.erl b/1.1.x/src/mochiweb/mochiweb_cookies.erl new file mode 100644 index 00000000..c090b714 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_cookies.erl @@ -0,0 +1,309 @@ +%% @author Emad El-Haraty +%% @copyright 2007 Mochi Media, Inc. + +%% @doc HTTP Cookie parsing and generating (RFC 2109, RFC 2965). + +-module(mochiweb_cookies). +-export([parse_cookie/1, cookie/3, cookie/2]). + +-define(QUOTE, $\"). + +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). + +%% RFC 2616 separators (called tspecials in RFC 2068) +-define(IS_SEPARATOR(C), + (C < 32 orelse + C =:= $\s orelse C =:= $\t orelse + C =:= $( orelse C =:= $) orelse C =:= $< orelse C =:= $> orelse + C =:= $@ orelse C =:= $, orelse C =:= $; orelse C =:= $: orelse + C =:= $\\ orelse C =:= $\" orelse C =:= $/ orelse + C =:= $[ orelse C =:= $] orelse C =:= $? orelse C =:= $= orelse + C =:= ${ orelse C =:= $})). + +%% @type proplist() = [{Key::string(), Value::string()}]. +%% @type header() = {Name::string(), Value::string()}. + +%% @spec cookie(Key::string(), Value::string()) -> header() +%% @doc Short-hand for cookie(Key, Value, []). +cookie(Key, Value) -> + cookie(Key, Value, []). + +%% @spec cookie(Key::string(), Value::string(), Options::[Option]) -> header() +%% where Option = {max_age, integer()} | {local_time, {date(), time()}} +%% | {domain, string()} | {path, string()} +%% | {secure, true | false} | {http_only, true | false} +%% +%% @doc Generate a Set-Cookie header field tuple. +cookie(Key, Value, Options) -> + Cookie = [any_to_list(Key), "=", quote(Value), "; Version=1"], + %% Set-Cookie: + %% Comment, Domain, Max-Age, Path, Secure, Version + %% Set-Cookie2: + %% Comment, CommentURL, Discard, Domain, Max-Age, Path, Port, Secure, + %% Version + ExpiresPart = + case proplists:get_value(max_age, Options) of + undefined -> + ""; + RawAge -> + When = case proplists:get_value(local_time, Options) of + undefined -> + calendar:local_time(); + LocalTime -> + LocalTime + end, + Age = case RawAge < 0 of + true -> + 0; + false -> + RawAge + end, + ["; Expires=", age_to_cookie_date(Age, When), + "; Max-Age=", quote(Age)] + end, + SecurePart = + case proplists:get_value(secure, Options) of + true -> + "; Secure"; + _ -> + "" + end, + DomainPart = + case proplists:get_value(domain, Options) of + undefined -> + ""; + Domain -> + ["; Domain=", quote(Domain)] + end, + PathPart = + case proplists:get_value(path, Options) of + undefined -> + ""; + Path -> + ["; Path=", quote(Path)] + end, + HttpOnlyPart = + case proplists:get_value(http_only, Options) of + true -> + "; HttpOnly"; + _ -> + "" + end, + CookieParts = [Cookie, ExpiresPart, SecurePart, DomainPart, PathPart, HttpOnlyPart], + {"Set-Cookie", lists:flatten(CookieParts)}. + + +%% Every major browser incorrectly handles quoted strings in a +%% different and (worse) incompatible manner. Instead of wasting time +%% writing redundant code for each browser, we restrict cookies to +%% only contain characters that browsers handle compatibly. +%% +%% By replacing the definition of quote with this, we generate +%% RFC-compliant cookies: +%% +%% quote(V) -> +%% Fun = fun(?QUOTE, Acc) -> [$\\, ?QUOTE | Acc]; +%% (Ch, Acc) -> [Ch | Acc] +%% end, +%% [?QUOTE | lists:foldr(Fun, [?QUOTE], V)]. + +%% Convert to a string and raise an error if quoting is required. +quote(V0) -> + V = any_to_list(V0), + lists:all(fun(Ch) -> Ch =:= $/ orelse not ?IS_SEPARATOR(Ch) end, V) + orelse erlang:error({cookie_quoting_required, V}), + V. + +add_seconds(Secs, LocalTime) -> + Greg = calendar:datetime_to_gregorian_seconds(LocalTime), + calendar:gregorian_seconds_to_datetime(Greg + Secs). + +age_to_cookie_date(Age, LocalTime) -> + httpd_util:rfc1123_date(add_seconds(Age, LocalTime)). + +%% @spec parse_cookie(string()) -> [{K::string(), V::string()}] +%% @doc Parse the contents of a Cookie header field, ignoring cookie +%% attributes, and return a simple property list. +parse_cookie("") -> + []; +parse_cookie(Cookie) -> + parse_cookie(Cookie, []). + +%% Internal API + +parse_cookie([], Acc) -> + lists:reverse(Acc); +parse_cookie(String, Acc) -> + {{Token, Value}, Rest} = read_pair(String), + Acc1 = case Token of + "" -> + Acc; + "$" ++ _ -> + Acc; + _ -> + [{Token, Value} | Acc] + end, + parse_cookie(Rest, Acc1). + +read_pair(String) -> + {Token, Rest} = read_token(skip_whitespace(String)), + {Value, Rest1} = read_value(skip_whitespace(Rest)), + {{Token, Value}, skip_past_separator(Rest1)}. + +read_value([$= | Value]) -> + Value1 = skip_whitespace(Value), + case Value1 of + [?QUOTE | _] -> + read_quoted(Value1); + _ -> + read_token(Value1) + end; +read_value(String) -> + {"", String}. + +read_quoted([?QUOTE | String]) -> + read_quoted(String, []). + +read_quoted([], Acc) -> + {lists:reverse(Acc), []}; +read_quoted([?QUOTE | Rest], Acc) -> + {lists:reverse(Acc), Rest}; +read_quoted([$\\, Any | Rest], Acc) -> + read_quoted(Rest, [Any | Acc]); +read_quoted([C | Rest], Acc) -> + read_quoted(Rest, [C | Acc]). + +skip_whitespace(String) -> + F = fun (C) -> ?IS_WHITESPACE(C) end, + lists:dropwhile(F, String). + +read_token(String) -> + F = fun (C) -> not ?IS_SEPARATOR(C) end, + lists:splitwith(F, String). + +skip_past_separator([]) -> + []; +skip_past_separator([$; | Rest]) -> + Rest; +skip_past_separator([$, | Rest]) -> + Rest; +skip_past_separator([_ | Rest]) -> + skip_past_separator(Rest). + +any_to_list(V) when is_list(V) -> + V; +any_to_list(V) when is_atom(V) -> + atom_to_list(V); +any_to_list(V) when is_binary(V) -> + binary_to_list(V); +any_to_list(V) when is_integer(V) -> + integer_to_list(V). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +quote_test() -> + %% ?assertError eunit macro is not compatible with coverage module + try quote(":wq") + catch error:{cookie_quoting_required, ":wq"} -> ok + end, + ?assertEqual( + "foo", + quote(foo)), + ok. + +parse_cookie_test() -> + %% RFC example + C1 = "$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\"; + Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\"; + Shipping=\"FedEx\"; $Path=\"/acme\"", + ?assertEqual( + [{"Customer","WILE_E_COYOTE"}, + {"Part_Number","Rocket_Launcher_0001"}, + {"Shipping","FedEx"}], + parse_cookie(C1)), + %% Potential edge cases + ?assertEqual( + [{"foo", "x"}], + parse_cookie("foo=\"\\x\"")), + ?assertEqual( + [], + parse_cookie("=")), + ?assertEqual( + [{"foo", ""}, {"bar", ""}], + parse_cookie(" foo ; bar ")), + ?assertEqual( + [{"foo", ""}, {"bar", ""}], + parse_cookie("foo=;bar=")), + ?assertEqual( + [{"foo", "\";"}, {"bar", ""}], + parse_cookie("foo = \"\\\";\";bar ")), + ?assertEqual( + [{"foo", "\";bar"}], + parse_cookie("foo=\"\\\";bar")), + ?assertEqual( + [], + parse_cookie([])), + ?assertEqual( + [{"foo", "bar"}, {"baz", "wibble"}], + parse_cookie("foo=bar , baz=wibble ")), + ok. + +domain_test() -> + ?assertEqual( + {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Domain=acme.com; " + "HttpOnly"}, + cookie("Customer", "WILE_E_COYOTE", + [{http_only, true}, {domain, "acme.com"}])), + ok. + +local_time_test() -> + {"Set-Cookie", S} = cookie("Customer", "WILE_E_COYOTE", + [{max_age, 111}, {secure, true}]), + ?assertMatch( + ["Customer=WILE_E_COYOTE", + " Version=1", + " Expires=" ++ _, + " Max-Age=111", + " Secure"], + string:tokens(S, ";")), + ok. + +cookie_test() -> + C1 = {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Path=/acme"}, + C1 = cookie("Customer", "WILE_E_COYOTE", [{path, "/acme"}]), + C1 = cookie("Customer", "WILE_E_COYOTE", + [{path, "/acme"}, {badoption, "negatory"}]), + C1 = cookie('Customer', 'WILE_E_COYOTE', [{path, '/acme'}]), + C1 = cookie(<<"Customer">>, <<"WILE_E_COYOTE">>, [{path, <<"/acme">>}]), + + {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey", []), + {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey"), + LocalTime = calendar:universal_time_to_local_time({{2007, 5, 15}, {13, 45, 33}}), + C2 = {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Expires=Tue, 15 May 2007 13:45:33 GMT; " + "Max-Age=0"}, + C2 = cookie("Customer", "WILE_E_COYOTE", + [{max_age, -111}, {local_time, LocalTime}]), + C3 = {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Expires=Wed, 16 May 2007 13:45:50 GMT; " + "Max-Age=86417"}, + C3 = cookie("Customer", "WILE_E_COYOTE", + [{max_age, 86417}, {local_time, LocalTime}]), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_cover.erl b/1.1.x/src/mochiweb/mochiweb_cover.erl new file mode 100644 index 00000000..6a14ef51 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_cover.erl @@ -0,0 +1,75 @@ +%% @author Bob Ippolito +%% @copyright 2010 Mochi Media, Inc. + +%% @doc Workarounds for various cover deficiencies. +-module(mochiweb_cover). +-export([get_beam/1, get_abstract_code/1, + get_clauses/2, clause_lookup_table/1]). +-export([clause_lookup_table/2]). + +%% Internal + +get_beam(Module) -> + {Module, Beam, _Path} = code:get_object_code(Module), + Beam. + +get_abstract_code(Beam) -> + {ok, {_Module, + [{abstract_code, + {raw_abstract_v1, L}}]}} = beam_lib:chunks(Beam, [abstract_code]), + L. + +get_clauses(Function, Code) -> + [L] = [Clauses || {function, _, FName, _, Clauses} + <- Code, FName =:= Function], + L. + +clause_lookup_table(Module, Function) -> + clause_lookup_table( + get_clauses(Function, + get_abstract_code(get_beam(Module)))). + +clause_lookup_table(Clauses) -> + lists:foldr(fun clause_fold/2, [], Clauses). + +clause_fold({clause, _, + [InTerm], + _Guards=[], + [OutTerm]}, + Acc) -> + try [{erl_parse:normalise(InTerm), erl_parse:normalise(OutTerm)} | Acc] + catch error:_ -> Acc + end; +clause_fold(_, Acc) -> + Acc. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +foo_table(a) -> b; +foo_table("a") -> <<"b">>; +foo_table(123) -> {4, 3, 2}; +foo_table([list]) -> []; +foo_table([list1, list2]) -> [list1, list2, list3]; +foo_table(ignored) -> some, code, ignored; +foo_table(Var) -> Var. + +foo_table_test() -> + T = clause_lookup_table(?MODULE, foo_table), + [?assertEqual(V, foo_table(K)) || {K, V} <- T]. + +clause_lookup_table_test() -> + ?assertEqual(b, foo_table(a)), + ?assertEqual(ignored, foo_table(ignored)), + ?assertEqual('Var', foo_table('Var')), + ?assertEqual( + [{a, b}, + {"a", <<"b">>}, + {123, {4, 3, 2}}, + {[list], []}, + {[list1, list2], [list1, list2, list3]}], + clause_lookup_table(?MODULE, foo_table)). + +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_echo.erl b/1.1.x/src/mochiweb/mochiweb_echo.erl new file mode 100644 index 00000000..6f7872b9 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_echo.erl @@ -0,0 +1,38 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Simple and stupid echo server to demo mochiweb_socket_server. + +-module(mochiweb_echo). +-author('bob@mochimedia.com'). +-export([start/0, stop/0, loop/1]). + +stop() -> + mochiweb_socket_server:stop(?MODULE). + +start() -> + mochiweb_socket_server:start([{name, ?MODULE}, + {port, 6789}, + {ip, "127.0.0.1"}, + {max, 1}, + {loop, {?MODULE, loop}}]). + +loop(Socket) -> + case mochiweb_socket:recv(Socket, 0, 30000) of + {ok, Data} -> + case mochiweb_socket:send(Socket, Data) of + ok -> + loop(Socket); + _ -> + exit(normal) + end; + _Other -> + exit(normal) + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_headers.erl b/1.1.x/src/mochiweb/mochiweb_headers.erl new file mode 100644 index 00000000..4fce9838 --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_headers.erl @@ -0,0 +1,299 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Case preserving (but case insensitive) HTTP Header dictionary. + +-module(mochiweb_headers). +-author('bob@mochimedia.com'). +-export([empty/0, from_list/1, insert/3, enter/3, get_value/2, lookup/2]). +-export([delete_any/2, get_primary_value/2]). +-export([default/3, enter_from_list/2, default_from_list/2]). +-export([to_list/1, make/1]). +-export([from_binary/1]). + +%% @type headers(). +%% @type key() = atom() | binary() | string(). +%% @type value() = atom() | binary() | string() | integer(). + +%% @spec empty() -> headers() +%% @doc Create an empty headers structure. +empty() -> + gb_trees:empty(). + +%% @spec make(headers() | [{key(), value()}]) -> headers() +%% @doc Construct a headers() from the given list. +make(L) when is_list(L) -> + from_list(L); +%% assume a tuple is already mochiweb_headers. +make(T) when is_tuple(T) -> + T. + +%% @spec from_binary(iolist()) -> headers() +%% @doc Transforms a raw HTTP header into a mochiweb headers structure. +%% +%% The given raw HTTP header can be one of the following: +%% +%% 1) A string or a binary representing a full HTTP header ending with +%% double CRLF. +%% Examples: +%% ``` +%% "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n" +%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>''' +%% +%% 2) A list of binaries or strings where each element represents a raw +%% HTTP header line ending with a single CRLF. +%% Examples: +%% ``` +%% [<<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>] +%% ["Content-Length: 47\r\n", "Content-Type: text/plain\r\n"] +%% ["Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">>]''' +%% +from_binary(RawHttpHeader) when is_binary(RawHttpHeader) -> + from_binary(RawHttpHeader, []); +from_binary(RawHttpHeaderList) -> + from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])). + +from_binary(RawHttpHeader, Acc) -> + case erlang:decode_packet(httph, RawHttpHeader, []) of + {ok, {http_header, _, H, _, V}, Rest} -> + from_binary(Rest, [{H, V} | Acc]); + _ -> + make(Acc) + end. + +%% @spec from_list([{key(), value()}]) -> headers() +%% @doc Construct a headers() from the given list. +from_list(List) -> + lists:foldl(fun ({K, V}, T) -> insert(K, V, T) end, empty(), List). + +%% @spec enter_from_list([{key(), value()}], headers()) -> headers() +%% @doc Insert pairs into the headers, replace any values for existing keys. +enter_from_list(List, T) -> + lists:foldl(fun ({K, V}, T1) -> enter(K, V, T1) end, T, List). + +%% @spec default_from_list([{key(), value()}], headers()) -> headers() +%% @doc Insert pairs into the headers for keys that do not already exist. +default_from_list(List, T) -> + lists:foldl(fun ({K, V}, T1) -> default(K, V, T1) end, T, List). + +%% @spec to_list(headers()) -> [{key(), string()}] +%% @doc Return the contents of the headers. The keys will be the exact key +%% that was first inserted (e.g. may be an atom or binary, case is +%% preserved). +to_list(T) -> + F = fun ({K, {array, L}}, Acc) -> + L1 = lists:reverse(L), + lists:foldl(fun (V, Acc1) -> [{K, V} | Acc1] end, Acc, L1); + (Pair, Acc) -> + [Pair | Acc] + end, + lists:reverse(lists:foldl(F, [], gb_trees:values(T))). + +%% @spec get_value(key(), headers()) -> string() | undefined +%% @doc Return the value of the given header using a case insensitive search. +%% undefined will be returned for keys that are not present. +get_value(K, T) -> + case lookup(K, T) of + {value, {_, V}} -> + expand(V); + none -> + undefined + end. + +%% @spec get_primary_value(key(), headers()) -> string() | undefined +%% @doc Return the value of the given header up to the first semicolon using +%% a case insensitive search. undefined will be returned for keys +%% that are not present. +get_primary_value(K, T) -> + case get_value(K, T) of + undefined -> + undefined; + V -> + lists:takewhile(fun (C) -> C =/= $; end, V) + end. + +%% @spec lookup(key(), headers()) -> {value, {key(), string()}} | none +%% @doc Return the case preserved key and value for the given header using +%% a case insensitive search. none will be returned for keys that are +%% not present. +lookup(K, T) -> + case gb_trees:lookup(normalize(K), T) of + {value, {K0, V}} -> + {value, {K0, expand(V)}}; + none -> + none + end. + +%% @spec default(key(), value(), headers()) -> headers() +%% @doc Insert the pair into the headers if it does not already exist. +default(K, V, T) -> + K1 = normalize(K), + V1 = any_to_list(V), + try gb_trees:insert(K1, {K, V1}, T) + catch + error:{key_exists, _} -> + T + end. + +%% @spec enter(key(), value(), headers()) -> headers() +%% @doc Insert the pair into the headers, replacing any pre-existing key. +enter(K, V, T) -> + K1 = normalize(K), + V1 = any_to_list(V), + gb_trees:enter(K1, {K, V1}, T). + +%% @spec insert(key(), value(), headers()) -> headers() +%% @doc Insert the pair into the headers, merging with any pre-existing key. +%% A merge is done with Value = V0 ++ ", " ++ V1. +insert(K, V, T) -> + K1 = normalize(K), + V1 = any_to_list(V), + try gb_trees:insert(K1, {K, V1}, T) + catch + error:{key_exists, _} -> + {K0, V0} = gb_trees:get(K1, T), + V2 = merge(K1, V1, V0), + gb_trees:update(K1, {K0, V2}, T) + end. + +%% @spec delete_any(key(), headers()) -> headers() +%% @doc Delete the header corresponding to key if it is present. +delete_any(K, T) -> + K1 = normalize(K), + gb_trees:delete_any(K1, T). + +%% Internal API + +expand({array, L}) -> + mochiweb_util:join(lists:reverse(L), ", "); +expand(V) -> + V. + +merge("set-cookie", V1, {array, L}) -> + {array, [V1 | L]}; +merge("set-cookie", V1, V0) -> + {array, [V1, V0]}; +merge(_, V1, V0) -> + V0 ++ ", " ++ V1. + +normalize(K) when is_list(K) -> + string:to_lower(K); +normalize(K) when is_atom(K) -> + normalize(atom_to_list(K)); +normalize(K) when is_binary(K) -> + normalize(binary_to_list(K)). + +any_to_list(V) when is_list(V) -> + V; +any_to_list(V) when is_atom(V) -> + atom_to_list(V); +any_to_list(V) when is_binary(V) -> + binary_to_list(V); +any_to_list(V) when is_integer(V) -> + integer_to_list(V). + +%% +%% Tests. +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +make_test() -> + Identity = make([{hdr, foo}]), + ?assertEqual( + Identity, + make(Identity)). + +enter_from_list_test() -> + H = make([{hdr, foo}]), + ?assertEqual( + [{baz, "wibble"}, {hdr, "foo"}], + to_list(enter_from_list([{baz, wibble}], H))), + ?assertEqual( + [{hdr, "bar"}], + to_list(enter_from_list([{hdr, bar}], H))), + ok. + +default_from_list_test() -> + H = make([{hdr, foo}]), + ?assertEqual( + [{baz, "wibble"}, {hdr, "foo"}], + to_list(default_from_list([{baz, wibble}], H))), + ?assertEqual( + [{hdr, "foo"}], + to_list(default_from_list([{hdr, bar}], H))), + ok. + +get_primary_value_test() -> + H = make([{hdr, foo}, {baz, <<"wibble;taco">>}]), + ?assertEqual( + "foo", + get_primary_value(hdr, H)), + ?assertEqual( + undefined, + get_primary_value(bar, H)), + ?assertEqual( + "wibble", + get_primary_value(<<"baz">>, H)), + ok. + +set_cookie_test() -> + H = make([{"set-cookie", foo}, {"set-cookie", bar}, {"set-cookie", baz}]), + ?assertEqual( + [{"set-cookie", "foo"}, {"set-cookie", "bar"}, {"set-cookie", "baz"}], + to_list(H)), + ok. + +headers_test() -> + H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]), + [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H), + H1 = ?MODULE:insert(taco, grande, H), + [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1), + H2 = ?MODULE:make([{"Set-Cookie", "foo"}]), + [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2), + H3 = ?MODULE:insert("Set-Cookie", "bar", H2), + [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3), + "foo, bar" = ?MODULE:get_value("set-cookie", H3), + {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3), + undefined = ?MODULE:get_value("shibby", H3), + none = ?MODULE:lookup("shibby", H3), + H4 = ?MODULE:insert("content-type", + "application/x-www-form-urlencoded; charset=utf8", + H3), + "application/x-www-form-urlencoded" = ?MODULE:get_primary_value( + "content-type", H4), + H4 = ?MODULE:delete_any("nonexistent-header", H4), + H3 = ?MODULE:delete_any("content-type", H4), + HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>, + H_HB = ?MODULE:from_binary(HB), + H_HB = ?MODULE:from_binary(binary_to_list(HB)), + "47" = ?MODULE:get_value("Content-Length", H_HB), + "text/plain" = ?MODULE:get_value("Content-Type", H_HB), + L_H_HB = ?MODULE:to_list(H_HB), + 2 = length(L_H_HB), + true = lists:member({'Content-Length', "47"}, L_H_HB), + true = lists:member({'Content-Type', "text/plain"}, L_H_HB), + HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ], + HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ], + HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ], + H_HL = ?MODULE:from_binary(HL), + H_HL = ?MODULE:from_binary(HL2), + H_HL = ?MODULE:from_binary(HL3), + "47" = ?MODULE:get_value("Content-Length", H_HL), + "text/plain" = ?MODULE:get_value("Content-Type", H_HL), + L_H_HL = ?MODULE:to_list(H_HL), + 2 = length(L_H_HL), + true = lists:member({'Content-Length', "47"}, L_H_HL), + true = lists:member({'Content-Type', "text/plain"}, L_H_HL), + [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary("")), + [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])), + ok. + +-endif. diff --git a/1.1.x/src/mochiweb/mochiweb_html.erl b/1.1.x/src/mochiweb/mochiweb_html.erl new file mode 100644 index 00000000..a15c359c --- /dev/null +++ b/1.1.x/src/mochiweb/mochiweb_html.erl @@ -0,0 +1,1061 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Loosely tokenizes and generates parse trees for HTML 4. +-module(mochiweb_html). +-export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1, + escape_attr/1, to_html/1]). + +%% This is a macro to placate syntax highlighters.. +-define(QUOTE, $\"). +-define(SQUOTE, $\'). +-define(ADV_COL(S, N), + S#decoder{column=N+S#decoder.column, + offset=N+S#decoder.offset}). +-define(INC_COL(S), + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset}). +-define(INC_LINE(S), + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}). +-define(INC_CHAR(S, C), + case C of + $\n -> + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}; + _ -> + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset} + end). + +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). +-define(IS_LITERAL_SAFE(C), + ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z) + orelse (C >= $0 andalso C =< $9))). +-define(PROBABLE_CLOSE(C), + (C =:= $> orelse ?IS_WHITESPACE(C))). + +-record(decoder, {line=1, + column=1, + offset=0}). + +%% @type html_node() = {string(), [html_attr()], [html_node() | string()]} +%% @type html_attr() = {string(), string()} +%% @type html_token() = html_data() | start_tag() | end_tag() | inline_html() | html_comment() | html_doctype() +%% @type html_data() = {data, string(), Whitespace::boolean()} +%% @type start_tag() = {start_tag, Name, [html_attr()], Singleton::boolean()} +%% @type end_tag() = {end_tag, Name} +%% @type html_comment() = {comment, Comment} +%% @type html_doctype() = {doctype, [Doctype]} +%% @type inline_html() = {'=', iolist()} + +%% External API. + +%% @spec parse(string() | binary()) -> html_node() +%% @doc tokenize and then transform the token stream into a HTML tree. +parse(Input) -> + parse_tokens(tokens(Input)). + +%% @spec parse_tokens([html_token()]) -> html_node() +%% @doc Transform the output of tokens(Doc) into a HTML tree. +parse_tokens(Tokens) when is_list(Tokens) -> + %% Skip over doctype, processing instructions + F = fun (X) -> + case X of + {start_tag, _, _, false} -> + false; + _ -> + true + end + end, + [{start_tag, Tag, Attrs, false} | Rest] = lists:dropwhile(F, Tokens), + {Tree, _} = tree(Rest, [norm({Tag, Attrs})]), + Tree. + +%% @spec tokens(StringOrBinary) -> [html_token()] +%% @doc Transform the input UTF-8 HTML into a token stream. +tokens(Input) -> + tokens(iolist_to_binary(Input), #decoder{}, []). + +%% @spec to_tokens(html_node()) -> [html_token()] +%% @doc Convert a html_node() tree to a list of tokens. +to_tokens({Tag0}) -> + to_tokens({Tag0, [], []}); +to_tokens(T={'=', _}) -> + [T]; +to_tokens(T={doctype, _}) -> + [T]; +to_tokens(T={comment, _}) -> + [T]; +to_tokens({Tag0, Acc}) -> + %% This is only allowed in sub-tags: {p, [{"class", "foo"}]} + to_tokens({Tag0, [], Acc}); +to_tokens({Tag0, Attrs, Acc}) -> + Tag = to_tag(Tag0), + to_tokens([{Tag, Acc}], [{start_tag, Tag, Attrs, is_singleton(Tag)}]). + +%% @spec to_html([html_token()] | html_node()) -> iolist() +%% @doc Convert a list of html_token() to a HTML document. +to_html(Node) when is_tuple(Node) -> + to_html(to_tokens(Node)); +to_html(Tokens) when is_list(Tokens) -> + to_html(Tokens, []). + +%% @spec escape(string() | atom() | binary()) -> binary() +%% @doc Escape a string such that it's safe for HTML (amp; lt; gt;). +escape(B) when is_binary(B) -> + escape(binary_to_list(B), []); +escape(A) when is_atom(A) -> + escape(atom_to_list(A), []); +escape(S) when is_list(S) -> + escape(S, []). + +%% @spec escape_attr(string() | binary() | atom() | integer() | float()) -> binary() +%% @doc Escape a string such that it's safe for HTML attrs +%% (amp; lt; gt; quot;). +escape_attr(B) when is_binary(B) -> + escape_attr(binary_to_list(B), []); +escape_attr(A) when is_atom(A) -> + escape_attr(atom_to_list(A), []); +escape_attr(S) when is_list(S) -> + escape_attr(S, []); +escape_attr(I) when is_integer(I) -> + escape_attr(integer_to_list(I), []); +escape_attr(F) when is_float(F) -> + escape_attr(mochinum:digits(F), []). + +to_html([], Acc) -> + lists:reverse(Acc); +to_html([{'=', Content} | Rest], Acc) -> + to_html(Rest, [Content | Acc]); +to_html([{pi, Tag, Attrs} | Rest], Acc) -> + Open = [<<">, + Tag, + attrs_to_html(Attrs, []), + <<"?>">>], + to_html(Rest, [Open | Acc]); +to_html([{comment, Comment} | Rest], Acc) -> + to_html(Rest, [[<<"">>] | Acc]); +to_html([{doctype, Parts} | Rest], Acc) -> + Inside = doctype_to_html(Parts, Acc), + to_html(Rest, [[<<">, Inside, <<">">>] | Acc]); +to_html([{data, Data, _Whitespace} | Rest], Acc) -> + to_html(Rest, [escape(Data) | Acc]); +to_html([{start_tag, Tag, Attrs, Singleton} | Rest], Acc) -> + Open = [<<"<">>, + Tag, + attrs_to_html(Attrs, []), + case Singleton of + true -> <<" />">>; + false -> <<">">> + end], + to_html(Rest, [Open | Acc]); +to_html([{end_tag, Tag} | Rest], Acc) -> + to_html(Rest, [[<<">, Tag, <<">">>] | Acc]). + +doctype_to_html([], Acc) -> + lists:reverse(Acc); +doctype_to_html([Word | Rest], Acc) -> + case lists:all(fun (C) -> ?IS_LITERAL_SAFE(C) end, + binary_to_list(iolist_to_binary(Word))) of + true -> + doctype_to_html(Rest, [[<<" ">>, Word] | Acc]); + false -> + doctype_to_html(Rest, [[<<" \"">>, escape_attr(Word), ?QUOTE] | Acc]) + end. + +attrs_to_html([], Acc) -> + lists:reverse(Acc); +attrs_to_html([{K, V} | Rest], Acc) -> + attrs_to_html(Rest, + [[<<" ">>, escape(K), <<"=\"">>, + escape_attr(V), <<"\"">>] | Acc]). + +escape([], Acc) -> + list_to_binary(lists:reverse(Acc)); +escape("<" ++ Rest, Acc) -> + escape(Rest, lists:reverse("<", Acc)); +escape(">" ++ Rest, Acc) -> + escape(Rest, lists:reverse(">", Acc)); +escape("&" ++ Rest, Acc) -> + escape(Rest, lists:reverse("&", Acc)); +escape([C | Rest], Acc) -> + escape(Rest, [C | Acc]). + +escape_attr([], Acc) -> + list_to_binary(lists:reverse(Acc)); +escape_attr("<" ++ Rest, Acc) -> + escape_attr(Rest, lists:reverse("<", Acc)); +escape_attr(">" ++ Rest, Acc) -> + escape_attr(Rest, lists:reverse(">", Acc)); +escape_attr("&" ++ Rest, Acc) -> + escape_attr(Rest, lists:reverse("&", Acc)); +escape_attr([?QUOTE | Rest], Acc) -> + escape_attr(Rest, lists:reverse(""", Acc)); +escape_attr([C | Rest], Acc) -> + escape_attr(Rest, [C | Acc]). + +to_tag(A) when is_atom(A) -> + norm(atom_to_list(A)); +to_tag(L) -> + norm(L). + +to_tokens([], Acc) -> + lists:reverse(Acc); +to_tokens([{Tag, []} | Rest], Acc) -> + to_tokens(Rest, [{end_tag, to_tag(Tag)} | Acc]); +to_tokens([{Tag0, [{T0} | R1]} | Rest], Acc) -> + %% Allow {br} + to_tokens([{Tag0, [{T0, [], []} | R1]} | Rest], Acc); +to_tokens([{Tag0, [T0={'=', _C0} | R1]} | Rest], Acc) -> + %% Allow {'=', iolist()} + to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); +to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) -> + %% Allow {comment, iolist()} + to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); +to_tokens([{Tag0, [T0={pi, _S0, _A0} | R1]} | Rest], Acc) -> + %% Allow {pi, binary(), list()} + to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); +to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) -> + %% Allow {p, [{"class", "foo"}]} + to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, C0} | R1]} | Rest], Acc) -> + %% Allow {p, "content"} and {p, <<"content">>} + to_tokens([{Tag0, [{T0, [], C0} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, A1, C0} | R1]} | Rest], Acc) when is_binary(C0) -> + %% Allow {"p", [{"class", "foo"}], <<"content">>} + to_tokens([{Tag0, [{T0, A1, binary_to_list(C0)} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, A1, C0=[C | _]} | R1]} | Rest], Acc) + when is_integer(C) -> + %% Allow {"p", [{"class", "foo"}], "content"} + to_tokens([{Tag0, [{T0, A1, [C0]} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, A1, C1} | R1]} | Rest], Acc) -> + %% Native {"p", [{"class", "foo"}], ["content"]} + Tag = to_tag(Tag0), + T1 = to_tag(T0), + case is_singleton(norm(T1)) of + true -> + to_tokens([{Tag, R1} | Rest], [{start_tag, T1, A1, true} | Acc]); + false -> + to_tokens([{T1, C1}, {Tag, R1} | Rest], + [{start_tag, T1, A1, false} | Acc]) + end; +to_tokens([{Tag0, [L | R1]} | Rest], Acc) when is_list(L) -> + %% List text + Tag = to_tag(Tag0), + to_tokens([{Tag, R1} | Rest], [{data, iolist_to_binary(L), false} | Acc]); +to_tokens([{Tag0, [B | R1]} | Rest], Acc) when is_binary(B) -> + %% Binary text + Tag = to_tag(Tag0), + to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]). + +tokens(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary>> -> + lists:reverse(Acc); + _ -> + {Tag, S1} = tokenize(B, S), + case parse_flag(Tag) of + script -> + {Tag2, S2} = tokenize_script(B, S1), + tokens(B, S2, [Tag2, Tag | Acc]); + textarea -> + {Tag2, S2} = tokenize_textarea(B, S1), + tokens(B, S2, [Tag2, Tag | Acc]); + none -> + tokens(B, S1, [Tag | Acc]) + end + end. + +parse_flag({start_tag, B, _, false}) -> + case string:to_lower(binary_to_list(B)) of + "script" -> + script; + "textarea" -> + textarea; + _ -> + none + end; +parse_flag(_) -> + none. + +tokenize(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, "", _/binary>> -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{comment, Raw}, ?ADV_COL(S, 3)}; + <<_:O/binary, C, _/binary>> -> + tokenize_comment(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{comment, Raw}, S} + end. + +tokenize_script(Bin, S=#decoder{offset=O}) -> + tokenize_script(Bin, S, O). + +tokenize_script(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + %% Just a look-ahead, we want the end_tag separately + <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, ZZ, _/binary>> + when (SS =:= $s orelse SS =:= $S) andalso + (CC =:= $c orelse CC =:= $C) andalso + (RR =:= $r orelse RR =:= $R) andalso + (II =:= $i orelse II =:= $I) andalso + (PP =:= $p orelse PP =:= $P) andalso + (TT=:= $t orelse TT =:= $T) andalso + ?PROBABLE_CLOSE(ZZ) -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, C, _/binary>> -> + tokenize_script(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{data, Raw, false}, S} + end. + +tokenize_textarea(Bin, S=#decoder{offset=O}) -> + tokenize_textarea(Bin, S, O). + +tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + %% Just a look-ahead, we want the end_tag separately + <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, ZZ, _/binary>> + when (TT =:= $t orelse TT =:= $T) andalso + (EE =:= $e orelse EE =:= $E) andalso + (XX =:= $x orelse XX =:= $X) andalso + (TT2 =:= $t orelse TT2 =:= $T) andalso + (AA =:= $a orelse AA =:= $A) andalso + (RR =:= $r orelse RR =:= $R) andalso + (EE2 =:= $e orelse EE2 =:= $E) andalso + (AA2 =:= $a orelse AA2 =:= $A) andalso + ?PROBABLE_CLOSE(ZZ) -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, C, _/binary>> -> + tokenize_textarea(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{data, Raw, false}, S} + end. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +to_html_test() -> + ?assertEqual( + <<"hey!

what's up

sucka
RAW!">>, + iolist_to_binary( + to_html({html, [], + [{<<"head">>, [], + [{title, <<"hey!">>}]}, + {body, [], + [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]}, + {'div', <<"sucka">>}, + {'=', <<"RAW!">>}, + {comment, <<" comment! ">>}]}]}))), + ?assertEqual( + <<"">>, + iolist_to_binary( + to_html({doctype, + [<<"html">>, <<"PUBLIC">>, + <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>, + <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]}))), + ?assertEqual( + <<"">>, + iolist_to_binary( + to_html({<<"html">>,[], + [{pi, <<"xml:namespace">>, + [{<<"prefix">>,<<"o">>}, + {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}))), + ok. + +escape_test() -> + ?assertEqual( + <<"&quot;\"word ><<up!&quot;">>, + escape(<<""\"word ><>)), + ?assertEqual( + <<"&quot;\"word ><<up!&quot;">>, + escape(""\"word ><>, + escape('"\"word >< + ?assertEqual( + <<"&quot;"word ><<up!&quot;">>, + escape_attr(<<""\"word ><>)), + ?assertEqual( + <<"&quot;"word ><<up!&quot;">>, + escape_attr(""\"word ><>, + escape_attr('"\"word ><>, + escape_attr(12345)), + ?assertEqual( + <<"1.5">>, + escape_attr(1.5)), + ok. + +tokens_test() -> + ?assertEqual( + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}], + tokens(<<"">>)), + ?assertEqual( + [{comment, <<"[if lt IE 7]>\n\n>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"textarea">>, [], false}, + {data, <<"">>, false}, + {end_tag, <<"textarea">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"textarea">>, [], false}, + {data, <<"">>, false}], + tokens(<<"