diff options
Diffstat (limited to 'src/mochiweb/mochijson2.erl')
-rw-r--r-- | src/mochiweb/mochijson2.erl | 225 |
1 files changed, 182 insertions, 43 deletions
diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl index 66f68bf0..64cabc86 100644 --- a/src/mochiweb/mochijson2.erl +++ b/src/mochiweb/mochijson2.erl @@ -9,7 +9,6 @@ -author('bob@mochimedia.com'). -export([encoder/1, encode/1]). -export([decoder/1, decode/1]). --export([test/0]). % This is a macro to placate syntax highlighters.. -define(Q, $\"). @@ -39,8 +38,9 @@ %% @type json_number() = integer() | float() %% @type json_array() = [json_term()] %% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_iolist() = {json, iolist()} %% @type json_term() = json_string() | json_number() | json_array() | -%% json_object() +%% json_object() | json_iolist() -record(encoder, {handler=null, utf8=false}). @@ -75,9 +75,6 @@ decoder(Options) -> decode(S) -> json_decode(S, #decoder{}). -test() -> - test_all(). - %% Internal API parse_encoder_options([], State) -> @@ -98,11 +95,8 @@ json_encode(false, _State) -> <<"false">>; json_encode(null, _State) -> <<"null">>; -json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 -> - %% Anything outside of 32-bit integers should be encoded as a float - integer_to_list(I); json_encode(I, _State) when is_integer(I) -> - mochinum:digits(float(I)); + integer_to_list(I); json_encode(F, _State) when is_float(F) -> mochinum:digits(F); json_encode(S, State) when is_binary(S); is_atom(S) -> @@ -111,6 +105,8 @@ json_encode(Array, State) when is_list(Array) -> json_encode_array(Array, State); json_encode({struct, Props}, State) when is_list(Props) -> json_encode_proplist(Props, State); +json_encode({json, IoList}, _State) -> + IoList; json_encode(Bad, #encoder{handler=null}) -> exit({json_encode, {bad_term, Bad}}); json_encode(Bad, State=#encoder{handler=Handler}) -> @@ -205,12 +201,10 @@ json_bin_is_safe(<<C, Rest/binary>>) -> false; $\t -> false; - C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + C when C >= 0, C < $\s; C >= 16#7f -> false; C when C < 16#7f -> - json_bin_is_safe(Rest); - _ -> - false + json_bin_is_safe(Rest) end. json_encode_string_unicode([], _State, Acc) -> @@ -408,8 +402,22 @@ tokenize_string(B, S=#decoder{offset=O}, Acc) -> Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), tokenize_string(B, ?ADV_COL(S, 6), Acc1) end; - <<_:O/binary, C, _/binary>> -> - tokenize_string(B, ?INC_CHAR(S, C), [C | Acc]) + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); + _ -> + throw(invalid_utf8) end. tokenize_number(B, S) -> @@ -510,6 +518,12 @@ tokenize(B, S=#decoder{offset=O}) -> trim = S#decoder.state, {eof, S} end. +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + %% testing constructs borrowed from the Yaws JSON implementation. @@ -519,19 +533,13 @@ obj_new() -> {struct, []}. is_obj({struct, Props}) -> - F = fun ({K, _}) when is_binary(K) -> - true; - (_) -> - false - end, + F = fun ({K, _}) when is_binary(K) -> true end, lists:all(F, Props). obj_from_list(Props) -> Obj = {struct, Props}, - case is_obj(Obj) of - true -> Obj; - false -> exit({json_bad_object, Obj}) - end. + ?assert(is_obj(Obj)), + Obj. %% Test for equivalence of Erlang terms. %% Due to arbitrary order of construction, equivalent objects might @@ -544,9 +552,7 @@ equiv(L1, L2) when is_list(L1), is_list(L2) -> equiv_list(L1, L2); equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; -equiv(true, true) -> true; -equiv(false, false) -> true; -equiv(null, null) -> true. +equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. %% Object representation and traversal order is unknown. %% Use the sledgehammer and sort property lists. @@ -566,11 +572,11 @@ equiv_list([], []) -> equiv_list([V1 | L1], [V2 | L2]) -> equiv(V1, V2) andalso equiv_list(L1, L2). -test_all() -> +decode_test() -> [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), - <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]), - test_encoder_utf8(), - test_input_validation(), + <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). + +e2j_vec_test() -> test_one(e2j_test_vec(utf8), 1). test_one([], _N) -> @@ -627,7 +633,7 @@ e2j_test_vec(utf8) -> ]. %% test utf8 encoding -test_encoder_utf8() -> +encoder_utf8_test() -> %% safe conversion case (default) [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = encode(<<1,"\321\202\320\265\321\201\321\202">>), @@ -637,11 +643,11 @@ test_encoder_utf8() -> [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = Enc(<<1,"\321\202\320\265\321\201\321\202">>). -test_input_validation() -> +input_validation_test() -> Good = [ - {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, % pound - {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, % euro - {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} % denarius + {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound + {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro + {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius ], lists:foreach(fun({CodePoint, UTF8}) -> Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), @@ -649,15 +655,148 @@ test_input_validation() -> end, Good), Bad = [ - % 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte <<?Q, 16#80, ?Q>>, - % missing continuations, last byte in each should be 80-BF + %% missing continuations, last byte in each should be 80-BF <<?Q, 16#C2, 16#7F, ?Q>>, <<?Q, 16#E0, 16#80,16#7F, ?Q>>, <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, - % we don't support code points > 10FFFF per RFC 3629 - <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>> + %% we don't support code points > 10FFFF per RFC 3629 + <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>, + %% escape characters trigger a different code path + <<?Q, $\\, $\n, 16#80, ?Q>> ], - lists:foreach(fun(X) -> - ok = try decode(X) catch invalid_utf8 -> ok end - end, Bad). + lists:foreach( + fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end, + %% could be {ucs,{bad_utf8_character_code}} or + %% {json_encode,{bad_char,_}} + {'EXIT', _} = (catch encode(X)) + end, Bad). + +inline_json_test() -> + ?assertEqual(<<"\"iodata iodata\"">>, + iolist_to_binary( + encode({json, [<<"\"iodata">>, " iodata\""]}))), + ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, + decode( + encode({struct, + [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), + ok. + +big_unicode_test() -> + UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(UTF8Seq))), + ?assertEqual( + UTF8Seq, + decode(iolist_to_binary(encode(UTF8Seq)))), + ok. + +custom_decoder_test() -> + ?assertEqual( + {struct, [{<<"key">>, <<"value">>}]}, + (decoder([]))("{\"key\": \"value\"}")), + F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, + ?assertEqual( + win, + (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), + ok. + +atom_test() -> + %% JSON native atoms + [begin + ?assertEqual(A, decode(atom_to_list(A))), + ?assertEqual(iolist_to_binary(atom_to_list(A)), + iolist_to_binary(encode(A))) + end || A <- [true, false, null]], + %% Atom to string + ?assertEqual( + <<"\"foo\"">>, + iolist_to_binary(encode(foo))), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))), + ok. + +key_encode_test() -> + %% Some forms are accepted as keys that would not be strings in other + %% cases + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{foo, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{"foo", 1}]}))), + ?assertEqual( + <<"{\"\\ud834\\udd20\":1}">>, + iolist_to_binary( + encode({struct, [{[16#0001d120], 1}]}))), + ?assertEqual( + <<"{\"1\":1}">>, + iolist_to_binary(encode({struct, [{1, 1}]}))), + ok. + +unsafe_chars_test() -> + Chars = "\"\\\b\f\n\r\t", + [begin + ?assertEqual(false, json_string_is_safe([C])), + ?assertEqual(false, json_bin_is_safe(<<C>>)), + ?assertEqual(<<C>>, decode(encode(<<C>>))) + end || C <- Chars], + ?assertEqual( + false, + json_string_is_safe([16#0001d120])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))), + ?assertEqual( + [16#0001d120], + xmerl_ucs:from_utf8( + binary_to_list( + decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))), + ?assertEqual( + false, + json_string_is_safe([16#110000])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))), + %% solidus can be escaped but isn't unsafe by default + ?assertEqual( + <<"/">>, + decode(<<"\"\\/\"">>)), + ok. + +int_test() -> + ?assertEqual(0, decode("0")), + ?assertEqual(1, decode("1")), + ?assertEqual(11, decode("11")), + ok. + +large_int_test() -> + ?assertEqual(<<"-2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(-2147483649214748364921474836492147483649))), + ?assertEqual(<<"2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(2147483649214748364921474836492147483649))), + ok. + +float_test() -> + ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), + ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), + ok. + +handler_test() -> + ?assertEqual( + {'EXIT',{json_encode,{bad_term,{}}}}, + catch encode({})), + F = fun ({}) -> [] end, + ?assertEqual( + <<"[]">>, + iolist_to_binary((encoder([{handler, F}]))({}))), + ok. + +-endif. |