diff options
Diffstat (limited to 'src/mochiweb/mochijson2.erl')
-rw-r--r-- | src/mochiweb/mochijson2.erl | 102 |
1 files changed, 72 insertions, 30 deletions
diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl index ee19458c..66f68bf0 100644 --- a/src/mochiweb/mochijson2.erl +++ b/src/mochiweb/mochijson2.erl @@ -42,7 +42,8 @@ %% @type json_term() = json_string() | json_number() | json_array() | %% json_object() --record(encoder, {handler=null}). +-record(encoder, {handler=null, + utf8=false}). -record(decoder, {object_hook=null, offset=0, @@ -52,6 +53,8 @@ %% @spec encoder([encoder_option()]) -> function() %% @doc Create an encoder/1 with the given options. +%% @type encoder_option() = handler_option() | utf8_option() +%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) encoder(Options) -> State = parse_encoder_options(Options, #encoder{}), fun (O) -> json_encode(O, State) end. @@ -70,10 +73,7 @@ decoder(Options) -> %% @spec decode(iolist()) -> json_term() %% @doc Decode the given iolist to Erlang terms. decode(S) -> - try json_decode(S, #decoder{}) - catch - _:_ -> throw({invalid_json, S}) - end. + json_decode(S, #decoder{}). test() -> test_all(). @@ -83,7 +83,9 @@ test() -> parse_encoder_options([], State) -> State; parse_encoder_options([{handler, Handler} | Rest], State) -> - parse_encoder_options(Rest, State#encoder{handler=Handler}). + parse_encoder_options(Rest, State#encoder{handler=Handler}); +parse_encoder_options([{utf8, Switch} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{utf8=Switch}). parse_decoder_options([], State) -> State; @@ -96,15 +98,18 @@ json_encode(false, _State) -> <<"false">>; json_encode(null, _State) -> <<"null">>; -json_encode(I, _State) when is_integer(I) -> +json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 -> + %% Anything outside of 32-bit integers should be encoded as a float integer_to_list(I); +json_encode(I, _State) when is_integer(I) -> + mochinum:digits(float(I)); json_encode(F, _State) when is_float(F) -> mochinum:digits(F); json_encode(S, State) when is_binary(S); is_atom(S) -> json_encode_string(S, State); json_encode(Array, State) when is_list(Array) -> json_encode_array(Array, State); -json_encode({Props}, State) when is_list(Props) -> +json_encode({struct, Props}, State) when is_list(Props) -> json_encode_proplist(Props, State); json_encode(Bad, #encoder{handler=null}) -> exit({json_encode, {bad_term, Bad}}); @@ -131,29 +136,29 @@ json_encode_proplist(Props, State) -> [$, | Acc1] = lists:foldl(F, "{", Props), lists:reverse([$\} | Acc1]). -json_encode_string(A, _State) when is_atom(A) -> +json_encode_string(A, State) when is_atom(A) -> L = atom_to_list(A), case json_string_is_safe(L) of true -> [?Q, L, ?Q]; false -> - json_encode_string_unicode(xmerl_ucs:from_utf8(L), [?Q]) + json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q]) end; -json_encode_string(B, _State) when is_binary(B) -> +json_encode_string(B, State) when is_binary(B) -> case json_bin_is_safe(B) of true -> [?Q, B, ?Q]; false -> - json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q]) + json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q]) end; json_encode_string(I, _State) when is_integer(I) -> [?Q, integer_to_list(I), ?Q]; -json_encode_string(L, _State) when is_list(L) -> +json_encode_string(L, State) when is_list(L) -> case json_string_is_safe(L) of true -> [?Q, L, ?Q]; false -> - json_encode_string_unicode(L, [?Q]) + json_encode_string_unicode(L, State, [?Q]) end. json_string_is_safe([]) -> @@ -208,9 +213,9 @@ json_bin_is_safe(<<C, Rest/binary>>) -> false end. -json_encode_string_unicode([], Acc) -> +json_encode_string_unicode([], _State, Acc) -> lists:reverse([$\" | Acc]); -json_encode_string_unicode([C | Cs], Acc) -> +json_encode_string_unicode([C | Cs], State, Acc) -> Acc1 = case C of ?Q -> [?Q, $\\ | Acc]; @@ -236,14 +241,18 @@ json_encode_string_unicode([C | Cs], Acc) -> [$r, $\\ | Acc]; $\t -> [$t, $\\ | Acc]; - C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + C when C >= 0, C < $\s -> + [unihex(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> + [xmerl_ucs:to_utf8(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> [unihex(C) | Acc]; C when C < 16#7f -> [C | Acc]; _ -> exit({json_encode, {bad_char, C}}) end, - json_encode_string_unicode(Cs, Acc1). + json_encode_string_unicode(Cs, State, Acc1). hexdigit(C) when C >= 0, C =< 9 -> C + $0; @@ -288,7 +297,7 @@ decode_object(B, S) -> decode_object(B, S=#decoder{state=key}, Acc) -> case tokenize(B, S) of {end_object, S1} -> - V = make_object({lists:reverse(Acc)}, S1), + V = make_object({struct, lists:reverse(Acc)}, S1), {V, S1#decoder{state=null}}; {{const, K}, S1} -> {colon, S2} = tokenize(B, S1), @@ -298,7 +307,7 @@ decode_object(B, S=#decoder{state=key}, Acc) -> decode_object(B, S=#decoder{state=comma}, Acc) -> case tokenize(B, S) of {end_object, S1} -> - V = make_object({lists:reverse(Acc)}, S1), + V = make_object({struct, lists:reverse(Acc)}, S1), {V, S1#decoder{state=null}}; {comma, S1} -> decode_object(B, S1#decoder{state=key}, Acc) @@ -507,9 +516,9 @@ tokenize(B, S=#decoder{offset=O}) -> %% Create an object from a list of Key/Value pairs. obj_new() -> - {[]}. + {struct, []}. -is_obj({Props}) -> +is_obj({struct, Props}) -> F = fun ({K, _}) when is_binary(K) -> true; (_) -> @@ -518,7 +527,7 @@ is_obj({Props}) -> lists:all(F, Props). obj_from_list(Props) -> - Obj = {Props}, + Obj = {struct, Props}, case is_obj(Obj) of true -> Obj; false -> exit({json_bad_object, Obj}) @@ -529,7 +538,7 @@ obj_from_list(Props) -> %% compare unequal as erlang terms, so we need to carefully recurse %% through aggregates (tuples and objects). -equiv({Props1}, {Props2}) -> +equiv({struct, Props1}, {struct, Props2}) -> equiv_object(Props1, Props2); equiv(L1, L2) when is_list(L1), is_list(L2) -> equiv_list(L1, L2); @@ -555,16 +564,13 @@ equiv_object(Props1, Props2) -> equiv_list([], []) -> true; equiv_list([V1 | L1], [V2 | L2]) -> - case equiv(V1, V2) of - true -> - equiv_list(L1, L2); - false -> - false - end. + equiv(V1, V2) andalso equiv_list(L1, L2). test_all() -> [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]), + test_encoder_utf8(), + test_input_validation(), test_one(e2j_test_vec(utf8), 1). test_one([], _N) -> @@ -619,3 +625,39 @@ e2j_test_vec(utf8) -> {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], "[-123,\"foo\",{\"bar\":[]},null]"} ]. + +%% test utf8 encoding +test_encoder_utf8() -> + %% safe conversion case (default) + [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = + encode(<<1,"\321\202\320\265\321\201\321\202">>), + + %% raw utf8 output (optional) + Enc = mochijson2:encoder([{utf8, true}]), + [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = + Enc(<<1,"\321\202\320\265\321\201\321\202">>). + +test_input_validation() -> + Good = [ + {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, % pound + {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, % euro + {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} % denarius + ], + lists:foreach(fun({CodePoint, UTF8}) -> + Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), + Expect = decode(UTF8) + end, Good), + + Bad = [ + % 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + <<?Q, 16#80, ?Q>>, + % missing continuations, last byte in each should be 80-BF + <<?Q, 16#C2, 16#7F, ?Q>>, + <<?Q, 16#E0, 16#80,16#7F, ?Q>>, + <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, + % we don't support code points > 10FFFF per RFC 3629 + <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>> + ], + lists:foreach(fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end + end, Bad). |