summaryrefslogtreecommitdiff
path: root/src/mochiweb/mochijson2.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/mochiweb/mochijson2.erl')
-rw-r--r--src/mochiweb/mochijson2.erl102
1 files changed, 72 insertions, 30 deletions
diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl
index ee19458c..66f68bf0 100644
--- a/src/mochiweb/mochijson2.erl
+++ b/src/mochiweb/mochijson2.erl
@@ -42,7 +42,8 @@
%% @type json_term() = json_string() | json_number() | json_array() |
%% json_object()
--record(encoder, {handler=null}).
+-record(encoder, {handler=null,
+ utf8=false}).
-record(decoder, {object_hook=null,
offset=0,
@@ -52,6 +53,8 @@
%% @spec encoder([encoder_option()]) -> function()
%% @doc Create an encoder/1 with the given options.
+%% @type encoder_option() = handler_option() | utf8_option()
+%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
encoder(Options) ->
State = parse_encoder_options(Options, #encoder{}),
fun (O) -> json_encode(O, State) end.
@@ -70,10 +73,7 @@ decoder(Options) ->
%% @spec decode(iolist()) -> json_term()
%% @doc Decode the given iolist to Erlang terms.
decode(S) ->
- try json_decode(S, #decoder{})
- catch
- _:_ -> throw({invalid_json, S})
- end.
+ json_decode(S, #decoder{}).
test() ->
test_all().
@@ -83,7 +83,9 @@ test() ->
parse_encoder_options([], State) ->
State;
parse_encoder_options([{handler, Handler} | Rest], State) ->
- parse_encoder_options(Rest, State#encoder{handler=Handler}).
+ parse_encoder_options(Rest, State#encoder{handler=Handler});
+parse_encoder_options([{utf8, Switch} | Rest], State) ->
+ parse_encoder_options(Rest, State#encoder{utf8=Switch}).
parse_decoder_options([], State) ->
State;
@@ -96,15 +98,18 @@ json_encode(false, _State) ->
<<"false">>;
json_encode(null, _State) ->
<<"null">>;
-json_encode(I, _State) when is_integer(I) ->
+json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 ->
+ %% Anything outside of 32-bit integers should be encoded as a float
integer_to_list(I);
+json_encode(I, _State) when is_integer(I) ->
+ mochinum:digits(float(I));
json_encode(F, _State) when is_float(F) ->
mochinum:digits(F);
json_encode(S, State) when is_binary(S); is_atom(S) ->
json_encode_string(S, State);
json_encode(Array, State) when is_list(Array) ->
json_encode_array(Array, State);
-json_encode({Props}, State) when is_list(Props) ->
+json_encode({struct, Props}, State) when is_list(Props) ->
json_encode_proplist(Props, State);
json_encode(Bad, #encoder{handler=null}) ->
exit({json_encode, {bad_term, Bad}});
@@ -131,29 +136,29 @@ json_encode_proplist(Props, State) ->
[$, | Acc1] = lists:foldl(F, "{", Props),
lists:reverse([$\} | Acc1]).
-json_encode_string(A, _State) when is_atom(A) ->
+json_encode_string(A, State) when is_atom(A) ->
L = atom_to_list(A),
case json_string_is_safe(L) of
true ->
[?Q, L, ?Q];
false ->
- json_encode_string_unicode(xmerl_ucs:from_utf8(L), [?Q])
+ json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
end;
-json_encode_string(B, _State) when is_binary(B) ->
+json_encode_string(B, State) when is_binary(B) ->
case json_bin_is_safe(B) of
true ->
[?Q, B, ?Q];
false ->
- json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q])
+ json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
end;
json_encode_string(I, _State) when is_integer(I) ->
[?Q, integer_to_list(I), ?Q];
-json_encode_string(L, _State) when is_list(L) ->
+json_encode_string(L, State) when is_list(L) ->
case json_string_is_safe(L) of
true ->
[?Q, L, ?Q];
false ->
- json_encode_string_unicode(L, [?Q])
+ json_encode_string_unicode(L, State, [?Q])
end.
json_string_is_safe([]) ->
@@ -208,9 +213,9 @@ json_bin_is_safe(<<C, Rest/binary>>) ->
false
end.
-json_encode_string_unicode([], Acc) ->
+json_encode_string_unicode([], _State, Acc) ->
lists:reverse([$\" | Acc]);
-json_encode_string_unicode([C | Cs], Acc) ->
+json_encode_string_unicode([C | Cs], State, Acc) ->
Acc1 = case C of
?Q ->
[?Q, $\\ | Acc];
@@ -236,14 +241,18 @@ json_encode_string_unicode([C | Cs], Acc) ->
[$r, $\\ | Acc];
$\t ->
[$t, $\\ | Acc];
- C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
+ C when C >= 0, C < $\s ->
+ [unihex(C) | Acc];
+ C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
+ [xmerl_ucs:to_utf8(C) | Acc];
+ C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
[unihex(C) | Acc];
C when C < 16#7f ->
[C | Acc];
_ ->
exit({json_encode, {bad_char, C}})
end,
- json_encode_string_unicode(Cs, Acc1).
+ json_encode_string_unicode(Cs, State, Acc1).
hexdigit(C) when C >= 0, C =< 9 ->
C + $0;
@@ -288,7 +297,7 @@ decode_object(B, S) ->
decode_object(B, S=#decoder{state=key}, Acc) ->
case tokenize(B, S) of
{end_object, S1} ->
- V = make_object({lists:reverse(Acc)}, S1),
+ V = make_object({struct, lists:reverse(Acc)}, S1),
{V, S1#decoder{state=null}};
{{const, K}, S1} ->
{colon, S2} = tokenize(B, S1),
@@ -298,7 +307,7 @@ decode_object(B, S=#decoder{state=key}, Acc) ->
decode_object(B, S=#decoder{state=comma}, Acc) ->
case tokenize(B, S) of
{end_object, S1} ->
- V = make_object({lists:reverse(Acc)}, S1),
+ V = make_object({struct, lists:reverse(Acc)}, S1),
{V, S1#decoder{state=null}};
{comma, S1} ->
decode_object(B, S1#decoder{state=key}, Acc)
@@ -507,9 +516,9 @@ tokenize(B, S=#decoder{offset=O}) ->
%% Create an object from a list of Key/Value pairs.
obj_new() ->
- {[]}.
+ {struct, []}.
-is_obj({Props}) ->
+is_obj({struct, Props}) ->
F = fun ({K, _}) when is_binary(K) ->
true;
(_) ->
@@ -518,7 +527,7 @@ is_obj({Props}) ->
lists:all(F, Props).
obj_from_list(Props) ->
- Obj = {Props},
+ Obj = {struct, Props},
case is_obj(Obj) of
true -> Obj;
false -> exit({json_bad_object, Obj})
@@ -529,7 +538,7 @@ obj_from_list(Props) ->
%% compare unequal as erlang terms, so we need to carefully recurse
%% through aggregates (tuples and objects).
-equiv({Props1}, {Props2}) ->
+equiv({struct, Props1}, {struct, Props2}) ->
equiv_object(Props1, Props2);
equiv(L1, L2) when is_list(L1), is_list(L2) ->
equiv_list(L1, L2);
@@ -555,16 +564,13 @@ equiv_object(Props1, Props2) ->
equiv_list([], []) ->
true;
equiv_list([V1 | L1], [V2 | L2]) ->
- case equiv(V1, V2) of
- true ->
- equiv_list(L1, L2);
- false ->
- false
- end.
+ equiv(V1, V2) andalso equiv_list(L1, L2).
test_all() ->
[1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
<<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]),
+ test_encoder_utf8(),
+ test_input_validation(),
test_one(e2j_test_vec(utf8), 1).
test_one([], _N) ->
@@ -619,3 +625,39 @@ e2j_test_vec(utf8) ->
{[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
"[-123,\"foo\",{\"bar\":[]},null]"}
].
+
+%% test utf8 encoding
+test_encoder_utf8() ->
+ %% safe conversion case (default)
+ [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
+ encode(<<1,"\321\202\320\265\321\201\321\202">>),
+
+ %% raw utf8 output (optional)
+ Enc = mochijson2:encoder([{utf8, true}]),
+ [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
+ Enc(<<1,"\321\202\320\265\321\201\321\202">>).
+
+test_input_validation() ->
+ Good = [
+ {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, % pound
+ {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, % euro
+ {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} % denarius
+ ],
+ lists:foreach(fun({CodePoint, UTF8}) ->
+ Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
+ Expect = decode(UTF8)
+ end, Good),
+
+ Bad = [
+ % 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
+ <<?Q, 16#80, ?Q>>,
+ % missing continuations, last byte in each should be 80-BF
+ <<?Q, 16#C2, 16#7F, ?Q>>,
+ <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
+ <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
+ % we don't support code points > 10FFFF per RFC 3629
+ <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>
+ ],
+ lists:foreach(fun(X) ->
+ ok = try decode(X) catch invalid_utf8 -> ok end
+ end, Bad).