%% @author Bob Ippolito <bob@mochimedia.com>
%% @copyright 2006 Mochi Media, Inc.
%%
%% Permission is hereby granted, free of charge, to any person
%% obtaining a copy of this software and associated documentation
%% files (the "Software"), to deal in the Software without restriction,
%% including without limitation the rights to use, copy, modify, merge,
%% publish, distribute, sublicense, and/or sell copies of the Software,
%% and to permit persons to whom the Software is furnished to do
%% so, subject to the following conditions:
%%
%% The above copyright notice and this permission notice shall be included
%% in all copies or substantial portions of the Software.
%%
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
%% MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
%% IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
%% CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
%% TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
%% SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

%% @doc Yet another JSON (RFC 4627) library for Erlang.

-module(cjson).
-author('bob@mochimedia.com').
-export([encoder/1, encode/1]).
-export([decoder/1, decode/1]).
-export([test/0]).

%
% NOTE: This file was originally mochijson.erl and has been adapted for
% use with CouchDB.
%
% The changes are:
%   {array, [...]}
% is now
%   {...}
% and:
%   {struct, [...]}
% is now
%   {obj, [...]}
%

% This is a macro to placate syntax highlighters..
-define(Q, $\").
-define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}).
-define(INC_COL(S), S#decoder{column=1+S#decoder.column}).
-define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}).

%% @type iolist() = [char() | binary() | iolist()]
%% @type iodata() = iolist() | binary()
%% @type json_string() = atom | string() | binary()
%% @type json_number() = integer() | float()
%% @type json_array() = {json_term()}
%% @type json_object() = {struct, [{json_string(), json_term()}]}
%% @type json_term() = json_string() | json_number() | json_array() |
%%                     json_object()
%% @type encoding() = utf8 | unicode
%% @type encoder_option() = {input_encoding, encoding()} |
%%                          {handler, function()}
%% @type decoder_option() = {input_encoding, encoding()} |
%%                          {object_hook, function()}

-record(encoder, {input_encoding=utf8,
          handler=null}).

-record(decoder, {input_encoding=utf8,
          object_hook=null,
          line=1,
          column=1,
          state=null}).

%% @spec encoder([encoder_option()]) -> function()
%% @doc Create an encoder/1 with the given options.
encoder(Options) ->
    State = parse_encoder_options(Options, #encoder{}),
    fun (O) -> json_encode(O, State) end.

%% @spec encode(json_term()) -> iolist()
%% @doc Encode the given as JSON to an iolist.
encode(Any) ->
    json_encode(Any, #encoder{}).

%% @spec decoder([decoder_option()]) -> function()
%% @doc Create a decoder/1 with the given options.
decoder(Options) ->
    State = parse_decoder_options(Options, #decoder{}),
    fun (O) -> json_decode(O, State) end.

%% @spec decode(iolist()) -> json_term()
%% @doc Decode the given iolist to Erlang terms.
decode(S) ->
    json_decode(S, #decoder{}).

test() ->
    test_all().

%% Internal API

parse_encoder_options([], State) ->
    State;
parse_encoder_options([{input_encoding, Encoding} | Rest], State) ->
    parse_encoder_options(Rest, State#encoder{input_encoding=Encoding});
parse_encoder_options([{handler, Handler} | Rest], State) ->
    parse_encoder_options(Rest, State#encoder{handler=Handler}).

parse_decoder_options([], State) ->
    State;
parse_decoder_options([{input_encoding, Encoding} | Rest], State) ->
    parse_decoder_options(Rest, State#decoder{input_encoding=Encoding});
parse_decoder_options([{object_hook, Hook} | Rest], State) ->
    parse_decoder_options(Rest, State#decoder{object_hook=Hook}).


format_float(F) ->
    format_float1(lists:reverse(float_to_list(F)), []).

format_float1([$0, $0, _, $e | Rest], []) ->
    strip_zeros(Rest, []);
format_float1([Sign, $e | Rest], Acc) ->
    strip_zeros(Rest, [$e, Sign | Acc]);
format_float1([C | Rest], Acc) ->
    format_float1(Rest, [C | Acc]).

strip_zeros(L=[$0, $. | _], Acc) ->
    lists:reverse(L, Acc);
strip_zeros([$0 | Rest], Acc) ->
    strip_zeros(Rest, Acc);
strip_zeros(L, Acc) ->
    lists:reverse(L, Acc).

json_encode(true, _State) ->
    "true";
json_encode(false, _State) ->
    "false";
json_encode(null, _State) ->
    "null";
json_encode(I, _State) when is_integer(I) ->
    integer_to_list(I);
json_encode(F, _State) when is_float(F) ->
    format_float(F);
json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) ->
    json_encode_string(L, State);
json_encode({obj, Props}, State) when is_list(Props) ->
    json_encode_proplist(Props, State);
json_encode(Array, State) when is_tuple(Array) ->
    json_encode_array(Array, State);
json_encode(Bad, #encoder{handler=null}) ->
    exit({json_encode, {bad_term, Bad}});
json_encode(Bad, State=#encoder{handler=Handler}) ->
    json_encode(Handler(Bad), State).

json_encode_array({}, _State) ->
    "[]";
json_encode_array(Tuple, State) ->
    F = fun (O, Acc) ->
        [$,, json_encode(O, State) | Acc]
    end,
    [$, | Acc1] = lists:foldl(F, "[", tuple_to_list(Tuple)),
    lists:reverse([$\] | Acc1]).

json_encode_proplist([], _State) ->
    "{}";
json_encode_proplist(Props, State) ->
    F = fun ({K, V}, Acc) ->
        KS = case K of
             K when is_atom(K) ->
                 json_encode_string_utf8(atom_to_list(K), [?Q]);
             K when is_integer(K) ->
                 json_encode_string(integer_to_list(K), State);
             K when is_list(K); is_binary(K) ->
                 json_encode_string(K, State)
             end,
        VS = json_encode(V, State),
        [$,, VS, $:, KS | Acc]
    end,
    [$, | Acc1] = lists:foldl(F, "{", Props),
    lists:reverse([$\} | Acc1]).

json_encode_string(A, _State) when is_atom(A) ->
    json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)), [?Q]);
json_encode_string(B, _State) when is_binary(B) ->
    json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q]);
json_encode_string(S, #encoder{input_encoding=utf8}) ->
    json_encode_string_utf8(S, [?Q]);
json_encode_string(S, #encoder{input_encoding=unicode}) ->
    json_encode_string_unicode(S, [?Q]).

json_encode_string_utf8([], Acc) ->
    lists:reverse([$\" | Acc]);
json_encode_string_utf8(All=[C | Cs], Acc) ->
    case C of
    C when C >= 16#7f ->
        json_encode_string_unicode(xmerl_ucs:from_utf8(All), Acc);
    _ ->
        Acc1 = case C of
               ?Q ->
               [?Q, $\\ | Acc];
               $/ ->
               [$/, $\\ | Acc];
               $\\ ->
               [$\\, $\\ | Acc];
               $\b ->
               [$b, $\\ | Acc];
               $\f ->
               [$f, $\\ | Acc];
               $\n ->
               [$n, $\\ | Acc];
               $\r ->
               [$r, $\\ | Acc];
               $\t ->
               [$t, $\\ | Acc];
               C when C >= 0, C < $\s ->
               [unihex(C) | Acc];
               C when C >= $\s ->
               [C | Acc];
               _ ->
               exit({json_encode, {bad_char, C}})
           end,
        json_encode_string_utf8(Cs, Acc1)
    end.

json_encode_string_unicode([], Acc) ->
    lists:reverse([$\" | Acc]);
json_encode_string_unicode([C | Cs], Acc) ->
    Acc1 = case C of
           ?Q ->
           [?Q, $\\ | Acc];
           $/ ->
           [$/, $\\ | Acc];
           $\\ ->
           [$\\, $\\ | Acc];
           $\b ->
           [$b, $\\ | Acc];
           $\f ->
           [$f, $\\ | Acc];
           $\n ->
           [$n, $\\ | Acc];
           $\r ->
           [$r, $\\ | Acc];
           $\t ->
           [$t, $\\ | Acc];
           C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
           [unihex(C) | Acc];
           C when C < 16#7f ->
           [C | Acc];
           _ ->
           exit({json_encode, {bad_char, C}})
       end,
    json_encode_string_unicode(Cs, Acc1).

dehex(C) when C >= $0, C =< $9 ->
    C - $0;
dehex(C) when C >= $a, C =< $f ->
    C - $a + 10;
dehex(C) when C >= $A, C =< $F ->
    C - $A + 10.

hexdigit(C) when C >= 0, C =< 9 ->
    C + $0;
hexdigit(C) when C =< 15 ->
    C + $a - 10.

unihex(C) when C < 16#10000 ->
    <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
    Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
    [$\\, $u | Digits];
unihex(C) when C =< 16#10FFFF ->
    N = C - 16#10000,
    S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
    S2 = 16#dc00 bor (N band 16#3ff),
    [unihex(S1), unihex(S2)].

json_decode(B, S) when is_binary(B) ->
    json_decode([B], S);
json_decode(L, S) ->
    {Res, L1, S1} = decode1(L, S),
    {eof, [], _} = tokenize(L1, S1#decoder{state=trim}),
    Res.

decode1(L, S=#decoder{state=null}) ->
    case tokenize(L, S#decoder{state=any}) of
    {{const, C}, L1, S1} ->
        {C, L1, S1};
    {start_array, L1, S1} ->
        decode_array(L1, S1#decoder{state=any}, []);
    {start_object, L1, S1} ->
        decode_object(L1, S1#decoder{state=key}, [])
    end.

make_object(V, #decoder{object_hook=null}) ->
    V;
make_object(V, #decoder{object_hook=Hook}) ->
    Hook(V).

decode_object(L, S=#decoder{state=key}, Acc) ->
    case tokenize(L, S) of
    {end_object, Rest, S1} ->
        V = make_object({obj, lists:reverse(Acc)}, S1),
        {V, Rest, S1#decoder{state=null}};
    {{const, K}, Rest, S1} when is_list(K) ->
        {colon, L2, S2} = tokenize(Rest, S1),
        {V, L3, S3} = decode1(L2, S2#decoder{state=null}),
        decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc])
    end;
decode_object(L, S=#decoder{state=comma}, Acc) ->
    case tokenize(L, S) of
    {end_object, Rest, S1} ->
        V = make_object({obj, lists:reverse(Acc)}, S1),
        {V, Rest, S1#decoder{state=null}};
    {comma, Rest, S1} ->
        decode_object(Rest, S1#decoder{state=key}, Acc)
    end.

decode_array(L, S=#decoder{state=any}, Acc) ->
    case tokenize(L, S) of
    {end_array, Rest, S1} ->
        {list_to_tuple(lists:reverse(Acc)), Rest, S1#decoder{state=null}};
    {start_array, Rest, S1} ->
        {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []),
        decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
    {start_object, Rest, S1} ->
        {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []),
        decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
    {{const, Const}, Rest, S1} ->
        decode_array(Rest, S1#decoder{state=comma}, [Const | Acc])
    end;
decode_array(L, S=#decoder{state=comma}, Acc) ->
    case tokenize(L, S) of
    {end_array, Rest, S1} ->
        {list_to_tuple(lists:reverse(Acc)), Rest, S1#decoder{state=null}};
    {comma, Rest, S1} ->
        decode_array(Rest, S1#decoder{state=any}, Acc)
    end.

tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc)
  when is_list(C); is_binary(C); C >= 16#7f ->
    List = xmerl_ucs:from_utf8(list_to_binary(lists:flatten(IoList))),
    tokenize_string(List, S#decoder{input_encoding=unicode}, Acc);
tokenize_string("\"" ++ Rest, S, Acc) ->
    {lists:reverse(Acc), Rest, ?INC_COL(S)};
tokenize_string("\\\"" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]);
tokenize_string("\\\\" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]);
tokenize_string("\\/" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]);
tokenize_string("\\b" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]);
tokenize_string("\\f" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]);
tokenize_string("\\n" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]);
tokenize_string("\\r" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]);
tokenize_string("\\t" ++ Rest, S, Acc) ->
    tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]);
tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) ->
    % coalesce UTF-16 surrogate pair?
    C = dehex(C0) bor
    (dehex(C1) bsl 4) bor
    (dehex(C2) bsl 8) bor
    (dehex(C3) bsl 12),
    tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]);
tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF ->
    tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]).

tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc)
  when is_list(C); is_binary(C); C >= 16#7f ->
    List = xmerl_ucs:from_utf8(list_to_binary(lists:flatten(IoList))),
    tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc);
tokenize_number([$- | Rest], sign, S, []) ->
    tokenize_number(Rest, int, ?INC_COL(S), [$-]);
tokenize_number(Rest, sign, S, []) ->
    tokenize_number(Rest, int, S, []);
tokenize_number([$0 | Rest], int, S, Acc) ->
    tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]);
tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 ->
    tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 ->
    tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
tokenize_number(Rest, int1, S, Acc) ->
    tokenize_number(Rest, frac, S, Acc);
tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 ->
    tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E ->
    tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
tokenize_number(Rest, frac, S, Acc) ->
    {{int, lists:reverse(Acc)}, Rest, S};
tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 ->
    tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]);
tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E ->
    tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]);
tokenize_number(Rest, frac1, S, Acc) ->
    {{float, lists:reverse(Acc)}, Rest, S};
tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ ->
    tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]);
tokenize_number(Rest, esign, S, Acc) ->
    tokenize_number(Rest, eint, S, Acc);
tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 ->
    tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 ->
    tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
tokenize_number(Rest, eint1, S, Acc) ->
    {{float, lists:reverse(Acc)}, Rest, S}.

tokenize([], S=#decoder{state=trim}) ->
    {eof, [], S};
tokenize([L | Rest], S) when is_list(L) ->
    tokenize(L ++ Rest, S);
tokenize([B | Rest], S) when is_binary(B) ->
    tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S#decoder{input_encoding=unicode});
tokenize("\r\n" ++ Rest, S) ->
    tokenize(Rest, ?INC_LINE(S));
tokenize("\n" ++ Rest, S) ->
    tokenize(Rest, ?INC_LINE(S));
tokenize([C | Rest], S) when C == $\s; C == $\t ->
    tokenize(Rest, ?INC_COL(S));
tokenize("{" ++ Rest, S) ->
    {start_object, Rest, ?INC_COL(S)};
tokenize("}" ++ Rest, S) ->
    {end_object, Rest, ?INC_COL(S)};
tokenize("[" ++ Rest, S) ->
    {start_array, Rest, ?INC_COL(S)};
tokenize("]" ++ Rest, S) ->
    {end_array, Rest, ?INC_COL(S)};
tokenize("," ++ Rest, S) ->
    {comma, Rest, ?INC_COL(S)};
tokenize(":" ++ Rest, S) ->
    {colon, Rest, ?INC_COL(S)};
tokenize("null" ++ Rest, S) ->
    {{const, null}, Rest, ?ADV_COL(S, 4)};
tokenize("true" ++ Rest, S) ->
    {{const, true}, Rest, ?ADV_COL(S, 4)};
tokenize("false" ++ Rest, S) ->
    {{const, false}, Rest, ?ADV_COL(S, 5)};
tokenize("\"" ++ Rest, S) ->
    {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []),
    {{const, xmerl_ucs:to_utf8(String)}, Rest1, S1};
tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- ->
    case tokenize_number(L, sign, S, []) of
    {{int, Int}, Rest, S1} ->
        {{const, list_to_integer(Int)}, Rest, S1};
    {{float, Float}, Rest, S1} ->
        {{const, list_to_float(Float)}, Rest, S1}
    end.

%% testing constructs borrowed from the Yaws JSON implementation.

%% Create an object from a list of Key/Value pairs.

obj_new() ->
    {obj, []}.

is_obj({obj, Props}) ->
    F = fun ({K, _}) when is_list(K) ->
        true;
        (_) ->
        false
    end,
    lists:all(F, Props).

obj_from_list(Props) ->
    Obj = {obj, Props},
    case is_obj(Obj) of
        true -> Obj;
        false -> exit(json_bad_object)
    end.

%% Test for equivalence of Erlang terms.
%% Due to arbitrary order of construction, equivalent objects might
%% compare unequal as erlang terms, so we need to carefully recurse
%% through aggregates (tuples and objects).

equiv({obj, Props1}, {obj, Props2}) ->
    equiv_object(Props1, Props2);
equiv(T1, T2) when is_tuple(T1), is_tuple(T2) ->
    equiv_list(tuple_to_list(T1), tuple_to_list(T2));
equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2;
equiv(true, true) -> true;
equiv(false, false) -> true;
equiv(null, null) -> true.

%% Object representation and traversal order is unknown.
%% Use the sledgehammer and sort property lists.

equiv_object(Props1, Props2) ->
    L1 = lists:keysort(1, Props1),
    L2 = lists:keysort(1, Props2),
    Pairs = lists:zip(L1, L2),
    true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
    equiv(K1, K2) and equiv(V1, V2)
    end, Pairs).

%% Recursively compare tuple elements for equivalence.

equiv_list([], []) ->
    true;
equiv_list([V1 | L1], [V2 | L2]) ->
    case equiv(V1, V2) of
    true ->
        equiv_list(L1, L2);
    false ->
        false
    end.

test_all() ->
    test_one(e2j_test_vec(utf8), 1).

test_one([], N) ->
    io:format("~p tests passed~n", [N-1]),
    ok;
test_one([{E, J} | Rest], N) ->
    io:format("[~p] ~p ~p~n", [N, E, J]),
    true = equiv(E, decode(J)),
    true = equiv(E, decode(encode(E))),
    test_one(Rest, 1+N).

e2j_test_vec(unicode) ->
    [
     {"foo" ++ [500] ++ "bar", [$", $f, $o, $o, 500, $b, $a, $r, $"]}
    ];
e2j_test_vec(utf8) ->
    [
    {1, "1"},
    {3.1416, "3.14160"}, % text representation may truncate, trail zeroes
    {-1, "-1"},
    {-3.1416, "-3.14160"},
    {12.0e10, "1.20000e+11"},
    {1.234E+10, "1.23400e+10"},
    {-1.234E-10, "-1.23400e-10"},
    {10.0, "1.0e+01"},
    {123.456, "1.23456E+2"},
    {10.0, "1e1"},
    {"foo", "\"foo\""},
    {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""},
    {"", "\"\""},
    {[], "\"\""},
    {"\n\n\n", "\"\\n\\n\\n\""},
    {obj_new(), "{}"},
    {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"},
    {obj_from_list([{"foo", "bar"}, {"baz", 123}]),
     "{\"foo\":\"bar\",\"baz\":123}"},
    {{}, "[]"},
    {{{}}, "[[]]"},
    {{1, "foo"}, "[1,\"foo\"]"},

    % json array in a json object
    {obj_from_list([{"foo", {123}}]),
     "{\"foo\":[123]}"},

    % json object in a json object
    {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]),
     "{\"foo\":{\"bar\":true}}"},

    % fold evaluation order
    {obj_from_list([{"foo", {}},
                     {"bar", obj_from_list([{"baz", true}])},
                     {"alice", "bob"}]),
     "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},

    % json object in a json array
    {{-123, "foo", obj_from_list([{"bar", {}}]), null},
     "[-123,\"foo\",{\"bar\":[]},null]"}
    ].