diff options
Diffstat (limited to 'src/couchdb/cjson.erl')
-rw-r--r-- | src/couchdb/cjson.erl | 567 |
1 files changed, 0 insertions, 567 deletions
diff --git a/src/couchdb/cjson.erl b/src/couchdb/cjson.erl deleted file mode 100644 index 6e144c9e..00000000 --- a/src/couchdb/cjson.erl +++ /dev/null @@ -1,567 +0,0 @@ -%% @author Bob Ippolito <bob@mochimedia.com> -%% @copyright 2006 Mochi Media, Inc. -%% -%% Permission is hereby granted, free of charge, to any person -%% obtaining a copy of this software and associated documentation -%% files (the "Software"), to deal in the Software without restriction, -%% including without limitation the rights to use, copy, modify, merge, -%% publish, distribute, sublicense, and/or sell copies of the Software, -%% and to permit persons to whom the Software is furnished to do -%% so, subject to the following conditions: -%% -%% The above copyright notice and this permission notice shall be included -%% in all copies or substantial portions of the Software. -%% -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -%% MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -%% IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -%% CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -%% TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -%% SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -%% @doc Yet another JSON (RFC 4627) library for Erlang. - --module(cjson). --author('bob@mochimedia.com'). --export([encoder/1, encode/1]). --export([decoder/1, decode/1]). --export([test/0]). - -% -% NOTE: This file was originally mochijson.erl and has been adapted for -% use with CouchDB. -% -% The changes are: -% {array, [...]} -% is now -% {...} -% and: -% {struct, [...]} -% is now -% {obj, [...]} -% - -% This is a macro to placate syntax highlighters.. --define(Q, $\"). --define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}). --define(INC_COL(S), S#decoder{column=1+S#decoder.column}). --define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}). - -%% @type iolist() = [char() | binary() | iolist()] -%% @type iodata() = iolist() | binary() -%% @type json_string() = atom | string() | binary() -%% @type json_number() = integer() | float() -%% @type json_array() = {json_term()} -%% @type json_object() = {struct, [{json_string(), json_term()}]} -%% @type json_term() = json_string() | json_number() | json_array() | -%% json_object() -%% @type encoding() = utf8 | unicode -%% @type encoder_option() = {input_encoding, encoding()} | -%% {handler, function()} -%% @type decoder_option() = {input_encoding, encoding()} | -%% {object_hook, function()} - --record(encoder, {input_encoding=utf8, - handler=null}). - --record(decoder, {input_encoding=utf8, - object_hook=null, - line=1, - column=1, - state=null}). - -%% @spec encoder([encoder_option()]) -> function() -%% @doc Create an encoder/1 with the given options. -encoder(Options) -> - State = parse_encoder_options(Options, #encoder{}), - fun (O) -> json_encode(O, State) end. - -%% @spec encode(json_term()) -> iolist() -%% @doc Encode the given as JSON to an iolist. -encode(Any) -> - json_encode(Any, #encoder{}). - -%% @spec decoder([decoder_option()]) -> function() -%% @doc Create a decoder/1 with the given options. -decoder(Options) -> - State = parse_decoder_options(Options, #decoder{}), - fun (O) -> json_decode(O, State) end. - -%% @spec decode(iolist()) -> json_term() -%% @doc Decode the given iolist to Erlang terms. -decode(S) -> - json_decode(S, #decoder{}). - -test() -> - test_all(). - -%% Internal API - -parse_encoder_options([], State) -> - State; -parse_encoder_options([{input_encoding, Encoding} | Rest], State) -> - parse_encoder_options(Rest, State#encoder{input_encoding=Encoding}); -parse_encoder_options([{handler, Handler} | Rest], State) -> - parse_encoder_options(Rest, State#encoder{handler=Handler}). - -parse_decoder_options([], State) -> - State; -parse_decoder_options([{input_encoding, Encoding} | Rest], State) -> - parse_decoder_options(Rest, State#decoder{input_encoding=Encoding}); -parse_decoder_options([{object_hook, Hook} | Rest], State) -> - parse_decoder_options(Rest, State#decoder{object_hook=Hook}). - - -format_float(F) -> - format_float1(lists:reverse(float_to_list(F)), []). - -format_float1([$0, $0, _, $e | Rest], []) -> - strip_zeros(Rest, []); -format_float1([Sign, $e | Rest], Acc) -> - strip_zeros(Rest, [$e, Sign | Acc]); -format_float1([C | Rest], Acc) -> - format_float1(Rest, [C | Acc]). - -strip_zeros(L=[$0, $. | _], Acc) -> - lists:reverse(L, Acc); -strip_zeros([$0 | Rest], Acc) -> - strip_zeros(Rest, Acc); -strip_zeros(L, Acc) -> - lists:reverse(L, Acc). - -json_encode(true, _State) -> - "true"; -json_encode(false, _State) -> - "false"; -json_encode(null, _State) -> - "null"; -json_encode(I, _State) when is_integer(I) -> - integer_to_list(I); -json_encode(F, _State) when is_float(F) -> - format_float(F); -json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) -> - json_encode_string(L, State); -json_encode({obj, Props}, State) when is_list(Props) -> - json_encode_proplist(Props, State); -json_encode(Array, State) when is_tuple(Array) -> - json_encode_array(Array, State); -json_encode(Bad, #encoder{handler=null}) -> - exit({json_encode, {bad_term, Bad}}); -json_encode(Bad, State=#encoder{handler=Handler}) -> - json_encode(Handler(Bad), State). - -json_encode_array({}, _State) -> - "[]"; -json_encode_array(Tuple, State) -> - F = fun (O, Acc) -> - [$,, json_encode(O, State) | Acc] - end, - [$, | Acc1] = lists:foldl(F, "[", tuple_to_list(Tuple)), - lists:reverse([$\] | Acc1]). - -json_encode_proplist([], _State) -> - "{}"; -json_encode_proplist(Props, State) -> - F = fun ({K, V}, Acc) -> - KS = case K of - K when is_atom(K) -> - json_encode_string_utf8(atom_to_list(K), [?Q]); - K when is_integer(K) -> - json_encode_string(integer_to_list(K), State); - K when is_list(K); is_binary(K) -> - json_encode_string(K, State) - end, - VS = json_encode(V, State), - [$,, VS, $:, KS | Acc] - end, - [$, | Acc1] = lists:foldl(F, "{", Props), - lists:reverse([$\} | Acc1]). - -json_encode_string(A, _State) when is_atom(A) -> - json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)), [?Q]); -json_encode_string(B, _State) when is_binary(B) -> - json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q]); -json_encode_string(S, #encoder{input_encoding=utf8}) -> - json_encode_string_utf8(S, [?Q]); -json_encode_string(S, #encoder{input_encoding=unicode}) -> - json_encode_string_unicode(S, [?Q]). - -json_encode_string_utf8([], Acc) -> - lists:reverse([$\" | Acc]); -json_encode_string_utf8(All=[C | Cs], Acc) -> - case C of - C when C >= 16#7f -> - json_encode_string_unicode(xmerl_ucs:from_utf8(All), Acc); - _ -> - Acc1 = case C of - ?Q -> - [?Q, $\\ | Acc]; - $/ -> - [$/, $\\ | Acc]; - $\\ -> - [$\\, $\\ | Acc]; - $\b -> - [$b, $\\ | Acc]; - $\f -> - [$f, $\\ | Acc]; - $\n -> - [$n, $\\ | Acc]; - $\r -> - [$r, $\\ | Acc]; - $\t -> - [$t, $\\ | Acc]; - C when C >= 0, C < $\s -> - [unihex(C) | Acc]; - C when C >= $\s -> - [C | Acc]; - _ -> - exit({json_encode, {bad_char, C}}) - end, - json_encode_string_utf8(Cs, Acc1) - end. - -json_encode_string_unicode([], Acc) -> - lists:reverse([$\" | Acc]); -json_encode_string_unicode([C | Cs], Acc) -> - Acc1 = case C of - ?Q -> - [?Q, $\\ | Acc]; - $/ -> - [$/, $\\ | Acc]; - $\\ -> - [$\\, $\\ | Acc]; - $\b -> - [$b, $\\ | Acc]; - $\f -> - [$f, $\\ | Acc]; - $\n -> - [$n, $\\ | Acc]; - $\r -> - [$r, $\\ | Acc]; - $\t -> - [$t, $\\ | Acc]; - C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> - [unihex(C) | Acc]; - C when C < 16#7f -> - [C | Acc]; - _ -> - exit({json_encode, {bad_char, C}}) - end, - json_encode_string_unicode(Cs, Acc1). - -dehex(C) when C >= $0, C =< $9 -> - C - $0; -dehex(C) when C >= $a, C =< $f -> - C - $a + 10; -dehex(C) when C >= $A, C =< $F -> - C - $A + 10. - -hexdigit(C) when C >= 0, C =< 9 -> - C + $0; -hexdigit(C) when C =< 15 -> - C + $a - 10. - -unihex(C) when C < 16#10000 -> - <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, - Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], - [$\\, $u | Digits]; -unihex(C) when C =< 16#10FFFF -> - N = C - 16#10000, - S1 = 16#d800 bor ((N bsr 10) band 16#3ff), - S2 = 16#dc00 bor (N band 16#3ff), - [unihex(S1), unihex(S2)]. - -json_decode(B, S) when is_binary(B) -> - json_decode([B], S); -json_decode(L, S) -> - {Res, L1, S1} = decode1(L, S), - {eof, [], _} = tokenize(L1, S1#decoder{state=trim}), - Res. - -decode1(L, S=#decoder{state=null}) -> - case tokenize(L, S#decoder{state=any}) of - {{const, C}, L1, S1} -> - {C, L1, S1}; - {start_array, L1, S1} -> - decode_array(L1, S1#decoder{state=any}, []); - {start_object, L1, S1} -> - decode_object(L1, S1#decoder{state=key}, []) - end. - -make_object(V, #decoder{object_hook=null}) -> - V; -make_object(V, #decoder{object_hook=Hook}) -> - Hook(V). - -decode_object(L, S=#decoder{state=key}, Acc) -> - case tokenize(L, S) of - {end_object, Rest, S1} -> - V = make_object({obj, lists:reverse(Acc)}, S1), - {V, Rest, S1#decoder{state=null}}; - {{const, K}, Rest, S1} when is_list(K) -> - {colon, L2, S2} = tokenize(Rest, S1), - {V, L3, S3} = decode1(L2, S2#decoder{state=null}), - decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc]) - end; -decode_object(L, S=#decoder{state=comma}, Acc) -> - case tokenize(L, S) of - {end_object, Rest, S1} -> - V = make_object({obj, lists:reverse(Acc)}, S1), - {V, Rest, S1#decoder{state=null}}; - {comma, Rest, S1} -> - decode_object(Rest, S1#decoder{state=key}, Acc) - end. - -decode_array(L, S=#decoder{state=any}, Acc) -> - case tokenize(L, S) of - {end_array, Rest, S1} -> - {list_to_tuple(lists:reverse(Acc)), Rest, S1#decoder{state=null}}; - {start_array, Rest, S1} -> - {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []), - decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); - {start_object, Rest, S1} -> - {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []), - decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); - {{const, Const}, Rest, S1} -> - decode_array(Rest, S1#decoder{state=comma}, [Const | Acc]) - end; -decode_array(L, S=#decoder{state=comma}, Acc) -> - case tokenize(L, S) of - {end_array, Rest, S1} -> - {list_to_tuple(lists:reverse(Acc)), Rest, S1#decoder{state=null}}; - {comma, Rest, S1} -> - decode_array(Rest, S1#decoder{state=any}, Acc) - end. - -tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc) - when is_list(C); is_binary(C); C >= 16#7f -> - List = xmerl_ucs:from_utf8(list_to_binary(lists:flatten(IoList))), - tokenize_string(List, S#decoder{input_encoding=unicode}, Acc); -tokenize_string("\"" ++ Rest, S, Acc) -> - {lists:reverse(Acc), Rest, ?INC_COL(S)}; -tokenize_string("\\\"" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]); -tokenize_string("\\\\" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]); -tokenize_string("\\/" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]); -tokenize_string("\\b" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]); -tokenize_string("\\f" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]); -tokenize_string("\\n" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]); -tokenize_string("\\r" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]); -tokenize_string("\\t" ++ Rest, S, Acc) -> - tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]); -tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) -> - % coalesce UTF-16 surrogate pair? - C = dehex(C0) bor - (dehex(C1) bsl 4) bor - (dehex(C2) bsl 8) bor - (dehex(C3) bsl 12), - tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]); -tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF -> - tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]). - -tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc) - when is_list(C); is_binary(C); C >= 16#7f -> - List = xmerl_ucs:from_utf8(list_to_binary(lists:flatten(IoList))), - tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc); -tokenize_number([$- | Rest], sign, S, []) -> - tokenize_number(Rest, int, ?INC_COL(S), [$-]); -tokenize_number(Rest, sign, S, []) -> - tokenize_number(Rest, int, S, []); -tokenize_number([$0 | Rest], int, S, Acc) -> - tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]); -tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 -> - tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); -tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 -> - tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); -tokenize_number(Rest, int1, S, Acc) -> - tokenize_number(Rest, frac, S, Acc); -tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 -> - tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); -tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E -> - tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]); -tokenize_number(Rest, frac, S, Acc) -> - {{int, lists:reverse(Acc)}, Rest, S}; -tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 -> - tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]); -tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E -> - tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]); -tokenize_number(Rest, frac1, S, Acc) -> - {{float, lists:reverse(Acc)}, Rest, S}; -tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ -> - tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]); -tokenize_number(Rest, esign, S, Acc) -> - tokenize_number(Rest, eint, S, Acc); -tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 -> - tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); -tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 -> - tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); -tokenize_number(Rest, eint1, S, Acc) -> - {{float, lists:reverse(Acc)}, Rest, S}. - -tokenize([], S=#decoder{state=trim}) -> - {eof, [], S}; -tokenize([L | Rest], S) when is_list(L) -> - tokenize(L ++ Rest, S); -tokenize([B | Rest], S) when is_binary(B) -> - tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S#decoder{input_encoding=unicode}); -tokenize("\r\n" ++ Rest, S) -> - tokenize(Rest, ?INC_LINE(S)); -tokenize("\n" ++ Rest, S) -> - tokenize(Rest, ?INC_LINE(S)); -tokenize([C | Rest], S) when C == $\s; C == $\t -> - tokenize(Rest, ?INC_COL(S)); -tokenize("{" ++ Rest, S) -> - {start_object, Rest, ?INC_COL(S)}; -tokenize("}" ++ Rest, S) -> - {end_object, Rest, ?INC_COL(S)}; -tokenize("[" ++ Rest, S) -> - {start_array, Rest, ?INC_COL(S)}; -tokenize("]" ++ Rest, S) -> - {end_array, Rest, ?INC_COL(S)}; -tokenize("," ++ Rest, S) -> - {comma, Rest, ?INC_COL(S)}; -tokenize(":" ++ Rest, S) -> - {colon, Rest, ?INC_COL(S)}; -tokenize("null" ++ Rest, S) -> - {{const, null}, Rest, ?ADV_COL(S, 4)}; -tokenize("true" ++ Rest, S) -> - {{const, true}, Rest, ?ADV_COL(S, 4)}; -tokenize("false" ++ Rest, S) -> - {{const, false}, Rest, ?ADV_COL(S, 5)}; -tokenize("\"" ++ Rest, S) -> - {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []), - {{const, xmerl_ucs:to_utf8(String)}, Rest1, S1}; -tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- -> - case tokenize_number(L, sign, S, []) of - {{int, Int}, Rest, S1} -> - {{const, list_to_integer(Int)}, Rest, S1}; - {{float, Float}, Rest, S1} -> - {{const, list_to_float(Float)}, Rest, S1} - end. - -%% testing constructs borrowed from the Yaws JSON implementation. - -%% Create an object from a list of Key/Value pairs. - -obj_new() -> - {obj, []}. - -is_obj({obj, Props}) -> - F = fun ({K, _}) when is_list(K) -> - true; - (_) -> - false - end, - lists:all(F, Props). - -obj_from_list(Props) -> - Obj = {obj, Props}, - case is_obj(Obj) of - true -> Obj; - false -> exit(json_bad_object) - end. - -%% Test for equivalence of Erlang terms. -%% Due to arbitrary order of construction, equivalent objects might -%% compare unequal as erlang terms, so we need to carefully recurse -%% through aggregates (tuples and objects). - -equiv({obj, Props1}, {obj, Props2}) -> - equiv_object(Props1, Props2); -equiv(T1, T2) when is_tuple(T1), is_tuple(T2) -> - equiv_list(tuple_to_list(T1), tuple_to_list(T2)); -equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; -equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2; -equiv(true, true) -> true; -equiv(false, false) -> true; -equiv(null, null) -> true. - -%% Object representation and traversal order is unknown. -%% Use the sledgehammer and sort property lists. - -equiv_object(Props1, Props2) -> - L1 = lists:keysort(1, Props1), - L2 = lists:keysort(1, Props2), - Pairs = lists:zip(L1, L2), - true = lists:all(fun({{K1, V1}, {K2, V2}}) -> - equiv(K1, K2) and equiv(V1, V2) - end, Pairs). - -%% Recursively compare tuple elements for equivalence. - -equiv_list([], []) -> - true; -equiv_list([V1 | L1], [V2 | L2]) -> - case equiv(V1, V2) of - true -> - equiv_list(L1, L2); - false -> - false - end. - -test_all() -> - test_one(e2j_test_vec(utf8), 1). - -test_one([], N) -> - io:format("~p tests passed~n", [N-1]), - ok; -test_one([{E, J} | Rest], N) -> - io:format("[~p] ~p ~p~n", [N, E, J]), - true = equiv(E, decode(J)), - true = equiv(E, decode(encode(E))), - test_one(Rest, 1+N). - -e2j_test_vec(unicode) -> - [ - {"foo" ++ [500] ++ "bar", [$", $f, $o, $o, 500, $b, $a, $r, $"]} - ]; -e2j_test_vec(utf8) -> - [ - {1, "1"}, - {3.1416, "3.14160"}, % text representation may truncate, trail zeroes - {-1, "-1"}, - {-3.1416, "-3.14160"}, - {12.0e10, "1.20000e+11"}, - {1.234E+10, "1.23400e+10"}, - {-1.234E-10, "-1.23400e-10"}, - {10.0, "1.0e+01"}, - {123.456, "1.23456E+2"}, - {10.0, "1e1"}, - {"foo", "\"foo\""}, - {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""}, - {"", "\"\""}, - {[], "\"\""}, - {"\n\n\n", "\"\\n\\n\\n\""}, - {obj_new(), "{}"}, - {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"}, - {obj_from_list([{"foo", "bar"}, {"baz", 123}]), - "{\"foo\":\"bar\",\"baz\":123}"}, - {{}, "[]"}, - {{{}}, "[[]]"}, - {{1, "foo"}, "[1,\"foo\"]"}, - - % json array in a json object - {obj_from_list([{"foo", {123}}]), - "{\"foo\":[123]}"}, - - % json object in a json object - {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]), - "{\"foo\":{\"bar\":true}}"}, - - % fold evaluation order - {obj_from_list([{"foo", {}}, - {"bar", obj_from_list([{"baz", true}])}, - {"alice", "bob"}]), - "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, - - % json object in a json array - {{-123, "foo", obj_from_list([{"bar", {}}]), null}, - "[-123,\"foo\",{\"bar\":[]},null]"} - ]. |