From cce81d89f7630f7d9b95757419d4ab91faf8217e Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 31 Aug 2010 04:21:47 +0000 Subject: validate input strings containing escape characters, COUCHDB-875 git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@991073 13f79535-47bb-0310-9956-ffa450edef68 --- src/mochiweb/mochijson2.erl | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl index eeb25b5b..64cabc86 100644 --- a/src/mochiweb/mochijson2.erl +++ b/src/mochiweb/mochijson2.erl @@ -402,8 +402,22 @@ tokenize_string(B, S=#decoder{offset=O}, Acc) -> Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), tokenize_string(B, ?ADV_COL(S, 6), Acc1) end; - <<_:O/binary, C, _/binary>> -> - tokenize_string(B, ?INC_CHAR(S, C), [C | Acc]) + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); + _ -> + throw(invalid_utf8) end. tokenize_number(B, S) -> @@ -648,7 +662,9 @@ input_validation_test() -> <>, <>, %% we don't support code points > 10FFFF per RFC 3629 - <> + <>, + %% escape characters trigger a different code path + <> ], lists:foreach( fun(X) -> -- cgit v1.2.3