From 219cd4db7e62878b3bd2cb537c73cb8acf04bf47 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 23 Sep 2009 20:43:35 +0000 Subject: reject attempts to upload invalid UTF-8 JSON. Closes COUCHDB-345 This patch requires JSON to be encoded using UTF-8. In the future we will accept other encodings. Thanks Joan Touzet and James Dumay for the bug reports and Curt Arnold for patches and discussion. git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@818249 13f79535-47bb-0310-9956-ffa450edef68 --- src/mochiweb/mochijson2.erl | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/mochiweb') diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl index 8b6adb1f..ee19458c 100644 --- a/src/mochiweb/mochijson2.erl +++ b/src/mochiweb/mochijson2.erl @@ -345,10 +345,24 @@ tokenize_string_fast(B, O) -> case B of <<_:O/binary, ?Q, _/binary>> -> O; - <<_:O/binary, C, _/binary>> when C =/= $\\ -> + <<_:O/binary, $\\, _/binary>> -> + {escape, O}; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> tokenize_string_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string_fast(B, 4 + O); _ -> - {escape, O} + throw(invalid_utf8) end. tokenize_string(B, S=#decoder{offset=O}, Acc) -> -- cgit v1.2.3