From 219cd4db7e62878b3bd2cb537c73cb8acf04bf47 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 23 Sep 2009 20:43:35 +0000 Subject: reject attempts to upload invalid UTF-8 JSON. Closes COUCHDB-345 This patch requires JSON to be encoded using UTF-8. In the future we will accept other encodings. Thanks Joan Touzet and James Dumay for the bug reports and Curt Arnold for patches and discussion. git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@818249 13f79535-47bb-0310-9956-ffa450edef68 --- THANKS | 1 + share/www/script/test/view_errors.js | 2 +- src/couchdb/couch_httpd.erl | 3 +++ src/mochiweb/mochijson2.erl | 18 ++++++++++++++++-- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/THANKS b/THANKS index 1ec6f31a..28f0fbb7 100644 --- a/THANKS +++ b/THANKS @@ -35,5 +35,6 @@ suggesting improvements or submitting changes. Some of these people are: * Sebastian Cohnen * Sven Helmberger * Dan Walters + * Curt Arnold For a list of authors see the `AUTHORS` file. diff --git a/share/www/script/test/view_errors.js b/share/www/script/test/view_errors.js index 256e9569..0f90c46f 100644 --- a/share/www/script/test/view_errors.js +++ b/share/www/script/test/view_errors.js @@ -55,7 +55,7 @@ couchTests.view_errors = function(debug) { map : "function(doc){emit(doc.integer)}" }) }); - T(JSON.parse(xhr.responseText).error == "invalid_json"); + T(JSON.parse(xhr.responseText).error == "bad_request"); // views should ignore Content-Type, like the rest of CouchDB var xhr = CouchDB.request("POST", "/test_suite_db/_temp_view", { diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl index bdceab33..ac9ec723 100644 --- a/src/couchdb/couch_httpd.erl +++ b/src/couchdb/couch_httpd.erl @@ -181,6 +181,9 @@ handle_request(MochiReq, DefaultFun, catch throw:{http_head_abort, Resp0} -> {ok, Resp0}; + throw:{invalid_json, S} -> + ?LOG_ERROR("attempted upload of invalid JSON ~s", [S]), + send_error(HttpReq, {bad_request, "invalid UTF-8 JSON"}); exit:normal -> exit(normal); throw:Error -> diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl index 8b6adb1f..ee19458c 100644 --- a/src/mochiweb/mochijson2.erl +++ b/src/mochiweb/mochijson2.erl @@ -345,10 +345,24 @@ tokenize_string_fast(B, O) -> case B of <<_:O/binary, ?Q, _/binary>> -> O; - <<_:O/binary, C, _/binary>> when C =/= $\\ -> + <<_:O/binary, $\\, _/binary>> -> + {escape, O}; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> tokenize_string_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string_fast(B, 4 + O); _ -> - {escape, O} + throw(invalid_utf8) end. tokenize_string(B, S=#decoder{offset=O}, Acc) -> -- cgit v1.2.3