summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/couchdb/couch_doc.erl4
-rw-r--r--src/couchdb/couch_util.erl33
2 files changed, 36 insertions, 1 deletions
diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
index 7c64b8a9..e3d66145 100644
--- a/src/couchdb/couch_doc.erl
+++ b/src/couchdb/couch_doc.erl
@@ -165,6 +165,10 @@ parse_revs([Rev | Rest]) ->
validate_docid(Id) when is_binary(Id) ->
+ case couch_util:validate_utf8(Id) of
+ false -> throw({bad_request, <<"Document id must be valid UTF-8">>});
+ true -> ok
+ end,
case Id of
<<"_design/", _/binary>> -> ok;
<<"_local/", _/binary>> -> ok;
diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl
index 0480f230..3de7739b 100644
--- a/src/couchdb/couch_util.erl
+++ b/src/couchdb/couch_util.erl
@@ -17,7 +17,7 @@
-export([rand32/0, implode/2, collate/2, collate/3]).
-export([abs_pathname/1,abs_pathname/2, trim/1]).
-export([encodeBase64Url/1, decodeBase64Url/1]).
--export([to_hex/1, parse_term/1, dict_find/3]).
+-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
-export([get_nested_json_value/2, json_user_ctx/1]).
-export([proplist_apply_field/2, json_apply_field/2]).
-export([to_binary/1, to_integer/1, to_list/1, url_encode/1]).
@@ -107,6 +107,37 @@ simple_call(Pid, Message) ->
erlang:demonitor(MRef, [flush])
end.
+validate_utf8(Data) when is_list(Data) ->
+ validate_utf8(?l2b(Data));
+validate_utf8(Bin) when is_binary(Bin) ->
+ validate_utf8_fast(Bin, 0).
+
+validate_utf8_fast(B, O) ->
+ case B of
+ <<_:O/binary>> ->
+ true;
+ <<_:O/binary, C1, _/binary>> when
+ C1 < 128 ->
+ validate_utf8_fast(B, 1 + O);
+ <<_:O/binary, C1, C2, _/binary>> when
+ C1 >= 194, C1 =< 223,
+ C2 >= 128, C2 =< 191 ->
+ validate_utf8_fast(B, 2 + O);
+ <<_:O/binary, C1, C2, C3, _/binary>> when
+ C1 >= 224, C1 =< 239,
+ C2 >= 128, C2 =< 191,
+ C3 >= 128, C3 =< 191 ->
+ validate_utf8_fast(B, 3 + O);
+ <<_:O/binary, C1, C2, C3, C4, _/binary>> when
+ C1 >= 240, C1 =< 244,
+ C2 >= 128, C2 =< 191,
+ C3 >= 128, C3 =< 191,
+ C4 >= 128, C4 =< 191 ->
+ validate_utf8_fast(B, 4 + O);
+ _ ->
+ false
+ end.
+
to_hex([]) ->
[];
to_hex(Bin) when is_binary(Bin) ->