diff options
-rw-r--r-- | THANKS | 1 | ||||
-rw-r--r-- | src/couchdb/couch_db.erl | 34 | ||||
-rw-r--r-- | src/couchdb/couch_httpd.erl | 3 | ||||
-rw-r--r-- | src/couchdb/couch_httpd_db.erl | 26 | ||||
-rw-r--r-- | src/couchdb/couch_util.erl | 5 | ||||
-rw-r--r-- | src/mochiweb/mochiweb_headers.erl | 65 | ||||
-rwxr-xr-x | test/etap/130-attachments-md5.t | 252 |
7 files changed, 378 insertions, 8 deletions
@@ -40,5 +40,6 @@ suggesting improvements or submitting changes. Some of these people are: * Joshua Bronson <jabronson@gmail.com> * Kostis Sagonas <kostis@cs.ntua.gr> * Matthew Hooker <mwhooker@gmail.com> + * Filipe Manana <fdmanana@gmail.com> For a list of authors see the `AUTHORS` file. diff --git a/src/couchdb/couch_db.erl b/src/couchdb/couch_db.erl index f0827334..79e00ff8 100644 --- a/src/couchdb/couch_db.erl +++ b/src/couchdb/couch_db.erl @@ -687,7 +687,7 @@ doc_flush_atts(Doc, Fd) -> check_md5(_NewSig, <<>>) -> ok; check_md5(Sig1, Sig2) when Sig1 == Sig2 -> ok; -check_md5(_, _) -> throw(data_corruption). +check_md5(_, _) -> throw(md5_mismatch). flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd -> % already written to our file, nothing to write @@ -713,8 +713,14 @@ flush_att(Fd, #att{data=Fun,len=undefined}=Att) when is_function(Fun) -> % WriterFun({0, _Footers}, State) % Called with Length == 0 on the last time. % WriterFun returns NewState. - fun({0, _Footers}, _) -> - ok; + fun({0, Footers}, _) -> + F = mochiweb_headers:from_binary(Footers), + case mochiweb_headers:get_value("Content-MD5", F) of + undefined -> + ok; + Md5 -> + {md5, base64:decode(Md5)} + end; ({_Length, Chunk}, _) -> couch_stream:write(OutputStream, Chunk) end, ok) @@ -725,11 +731,29 @@ flush_att(Fd, #att{data=Fun,len=Len}=Att) when is_function(Fun) -> write_streamed_attachment(OutputStream, Fun, Len) end). +% From RFC 2616 3.6.1 - Chunked Transfer Coding +% +% In other words, the origin server is willing to accept +% the possibility that the trailer fields might be silently +% discarded along the path to the client. +% +% I take this to mean that if "Trailers: Content-MD5\r\n" +% is present in the request, but there is no Content-MD5 +% trailer, we're free to ignore this inconsistency and +% pretend that no Content-MD5 exists. with_stream(Fd, #att{md5=InMd5}=Att, Fun) -> {ok, OutputStream} = couch_stream:open(Fd), - Fun(OutputStream), + ReqMd5 = case Fun(OutputStream) of + {md5, FooterMd5} -> + case InMd5 of + md5_in_footer -> FooterMd5; + _ -> InMd5 + end; + _ -> + InMd5 + end, {StreamInfo, Len, Md5} = couch_stream:close(OutputStream), - check_md5(Md5, InMd5), + check_md5(Md5, ReqMd5), Att#att{data={Fd,StreamInfo},len=Len,md5=Md5}. diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl index d0b2e6c2..a61b29fb 100644 --- a/src/couchdb/couch_httpd.erl +++ b/src/couchdb/couch_httpd.erl @@ -541,6 +541,9 @@ error_info({bad_request, Reason}) -> {400, <<"bad_request">>, Reason}; error_info({query_parse_error, Reason}) -> {400, <<"query_parse_error">>, Reason}; +% Prior art for md5 mismatch resulting in a 400 is from AWS S3 +error_info(md5_mismatch) -> + {400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>}; error_info(not_found) -> {404, <<"not_found">>, <<"missing">>}; error_info({not_found, Reason}) -> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl index e1c778a9..8b955c88 100644 --- a/src/couchdb/couch_httpd_db.erl +++ b/src/couchdb/couch_httpd_db.erl @@ -1043,8 +1043,9 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) undefined; Length -> list_to_integer(Length) - end - }] + end, + md5 = get_md5_header(Req) + }] end, Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of @@ -1084,6 +1085,27 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) db_attachment_req(Req, _Db, _DocId, _FileNameParts) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT"). + +get_md5_header(Req) -> + ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"), + Length = couch_httpd:body_length(Req), + Trailer = couch_httpd:header_value(Req, "Trailer"), + case {ContentMD5, Length, Trailer} of + _ when is_list(ContentMD5) orelse is_binary(ContentMD5) -> + base64:decode(ContentMD5); + {_, chunked, undefined} -> + <<>>; + {_, chunked, _} -> + case re:run(Trailer, "\\bContent-MD5\\b", [caseless]) of + {match, _} -> + md5_in_footer; + _ -> + <<>> + end; + _ -> + <<>> + end. + parse_doc_format(FormatStr) when is_binary(FormatStr) -> parse_doc_format(?b2l(FormatStr)); parse_doc_format(FormatStr) when is_list(FormatStr) -> diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl index 8e2c66df..6edfb781 100644 --- a/src/couchdb/couch_util.erl +++ b/src/couchdb/couch_util.erl @@ -419,4 +419,7 @@ json_encode(V) -> json_decode(V) -> try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V) - catch _:_ -> throw({invalid_json,V}) end. + catch + _Type:_Error -> + throw({invalid_json,V}) + end. diff --git a/src/mochiweb/mochiweb_headers.erl b/src/mochiweb/mochiweb_headers.erl index 6fcec7c3..d90fd679 100644 --- a/src/mochiweb/mochiweb_headers.erl +++ b/src/mochiweb/mochiweb_headers.erl @@ -9,6 +9,7 @@ -export([delete_any/2, get_primary_value/2]). -export([default/3, enter_from_list/2, default_from_list/2]). -export([to_list/1, make/1]). +-export([from_binary/1]). -export([test/0]). %% @type headers(). @@ -37,6 +38,36 @@ test() -> "content-type", H4), H4 = ?MODULE:delete_any("nonexistent-header", H4), H3 = ?MODULE:delete_any("content-type", H4), + HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>, + H_HB = ?MODULE:from_binary(HB), + H_HB = ?MODULE:from_binary(binary_to_list(HB)), + "47" = ?MODULE:get_value("Content-Length", H_HB), + "text/plain" = ?MODULE:get_value("Content-Type", H_HB), + L_H_HB = ?MODULE:to_list(H_HB), + 2 = length(L_H_HB), + true = lists:member({'Content-Length', "47"}, L_H_HB), + true = lists:member({'Content-Type', "text/plain"}, L_H_HB), + HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ], + HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ], + HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ], + H_HL = ?MODULE:from_binary(HL), + H_HL = ?MODULE:from_binary(HL2), + H_HL = ?MODULE:from_binary(HL3), + "47" = ?MODULE:get_value("Content-Length", H_HL), + "text/plain" = ?MODULE:get_value("Content-Type", H_HL), + L_H_HL = ?MODULE:to_list(H_HL), + 2 = length(L_H_HL), + true = lists:member({'Content-Length', "47"}, L_H_HL), + true = lists:member({'Content-Type', "text/plain"}, L_H_HL), + [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary("")), + [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])), ok. %% @spec empty() -> headers() @@ -52,6 +83,40 @@ make(L) when is_list(L) -> make(T) when is_tuple(T) -> T. +%% @spec from_binary(RawHttpHeader()) -> headers() +%% @type RawHttpHeader() -> string() | binary() | [ string() | binary() ] +%% +%% @doc Transforms a raw HTTP header into a mochiweb headers structure. +%% +%% The given raw HTTP header can be one of the following: +%% +%% 1) A string or a binary representing a full HTTP header ending with +%% double CRLF. +%% Examples: +%% "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n" +%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">> +%% +%% 2) A list of binaries or strings where each element represents a raw +%% HTTP header line ending with a single CRLF. +%% Examples: +%% [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ] +%% [ "Content-Length: 47\r\n", "Content-Type: text/plain\r\n" ] +%% [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ] +%% +from_binary(RawHttpHeader) when is_binary(RawHttpHeader) -> + from_binary(RawHttpHeader, []); + +from_binary(RawHttpHeaderList) -> + from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])). + +from_binary(RawHttpHeader, Acc) -> + case erlang:decode_packet(httph, RawHttpHeader, []) of + { ok, {http_header, _, H, _, V}, Rest } -> + from_binary(Rest, [{H, V} | Acc]); + _ -> + make(Acc) + end. + %% @spec from_list([{key(), value()}]) -> headers() %% @doc Construct a headers() from the given list. from_list(List) -> diff --git a/test/etap/130-attachments-md5.t b/test/etap/130-attachments-md5.t new file mode 100755 index 00000000..fe6732d6 --- /dev/null +++ b/test/etap/130-attachments-md5.t @@ -0,0 +1,252 @@ +#!/usr/bin/env escript +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +default_config() -> + test_util:build_file("etc/couchdb/default_dev.ini"). + +test_db_name() -> + <<"etap-test-db">>. + +docid() -> + case get(docid) of + undefined -> + put(docid, 1), + "1"; + Count -> + put(docid, Count+1), + integer_to_list(Count+1) + end. + +main(_) -> + test_util:init_code_path(), + + etap:plan(16), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + couch_server_sup:start_link([default_config()]), + Addr = couch_config:get("httpd", "bind_address", any), + Port = list_to_integer(couch_config:get("httpd", "port", "5984")), + put(addr, Addr), + put(port, Port), + timer:sleep(1000), + + couch_server:delete(test_db_name(), []), + couch_db:create(test_db_name(), []), + + test_identity_without_md5(), + test_chunked_without_md5(), + + test_identity_with_valid_md5(), + test_chunked_with_valid_md5_header(), + test_chunked_with_valid_md5_trailer(), + + test_identity_with_invalid_md5(), + test_chunked_with_invalid_md5_header(), + test_chunked_with_invalid_md5_trailer(), + + couch_server:delete(test_db_name(), []), + couch_server_sup:stop(), + ok. + +test_identity_without_md5() -> + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Content-Length: 34\r\n", + "\r\n", + "We all live in a yellow submarine!"], + + {Code, Json} = do_request(Data), + etap:is(Code, 201, "Stored with identity encoding and no MD5"), + etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success."). + +test_chunked_without_md5() -> + AttData = <<"We all live in a yellow submarine!">>, + <<Part1:21/binary, Part2:13/binary>> = AttData, + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Transfer-Encoding: chunked\r\n", + "\r\n", + to_hex(size(Part1)), "\r\n", + Part1, "\r\n", + to_hex(size(Part2)), "\r\n", + Part2, "\r\n" + "0\r\n" + "\r\n"], + + {Code, Json} = do_request(Data), + etap:is(Code, 201, "Stored with chunked encoding and no MD5"), + etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success."). + +test_identity_with_valid_md5() -> + AttData = "We all live in a yellow submarine!", + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Content-Length: 34\r\n", + "Content-MD5: ", base64:encode(erlang:md5(AttData)), "\r\n", + "\r\n", + AttData], + + {Code, Json} = do_request(Data), + etap:is(Code, 201, "Stored with identity encoding and valid MD5"), + etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success."). + +test_chunked_with_valid_md5_header() -> + AttData = <<"We all live in a yellow submarine!">>, + <<Part1:21/binary, Part2:13/binary>> = AttData, + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Transfer-Encoding: chunked\r\n", + "Content-MD5: ", base64:encode(erlang:md5(AttData)), "\r\n", + "\r\n", + to_hex(size(Part1)), "\r\n", + Part1, "\r\n", + to_hex(size(Part2)), "\r\n", + Part2, "\r\n", + "0\r\n", + "\r\n"], + + {Code, Json} = do_request(Data), + etap:is(Code, 201, "Stored with chunked encoding and valid MD5 header."), + etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success."). + +test_chunked_with_valid_md5_trailer() -> + AttData = <<"We all live in a yellow submarine!">>, + <<Part1:21/binary, Part2:13/binary>> = AttData, + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Transfer-Encoding: chunked\r\n", + "Trailer: Content-MD5\r\n", + "\r\n", + to_hex(size(Part1)), "\r\n", + Part1, "\r\n", + to_hex(size(Part2)), "\r\n", + Part2, "\r\n", + "0\r\n", + "Content-MD5: ", base64:encode(erlang:md5(AttData)), "\r\n", + "\r\n"], + + {Code, Json} = do_request(Data), + etap:is(Code, 201, "Stored with chunked encoding and valid MD5 trailer."), + etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success."). + +test_identity_with_invalid_md5() -> + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Content-Length: 34\r\n", + "Content-MD5: ", base64:encode(<<"foobar!">>), "\r\n", + "\r\n", + "We all live in a yellow submarine!"], + + {Code, Json} = do_request(Data), + etap:is(Code, 400, "Invalid MD5 header causes an error: identity"), + etap:is( + get_json(Json, [<<"error">>]), + <<"content_md5_mismatch">>, + "Body indicates reason for failure." + ). + +test_chunked_with_invalid_md5_header() -> + AttData = <<"We all live in a yellow submarine!">>, + <<Part1:21/binary, Part2:13/binary>> = AttData, + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Transfer-Encoding: chunked\r\n", + "Content-MD5: ", base64:encode(<<"so sneaky...">>), "\r\n", + "\r\n", + to_hex(size(Part1)), "\r\n", + Part1, "\r\n", + to_hex(size(Part2)), "\r\n", + Part2, "\r\n", + "0\r\n", + "\r\n"], + + {Code, Json} = do_request(Data), + etap:is(Code, 400, "Invalid MD5 header causes an error: chunked"), + etap:is( + get_json(Json, [<<"error">>]), + <<"content_md5_mismatch">>, + "Body indicates reason for failure." + ). + +test_chunked_with_invalid_md5_trailer() -> + AttData = <<"We all live in a yellow submarine!">>, + <<Part1:21/binary, Part2:13/binary>> = AttData, + Data = [ + "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n", + "Content-Type: text/plain\r\n", + "Transfer-Encoding: chunked\r\n", + "Trailer: Content-MD5\r\n", + "\r\n", + to_hex(size(Part1)), "\r\n", + Part1, "\r\n", + to_hex(size(Part2)), "\r\n", + Part2, "\r\n", + "0\r\n", + "Content-MD5: ", base64:encode(<<"Kool-Aid Fountain!">>), "\r\n", + "\r\n"], + + {Code, Json} = do_request(Data), + etap:is(Code, 400, "Invalid MD5 Trailer causes an error"), + etap:is( + get_json(Json, [<<"error">>]), + <<"content_md5_mismatch">>, + "Body indicates reason for failure." + ). + + +get_socket() -> + Options = [binary, {packet, 0}, {active, false}], + {ok, Sock} = gen_tcp:connect(get(addr), get(port), Options), + Sock. + +do_request(Request) -> + Sock = get_socket(), + gen_tcp:send(Sock, list_to_binary(lists:flatten(Request))), + timer:sleep(100), + {ok, R} = gen_tcp:recv(Sock, 0), + gen_tcp:close(Sock), + [Header, Body] = re:split(R, "\r\n\r\n", [{return, binary}]), + {ok, {http_response, _, Code, _}, _} = + erlang:decode_packet(http, Header, []), + Json = couch_util:json_decode(Body), + {Code, Json}. + +get_json(Json, Path) -> + couch_util:get_nested_json_value(Json, Path). + +to_hex(Val) -> + to_hex(Val, []). + +to_hex(0, Acc) -> + Acc; +to_hex(Val, Acc) -> + to_hex(Val div 16, [hex_char(Val rem 16) | Acc]). + +hex_char(V) when V < 10 -> $0 + V; +hex_char(V) -> $A + V - 10. + |