From 22c551bb103072826c0299265670d1483c753dde Mon Sep 17 00:00:00 2001 From: Paul Joseph Davis Date: Wed, 16 Dec 2009 00:05:35 +0000 Subject: Provide Content-MD5 header support for attachments. Fixes COUCHDB-558. Thanks to Filipe Manana we now have checks for attachment transfer integrity using the Content-MD5 header (or trailer). Use of this integrity check is triggered by specifying a Content-MD5 header in your request with a value that is a base64 encoded md5. For requests that are using a chunked Transfer-Encoding it is also possible to use a trailer so that the Content-MD5 doesn't need to be known before transfer. This works by specifying a header "Trailer: Content-MD5" and then in the final chunk (the one with a size of zero) you can specify a Content-MD5 with exactly the same format as in the request headers. See the ETap test 130-attachments-md5.t for explicit examples of the request messages. git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@891077 13f79535-47bb-0310-9956-ffa450edef68 --- src/couchdb/couch_db.erl | 34 +++++++++++++++++++++++++++++----- src/couchdb/couch_httpd.erl | 3 +++ src/couchdb/couch_httpd_db.erl | 26 ++++++++++++++++++++++++-- src/couchdb/couch_util.erl | 5 ++++- 4 files changed, 60 insertions(+), 8 deletions(-) (limited to 'src/couchdb') diff --git a/src/couchdb/couch_db.erl b/src/couchdb/couch_db.erl index f0827334..79e00ff8 100644 --- a/src/couchdb/couch_db.erl +++ b/src/couchdb/couch_db.erl @@ -687,7 +687,7 @@ doc_flush_atts(Doc, Fd) -> check_md5(_NewSig, <<>>) -> ok; check_md5(Sig1, Sig2) when Sig1 == Sig2 -> ok; -check_md5(_, _) -> throw(data_corruption). +check_md5(_, _) -> throw(md5_mismatch). flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd -> % already written to our file, nothing to write @@ -713,8 +713,14 @@ flush_att(Fd, #att{data=Fun,len=undefined}=Att) when is_function(Fun) -> % WriterFun({0, _Footers}, State) % Called with Length == 0 on the last time. % WriterFun returns NewState. - fun({0, _Footers}, _) -> - ok; + fun({0, Footers}, _) -> + F = mochiweb_headers:from_binary(Footers), + case mochiweb_headers:get_value("Content-MD5", F) of + undefined -> + ok; + Md5 -> + {md5, base64:decode(Md5)} + end; ({_Length, Chunk}, _) -> couch_stream:write(OutputStream, Chunk) end, ok) @@ -725,11 +731,29 @@ flush_att(Fd, #att{data=Fun,len=Len}=Att) when is_function(Fun) -> write_streamed_attachment(OutputStream, Fun, Len) end). +% From RFC 2616 3.6.1 - Chunked Transfer Coding +% +% In other words, the origin server is willing to accept +% the possibility that the trailer fields might be silently +% discarded along the path to the client. +% +% I take this to mean that if "Trailers: Content-MD5\r\n" +% is present in the request, but there is no Content-MD5 +% trailer, we're free to ignore this inconsistency and +% pretend that no Content-MD5 exists. with_stream(Fd, #att{md5=InMd5}=Att, Fun) -> {ok, OutputStream} = couch_stream:open(Fd), - Fun(OutputStream), + ReqMd5 = case Fun(OutputStream) of + {md5, FooterMd5} -> + case InMd5 of + md5_in_footer -> FooterMd5; + _ -> InMd5 + end; + _ -> + InMd5 + end, {StreamInfo, Len, Md5} = couch_stream:close(OutputStream), - check_md5(Md5, InMd5), + check_md5(Md5, ReqMd5), Att#att{data={Fd,StreamInfo},len=Len,md5=Md5}. diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl index d0b2e6c2..a61b29fb 100644 --- a/src/couchdb/couch_httpd.erl +++ b/src/couchdb/couch_httpd.erl @@ -541,6 +541,9 @@ error_info({bad_request, Reason}) -> {400, <<"bad_request">>, Reason}; error_info({query_parse_error, Reason}) -> {400, <<"query_parse_error">>, Reason}; +% Prior art for md5 mismatch resulting in a 400 is from AWS S3 +error_info(md5_mismatch) -> + {400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>}; error_info(not_found) -> {404, <<"not_found">>, <<"missing">>}; error_info({not_found, Reason}) -> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl index e1c778a9..8b955c88 100644 --- a/src/couchdb/couch_httpd_db.erl +++ b/src/couchdb/couch_httpd_db.erl @@ -1043,8 +1043,9 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) undefined; Length -> list_to_integer(Length) - end - }] + end, + md5 = get_md5_header(Req) + }] end, Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of @@ -1084,6 +1085,27 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) db_attachment_req(Req, _Db, _DocId, _FileNameParts) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT"). + +get_md5_header(Req) -> + ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"), + Length = couch_httpd:body_length(Req), + Trailer = couch_httpd:header_value(Req, "Trailer"), + case {ContentMD5, Length, Trailer} of + _ when is_list(ContentMD5) orelse is_binary(ContentMD5) -> + base64:decode(ContentMD5); + {_, chunked, undefined} -> + <<>>; + {_, chunked, _} -> + case re:run(Trailer, "\\bContent-MD5\\b", [caseless]) of + {match, _} -> + md5_in_footer; + _ -> + <<>> + end; + _ -> + <<>> + end. + parse_doc_format(FormatStr) when is_binary(FormatStr) -> parse_doc_format(?b2l(FormatStr)); parse_doc_format(FormatStr) when is_list(FormatStr) -> diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl index 8e2c66df..6edfb781 100644 --- a/src/couchdb/couch_util.erl +++ b/src/couchdb/couch_util.erl @@ -419,4 +419,7 @@ json_encode(V) -> json_decode(V) -> try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V) - catch _:_ -> throw({invalid_json,V}) end. + catch + _Type:_Error -> + throw({invalid_json,V}) + end. -- cgit v1.2.3