summaryrefslogtreecommitdiff
path: root/src/couchdb
diff options
context:
space:
mode:
authorPaul Joseph Davis <davisp@apache.org>2009-12-16 00:05:35 +0000
committerPaul Joseph Davis <davisp@apache.org>2009-12-16 00:05:35 +0000
commit22c551bb103072826c0299265670d1483c753dde (patch)
treeda3ede0f1b8e784bcd5f519fa3927f49e06e4ccd /src/couchdb
parent04404e2dbc2fcdb08dbd967fb99d674d12b85c75 (diff)
Provide Content-MD5 header support for attachments.
Fixes COUCHDB-558. Thanks to Filipe Manana we now have checks for attachment transfer integrity using the Content-MD5 header (or trailer). Use of this integrity check is triggered by specifying a Content-MD5 header in your request with a value that is a base64 encoded md5. For requests that are using a chunked Transfer-Encoding it is also possible to use a trailer so that the Content-MD5 doesn't need to be known before transfer. This works by specifying a header "Trailer: Content-MD5" and then in the final chunk (the one with a size of zero) you can specify a Content-MD5 with exactly the same format as in the request headers. See the ETap test 130-attachments-md5.t for explicit examples of the request messages. git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@891077 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/couchdb')
-rw-r--r--src/couchdb/couch_db.erl34
-rw-r--r--src/couchdb/couch_httpd.erl3
-rw-r--r--src/couchdb/couch_httpd_db.erl26
-rw-r--r--src/couchdb/couch_util.erl5
4 files changed, 60 insertions, 8 deletions
diff --git a/src/couchdb/couch_db.erl b/src/couchdb/couch_db.erl
index f0827334..79e00ff8 100644
--- a/src/couchdb/couch_db.erl
+++ b/src/couchdb/couch_db.erl
@@ -687,7 +687,7 @@ doc_flush_atts(Doc, Fd) ->
check_md5(_NewSig, <<>>) -> ok;
check_md5(Sig1, Sig2) when Sig1 == Sig2 -> ok;
-check_md5(_, _) -> throw(data_corruption).
+check_md5(_, _) -> throw(md5_mismatch).
flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd ->
% already written to our file, nothing to write
@@ -713,8 +713,14 @@ flush_att(Fd, #att{data=Fun,len=undefined}=Att) when is_function(Fun) ->
% WriterFun({0, _Footers}, State)
% Called with Length == 0 on the last time.
% WriterFun returns NewState.
- fun({0, _Footers}, _) ->
- ok;
+ fun({0, Footers}, _) ->
+ F = mochiweb_headers:from_binary(Footers),
+ case mochiweb_headers:get_value("Content-MD5", F) of
+ undefined ->
+ ok;
+ Md5 ->
+ {md5, base64:decode(Md5)}
+ end;
({_Length, Chunk}, _) ->
couch_stream:write(OutputStream, Chunk)
end, ok)
@@ -725,11 +731,29 @@ flush_att(Fd, #att{data=Fun,len=Len}=Att) when is_function(Fun) ->
write_streamed_attachment(OutputStream, Fun, Len)
end).
+% From RFC 2616 3.6.1 - Chunked Transfer Coding
+%
+% In other words, the origin server is willing to accept
+% the possibility that the trailer fields might be silently
+% discarded along the path to the client.
+%
+% I take this to mean that if "Trailers: Content-MD5\r\n"
+% is present in the request, but there is no Content-MD5
+% trailer, we're free to ignore this inconsistency and
+% pretend that no Content-MD5 exists.
with_stream(Fd, #att{md5=InMd5}=Att, Fun) ->
{ok, OutputStream} = couch_stream:open(Fd),
- Fun(OutputStream),
+ ReqMd5 = case Fun(OutputStream) of
+ {md5, FooterMd5} ->
+ case InMd5 of
+ md5_in_footer -> FooterMd5;
+ _ -> InMd5
+ end;
+ _ ->
+ InMd5
+ end,
{StreamInfo, Len, Md5} = couch_stream:close(OutputStream),
- check_md5(Md5, InMd5),
+ check_md5(Md5, ReqMd5),
Att#att{data={Fd,StreamInfo},len=Len,md5=Md5}.
diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl
index d0b2e6c2..a61b29fb 100644
--- a/src/couchdb/couch_httpd.erl
+++ b/src/couchdb/couch_httpd.erl
@@ -541,6 +541,9 @@ error_info({bad_request, Reason}) ->
{400, <<"bad_request">>, Reason};
error_info({query_parse_error, Reason}) ->
{400, <<"query_parse_error">>, Reason};
+% Prior art for md5 mismatch resulting in a 400 is from AWS S3
+error_info(md5_mismatch) ->
+ {400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>};
error_info(not_found) ->
{404, <<"not_found">>, <<"missing">>};
error_info({not_found, Reason}) ->
diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
index e1c778a9..8b955c88 100644
--- a/src/couchdb/couch_httpd_db.erl
+++ b/src/couchdb/couch_httpd_db.erl
@@ -1043,8 +1043,9 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts)
undefined;
Length ->
list_to_integer(Length)
- end
- }]
+ end,
+ md5 = get_md5_header(Req)
+ }]
end,
Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of
@@ -1084,6 +1085,27 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts)
db_attachment_req(Req, _Db, _DocId, _FileNameParts) ->
send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT").
+
+get_md5_header(Req) ->
+ ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"),
+ Length = couch_httpd:body_length(Req),
+ Trailer = couch_httpd:header_value(Req, "Trailer"),
+ case {ContentMD5, Length, Trailer} of
+ _ when is_list(ContentMD5) orelse is_binary(ContentMD5) ->
+ base64:decode(ContentMD5);
+ {_, chunked, undefined} ->
+ <<>>;
+ {_, chunked, _} ->
+ case re:run(Trailer, "\\bContent-MD5\\b", [caseless]) of
+ {match, _} ->
+ md5_in_footer;
+ _ ->
+ <<>>
+ end;
+ _ ->
+ <<>>
+ end.
+
parse_doc_format(FormatStr) when is_binary(FormatStr) ->
parse_doc_format(?b2l(FormatStr));
parse_doc_format(FormatStr) when is_list(FormatStr) ->
diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl
index 8e2c66df..6edfb781 100644
--- a/src/couchdb/couch_util.erl
+++ b/src/couchdb/couch_util.erl
@@ -419,4 +419,7 @@ json_encode(V) ->
json_decode(V) ->
try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V)
- catch _:_ -> throw({invalid_json,V}) end.
+ catch
+ _Type:_Error ->
+ throw({invalid_json,V})
+ end.