diff options
author | Adam Kocoloski <kocolosk@apache.org> | 2010-03-05 16:27:00 +0000 |
---|---|---|
committer | Adam Kocoloski <kocolosk@apache.org> | 2010-03-05 16:27:00 +0000 |
commit | 64481d0117baba9fce06384addff168912c83546 (patch) | |
tree | e945c4dae6663f4c359e179a50baf88d0ad71ef3 /src/couchdb/couch_doc.erl | |
parent | 52c9cec5c6715139cf06a99be9779e2f677bceae (diff) |
efficient attachment replication. Patch by Filipe Manana. Closes COUCHDB-639
git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@919469 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/couchdb/couch_doc.erl')
-rw-r--r-- | src/couchdb/couch_doc.erl | 91 |
1 files changed, 61 insertions, 30 deletions
diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl index f5c47bf7..71d21dda 100644 --- a/src/couchdb/couch_doc.erl +++ b/src/couchdb/couch_doc.erl @@ -17,7 +17,7 @@ -export([from_json_obj/1,to_json_obj/2,has_stubs/1, merge_stubs/2]). -export([validate_docid/1]). -export([doc_from_multi_part_stream/2]). --export([doc_to_multi_part_stream/5, len_doc_to_multi_part_stream/4]). +-export([doc_to_multi_part_stream/6, len_doc_to_multi_part_stream/5]). -include("couch_db.hrl"). @@ -73,21 +73,26 @@ to_json_meta(Meta) -> end, Meta). to_json_attachments(Attachments, Options) -> - case lists:member(attachments, Options) of + RevPos = case lists:member(attachments, Options) of true -> % return all the binaries - to_json_attachments(Attachments, 0, lists:member(follows, Options)); + 0; false -> % note the default is [], because this sorts higher than all numbers. % and will return all the binaries. - RevPos = proplists:get_value(atts_after_revpos, Options, []), - to_json_attachments(Attachments, RevPos, lists:member(follows, Options)) - end. + proplists:get_value(atts_after_revpos, Options, []) + end, + to_json_attachments( + Attachments, + RevPos, + lists:member(follows, Options), + lists:member(att_gzip_length, Options) + ). -to_json_attachments([], _RevPosIncludeAfter, _DataToFollow) -> +to_json_attachments([], _RevPosIncludeAfter, _DataToFollow, _ShowGzipLen) -> []; -to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow) -> +to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow, ShowGzipLen) -> AttProps = lists:map( - fun(#att{disk_len=DiskLen}=Att) -> + fun(#att{disk_len=DiskLen, att_len=AttLen, comp=Comp}=Att) -> {Att#att.name, {[ {<<"content_type">>, Att#att.type}, {<<"revpos">>, Att#att.revpos} @@ -96,7 +101,7 @@ to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow) -> if DataToFollow -> [{<<"length">>, DiskLen}, {<<"follows">>, true}]; true -> - AttData = case Att#att.comp of + AttData = case Comp of true -> zlib:gunzip(att_to_bin(Att)); _ -> @@ -106,7 +111,13 @@ to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow) -> end; true -> [{<<"length">>, DiskLen}, {<<"stub">>, true}] - end + end ++ + case {ShowGzipLen, Comp} of + {true, true} -> + [{<<"gzip_length">>, AttLen}]; + _ -> + [] + end }} end, Atts), [{<<"_attachments">>, {AttProps}}]. @@ -190,19 +201,19 @@ transfer_fields([{<<"_attachments">>, {JsonBins}} | Rest], Doc) -> case proplists:get_value(<<"stub">>, BinProps) of true -> Type = proplists:get_value(<<"content_type">>, BinProps), - Length = proplists:get_value(<<"length">>, BinProps), RevPos = proplists:get_value(<<"revpos">>, BinProps, 0), - #att{name=Name, data=stub, type=Type, att_len=Length, - disk_len=Length, revpos=RevPos}; + {AttLen, DiskLen, Comp} = att_lengths(BinProps), + #att{name=Name, data=stub, type=Type, att_len=AttLen, + disk_len=DiskLen, comp=Comp, revpos=RevPos}; _ -> Type = proplists:get_value(<<"content_type">>, BinProps, ?DEFAULT_ATTACHMENT_CONTENT_TYPE), RevPos = proplists:get_value(<<"revpos">>, BinProps, 0), case proplists:get_value(<<"follows">>, BinProps) of true -> - Len = proplists:get_value(<<"length">>, BinProps), - #att{name=Name, data=follows, type=Type, - att_len=Len, disk_len=Len, revpos=RevPos}; + {AttLen, DiskLen, Comp} = att_lengths(BinProps), + #att{name=Name, data=follows, type=Type, comp=Comp, + att_len=AttLen, disk_len=DiskLen, revpos=RevPos}; _ -> Value = proplists:get_value(<<"data">>, BinProps), Bin = base64:decode(Value), @@ -250,6 +261,16 @@ transfer_fields([{<<"_",Name/binary>>, _} | _], _) -> transfer_fields([Field | Rest], #doc{body=Fields}=Doc) -> transfer_fields(Rest, Doc#doc{body=[Field|Fields]}). +att_lengths(BinProps) -> + DiskLen = proplists:get_value(<<"length">>, BinProps), + GzipLen = proplists:get_value(<<"gzip_length">>, BinProps), + case GzipLen of + undefined -> + {DiskLen, DiskLen, false}; + _ -> + {GzipLen, DiskLen, true} + end. + to_doc_info(FullDocInfo) -> {DocInfo, _Path} = to_doc_info_path(FullDocInfo), DocInfo. @@ -355,18 +376,24 @@ fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0-> ResultAcc = Fun(Bin, Acc), fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc). -len_doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos) -> +len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, AttsSinceRevPos, + SendGzipAtts) -> 2 + % "--" size(Boundary) + 36 + % "\r\ncontent-type: application/json\r\n\r\n" iolist_size(JsonBytes) + 4 + % "\r\n--" size(Boundary) + - + lists:foldl(fun(#att{revpos=RevPos,disk_len=DiskLen}, AccAttsSize) -> + + lists:foldl(fun(#att{revpos=RevPos} = Att, AccAttsSize) -> if RevPos > AttsSinceRevPos -> AccAttsSize + 4 + % "\r\n\r\n" - DiskLen + + case SendGzipAtts of + true -> + Att#att.att_len; + _ -> + Att#att.disk_len + end + 4 + % "\r\n--" size(Boundary); true -> @@ -374,29 +401,33 @@ len_doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos) -> end end, 0, Atts) + 2. % "--" - -doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos,WriteFun) -> + +doc_to_multi_part_stream(Boundary, JsonBytes, Atts, AttsSinceRevPos, WriteFun, + SendGzipAtts) -> WriteFun([<<"--", Boundary/binary, "\r\ncontent-type: application/json\r\n\r\n">>, JsonBytes, <<"\r\n--", Boundary/binary>>]), - atts_to_mp(Atts, Boundary, WriteFun, AttsSinceRevPos). + atts_to_mp(Atts, Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts). -atts_to_mp([], _Boundary, WriteFun, _AttsSinceRevPos) -> +atts_to_mp([], _Boundary, WriteFun, _AttsSinceRevPos, _SendGzipAtts) -> WriteFun(<<"--">>); atts_to_mp([#att{revpos=RevPos} = Att | RestAtts], Boundary, WriteFun, - AttsSinceRevPos) when RevPos > AttsSinceRevPos -> + AttsSinceRevPos, SendGzipAtts) when RevPos > AttsSinceRevPos -> WriteFun(<<"\r\n\r\n">>), - AttFun = case Att#att.comp of - true -> + AttFun = case {Att#att.comp, SendGzipAtts} of + {true, false} -> fun att_foldl_unzip/3; _ -> + % receiver knows that the attachment is compressed by checking that the + % "gzip_length" field is present in the corresponding JSON attachment + % object found within the JSON doc fun att_foldl/3 end, AttFun(Att, fun(Data, ok) -> WriteFun(Data) end, ok), WriteFun(<<"\r\n--", Boundary/binary>>), - atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos); -atts_to_mp([_ | RestAtts], Boundary, WriteFun, AttsSinceRevPos) -> - atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos). + atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts); +atts_to_mp([_ | RestAtts], Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts) -> + atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts). doc_from_multi_part_stream(ContentType, DataFun) -> |