summaryrefslogtreecommitdiff
path: root/src/couchdb/couch_doc.erl
diff options
context:
space:
mode:
authorAdam Kocoloski <kocolosk@apache.org>2010-03-05 16:27:00 +0000
committerAdam Kocoloski <kocolosk@apache.org>2010-03-05 16:27:00 +0000
commit64481d0117baba9fce06384addff168912c83546 (patch)
treee945c4dae6663f4c359e179a50baf88d0ad71ef3 /src/couchdb/couch_doc.erl
parent52c9cec5c6715139cf06a99be9779e2f677bceae (diff)
efficient attachment replication. Patch by Filipe Manana. Closes COUCHDB-639
git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@919469 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/couchdb/couch_doc.erl')
-rw-r--r--src/couchdb/couch_doc.erl91
1 files changed, 61 insertions, 30 deletions
diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
index f5c47bf7..71d21dda 100644
--- a/src/couchdb/couch_doc.erl
+++ b/src/couchdb/couch_doc.erl
@@ -17,7 +17,7 @@
-export([from_json_obj/1,to_json_obj/2,has_stubs/1, merge_stubs/2]).
-export([validate_docid/1]).
-export([doc_from_multi_part_stream/2]).
--export([doc_to_multi_part_stream/5, len_doc_to_multi_part_stream/4]).
+-export([doc_to_multi_part_stream/6, len_doc_to_multi_part_stream/5]).
-include("couch_db.hrl").
@@ -73,21 +73,26 @@ to_json_meta(Meta) ->
end, Meta).
to_json_attachments(Attachments, Options) ->
- case lists:member(attachments, Options) of
+ RevPos = case lists:member(attachments, Options) of
true -> % return all the binaries
- to_json_attachments(Attachments, 0, lists:member(follows, Options));
+ 0;
false ->
% note the default is [], because this sorts higher than all numbers.
% and will return all the binaries.
- RevPos = proplists:get_value(atts_after_revpos, Options, []),
- to_json_attachments(Attachments, RevPos, lists:member(follows, Options))
- end.
+ proplists:get_value(atts_after_revpos, Options, [])
+ end,
+ to_json_attachments(
+ Attachments,
+ RevPos,
+ lists:member(follows, Options),
+ lists:member(att_gzip_length, Options)
+ ).
-to_json_attachments([], _RevPosIncludeAfter, _DataToFollow) ->
+to_json_attachments([], _RevPosIncludeAfter, _DataToFollow, _ShowGzipLen) ->
[];
-to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow) ->
+to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow, ShowGzipLen) ->
AttProps = lists:map(
- fun(#att{disk_len=DiskLen}=Att) ->
+ fun(#att{disk_len=DiskLen, att_len=AttLen, comp=Comp}=Att) ->
{Att#att.name, {[
{<<"content_type">>, Att#att.type},
{<<"revpos">>, Att#att.revpos}
@@ -96,7 +101,7 @@ to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow) ->
if DataToFollow ->
[{<<"length">>, DiskLen}, {<<"follows">>, true}];
true ->
- AttData = case Att#att.comp of
+ AttData = case Comp of
true ->
zlib:gunzip(att_to_bin(Att));
_ ->
@@ -106,7 +111,13 @@ to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow) ->
end;
true ->
[{<<"length">>, DiskLen}, {<<"stub">>, true}]
- end
+ end ++
+ case {ShowGzipLen, Comp} of
+ {true, true} ->
+ [{<<"gzip_length">>, AttLen}];
+ _ ->
+ []
+ end
}}
end, Atts),
[{<<"_attachments">>, {AttProps}}].
@@ -190,19 +201,19 @@ transfer_fields([{<<"_attachments">>, {JsonBins}} | Rest], Doc) ->
case proplists:get_value(<<"stub">>, BinProps) of
true ->
Type = proplists:get_value(<<"content_type">>, BinProps),
- Length = proplists:get_value(<<"length">>, BinProps),
RevPos = proplists:get_value(<<"revpos">>, BinProps, 0),
- #att{name=Name, data=stub, type=Type, att_len=Length,
- disk_len=Length, revpos=RevPos};
+ {AttLen, DiskLen, Comp} = att_lengths(BinProps),
+ #att{name=Name, data=stub, type=Type, att_len=AttLen,
+ disk_len=DiskLen, comp=Comp, revpos=RevPos};
_ ->
Type = proplists:get_value(<<"content_type">>, BinProps,
?DEFAULT_ATTACHMENT_CONTENT_TYPE),
RevPos = proplists:get_value(<<"revpos">>, BinProps, 0),
case proplists:get_value(<<"follows">>, BinProps) of
true ->
- Len = proplists:get_value(<<"length">>, BinProps),
- #att{name=Name, data=follows, type=Type,
- att_len=Len, disk_len=Len, revpos=RevPos};
+ {AttLen, DiskLen, Comp} = att_lengths(BinProps),
+ #att{name=Name, data=follows, type=Type, comp=Comp,
+ att_len=AttLen, disk_len=DiskLen, revpos=RevPos};
_ ->
Value = proplists:get_value(<<"data">>, BinProps),
Bin = base64:decode(Value),
@@ -250,6 +261,16 @@ transfer_fields([{<<"_",Name/binary>>, _} | _], _) ->
transfer_fields([Field | Rest], #doc{body=Fields}=Doc) ->
transfer_fields(Rest, Doc#doc{body=[Field|Fields]}).
+att_lengths(BinProps) ->
+ DiskLen = proplists:get_value(<<"length">>, BinProps),
+ GzipLen = proplists:get_value(<<"gzip_length">>, BinProps),
+ case GzipLen of
+ undefined ->
+ {DiskLen, DiskLen, false};
+ _ ->
+ {GzipLen, DiskLen, true}
+ end.
+
to_doc_info(FullDocInfo) ->
{DocInfo, _Path} = to_doc_info_path(FullDocInfo),
DocInfo.
@@ -355,18 +376,24 @@ fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0->
ResultAcc = Fun(Bin, Acc),
fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc).
-len_doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos) ->
+len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, AttsSinceRevPos,
+ SendGzipAtts) ->
2 + % "--"
size(Boundary) +
36 + % "\r\ncontent-type: application/json\r\n\r\n"
iolist_size(JsonBytes) +
4 + % "\r\n--"
size(Boundary) +
- + lists:foldl(fun(#att{revpos=RevPos,disk_len=DiskLen}, AccAttsSize) ->
+ + lists:foldl(fun(#att{revpos=RevPos} = Att, AccAttsSize) ->
if RevPos > AttsSinceRevPos ->
AccAttsSize +
4 + % "\r\n\r\n"
- DiskLen +
+ case SendGzipAtts of
+ true ->
+ Att#att.att_len;
+ _ ->
+ Att#att.disk_len
+ end +
4 + % "\r\n--"
size(Boundary);
true ->
@@ -374,29 +401,33 @@ len_doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos) ->
end
end, 0, Atts) +
2. % "--"
-
-doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos,WriteFun) ->
+
+doc_to_multi_part_stream(Boundary, JsonBytes, Atts, AttsSinceRevPos, WriteFun,
+ SendGzipAtts) ->
WriteFun([<<"--", Boundary/binary,
"\r\ncontent-type: application/json\r\n\r\n">>,
JsonBytes, <<"\r\n--", Boundary/binary>>]),
- atts_to_mp(Atts, Boundary, WriteFun, AttsSinceRevPos).
+ atts_to_mp(Atts, Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts).
-atts_to_mp([], _Boundary, WriteFun, _AttsSinceRevPos) ->
+atts_to_mp([], _Boundary, WriteFun, _AttsSinceRevPos, _SendGzipAtts) ->
WriteFun(<<"--">>);
atts_to_mp([#att{revpos=RevPos} = Att | RestAtts], Boundary, WriteFun,
- AttsSinceRevPos) when RevPos > AttsSinceRevPos ->
+ AttsSinceRevPos, SendGzipAtts) when RevPos > AttsSinceRevPos ->
WriteFun(<<"\r\n\r\n">>),
- AttFun = case Att#att.comp of
- true ->
+ AttFun = case {Att#att.comp, SendGzipAtts} of
+ {true, false} ->
fun att_foldl_unzip/3;
_ ->
+ % receiver knows that the attachment is compressed by checking that the
+ % "gzip_length" field is present in the corresponding JSON attachment
+ % object found within the JSON doc
fun att_foldl/3
end,
AttFun(Att, fun(Data, ok) -> WriteFun(Data) end, ok),
WriteFun(<<"\r\n--", Boundary/binary>>),
- atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos);
-atts_to_mp([_ | RestAtts], Boundary, WriteFun, AttsSinceRevPos) ->
- atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos).
+ atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts);
+atts_to_mp([_ | RestAtts], Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts) ->
+ atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos, SendGzipAtts).
doc_from_multi_part_stream(ContentType, DataFun) ->