From ee09a0de9f8356abe24a0ac0f26cdff35f8fa704 Mon Sep 17 00:00:00 2001 From: John Christopher Anderson Date: Fri, 29 Jan 2010 22:43:33 +0000 Subject: Allow storing attachments in compressed form. Closes COUCHDB-583. Thanks Filipe Manana git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@904650 13f79535-47bb-0310-9956-ffa450edef68 --- test/etap/030-doc-from-json.t | 14 +- test/etap/031-doc-to-json.t | 17 +- test/etap/050-stream.t | 6 +- test/etap/140-attachment-comp.t | 553 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 580 insertions(+), 10 deletions(-) create mode 100755 test/etap/140-attachment-comp.t (limited to 'test') diff --git a/test/etap/030-doc-from-json.t b/test/etap/030-doc-from-json.t index dc3327aa..359a75f9 100755 --- a/test/etap/030-doc-from-json.t +++ b/test/etap/030-doc-from-json.t @@ -17,7 +17,11 @@ %% XXX: Figure out how to -include("couch_db.hrl") -record(doc, {id= <<"">>, revs={0, []}, body={[]}, atts=[], deleted=false, meta=[]}). --record(att, {name, type, len, md5= <<>>, revpos=0, data}). +-record(att, {name, type, att_len, disk_len, md5= <<>>, revpos=0, data, + comp=false}). + +default_config() -> + test_util:build_file("etc/couchdb/default_dev.ini"). main(_) -> test_util:init_code_path(), @@ -32,6 +36,8 @@ main(_) -> ok. test() -> + couch_config:start_link([default_config()]), + couch_config:set("attachments", "compression_level", "0"), ok = test_from_json_success(), ok = test_from_json_errors(), ok. @@ -85,13 +91,15 @@ test_from_json_success() -> name = <<"my_attachment.fu">>, data = stub, type = <<"application/awesome">>, - len = 45 + att_len = 45, + disk_len = 45 }, #att{ name = <<"noahs_private_key.gpg">>, data = <<"I have a pet fish!">>, type = <<"application/pgp-signature">>, - len = 18 + att_len = 18, + disk_len = 18 } ]}, "Attachments are parsed correctly." diff --git a/test/etap/031-doc-to-json.t b/test/etap/031-doc-to-json.t index 4e7a175f..89cf4bc9 100755 --- a/test/etap/031-doc-to-json.t +++ b/test/etap/031-doc-to-json.t @@ -17,7 +17,11 @@ %% XXX: Figure out how to -include("couch_db.hrl") -record(doc, {id= <<"">>, revs={0, []}, body={[]}, atts=[], deleted=false, meta=[]}). --record(att, {name, type, len, md5= <<>>, revpos=0, data}). +-record(att, {name, type, att_len, disk_len, md5= <<>>, revpos=0, data, + comp=false}). + +default_config() -> + test_util:build_file("etc/couchdb/default_dev.ini"). main(_) -> test_util:init_code_path(), @@ -32,6 +36,8 @@ main(_) -> ok. test() -> + couch_config:start_link([default_config()]), + couch_config:set("attachments", "compression_level", "0"), ok = test_to_json_success(), ok. @@ -116,14 +122,16 @@ test_to_json_success() -> type = <<"xml/sucks">>, data = fun() -> ok end, revpos = 1, - len = 400 + att_len = 400, + disk_len = 400 }, #att{ name = <<"fast.json">>, type = <<"json/ftw">>, data = <<"{\"so\": \"there!\"}">>, revpos = 1, - len = 16 + att_len = 16, + disk_len = 16 } ]}, {[ @@ -153,7 +161,8 @@ test_to_json_success() -> type = <<"text/plain">>, data = fun() -> <<"diet pepsi">> end, revpos = 1, - len = 10 + att_len = 10, + disk_len = 10 }, #att{ name = <<"food.now">>, diff --git a/test/etap/050-stream.t b/test/etap/050-stream.t index 9324916c..545dd524 100755 --- a/test/etap/050-stream.t +++ b/test/etap/050-stream.t @@ -42,7 +42,7 @@ test() -> etap:is(ok, couch_stream:write(Stream, <<>>), "Writing an empty binary does nothing."), - {Ptrs, Length, _} = couch_stream:close(Stream), + {Ptrs, Length, _, _, _} = couch_stream:close(Stream), etap:is(Ptrs, [0], "Close returns the file pointers."), etap:is(Length, 8, "Close also returns the number of bytes written."), etap:is(<<"foodfoob">>, read_all(Fd, Ptrs), "Returned pointers are valid."), @@ -58,7 +58,7 @@ test() -> etap:is(ok, couch_stream:write(Stream2, ZeroBits), "Successfully wrote 80 0 bits."), - {Ptrs2, Length2, _} = couch_stream:close(Stream2), + {Ptrs2, Length2, _, _, _} = couch_stream:close(Stream2), etap:is(Ptrs2, [ExpPtr], "Closing stream returns the file pointers."), etap:is(Length2, 20, "Length written is 160 bytes."), @@ -73,7 +73,7 @@ test() -> couch_stream:write(Stream3, Data), [Data | Acc] end, [], lists:seq(1, 1024)), - {Ptrs3, Length3, _} = couch_stream:close(Stream3), + {Ptrs3, Length3, _, _, _} = couch_stream:close(Stream3), % 4095 because of 5 * 4096 rem 5 (last write before exceeding threshold) % + 5 puts us over the threshold diff --git a/test/etap/140-attachment-comp.t b/test/etap/140-attachment-comp.t new file mode 100755 index 00000000..cb3039e6 --- /dev/null +++ b/test/etap/140-attachment-comp.t @@ -0,0 +1,553 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +default_config() -> + test_util:build_file("etc/couchdb/default_dev.ini"). + +test_db_name() -> + <<"couch_test_atts_compression">>. + +main(_) -> + test_util:init_code_path(), + + etap:plan(57), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + couch_server_sup:start_link([default_config()]), + put(addr, couch_config:get("httpd", "bind_address", "127.0.0.1")), + put(port, couch_config:get("httpd", "port", "5984")), + application:start(inets), + timer:sleep(1000), + couch_server:delete(test_db_name(), []), + couch_db:create(test_db_name(), []), + + couch_config:set("attachments", "compression_level", "8"), + couch_config:set("attachments", "compressible_types", "text/*"), + + create_1st_text_att(), + create_1st_png_att(), + create_2nd_text_att(), + create_2nd_png_att(), + + tests_for_1st_text_att(), + tests_for_1st_png_att(), + tests_for_2nd_text_att(), + tests_for_2nd_png_att(), + + timer:sleep(3000), % to avoid mochiweb socket closed exceptions + couch_server:delete(test_db_name(), []), + couch_server_sup:stop(), + ok. + +db_url() -> + "http://" ++ get(addr) ++ ":" ++ get(port) ++ "/" ++ + binary_to_list(test_db_name()). + +create_1st_text_att() -> + {ok, {{_, Code, _}, _Headers, _Body}} = http:request( + put, + {db_url() ++ "/testdoc1/readme.txt", [], + "text/plain", test_text_data()}, + [], + [{sync, true}]), + etap:is(Code, 201, "Created text attachment using the standalone api"), + ok. + +create_1st_png_att() -> + {ok, {{_, Code, _}, _Headers, _Body}} = http:request( + put, + {db_url() ++ "/testdoc2/icon.png", [], + "image/png", test_png_data()}, + [], + [{sync, true}]), + etap:is(Code, 201, "Created png attachment using the standalone api"), + ok. + +% create a text attachment using the non-standalone attachment api +create_2nd_text_att() -> + DocJson = {[ + {<<"_attachments">>, {[ + {<<"readme.txt">>, {[ + {<<"content_type">>, <<"text/plain">>}, + {<<"data">>, base64:encode(test_text_data())} + ]} + }]}} + ]}, + {ok, {{_, Code, _}, _Headers, _Body}} = http:request( + put, + {db_url() ++ "/testdoc3", [], + "application/json", list_to_binary(couch_util:json_encode(DocJson))}, + [], + [{sync, true}]), + etap:is(Code, 201, "Created text attachment using the non-standalone api"), + ok. + +% create a png attachment using the non-standalone attachment api +create_2nd_png_att() -> + DocJson = {[ + {<<"_attachments">>, {[ + {<<"icon.png">>, {[ + {<<"content_type">>, <<"image/png">>}, + {<<"data">>, base64:encode(test_png_data())} + ]} + }]}} + ]}, + {ok, {{_, Code, _}, _Headers, _Body}} = http:request( + put, + {db_url() ++ "/testdoc4", [], + "application/json", list_to_binary(couch_util:json_encode(DocJson))}, + [], + [{sync, true}]), + etap:is(Code, 201, "Created png attachment using the non-standalone api"), + ok. + +tests_for_1st_text_att() -> + test_get_1st_text_att_with_accept_encoding_gzip(), + test_get_1st_text_att_without_accept_encoding_header(), + test_get_1st_text_att_with_accept_encoding_deflate(), + test_get_1st_text_att_with_accept_encoding_deflate_only(), + test_get_doc_with_1st_text_att(), + test_length_1st_text_att_stub(). + +tests_for_1st_png_att() -> + test_get_1st_png_att_without_accept_encoding_header(), + test_get_1st_png_att_with_accept_encoding_gzip(), + test_get_1st_png_att_with_accept_encoding_deflate(), + test_get_doc_with_1st_png_att(), + test_length_1st_png_att_stub(). + +tests_for_2nd_text_att() -> + test_get_2nd_text_att_with_accept_encoding_gzip(), + test_get_2nd_text_att_without_accept_encoding_header(), + test_get_doc_with_2nd_text_att(), + test_length_2nd_text_att_stub(). + +tests_for_2nd_png_att() -> + test_get_2nd_png_att_without_accept_encoding_header(), + test_get_2nd_png_att_with_accept_encoding_gzip(), + test_get_doc_with_2nd_png_att(), + test_length_2nd_png_att_stub(). + +test_get_1st_text_att_with_accept_encoding_gzip() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc1/readme.txt", [{"Accept-Encoding", "gzip"}]}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, true, "received body is gziped"), + Uncompressed = binary_to_list(zlib:gunzip(list_to_binary(Body))), + etap:is( + Uncompressed, + test_text_data(), + "received data for the 1st text attachment is ok" + ), + ok. + +test_get_1st_text_att_without_accept_encoding_header() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc1/readme.txt", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + etap:is( + Body, + test_text_data(), + "received data for the 1st text attachment is ok" + ), + ok. + +test_get_1st_text_att_with_accept_encoding_deflate() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc1/readme.txt", [{"Accept-Encoding", "deflate"}]}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + Deflated = lists:member({"content-encoding", "deflate"}, Headers), + etap:is(Deflated, false, "received body is not deflated"), + etap:is( + Body, + test_text_data(), + "received data for the 1st text attachment is ok" + ), + ok. + +test_get_1st_text_att_with_accept_encoding_deflate_only() -> + {ok, {{_, Code, _}, _Headers, _Body}} = http:request( + get, + {db_url() ++ "/testdoc1/readme.txt", + [{"Accept-Encoding", "deflate, *;q=0"}]}, + [], + [{sync, true}]), + etap:is( + Code, + 406, + "HTTP response code is 406 for an unsupported content encoding request" + ), + ok. + +test_get_1st_png_att_without_accept_encoding_header() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc2/icon.png", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + etap:is( + Body, + test_png_data(), + "received data for the 1st png attachment is ok" + ), + ok. + +test_get_1st_png_att_with_accept_encoding_gzip() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc2/icon.png", [{"Accept-Encoding", "gzip"}]}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + etap:is( + Body, + test_png_data(), + "received data for the 1st png attachment is ok" + ), + ok. + +test_get_1st_png_att_with_accept_encoding_deflate() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc2/icon.png", [{"Accept-Encoding", "deflate"}]}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Deflated = lists:member({"content-encoding", "deflate"}, Headers), + etap:is(Deflated, false, "received body is not deflated"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + etap:is( + Body, + test_png_data(), + "received data for the 1st png attachment is ok" + ), + ok. + +test_get_doc_with_1st_text_att() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc1?attachments=true", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + TextAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"readme.txt">>] + ), + TextAttType = couch_util:get_nested_json_value( + TextAttJson, + [<<"content_type">>] + ), + TextAttData = couch_util:get_nested_json_value( + TextAttJson, + [<<"data">>] + ), + etap:is( + TextAttType, + <<"text/plain">>, + "1st text attachment has type text/plain" + ), + %% check the attachment's data is the base64 encoding of the plain text + %% and not the base64 encoding of the gziped plain text + etap:is( + TextAttData, + base64:encode(test_text_data()), + "1st text attachment data is properly base64 encoded" + ), + ok. + +test_length_1st_text_att_stub() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc1", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + TextAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"readme.txt">>] + ), + TextAttLength = couch_util:get_nested_json_value( + TextAttJson, + [<<"length">>] + ), + etap:is( + TextAttLength, + length(test_text_data()), + "1st text attachment stub length matches the uncompressed length" + ), + ok. + +test_get_doc_with_1st_png_att() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc2?attachments=true", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + PngAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"icon.png">>] + ), + PngAttType = couch_util:get_nested_json_value( + PngAttJson, + [<<"content_type">>] + ), + PngAttData = couch_util:get_nested_json_value( + PngAttJson, + [<<"data">>] + ), + etap:is(PngAttType, <<"image/png">>, "attachment has type image/png"), + etap:is( + PngAttData, + base64:encode(test_png_data()), + "1st png attachment data is properly base64 encoded" + ), + ok. + +test_length_1st_png_att_stub() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc2", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + PngAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"icon.png">>] + ), + PngAttLength = couch_util:get_nested_json_value( + PngAttJson, + [<<"length">>] + ), + etap:is( + PngAttLength, + length(test_png_data()), + "1st png attachment stub length matches the uncompressed length" + ), + ok. + +test_get_2nd_text_att_with_accept_encoding_gzip() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc3/readme.txt", [{"Accept-Encoding", "gzip"}]}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, true, "received body is gziped"), + Uncompressed = binary_to_list(zlib:gunzip(list_to_binary(Body))), + etap:is( + Uncompressed, + test_text_data(), + "received data for the 2nd text attachment is ok" + ), + ok. + +test_get_2nd_text_att_without_accept_encoding_header() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc3/readme.txt", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + etap:is( + Body, + test_text_data(), + "received data for the 2nd text attachment is ok" + ), + ok. + +test_get_2nd_png_att_without_accept_encoding_header() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc4/icon.png", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + etap:is( + Body, + test_png_data(), + "received data for the 2nd png attachment is ok" + ), + ok. + +test_get_2nd_png_att_with_accept_encoding_gzip() -> + {ok, {{_, Code, _}, Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc4/icon.png", [{"Accept-Encoding", "gzip"}]}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Gziped = lists:member({"content-encoding", "gzip"}, Headers), + etap:is(Gziped, false, "received body is not gziped"), + etap:is( + Body, + test_png_data(), + "received data for the 2nd png attachment is ok" + ), + ok. + +test_get_doc_with_2nd_text_att() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc3?attachments=true", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + TextAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"readme.txt">>] + ), + TextAttType = couch_util:get_nested_json_value( + TextAttJson, + [<<"content_type">>] + ), + TextAttData = couch_util:get_nested_json_value( + TextAttJson, + [<<"data">>] + ), + etap:is(TextAttType, <<"text/plain">>, "attachment has type text/plain"), + %% check the attachment's data is the base64 encoding of the plain text + %% and not the base64 encoding of the gziped plain text + etap:is( + TextAttData, + base64:encode(test_text_data()), + "2nd text attachment data is properly base64 encoded" + ), + ok. + +test_length_2nd_text_att_stub() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc3", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + TextAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"readme.txt">>] + ), + TextAttLength = couch_util:get_nested_json_value( + TextAttJson, + [<<"length">>] + ), + etap:is( + TextAttLength, + length(test_text_data()), + "2nd text attachment stub length matches the uncompressed length" + ), + ok. + +test_get_doc_with_2nd_png_att() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc4?attachments=true", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + PngAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"icon.png">>] + ), + PngAttType = couch_util:get_nested_json_value( + PngAttJson, + [<<"content_type">>] + ), + PngAttData = couch_util:get_nested_json_value( + PngAttJson, + [<<"data">>] + ), + etap:is(PngAttType, <<"image/png">>, "attachment has type image/png"), + etap:is( + PngAttData, + base64:encode(test_png_data()), + "2nd png attachment data is properly base64 encoded" + ), + ok. + +test_length_2nd_png_att_stub() -> + {ok, {{_, Code, _}, _Headers, Body}} = http:request( + get, + {db_url() ++ "/testdoc4", []}, + [], + [{sync, true}]), + etap:is(Code, 200, "HTTP response code is 200"), + Json = couch_util:json_decode(Body), + PngAttJson = couch_util:get_nested_json_value( + Json, + [<<"_attachments">>, <<"icon.png">>] + ), + PngAttLength = couch_util:get_nested_json_value( + PngAttJson, + [<<"length">>] + ), + etap:is( + PngAttLength, + length(test_png_data()), + "2nd png attachment stub length matches the uncompressed length" + ), + ok. + +test_png_data() -> + {ok, Data} = file:read_file( + test_util:source_file("share/www/image/logo.png") + ), + binary_to_list(Data). + +test_text_data() -> + {ok, Data} = file:read_file( + test_util:source_file("README") + ), + binary_to_list(Data). -- cgit v1.2.3