diff options
Diffstat (limited to 'src/mochiweb/mochiweb_multipart.erl')
-rw-r--r-- | src/mochiweb/mochiweb_multipart.erl | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/src/mochiweb/mochiweb_multipart.erl b/src/mochiweb/mochiweb_multipart.erl new file mode 100644 index 00000000..804273cb --- /dev/null +++ b/src/mochiweb/mochiweb_multipart.erl @@ -0,0 +1,428 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Utilities for parsing multipart/form-data. + +-module(mochiweb_multipart). +-author('bob@mochimedia.com'). + +-export([parse_form/2]). +-export([parse_multipart_request/2]). +-export([test/0]). + +-define(CHUNKSIZE, 4096). + +-record(mp, {state, boundary, length, buffer, callback, req}). + +%% TODO: DOCUMENT THIS MODULE. + +parse_form(Req, FileHandler) -> + Callback = fun (Next) -> parse_form_outer(Next, FileHandler, []) end, + {_, _, Res} = parse_multipart_request(Req, Callback), + Res. + +parse_form_outer(eof, _, Acc) -> + lists:reverse(Acc); +parse_form_outer({headers, H}, FileHandler, State) -> + {"form-data", H1} = proplists:get_value("content-disposition", H), + Name = proplists:get_value("name", H1), + Filename = proplists:get_value("filename", H1), + case Filename of + undefined -> + fun (Next) -> + parse_form_value(Next, {Name, []}, FileHandler, State) + end; + _ -> + ContentType = proplists:get_value("content-type", H), + Handler = FileHandler(Filename, ContentType), + fun (Next) -> + parse_form_file(Next, {Name, Handler}, FileHandler, State) + end + end. + +parse_form_value(body_end, {Name, Acc}, FileHandler, State) -> + Value = binary_to_list(iolist_to_binary(lists:reverse(Acc))), + State1 = [{Name, Value} | State], + fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; +parse_form_value({body, Data}, {Name, Acc}, FileHandler, State) -> + Acc1 = [Data | Acc], + fun (Next) -> parse_form_value(Next, {Name, Acc1}, FileHandler, State) end. + +parse_form_file(body_end, {Name, Handler}, FileHandler, State) -> + Value = Handler(eof), + State1 = [{Name, Value} | State], + fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; +parse_form_file({body, Data}, {Name, Handler}, FileHandler, State) -> + H1 = Handler(Data), + fun (Next) -> parse_form_file(Next, {Name, H1}, FileHandler, State) end. + +parse_multipart_request(Req, Callback) -> + %% TODO: Support chunked? + Length = list_to_integer(Req:get_header_value("content-length")), + Boundary = iolist_to_binary( + get_boundary(Req:get_header_value("content-type"))), + Prefix = <<"\r\n--", Boundary/binary>>, + BS = size(Boundary), + Chunk = read_chunk(Req, Length), + Length1 = Length - size(Chunk), + <<"--", Boundary:BS/binary, "\r\n", Rest/binary>> = Chunk, + feed_mp(headers, #mp{boundary=Prefix, + length=Length1, + buffer=Rest, + callback=Callback, + req=Req}). + +parse_headers(<<>>) -> + []; +parse_headers(Binary) -> + parse_headers(Binary, []). + +parse_headers(Binary, Acc) -> + case find_in_binary(<<"\r\n">>, Binary) of + {exact, N} -> + <<Line:N/binary, "\r\n", Rest/binary>> = Binary, + parse_headers(Rest, [split_header(Line) | Acc]); + not_found -> + lists:reverse([split_header(Binary) | Acc]) + end. + +split_header(Line) -> + {Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end, + binary_to_list(Line)), + {string:to_lower(string:strip(Name)), + mochiweb_util:parse_header(Value)}. + +read_chunk(Req, Length) when Length > 0 -> + case Length of + Length when Length < ?CHUNKSIZE -> + Req:recv(Length); + _ -> + Req:recv(?CHUNKSIZE) + end. + +read_more(State=#mp{length=Length, buffer=Buffer, req=Req}) -> + Data = read_chunk(Req, Length), + Buffer1 = <<Buffer/binary, Data/binary>>, + State#mp{length=Length - size(Data), + buffer=Buffer1}. + +feed_mp(headers, State=#mp{buffer=Buffer, callback=Callback}) -> + {State1, P} = case find_in_binary(<<"\r\n\r\n">>, Buffer) of + {exact, N} -> + {State, N}; + _ -> + S1 = read_more(State), + %% Assume headers must be less than ?CHUNKSIZE + {exact, N} = find_in_binary(<<"\r\n\r\n">>, + S1#mp.buffer), + {S1, N} + end, + <<Headers:P/binary, "\r\n\r\n", Rest/binary>> = State1#mp.buffer, + NextCallback = Callback({headers, parse_headers(Headers)}), + feed_mp(body, State1#mp{buffer=Rest, + callback=NextCallback}); +feed_mp(body, State=#mp{boundary=Prefix, buffer=Buffer, callback=Callback}) -> + case find_boundary(Prefix, Buffer) of + {end_boundary, Start, Skip} -> + <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer, + C1 = Callback({body, Data}), + C2 = C1(body_end), + {State#mp.length, Rest, C2(eof)}; + {next_boundary, Start, Skip} -> + <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer, + C1 = Callback({body, Data}), + feed_mp(headers, State#mp{callback=C1(body_end), + buffer=Rest}); + {maybe, Start} -> + <<Data:Start/binary, Rest/binary>> = Buffer, + feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), + buffer=Rest})); + not_found -> + {Data, Rest} = {Buffer, <<>>}, + feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), + buffer=Rest})) + end. + +get_boundary(ContentType) -> + {"multipart/form-data", Opts} = mochiweb_util:parse_header(ContentType), + case proplists:get_value("boundary", Opts) of + S when is_list(S) -> + S + end. + +find_in_binary(B, Data) when size(B) > 0 -> + case size(Data) - size(B) of + Last when Last < 0 -> + partial_find(B, Data, 0, size(Data)); + Last -> + find_in_binary(B, size(B), Data, 0, Last) + end. + +find_in_binary(B, BS, D, N, Last) when N =< Last-> + case D of + <<_:N/binary, B:BS/binary, _/binary>> -> + {exact, N}; + _ -> + find_in_binary(B, BS, D, 1 + N, Last) + end; +find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last -> + partial_find(B, D, N, BS - 1). + +partial_find(_B, _D, _N, 0) -> + not_found; +partial_find(B, D, N, K) -> + <<B1:K/binary, _/binary>> = B, + case D of + <<_Skip:N/binary, B1:K/binary>> -> + {partial, N, K}; + _ -> + partial_find(B, D, 1 + N, K - 1) + end. + +find_boundary(Prefix, Data) -> + case find_in_binary(Prefix, Data) of + {exact, Skip} -> + PrefixSkip = Skip + size(Prefix), + case Data of + <<_:PrefixSkip/binary, "\r\n", _/binary>> -> + {next_boundary, Skip, size(Prefix) + 2}; + <<_:PrefixSkip/binary, "--\r\n", _/binary>> -> + {end_boundary, Skip, size(Prefix) + 4}; + _ when size(Data) < PrefixSkip + 4 -> + %% Underflow + {maybe, Skip}; + _ -> + %% False positive + not_found + end; + {partial, Skip, Length} when (Skip + Length) =:= size(Data) -> + %% Underflow + {maybe, Skip}; + _ -> + not_found + end. + +with_socket_server(ServerFun, ClientFun) -> + {ok, Server} = mochiweb_socket_server:start([{ip, "127.0.0.1"}, + {port, 0}, + {loop, ServerFun}]), + Port = mochiweb_socket_server:get(Server, port), + {ok, Client} = gen_tcp:connect("127.0.0.1", Port, + [binary, {active, false}]), + Res = (catch ClientFun(Client)), + mochiweb_socket_server:stop(Server), + Res. + +fake_request(Socket, ContentType, Length) -> + mochiweb_request:new(Socket, + 'POST', + "/multipart", + {1,1}, + mochiweb_headers:make( + [{"content-type", ContentType}, + {"content-length", Length}])). + +test_callback(Expect, [Expect | Rest]) -> + case Rest of + [] -> + ok; + _ -> + fun (Next) -> test_callback(Next, Rest) end + end. + +test_parse3() -> + ContentType = "multipart/form-data; boundary=---------------------------7386909285754635891697677882", + BinContent = <<"-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test_file.txt\"\r\nContent-Type: text/plain\r\n\r\nWoo multiline text file\n\nLa la la\r\n-----------------------------7386909285754635891697677882--\r\n">>, + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "hidden"}]}}]}, + {body, <<"multipart message">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "file"}, {"filename", "test_file.txt"}]}}, + {"content-type", {"text/plain", []}}]}, + {body, <<"Woo multiline text file\n\nLa la la">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + case gen_tcp:send(Socket, BinContent) of + ok -> + exit(normal) + end + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + + +test_parse2() -> + ContentType = "multipart/form-data; boundary=---------------------------6072231407570234361599764024", + BinContent = <<"-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"file\"; filename=\"\"\r\nContent-Type: application/octet-stream\r\n\r\n\r\n-----------------------------6072231407570234361599764024--\r\n">>, + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "hidden"}]}}]}, + {body, <<"multipart message">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "file"}, {"filename", ""}]}}, + {"content-type", {"application/octet-stream", []}}]}, + {body, <<>>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + case gen_tcp:send(Socket, BinContent) of + ok -> + exit(normal) + end + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +handler_test(Filename, ContentType) -> + fun (Next) -> + handler_test_read(Next, {Filename, ContentType}, []) + end. + +handler_test_read(eof, {Filename, ContentType}, Acc) -> + Value = iolist_to_binary(lists:reverse(Acc)), + {Filename, ContentType, Value}; +handler_test_read(Data, H, Acc) -> + Acc1 = [Data | Acc], + fun (Next) -> handler_test_read(Next, H, Acc1) end. + + +test_parse_form() -> + ContentType = "multipart/form-data; boundary=AaB03x", + "AaB03x" = get_boundary(ContentType), + Content = mochiweb_util:join( + ["--AaB03x", + "Content-Disposition: form-data; name=\"submit-name\"", + "", + "Larry", + "--AaB03x", + "Content-Disposition: form-data; name=\"files\";" + ++ "filename=\"file1.txt\"", + "Content-Type: text/plain", + "", + "... contents of file1.txt ...", + "--AaB03x--", + ""], "\r\n"), + BinContent = iolist_to_binary(Content), + ServerFun = fun (Socket) -> + case gen_tcp:send(Socket, BinContent) of + ok -> + exit(normal) + end + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + size(BinContent)), + Res = parse_form(Req, fun handler_test/2), + [{"submit-name", "Larry"}, + {"files", {"file1.txt", {"text/plain",[]}, + <<"... contents of file1.txt ...">>} + }] = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +test_parse() -> + ContentType = "multipart/form-data; boundary=AaB03x", + "AaB03x" = get_boundary(ContentType), + Content = mochiweb_util:join( + ["--AaB03x", + "Content-Disposition: form-data; name=\"submit-name\"", + "", + "Larry", + "--AaB03x", + "Content-Disposition: form-data; name=\"files\";" + ++ "filename=\"file1.txt\"", + "Content-Type: text/plain", + "", + "... contents of file1.txt ...", + "--AaB03x--", + ""], "\r\n"), + BinContent = iolist_to_binary(Content), + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "submit-name"}]}}]}, + {body, <<"Larry">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "files"}, {"filename", "file1.txt"}]}}, + {"content-type", {"text/plain", []}}]}, + {body, <<"... contents of file1.txt ...">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + case gen_tcp:send(Socket, BinContent) of + ok -> + exit(normal) + end + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +test_find_boundary() -> + B = <<"\r\n--X">>, + {next_boundary, 0, 7} = find_boundary(B, <<"\r\n--X\r\nRest">>), + {next_boundary, 1, 7} = find_boundary(B, <<"!\r\n--X\r\nRest">>), + {end_boundary, 0, 9} = find_boundary(B, <<"\r\n--X--\r\nRest">>), + {end_boundary, 1, 9} = find_boundary(B, <<"!\r\n--X--\r\nRest">>), + not_found = find_boundary(B, <<"--X\r\nRest">>), + {maybe, 0} = find_boundary(B, <<"\r\n--X\r">>), + {maybe, 1} = find_boundary(B, <<"!\r\n--X\r">>), + P = <<"\r\n-----------------------------16037454351082272548568224146">>, + B0 = <<55,212,131,77,206,23,216,198,35,87,252,118,252,8,25,211,132,229, + 182,42,29,188,62,175,247,243,4,4,0,59, 13,10,45,45,45,45,45,45,45, + 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45, + 49,54,48,51,55,52,53,52,51,53,49>>, + {maybe, 30} = find_boundary(P, B0), + ok. + +test_find_in_binary() -> + {exact, 0} = find_in_binary(<<"foo">>, <<"foobarbaz">>), + {exact, 1} = find_in_binary(<<"oo">>, <<"foobarbaz">>), + {exact, 8} = find_in_binary(<<"z">>, <<"foobarbaz">>), + not_found = find_in_binary(<<"q">>, <<"foobarbaz">>), + {partial, 7, 2} = find_in_binary(<<"azul">>, <<"foobarbaz">>), + {exact, 0} = find_in_binary(<<"foobarbaz">>, <<"foobarbaz">>), + {partial, 0, 3} = find_in_binary(<<"foobar">>, <<"foo">>), + {partial, 1, 3} = find_in_binary(<<"foobar">>, <<"afoo">>), + ok. + +test() -> + test_find_in_binary(), + test_find_boundary(), + test_parse(), + test_parse2(), + test_parse3(), + test_parse_form(), + ok. |