From 4b0948ddb3a428f8a5330e05745b2fbd4ccf9375 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 26 Jul 2010 17:21:30 +0000 Subject: Add SSL support to CouchDB. To enable SSL you need to do three things; 1) enable the httpsd daemon in local.ini (you can just uncomment the line). 2) supply your PEM-encoded cert and key files in the [ssl] section. 3) start CouchDB. CouchDB will now, in addition to handling HTTP on port 5984, accept SSL connections on port 6984. The patch itself adds SSL support by updating the local version of Mochiweb to the latest. The upstream release includes our local tweak to support large numbers and to handle Accept-Encoding headers. Our local Mochiweb fork changed the default idle timeout from 10 seconds to 5 minutes, and it was agreed on #irc to revert this change. The only tweaks to Mochiweb were in mochiweb.app.src (to record the git commit I built from) and the removal of Makefile (replaced by Makefile.am). Futon received many tweaks as we have 'http://' hardcoded all over. All such instances now use window.location.protocol + '//'. CouchDB received a tweak to use the right scheme in couch_httpd:absolute_uri (it now gets it from the Mochireq and not mochiweb_socket_server). git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@979368 13f79535-47bb-0310-9956-ffa450edef68 --- src/mochiweb/mochiweb_html.erl | 668 ++++++++++++++++++++++++++--------------- 1 file changed, 418 insertions(+), 250 deletions(-) (limited to 'src/mochiweb/mochiweb_html.erl') diff --git a/src/mochiweb/mochiweb_html.erl b/src/mochiweb/mochiweb_html.erl index 77100d50..a15c359c 100644 --- a/src/mochiweb/mochiweb_html.erl +++ b/src/mochiweb/mochiweb_html.erl @@ -4,9 +4,9 @@ %% @doc Loosely tokenizes and generates parse trees for HTML 4. -module(mochiweb_html). -export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1, - escape_attr/1, to_html/1, test/0]). + escape_attr/1, to_html/1]). -% This is a macro to placate syntax highlighters.. +%% This is a macro to placate syntax highlighters.. -define(QUOTE, $\"). -define(SQUOTE, $\'). -define(ADV_COL(S, N), @@ -35,6 +35,8 @@ -define(IS_LITERAL_SAFE(C), ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z) orelse (C >= $0 andalso C =< $9))). +-define(PROBABLE_CLOSE(C), + (C =:= $> orelse ?IS_WHITESPACE(C))). -record(decoder, {line=1, column=1, @@ -89,6 +91,7 @@ to_tokens(T={doctype, _}) -> to_tokens(T={comment, _}) -> [T]; to_tokens({Tag0, Acc}) -> + %% This is only allowed in sub-tags: {p, [{"class", "foo"}]} to_tokens({Tag0, [], Acc}); to_tokens({Tag0, Attrs, Acc}) -> Tag = to_tag(Tag0), @@ -124,40 +127,6 @@ escape_attr(I) when is_integer(I) -> escape_attr(F) when is_float(F) -> escape_attr(mochinum:digits(F), []). -%% @spec test() -> ok -%% @doc Run tests for mochiweb_html. -test() -> - test_destack(), - test_tokens(), - test_tokens2(), - test_parse(), - test_parse2(), - test_parse_tokens(), - test_escape(), - test_escape_attr(), - test_to_html(), - ok. - - -%% Internal API - -test_to_html() -> - Expect = <<"hey!

what's up

sucka
">>, - Expect = iolist_to_binary( - to_html({html, [], - [{<<"head">>, [], - [{title, <<"hey!">>}]}, - {body, [], - [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]}, - {'div', <<"sucka">>}, - {comment, <<" comment! ">>}]}]})), - Expect1 = <<"">>, - Expect1 = iolist_to_binary( - to_html({doctype, - [<<"html">>, <<"PUBLIC">>, - <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>, - <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]})), - ok. to_html([], Acc) -> lists:reverse(Acc); to_html([{'=', Content} | Rest], Acc) -> @@ -205,16 +174,6 @@ attrs_to_html([{K, V} | Rest], Acc) -> [[<<" ">>, escape(K), <<"=\"">>, escape_attr(V), <<"\"">>] | Acc]). -test_escape() -> - <<"&quot;\"word <<up!&quot;">> = - escape(<<""\"word <>), - ok. - -test_escape_attr() -> - <<"&quot;"word <<up!&quot;">> = - escape_attr(<<""\"word <>), - ok. - escape([], Acc) -> list_to_binary(lists:reverse(Acc)); escape("<" ++ Rest, Acc) -> @@ -257,6 +216,9 @@ to_tokens([{Tag0, [T0={'=', _C0} | R1]} | Rest], Acc) -> to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) -> %% Allow {comment, iolist()} to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); +to_tokens([{Tag0, [T0={pi, _S0, _A0} | R1]} | Rest], Acc) -> + %% Allow {pi, binary(), list()} + to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) -> %% Allow {p, [{"class", "foo"}]} to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc); @@ -290,39 +252,6 @@ to_tokens([{Tag0, [B | R1]} | Rest], Acc) when is_binary(B) -> Tag = to_tag(Tag0), to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]). -test_tokens() -> - [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, - {<<"wibble">>, <<"wibble">>}, - {<<"alice">>, <<"bob">>}], true}] = - tokens(<<"">>), - [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, - {<<"wibble">>, <<"wibble">>}, - {<<"alice">>, <<"bob">>}], true}] = - tokens(<<"">>), - [{comment, <<"[if lt IE 7]>\n\n>}] = - tokens(<<"">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"">>), - [{start_tag, <<"textarea">>, [], false}, - {data, <<"">>, false}, - {end_tag, <<"textarea">>}] = - tokens(<<"">>), - ok. - tokens(B, S=#decoder{offset=O}, Acc) -> case B of <<_:O/binary>> -> @@ -374,7 +303,8 @@ tokenize(B, S=#decoder{offset=O}) -> {{end_tag, Tag}, S2}; <<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) -> %% This isn't really strict HTML - tokenize_data(B, ?INC_COL(S)); + {{data, Data, _Whitespace}, S1} = tokenize_data(B, ?INC_COL(S)), + {{data, <<$<, Data/binary>>, false}, S1}; <<_:O/binary, "<", _/binary>> -> {Tag, S1} = tokenize_literal(B, ?INC_COL(S)), {Attrs, S2} = tokenize_attributes(B, S1), @@ -385,149 +315,6 @@ tokenize(B, S=#decoder{offset=O}) -> tokenize_data(B, S) end. -test_parse() -> - D0 = <<" - - - - Foo - - - - - - - CDATA>>]]> -">>, - Expect = {<<"html">>, [], - [{<<"head">>, [], - [{<<"meta">>, - [{<<"http-equiv">>,<<"Content-Type">>}, - {<<"content">>,<<"text/html; charset=UTF-8">>}], - []}, - {<<"title">>,[],[<<"Foo">>]}, - {<<"link">>, - [{<<"rel">>,<<"stylesheet">>}, - {<<"type">>,<<"text/css">>}, - {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>}, - {<<"media">>,<<"screen">>}], - []}, - {<<"link">>, - [{<<"rel">>,<<"stylesheet">>}, - {<<"type">>,<<"text/css">>}, - {<<"href">>,<<"/static/foo.css">>}, - {<<"media">>,<<"screen">>}], - []}, - {comment,<<"[if lt IE 7]>\n \n >}, - {<<"link">>, - [{<<"rel">>,<<"icon">>}, - {<<"href">>,<<"/static/images/favicon.ico">>}, - {<<"type">>,<<"image/x-icon">>}], - []}, - {<<"link">>, - [{<<"rel">>,<<"shortcut icon">>}, - {<<"href">>,<<"/static/images/favicon.ico">>}, - {<<"type">>,<<"image/x-icon">>}], - []}]}, - {<<"body">>, - [{<<"id">>,<<"home">>}, - {<<"class">>,<<"tundra">>}], - [<<"<CDATA>>">>]}]}, - Expect = parse(D0), - ok. - -test_tokens2() -> - D0 = <<"from __future__ import *http://bob.pythonmac.orgBob's Rants">>, - Expect = [{start_tag,<<"channel">>,[],false}, - {start_tag,<<"title">>,[],false}, - {data,<<"from __future__ import *">>,false}, - {end_tag,<<"title">>}, - {start_tag,<<"link">>,[],true}, - {data,<<"http://bob.pythonmac.org">>,false}, - {end_tag,<<"link">>}, - {start_tag,<<"description">>,[],false}, - {data,<<"Bob's Rants">>,false}, - {end_tag,<<"description">>}, - {end_tag,<<"channel">>}], - Expect = tokens(D0), - ok. - -test_parse2() -> - D0 = <<"from __future__ import *http://bob.pythonmac.org
fooBob's Rants
">>, - Expect = {<<"channel">>,[], - [{<<"title">>,[],[<<"from __future__ import *">>]}, - {<<"link">>,[],[ - <<"http://bob.pythonmac.org">>, - {<<"br">>,[],[]}, - <<"foo">>]}, - {<<"description">>,[],[<<"Bob's Rants">>]}]}, - Expect = parse(D0), - ok. - -test_parse_tokens() -> - D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]}, - {data,<<"\n">>,true}, - {start_tag,<<"html">>,[],false}], - {<<"html">>, [], []} = parse_tokens(D0), - D1 = D0 ++ [{end_tag, <<"html">>}], - {<<"html">>, [], []} = parse_tokens(D1), - D2 = D0 ++ [{start_tag, <<"body">>, [], false}], - {<<"html">>, [], [{<<"body">>, [], []}]} = parse_tokens(D2), - D3 = D0 ++ [{start_tag, <<"head">>, [], false}, - {end_tag, <<"head">>}, - {start_tag, <<"body">>, [], false}], - {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]} = parse_tokens(D3), - D4 = D3 ++ [{data,<<"\n">>,true}, - {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false}, - {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false}, - {end_tag,<<"a">>}, - {end_tag,<<"div">>}, - {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false}, - {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false}, - {end_tag,<<"div">>}, - {end_tag,<<"div">>}], - {<<"html">>, [], - [{<<"head">>, [], []}, - {<<"body">>, [], - [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]}, - {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]} - ]}]} = parse_tokens(D4), - D5 = [{start_tag,<<"html">>,[],false}, - {data,<<"\n">>,true}, - {data,<<"boo">>,false}, - {data,<<"hoo">>,false}, - {data,<<"\n">>,true}, - {end_tag,<<"html">>}], - {<<"html">>, [], [<<"\nboohoo\n">>]} = parse_tokens(D5), - D6 = [{start_tag,<<"html">>,[],false}, - {data,<<"\n">>,true}, - {data,<<"\n">>,true}, - {end_tag,<<"html">>}], - {<<"html">>, [], []} = parse_tokens(D6), - D7 = [{start_tag,<<"html">>,[],false}, - {start_tag,<<"ul">>,[],false}, - {start_tag,<<"li">>,[],false}, - {data,<<"word">>,false}, - {start_tag,<<"li">>,[],false}, - {data,<<"up">>,false}, - {end_tag,<<"li">>}, - {start_tag,<<"li">>,[],false}, - {data,<<"fdsa">>,false}, - {start_tag,<<"br">>,[],true}, - {data,<<"asdf">>,false}, - {end_tag,<<"ul">>}, - {end_tag,<<"html">>}], - {<<"html">>, [], - [{<<"ul">>, [], - [{<<"li">>, [], [<<"word">>]}, - {<<"li">>, [], [<<"up">>]}, - {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]} = parse_tokens(D7), - ok. - tree_data([{data, Data, Whitespace} | Rest], AllWhitespace, Acc) -> tree_data(Rest, (Whitespace andalso AllWhitespace), [Data | Acc]); tree_data(Rest, AllWhitespace, Acc) -> @@ -556,7 +343,9 @@ tree(L=[{data, _Data, _Whitespace} | _], S) -> tree(Rest, S); {Data, false, Rest} -> tree(Rest, append_stack_child(Data, S)) - end. + end; +tree([{doctype, _} | Rest], Stack) -> + tree(Rest, Stack). norm({Tag, Attrs}) -> {norm(Tag), [{norm(K), iolist_to_binary(V)} || {K, V} <- Attrs], []}; @@ -565,21 +354,6 @@ norm(Tag) when is_binary(Tag) -> norm(Tag) -> list_to_binary(string:to_lower(Tag)). -test_destack() -> - {<<"a">>, [], []} = - destack([{<<"a">>, [], []}]), - {<<"a">>, [], [{<<"b">>, [], []}]} = - destack([{<<"b">>, [], []}, {<<"a">>, [], []}]), - {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} = - destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), - [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] = - destack(<<"b">>, - [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), - [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] = - destack(<<"c">>, - [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]), - ok. - stack(T1={TN, _, _}, Stack=[{TN, _, _} | _Rest]) when TN =:= <<"li">> orelse TN =:= <<"option">> -> [T1 | destack(TN, Stack)]; @@ -719,9 +493,10 @@ find_qgt(Bin, S=#decoder{offset=O}) -> case Bin of <<_:O/binary, "?>", _/binary>> -> ?ADV_COL(S, 2); - <<_:O/binary, C, _/binary>> -> - find_qgt(Bin, ?INC_CHAR(S, C)); - _ -> + %% tokenize_attributes takes care of this state: + %% <<_:O/binary, C, _/binary>> -> + %% find_qgt(Bin, ?INC_CHAR(S, C)); + <<_:O/binary>> -> S end. @@ -766,7 +541,7 @@ tokenize_charref(Bin, S=#decoder{offset=O}, Start) -> <<_:Start1/binary, R:Len1/binary, _/binary>> = Bin, R; Unichar -> - list_to_binary(xmerl_ucs:to_utf8(Unichar)) + mochiutf8:codepoint_to_bytes(Unichar) end, {{data, Data, false}, ?INC_COL(S)}; _ -> @@ -791,11 +566,10 @@ tokenize_doctype(Bin, S=#decoder{offset=O}, Acc) -> tokenize_word_or_literal(Bin, S=#decoder{offset=O}) -> case Bin of - <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> - {error, {whitespace, [C], S}}; <<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE -> tokenize_word(Bin, ?INC_COL(S), C); - _ -> + <<_:O/binary, C, _/binary>> when not ?IS_WHITESPACE(C) -> + %% Sanity check for whitespace tokenize_literal(Bin, S, []) end. @@ -852,13 +626,14 @@ tokenize_script(Bin, S=#decoder{offset=O}) -> tokenize_script(Bin, S=#decoder{offset=O}, Start) -> case Bin of %% Just a look-ahead, we want the end_tag separately - <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>> + <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, ZZ, _/binary>> when (SS =:= $s orelse SS =:= $S) andalso (CC =:= $c orelse CC =:= $C) andalso (RR =:= $r orelse RR =:= $R) andalso (II =:= $i orelse II =:= $I) andalso (PP =:= $p orelse PP =:= $P) andalso - (TT=:= $t orelse TT =:= $T) -> + (TT=:= $t orelse TT =:= $T) andalso + ?PROBABLE_CLOSE(ZZ) -> Len = O - Start, <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, {{data, Raw, false}, S}; @@ -874,7 +649,7 @@ tokenize_textarea(Bin, S=#decoder{offset=O}) -> tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> case Bin of %% Just a look-ahead, we want the end_tag separately - <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>> + <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, ZZ, _/binary>> when (TT =:= $t orelse TT =:= $T) andalso (EE =:= $e orelse EE =:= $E) andalso (XX =:= $x orelse XX =:= $X) andalso @@ -882,7 +657,8 @@ tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> (AA =:= $a orelse AA =:= $A) andalso (RR =:= $r orelse RR =:= $R) andalso (EE2 =:= $e orelse EE2 =:= $E) andalso - (AA2 =:= $a orelse AA2 =:= $A) -> + (AA2 =:= $a orelse AA2 =:= $A) andalso + ?PROBABLE_CLOSE(ZZ) -> Len = O - Start, <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, {{data, Raw, false}, S}; @@ -891,3 +667,395 @@ tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> <<_:Start/binary, Raw/binary>> -> {{data, Raw, false}, S} end. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +to_html_test() -> + ?assertEqual( + <<"hey!

what's up

sucka
RAW!">>, + iolist_to_binary( + to_html({html, [], + [{<<"head">>, [], + [{title, <<"hey!">>}]}, + {body, [], + [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]}, + {'div', <<"sucka">>}, + {'=', <<"RAW!">>}, + {comment, <<" comment! ">>}]}]}))), + ?assertEqual( + <<"">>, + iolist_to_binary( + to_html({doctype, + [<<"html">>, <<"PUBLIC">>, + <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>, + <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]}))), + ?assertEqual( + <<"">>, + iolist_to_binary( + to_html({<<"html">>,[], + [{pi, <<"xml:namespace">>, + [{<<"prefix">>,<<"o">>}, + {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}))), + ok. + +escape_test() -> + ?assertEqual( + <<"&quot;\"word ><<up!&quot;">>, + escape(<<""\"word ><>)), + ?assertEqual( + <<"&quot;\"word ><<up!&quot;">>, + escape(""\"word ><>, + escape('"\"word >< + ?assertEqual( + <<"&quot;"word ><<up!&quot;">>, + escape_attr(<<""\"word ><>)), + ?assertEqual( + <<"&quot;"word ><<up!&quot;">>, + escape_attr(""\"word ><>, + escape_attr('"\"word ><>, + escape_attr(12345)), + ?assertEqual( + <<"1.5">>, + escape_attr(1.5)), + ok. + +tokens_test() -> + ?assertEqual( + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}], + tokens(<<"">>)), + ?assertEqual( + [{comment, <<"[if lt IE 7]>\n\n>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"textarea">>, [], false}, + {data, <<"">>, false}, + {end_tag, <<"textarea">>}], + tokens(<<"">>)), + ?assertEqual( + [{start_tag, <<"textarea">>, [], false}, + {data, <<"">>, false}], + tokens(<<"