summaryrefslogtreecommitdiff
path: root/src/mochiweb/mochiweb_html.erl
diff options
context:
space:
mode:
authorRobert Newson <rnewson@apache.org>2010-07-26 17:21:30 +0000
committerRobert Newson <rnewson@apache.org>2010-07-26 17:21:30 +0000
commit4b0948ddb3a428f8a5330e05745b2fbd4ccf9375 (patch)
tree5ab1dde286028653d5569ceae6dfc883fa365b7a /src/mochiweb/mochiweb_html.erl
parentcd214b23e8129868d4a7020ddafd55a16e496652 (diff)
Add SSL support to CouchDB.
To enable SSL you need to do three things; 1) enable the httpsd daemon in local.ini (you can just uncomment the line). 2) supply your PEM-encoded cert and key files in the [ssl] section. 3) start CouchDB. CouchDB will now, in addition to handling HTTP on port 5984, accept SSL connections on port 6984. The patch itself adds SSL support by updating the local version of Mochiweb to the latest. The upstream release includes our local tweak to support large numbers and to handle Accept-Encoding headers. Our local Mochiweb fork changed the default idle timeout from 10 seconds to 5 minutes, and it was agreed on #irc to revert this change. The only tweaks to Mochiweb were in mochiweb.app.src (to record the git commit I built from) and the removal of Makefile (replaced by Makefile.am). Futon received many tweaks as we have 'http://' hardcoded all over. All such instances now use window.location.protocol + '//'. CouchDB received a tweak to use the right scheme in couch_httpd:absolute_uri (it now gets it from the Mochireq and not mochiweb_socket_server). git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@979368 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/mochiweb/mochiweb_html.erl')
-rw-r--r--src/mochiweb/mochiweb_html.erl668
1 files changed, 418 insertions, 250 deletions
diff --git a/src/mochiweb/mochiweb_html.erl b/src/mochiweb/mochiweb_html.erl
index 77100d50..a15c359c 100644
--- a/src/mochiweb/mochiweb_html.erl
+++ b/src/mochiweb/mochiweb_html.erl
@@ -4,9 +4,9 @@
%% @doc Loosely tokenizes and generates parse trees for HTML 4.
-module(mochiweb_html).
-export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1,
- escape_attr/1, to_html/1, test/0]).
+ escape_attr/1, to_html/1]).
-% This is a macro to placate syntax highlighters..
+%% This is a macro to placate syntax highlighters..
-define(QUOTE, $\").
-define(SQUOTE, $\').
-define(ADV_COL(S, N),
@@ -35,6 +35,8 @@
-define(IS_LITERAL_SAFE(C),
((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z)
orelse (C >= $0 andalso C =< $9))).
+-define(PROBABLE_CLOSE(C),
+ (C =:= $> orelse ?IS_WHITESPACE(C))).
-record(decoder, {line=1,
column=1,
@@ -89,6 +91,7 @@ to_tokens(T={doctype, _}) ->
to_tokens(T={comment, _}) ->
[T];
to_tokens({Tag0, Acc}) ->
+ %% This is only allowed in sub-tags: {p, [{"class", "foo"}]}
to_tokens({Tag0, [], Acc});
to_tokens({Tag0, Attrs, Acc}) ->
Tag = to_tag(Tag0),
@@ -124,40 +127,6 @@ escape_attr(I) when is_integer(I) ->
escape_attr(F) when is_float(F) ->
escape_attr(mochinum:digits(F), []).
-%% @spec test() -> ok
-%% @doc Run tests for mochiweb_html.
-test() ->
- test_destack(),
- test_tokens(),
- test_tokens2(),
- test_parse(),
- test_parse2(),
- test_parse_tokens(),
- test_escape(),
- test_escape_attr(),
- test_to_html(),
- ok.
-
-
-%% Internal API
-
-test_to_html() ->
- Expect = <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div><!-- comment! --></body></html>">>,
- Expect = iolist_to_binary(
- to_html({html, [],
- [{<<"head">>, [],
- [{title, <<"hey!">>}]},
- {body, [],
- [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
- {'div', <<"sucka">>},
- {comment, <<" comment! ">>}]}]})),
- Expect1 = <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
- Expect1 = iolist_to_binary(
- to_html({doctype,
- [<<"html">>, <<"PUBLIC">>,
- <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
- <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]})),
- ok.
to_html([], Acc) ->
lists:reverse(Acc);
to_html([{'=', Content} | Rest], Acc) ->
@@ -205,16 +174,6 @@ attrs_to_html([{K, V} | Rest], Acc) ->
[[<<" ">>, escape(K), <<"=\"">>,
escape_attr(V), <<"\"">>] | Acc]).
-test_escape() ->
- <<"&amp;quot;\"word &lt;&lt;up!&amp;quot;">> =
- escape(<<"&quot;\"word <<up!&quot;">>),
- ok.
-
-test_escape_attr() ->
- <<"&amp;quot;&quot;word &lt;&lt;up!&amp;quot;">> =
- escape_attr(<<"&quot;\"word <<up!&quot;">>),
- ok.
-
escape([], Acc) ->
list_to_binary(lists:reverse(Acc));
escape("<" ++ Rest, Acc) ->
@@ -257,6 +216,9 @@ to_tokens([{Tag0, [T0={'=', _C0} | R1]} | Rest], Acc) ->
to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) ->
%% Allow {comment, iolist()}
to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [T0={pi, _S0, _A0} | R1]} | Rest], Acc) ->
+ %% Allow {pi, binary(), list()}
+ to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) ->
%% Allow {p, [{"class", "foo"}]}
to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc);
@@ -290,39 +252,6 @@ to_tokens([{Tag0, [B | R1]} | Rest], Acc) when is_binary(B) ->
Tag = to_tag(Tag0),
to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]).
-test_tokens() ->
- [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
- {<<"wibble">>, <<"wibble">>},
- {<<"alice">>, <<"bob">>}], true}] =
- tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>),
- [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
- {<<"wibble">>, <<"wibble">>},
- {<<"alice">>, <<"bob">>}], true}] =
- tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>),
- [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}] =
- tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>),
- [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
- {data, <<" A= B <= C ">>, false},
- {end_tag, <<"script">>}] =
- tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>),
- [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
- {data, <<" A= B <= C ">>, false},
- {end_tag, <<"script">>}] =
- tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>),
- [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
- {data, <<" A= B <= C ">>, false},
- {end_tag, <<"script">>}] =
- tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>),
- [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
- {data, <<" A= B <= C ">>, false},
- {end_tag, <<"script">>}] =
- tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>),
- [{start_tag, <<"textarea">>, [], false},
- {data, <<"<html></body>">>, false},
- {end_tag, <<"textarea">>}] =
- tokens(<<"<textarea><html></body></textarea>">>),
- ok.
-
tokens(B, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary>> ->
@@ -374,7 +303,8 @@ tokenize(B, S=#decoder{offset=O}) ->
{{end_tag, Tag}, S2};
<<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) ->
%% This isn't really strict HTML
- tokenize_data(B, ?INC_COL(S));
+ {{data, Data, _Whitespace}, S1} = tokenize_data(B, ?INC_COL(S)),
+ {{data, <<$<, Data/binary>>, false}, S1};
<<_:O/binary, "<", _/binary>> ->
{Tag, S1} = tokenize_literal(B, ?INC_COL(S)),
{Attrs, S2} = tokenize_attributes(B, S1),
@@ -385,149 +315,6 @@ tokenize(B, S=#decoder{offset=O}) ->
tokenize_data(B, S)
end.
-test_parse() ->
- D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
-<html>
- <head>
- <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">
- <title>Foo</title>
- <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\">
- <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\">
- <!--[if lt IE 7]>
- <style type=\"text/css\">
- .no_ie { display: none; }
- </style>
- <![endif]-->
- <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
- <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
- </head>
- <body id=\"home\" class=\"tundra\"><![CDATA[&lt;<this<!-- is -->CDATA>&gt;]]></body>
-</html>">>,
- Expect = {<<"html">>, [],
- [{<<"head">>, [],
- [{<<"meta">>,
- [{<<"http-equiv">>,<<"Content-Type">>},
- {<<"content">>,<<"text/html; charset=UTF-8">>}],
- []},
- {<<"title">>,[],[<<"Foo">>]},
- {<<"link">>,
- [{<<"rel">>,<<"stylesheet">>},
- {<<"type">>,<<"text/css">>},
- {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
- {<<"media">>,<<"screen">>}],
- []},
- {<<"link">>,
- [{<<"rel">>,<<"stylesheet">>},
- {<<"type">>,<<"text/css">>},
- {<<"href">>,<<"/static/foo.css">>},
- {<<"media">>,<<"screen">>}],
- []},
- {comment,<<"[if lt IE 7]>\n <style type=\"text/css\">\n .no_ie { display: none; }\n </style>\n <![endif]">>},
- {<<"link">>,
- [{<<"rel">>,<<"icon">>},
- {<<"href">>,<<"/static/images/favicon.ico">>},
- {<<"type">>,<<"image/x-icon">>}],
- []},
- {<<"link">>,
- [{<<"rel">>,<<"shortcut icon">>},
- {<<"href">>,<<"/static/images/favicon.ico">>},
- {<<"type">>,<<"image/x-icon">>}],
- []}]},
- {<<"body">>,
- [{<<"id">>,<<"home">>},
- {<<"class">>,<<"tundra">>}],
- [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
- Expect = parse(D0),
- ok.
-
-test_tokens2() ->
- D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>,
- Expect = [{start_tag,<<"channel">>,[],false},
- {start_tag,<<"title">>,[],false},
- {data,<<"from __future__ import *">>,false},
- {end_tag,<<"title">>},
- {start_tag,<<"link">>,[],true},
- {data,<<"http://bob.pythonmac.org">>,false},
- {end_tag,<<"link">>},
- {start_tag,<<"description">>,[],false},
- {data,<<"Bob's Rants">>,false},
- {end_tag,<<"description">>},
- {end_tag,<<"channel">>}],
- Expect = tokens(D0),
- ok.
-
-test_parse2() ->
- D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>,
- Expect = {<<"channel">>,[],
- [{<<"title">>,[],[<<"from __future__ import *">>]},
- {<<"link">>,[],[
- <<"http://bob.pythonmac.org">>,
- {<<"br">>,[],[]},
- <<"foo">>]},
- {<<"description">>,[],[<<"Bob's Rants">>]}]},
- Expect = parse(D0),
- ok.
-
-test_parse_tokens() ->
- D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]},
- {data,<<"\n">>,true},
- {start_tag,<<"html">>,[],false}],
- {<<"html">>, [], []} = parse_tokens(D0),
- D1 = D0 ++ [{end_tag, <<"html">>}],
- {<<"html">>, [], []} = parse_tokens(D1),
- D2 = D0 ++ [{start_tag, <<"body">>, [], false}],
- {<<"html">>, [], [{<<"body">>, [], []}]} = parse_tokens(D2),
- D3 = D0 ++ [{start_tag, <<"head">>, [], false},
- {end_tag, <<"head">>},
- {start_tag, <<"body">>, [], false}],
- {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]} = parse_tokens(D3),
- D4 = D3 ++ [{data,<<"\n">>,true},
- {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false},
- {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false},
- {end_tag,<<"a">>},
- {end_tag,<<"div">>},
- {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false},
- {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false},
- {end_tag,<<"div">>},
- {end_tag,<<"div">>}],
- {<<"html">>, [],
- [{<<"head">>, [], []},
- {<<"body">>, [],
- [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
- {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
- ]}]} = parse_tokens(D4),
- D5 = [{start_tag,<<"html">>,[],false},
- {data,<<"\n">>,true},
- {data,<<"boo">>,false},
- {data,<<"hoo">>,false},
- {data,<<"\n">>,true},
- {end_tag,<<"html">>}],
- {<<"html">>, [], [<<"\nboohoo\n">>]} = parse_tokens(D5),
- D6 = [{start_tag,<<"html">>,[],false},
- {data,<<"\n">>,true},
- {data,<<"\n">>,true},
- {end_tag,<<"html">>}],
- {<<"html">>, [], []} = parse_tokens(D6),
- D7 = [{start_tag,<<"html">>,[],false},
- {start_tag,<<"ul">>,[],false},
- {start_tag,<<"li">>,[],false},
- {data,<<"word">>,false},
- {start_tag,<<"li">>,[],false},
- {data,<<"up">>,false},
- {end_tag,<<"li">>},
- {start_tag,<<"li">>,[],false},
- {data,<<"fdsa">>,false},
- {start_tag,<<"br">>,[],true},
- {data,<<"asdf">>,false},
- {end_tag,<<"ul">>},
- {end_tag,<<"html">>}],
- {<<"html">>, [],
- [{<<"ul">>, [],
- [{<<"li">>, [], [<<"word">>]},
- {<<"li">>, [], [<<"up">>]},
- {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]} = parse_tokens(D7),
- ok.
-
tree_data([{data, Data, Whitespace} | Rest], AllWhitespace, Acc) ->
tree_data(Rest, (Whitespace andalso AllWhitespace), [Data | Acc]);
tree_data(Rest, AllWhitespace, Acc) ->
@@ -556,7 +343,9 @@ tree(L=[{data, _Data, _Whitespace} | _], S) ->
tree(Rest, S);
{Data, false, Rest} ->
tree(Rest, append_stack_child(Data, S))
- end.
+ end;
+tree([{doctype, _} | Rest], Stack) ->
+ tree(Rest, Stack).
norm({Tag, Attrs}) ->
{norm(Tag), [{norm(K), iolist_to_binary(V)} || {K, V} <- Attrs], []};
@@ -565,21 +354,6 @@ norm(Tag) when is_binary(Tag) ->
norm(Tag) ->
list_to_binary(string:to_lower(Tag)).
-test_destack() ->
- {<<"a">>, [], []} =
- destack([{<<"a">>, [], []}]),
- {<<"a">>, [], [{<<"b">>, [], []}]} =
- destack([{<<"b">>, [], []}, {<<"a">>, [], []}]),
- {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} =
- destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
- [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] =
- destack(<<"b">>,
- [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
- [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] =
- destack(<<"c">>,
- [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]),
- ok.
-
stack(T1={TN, _, _}, Stack=[{TN, _, _} | _Rest])
when TN =:= <<"li">> orelse TN =:= <<"option">> ->
[T1 | destack(TN, Stack)];
@@ -719,9 +493,10 @@ find_qgt(Bin, S=#decoder{offset=O}) ->
case Bin of
<<_:O/binary, "?>", _/binary>> ->
?ADV_COL(S, 2);
- <<_:O/binary, C, _/binary>> ->
- find_qgt(Bin, ?INC_CHAR(S, C));
- _ ->
+ %% tokenize_attributes takes care of this state:
+ %% <<_:O/binary, C, _/binary>> ->
+ %% find_qgt(Bin, ?INC_CHAR(S, C));
+ <<_:O/binary>> ->
S
end.
@@ -766,7 +541,7 @@ tokenize_charref(Bin, S=#decoder{offset=O}, Start) ->
<<_:Start1/binary, R:Len1/binary, _/binary>> = Bin,
R;
Unichar ->
- list_to_binary(xmerl_ucs:to_utf8(Unichar))
+ mochiutf8:codepoint_to_bytes(Unichar)
end,
{{data, Data, false}, ?INC_COL(S)};
_ ->
@@ -791,11 +566,10 @@ tokenize_doctype(Bin, S=#decoder{offset=O}, Acc) ->
tokenize_word_or_literal(Bin, S=#decoder{offset=O}) ->
case Bin of
- <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
- {error, {whitespace, [C], S}};
<<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE ->
tokenize_word(Bin, ?INC_COL(S), C);
- _ ->
+ <<_:O/binary, C, _/binary>> when not ?IS_WHITESPACE(C) ->
+ %% Sanity check for whitespace
tokenize_literal(Bin, S, [])
end.
@@ -852,13 +626,14 @@ tokenize_script(Bin, S=#decoder{offset=O}) ->
tokenize_script(Bin, S=#decoder{offset=O}, Start) ->
case Bin of
%% Just a look-ahead, we want the end_tag separately
- <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>>
+ <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, ZZ, _/binary>>
when (SS =:= $s orelse SS =:= $S) andalso
(CC =:= $c orelse CC =:= $C) andalso
(RR =:= $r orelse RR =:= $R) andalso
(II =:= $i orelse II =:= $I) andalso
(PP =:= $p orelse PP =:= $P) andalso
- (TT=:= $t orelse TT =:= $T) ->
+ (TT=:= $t orelse TT =:= $T) andalso
+ ?PROBABLE_CLOSE(ZZ) ->
Len = O - Start,
<<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
{{data, Raw, false}, S};
@@ -874,7 +649,7 @@ tokenize_textarea(Bin, S=#decoder{offset=O}) ->
tokenize_textarea(Bin, S=#decoder{offset=O}, Start) ->
case Bin of
%% Just a look-ahead, we want the end_tag separately
- <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>>
+ <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, ZZ, _/binary>>
when (TT =:= $t orelse TT =:= $T) andalso
(EE =:= $e orelse EE =:= $E) andalso
(XX =:= $x orelse XX =:= $X) andalso
@@ -882,7 +657,8 @@ tokenize_textarea(Bin, S=#decoder{offset=O}, Start) ->
(AA =:= $a orelse AA =:= $A) andalso
(RR =:= $r orelse RR =:= $R) andalso
(EE2 =:= $e orelse EE2 =:= $E) andalso
- (AA2 =:= $a orelse AA2 =:= $A) ->
+ (AA2 =:= $a orelse AA2 =:= $A) andalso
+ ?PROBABLE_CLOSE(ZZ) ->
Len = O - Start,
<<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
{{data, Raw, false}, S};
@@ -891,3 +667,395 @@ tokenize_textarea(Bin, S=#decoder{offset=O}, Start) ->
<<_:Start/binary, Raw/binary>> ->
{{data, Raw, false}, S}
end.
+
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+to_html_test() ->
+ ?assertEqual(
+ <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div>RAW!<!-- comment! --></body></html>">>,
+ iolist_to_binary(
+ to_html({html, [],
+ [{<<"head">>, [],
+ [{title, <<"hey!">>}]},
+ {body, [],
+ [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
+ {'div', <<"sucka">>},
+ {'=', <<"RAW!">>},
+ {comment, <<" comment! ">>}]}]}))),
+ ?assertEqual(
+ <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
+ iolist_to_binary(
+ to_html({doctype,
+ [<<"html">>, <<"PUBLIC">>,
+ <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
+ <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]}))),
+ ?assertEqual(
+ <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>,
+ iolist_to_binary(
+ to_html({<<"html">>,[],
+ [{pi, <<"xml:namespace">>,
+ [{<<"prefix">>,<<"o">>},
+ {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}))),
+ ok.
+
+escape_test() ->
+ ?assertEqual(
+ <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+ escape(<<"&quot;\"word ><<up!&quot;">>)),
+ ?assertEqual(
+ <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+ escape("&quot;\"word ><<up!&quot;")),
+ ?assertEqual(
+ <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+ escape('&quot;\"word ><<up!&quot;')),
+ ok.
+
+escape_attr_test() ->
+ ?assertEqual(
+ <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+ escape_attr(<<"&quot;\"word ><<up!&quot;">>)),
+ ?assertEqual(
+ <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+ escape_attr("&quot;\"word ><<up!&quot;")),
+ ?assertEqual(
+ <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+ escape_attr('&quot;\"word ><<up!&quot;')),
+ ?assertEqual(
+ <<"12345">>,
+ escape_attr(12345)),
+ ?assertEqual(
+ <<"1.5">>,
+ escape_attr(1.5)),
+ ok.
+
+tokens_test() ->
+ ?assertEqual(
+ [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+ {<<"wibble">>, <<"wibble">>},
+ {<<"alice">>, <<"bob">>}], true}],
+ tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>)),
+ ?assertEqual(
+ [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+ {<<"wibble">>, <<"wibble">>},
+ {<<"alice">>, <<"bob">>}], true}],
+ tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>)),
+ ?assertEqual(
+ [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}],
+ tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>)),
+ ?assertEqual(
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}],
+ tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>)),
+ ?assertEqual(
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}],
+ tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>)),
+ ?assertEqual(
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}],
+ tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>)),
+ ?assertEqual(
+ [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+ {data, <<" A= B <= C ">>, false},
+ {end_tag, <<"script">>}],
+ tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>)),
+ ?assertEqual(
+ [{start_tag, <<"textarea">>, [], false},
+ {data, <<"<html></body>">>, false},
+ {end_tag, <<"textarea">>}],
+ tokens(<<"<textarea><html></body></textarea>">>)),
+ ?assertEqual(
+ [{start_tag, <<"textarea">>, [], false},
+ {data, <<"<html></body></textareaz>">>, false}],
+ tokens(<<"<textarea ><html></body></textareaz>">>)),
+ ?assertEqual(
+ [{pi, <<"xml:namespace">>,
+ [{<<"prefix">>,<<"o">>},
+ {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+ tokens(<<"<?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?>">>)),
+ ?assertEqual(
+ [{pi, <<"xml:namespace">>,
+ [{<<"prefix">>,<<"o">>},
+ {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+ tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office \n?>">>)),
+ ?assertEqual(
+ [{pi, <<"xml:namespace">>,
+ [{<<"prefix">>,<<"o">>},
+ {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+ tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office">>)),
+ ?assertEqual(
+ [{data, <<"<">>, false}],
+ tokens(<<"&lt;">>)),
+ ?assertEqual(
+ [{data, <<"not html ">>, false},
+ {data, <<"< at all">>, false}],
+ tokens(<<"not html < at all">>)),
+ ok.
+
+parse_test() ->
+ D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
+<html>
+ <head>
+ <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">
+ <title>Foo</title>
+ <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\">
+ <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\">
+ <!--[if lt IE 7]>
+ <style type=\"text/css\">
+ .no_ie { display: none; }
+ </style>
+ <![endif]-->
+ <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+ <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+ </head>
+ <body id=\"home\" class=\"tundra\"><![CDATA[&lt;<this<!-- is -->CDATA>&gt;]]></body>
+</html>">>,
+ ?assertEqual(
+ {<<"html">>, [],
+ [{<<"head">>, [],
+ [{<<"meta">>,
+ [{<<"http-equiv">>,<<"Content-Type">>},
+ {<<"content">>,<<"text/html; charset=UTF-8">>}],
+ []},
+ {<<"title">>,[],[<<"Foo">>]},
+ {<<"link">>,
+ [{<<"rel">>,<<"stylesheet">>},
+ {<<"type">>,<<"text/css">>},
+ {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
+ {<<"media">>,<<"screen">>}],
+ []},
+ {<<"link">>,
+ [{<<"rel">>,<<"stylesheet">>},
+ {<<"type">>,<<"text/css">>},
+ {<<"href">>,<<"/static/foo.css">>},
+ {<<"media">>,<<"screen">>}],
+ []},
+ {comment,<<"[if lt IE 7]>\n <style type=\"text/css\">\n .no_ie { display: none; }\n </style>\n <![endif]">>},
+ {<<"link">>,
+ [{<<"rel">>,<<"icon">>},
+ {<<"href">>,<<"/static/images/favicon.ico">>},
+ {<<"type">>,<<"image/x-icon">>}],
+ []},
+ {<<"link">>,
+ [{<<"rel">>,<<"shortcut icon">>},
+ {<<"href">>,<<"/static/images/favicon.ico">>},
+ {<<"type">>,<<"image/x-icon">>}],
+ []}]},
+ {<<"body">>,
+ [{<<"id">>,<<"home">>},
+ {<<"class">>,<<"tundra">>}],
+ [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
+ parse(D0)),
+ ?assertEqual(
+ {<<"html">>,[],
+ [{pi, <<"xml:namespace">>,
+ [{<<"prefix">>,<<"o">>},
+ {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]},
+ parse(
+ <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>)),
+ ?assertEqual(
+ {<<"html">>, [],
+ [{<<"dd">>, [], [<<"foo">>]},
+ {<<"dt">>, [], [<<"bar">>]}]},
+ parse(<<"<html><dd>foo<dt>bar</html>">>)),
+ %% Singleton sadness
+ ?assertEqual(
+ {<<"html">>, [],
+ [{<<"link">>, [], []},
+ <<"foo">>,
+ {<<"br">>, [], []},
+ <<"bar">>]},
+ parse(<<"<html><link>foo<br>bar</html>">>)),
+ ?assertEqual(
+ {<<"html">>, [],
+ [{<<"link">>, [], [<<"foo">>,
+ {<<"br">>, [], []},
+ <<"bar">>]}]},
+ parse(<<"<html><link>foo<br>bar</link></html>">>)),
+ ok.
+
+exhaustive_is_singleton_test() ->
+ T = mochiweb_cover:clause_lookup_table(?MODULE, is_singleton),
+ [?assertEqual(V, is_singleton(K)) || {K, V} <- T].
+
+tokenize_attributes_test() ->
+ ?assertEqual(
+ {<<"foo">>,
+ [{<<"bar">>, <<"b\"az">>},
+ {<<"wibble">>, <<"wibble">>},
+ {<<"taco", 16#c2, 16#a9>>, <<"bell">>},
+ {<<"quux">>, <<"quux">>}],
+ []},
+ parse(<<"<foo bar=\"b&quot;az\" wibble taco&copy;=bell quux">>)),
+ ok.
+
+tokens2_test() ->
+ D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>,
+ ?assertEqual(
+ [{start_tag,<<"channel">>,[],false},
+ {start_tag,<<"title">>,[],false},
+ {data,<<"from __future__ import *">>,false},
+ {end_tag,<<"title">>},
+ {start_tag,<<"link">>,[],true},
+ {data,<<"http://bob.pythonmac.org">>,false},
+ {end_tag,<<"link">>},
+ {start_tag,<<"description">>,[],false},
+ {data,<<"Bob's Rants">>,false},
+ {end_tag,<<"description">>},
+ {end_tag,<<"channel">>}],
+ tokens(D0)),
+ ok.
+
+to_tokens_test() ->
+ ?assertEqual(
+ [{start_tag, <<"p">>, [{class, 1}], false},
+ {end_tag, <<"p">>}],
+ to_tokens({p, [{class, 1}], []})),
+ ?assertEqual(
+ [{start_tag, <<"p">>, [], false},
+ {end_tag, <<"p">>}],
+ to_tokens({p})),
+ ?assertEqual(
+ [{'=', <<"data">>}],
+ to_tokens({'=', <<"data">>})),
+ ?assertEqual(
+ [{comment, <<"comment">>}],
+ to_tokens({comment, <<"comment">>})),
+ %% This is only allowed in sub-tags:
+ %% {p, [{"class", "foo"}]} as {p, [{"class", "foo"}], []}
+ %% On the outside it's always treated as follows:
+ %% {p, [], [{"class", "foo"}]} as {p, [], [{"class", "foo"}]}
+ ?assertEqual(
+ [{start_tag, <<"html">>, [], false},
+ {start_tag, <<"p">>, [{class, 1}], false},
+ {end_tag, <<"p">>},
+ {end_tag, <<"html">>}],
+ to_tokens({html, [{p, [{class, 1}]}]})),
+ ok.
+
+parse2_test() ->
+ D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>,
+ ?assertEqual(
+ {<<"channel">>,[],
+ [{<<"title">>,[],[<<"from __future__ import *">>]},
+ {<<"link">>,[],[
+ <<"http://bob.pythonmac.org">>,
+ {<<"br">>,[],[]},
+ <<"foo">>]},
+ {<<"description">>,[],[<<"Bob's Rants">>]}]},
+ parse(D0)),
+ ok.
+
+parse_tokens_test() ->
+ D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]},
+ {data,<<"\n">>,true},
+ {start_tag,<<"html">>,[],false}],
+ ?assertEqual(
+ {<<"html">>, [], []},
+ parse_tokens(D0)),
+ D1 = D0 ++ [{end_tag, <<"html">>}],
+ ?assertEqual(
+ {<<"html">>, [], []},
+ parse_tokens(D1)),
+ D2 = D0 ++ [{start_tag, <<"body">>, [], false}],
+ ?assertEqual(
+ {<<"html">>, [], [{<<"body">>, [], []}]},
+ parse_tokens(D2)),
+ D3 = D0 ++ [{start_tag, <<"head">>, [], false},
+ {end_tag, <<"head">>},
+ {start_tag, <<"body">>, [], false}],
+ ?assertEqual(
+ {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]},
+ parse_tokens(D3)),
+ D4 = D3 ++ [{data,<<"\n">>,true},
+ {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false},
+ {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false},
+ {end_tag,<<"a">>},
+ {end_tag,<<"div">>},
+ {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false},
+ {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false},
+ {end_tag,<<"div">>},
+ {end_tag,<<"div">>}],
+ ?assertEqual(
+ {<<"html">>, [],
+ [{<<"head">>, [], []},
+ {<<"body">>, [],
+ [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
+ {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
+ ]}]},
+ parse_tokens(D4)),
+ D5 = [{start_tag,<<"html">>,[],false},
+ {data,<<"\n">>,true},
+ {data,<<"boo">>,false},
+ {data,<<"hoo">>,false},
+ {data,<<"\n">>,true},
+ {end_tag,<<"html">>}],
+ ?assertEqual(
+ {<<"html">>, [], [<<"\nboohoo\n">>]},
+ parse_tokens(D5)),
+ D6 = [{start_tag,<<"html">>,[],false},
+ {data,<<"\n">>,true},
+ {data,<<"\n">>,true},
+ {end_tag,<<"html">>}],
+ ?assertEqual(
+ {<<"html">>, [], []},
+ parse_tokens(D6)),
+ D7 = [{start_tag,<<"html">>,[],false},
+ {start_tag,<<"ul">>,[],false},
+ {start_tag,<<"li">>,[],false},
+ {data,<<"word">>,false},
+ {start_tag,<<"li">>,[],false},
+ {data,<<"up">>,false},
+ {end_tag,<<"li">>},
+ {start_tag,<<"li">>,[],false},
+ {data,<<"fdsa">>,false},
+ {start_tag,<<"br">>,[],true},
+ {data,<<"asdf">>,false},
+ {end_tag,<<"ul">>},
+ {end_tag,<<"html">>}],
+ ?assertEqual(
+ {<<"html">>, [],
+ [{<<"ul">>, [],
+ [{<<"li">>, [], [<<"word">>]},
+ {<<"li">>, [], [<<"up">>]},
+ {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]},
+ parse_tokens(D7)),
+ ok.
+
+destack_test() ->
+ {<<"a">>, [], []} =
+ destack([{<<"a">>, [], []}]),
+ {<<"a">>, [], [{<<"b">>, [], []}]} =
+ destack([{<<"b">>, [], []}, {<<"a">>, [], []}]),
+ {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} =
+ destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+ [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] =
+ destack(<<"b">>,
+ [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+ [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] =
+ destack(<<"c">>,
+ [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]),
+ ok.
+
+doctype_test() ->
+ ?assertEqual(
+ {<<"html">>,[],[{<<"head">>,[],[]}]},
+ mochiweb_html:parse("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
+ "<html><head></head></body></html>")),
+ %% http://code.google.com/p/mochiweb/issues/detail?id=52
+ ?assertEqual(
+ {<<"html">>,[],[{<<"head">>,[],[]}]},
+ mochiweb_html:parse("<html>"
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
+ "<head></head></body></html>")),
+ ok.
+
+-endif.