diff options
Diffstat (limited to 'src/mochiweb/mochiweb_html.erl')
-rw-r--r-- | src/mochiweb/mochiweb_html.erl | 89 |
1 files changed, 82 insertions, 7 deletions
diff --git a/src/mochiweb/mochiweb_html.erl b/src/mochiweb/mochiweb_html.erl index 3c7f9dfc..0ac4826e 100644 --- a/src/mochiweb/mochiweb_html.erl +++ b/src/mochiweb/mochiweb_html.erl @@ -35,7 +35,7 @@ -define(IS_LITERAL_SAFE(C), ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z) orelse (C >= $0 andalso C =< $9))). - + -record(decoder, {line=1, column=1, offset=0}). @@ -202,7 +202,7 @@ attrs_to_html([{K, V} | Rest], Acc) -> attrs_to_html(Rest, [[<<" ">>, escape(K), <<"=\"">>, escape_attr(V), <<"\"">>] | Acc]). - + test_escape() -> <<"&quot;\"word <<up!&quot;">> = escape(<<""\"word <<up!"">>), @@ -299,6 +299,14 @@ test_tokens() -> tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>), [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}] = tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>), + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}] = + tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>), + [{start_tag, <<"textarea">>, [], false}, + {data, <<"<html></body>">>, false}, + {end_tag, <<"textarea">>}] = + tokens(<<"<textarea><html></body></textarea>">>), ok. tokens(B, S=#decoder{offset=O}, Acc) -> @@ -307,9 +315,30 @@ tokens(B, S=#decoder{offset=O}, Acc) -> lists:reverse(Acc); _ -> {Tag, S1} = tokenize(B, S), - tokens(B, S1, [Tag | Acc]) + case parse_flag(Tag) of + script -> + {Tag2, S2} = tokenize_script(B, S1), + tokens(B, S2, [Tag2, Tag | Acc]); + textarea -> + {Tag2, S2} = tokenize_textarea(B, S1), + tokens(B, S2, [Tag2, Tag | Acc]); + none -> + tokens(B, S1, [Tag | Acc]) + end end. +parse_flag({start_tag, B, _, false}) -> + case string:to_lower(binary_to_list(B)) of + "script" -> + script; + "textarea" -> + textarea; + _ -> + none + end; +parse_flag(_) -> + none. + tokenize(B, S=#decoder{offset=O}) -> case B of <<_:O/binary, "<!--", _/binary>> -> @@ -330,7 +359,7 @@ tokenize(B, S=#decoder{offset=O}) -> {S2, _} = find_gt(B, S1), {{end_tag, Tag}, S2}; <<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) -> - %% This isn't really strict HTML but we want this for markdown + %% This isn't really strict HTML tokenize_data(B, ?INC_COL(S)); <<_:O/binary, "<", _/binary>> -> {Tag, S1} = tokenize_literal(B, ?INC_COL(S)), @@ -524,7 +553,7 @@ append_stack_child(StartTag, [{Name, Attrs, Acc} | Stack]) -> destack(TagName, Stack) when is_list(Stack) -> F = fun (X) -> - case X of + case X of {TagName, _, _} -> false; _ -> @@ -542,7 +571,7 @@ destack(TagName, Stack) when is_list(Stack) -> %% Unfurl up to the tag, then accumulate it [{T0, A0, [destack(Pre ++ [T]) | Acc0]} | Post] end. - + destack([{Tag, Attrs, Acc}]) -> {Tag, Attrs, lists:reverse(Acc)}; destack([{T1, A1, Acc1}, {T0, A0, Acc0} | Rest]) -> @@ -655,7 +684,7 @@ find_gt(Bin, S=#decoder{offset=O}, HasSlash) -> tokenize_charref(Bin, S=#decoder{offset=O}) -> tokenize_charref(Bin, S, O). - + tokenize_charref(Bin, S=#decoder{offset=O}, Start) -> case Bin of <<_:O/binary>> -> @@ -758,3 +787,49 @@ tokenize_comment(Bin, S=#decoder{offset=O}, Start) -> <<_:Start/binary, Raw/binary>> -> {{comment, Raw}, S} end. + +tokenize_script(Bin, S=#decoder{offset=O}) -> + tokenize_script(Bin, S, O). + +tokenize_script(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + %% Just a look-ahead, we want the end_tag separately + <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>> + when (SS =:= $s orelse SS =:= $S) andalso + (CC =:= $c orelse CC =:= $C) andalso + (RR =:= $r orelse RR =:= $R) andalso + (II =:= $i orelse II =:= $I) andalso + (PP =:= $p orelse PP =:= $P) andalso + (TT=:= $t orelse TT =:= $T) -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, C, _/binary>> -> + tokenize_script(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{data, Raw, false}, S} + end. + +tokenize_textarea(Bin, S=#decoder{offset=O}) -> + tokenize_textarea(Bin, S, O). + +tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + %% Just a look-ahead, we want the end_tag separately + <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>> + when (TT =:= $t orelse TT =:= $T) andalso + (EE =:= $e orelse EE =:= $E) andalso + (XX =:= $x orelse XX =:= $X) andalso + (TT2 =:= $t orelse TT2 =:= $T) andalso + (AA =:= $a orelse AA =:= $A) andalso + (RR =:= $r orelse RR =:= $R) andalso + (EE2 =:= $e orelse EE2 =:= $E) andalso + (AA2 =:= $a orelse AA2 =:= $A) -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, C, _/binary>> -> + tokenize_textarea(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{data, Raw, false}, S} + end. |