summaryrefslogtreecommitdiff
path: root/deps/mochiweb/src/mochiweb_charref.erl
diff options
context:
space:
mode:
Diffstat (limited to 'deps/mochiweb/src/mochiweb_charref.erl')
-rw-r--r--deps/mochiweb/src/mochiweb_charref.erl308
1 files changed, 308 insertions, 0 deletions
diff --git a/deps/mochiweb/src/mochiweb_charref.erl b/deps/mochiweb/src/mochiweb_charref.erl
new file mode 100644
index 00000000..99cd5502
--- /dev/null
+++ b/deps/mochiweb/src/mochiweb_charref.erl
@@ -0,0 +1,308 @@
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Converts HTML 4 charrefs and entities to codepoints.
+-module(mochiweb_charref).
+-export([charref/1]).
+
+%% External API.
+
+%% @spec charref(S) -> integer() | undefined
+%% @doc Convert a decimal charref, hex charref, or html entity to a unicode
+%% codepoint, or return undefined on failure.
+%% The input should not include an ampersand or semicolon.
+%% charref("#38") = 38, charref("#x26") = 38, charref("amp") = 38.
+charref(B) when is_binary(B) ->
+ charref(binary_to_list(B));
+charref([$#, C | L]) when C =:= $x orelse C =:= $X ->
+ try erlang:list_to_integer(L, 16)
+ catch
+ error:badarg -> undefined
+ end;
+charref([$# | L]) ->
+ try list_to_integer(L)
+ catch
+ error:badarg -> undefined
+ end;
+charref(L) ->
+ entity(L).
+
+%% Internal API.
+
+entity("nbsp") -> 160;
+entity("iexcl") -> 161;
+entity("cent") -> 162;
+entity("pound") -> 163;
+entity("curren") -> 164;
+entity("yen") -> 165;
+entity("brvbar") -> 166;
+entity("sect") -> 167;
+entity("uml") -> 168;
+entity("copy") -> 169;
+entity("ordf") -> 170;
+entity("laquo") -> 171;
+entity("not") -> 172;
+entity("shy") -> 173;
+entity("reg") -> 174;
+entity("macr") -> 175;
+entity("deg") -> 176;
+entity("plusmn") -> 177;
+entity("sup2") -> 178;
+entity("sup3") -> 179;
+entity("acute") -> 180;
+entity("micro") -> 181;
+entity("para") -> 182;
+entity("middot") -> 183;
+entity("cedil") -> 184;
+entity("sup1") -> 185;
+entity("ordm") -> 186;
+entity("raquo") -> 187;
+entity("frac14") -> 188;
+entity("frac12") -> 189;
+entity("frac34") -> 190;
+entity("iquest") -> 191;
+entity("Agrave") -> 192;
+entity("Aacute") -> 193;
+entity("Acirc") -> 194;
+entity("Atilde") -> 195;
+entity("Auml") -> 196;
+entity("Aring") -> 197;
+entity("AElig") -> 198;
+entity("Ccedil") -> 199;
+entity("Egrave") -> 200;
+entity("Eacute") -> 201;
+entity("Ecirc") -> 202;
+entity("Euml") -> 203;
+entity("Igrave") -> 204;
+entity("Iacute") -> 205;
+entity("Icirc") -> 206;
+entity("Iuml") -> 207;
+entity("ETH") -> 208;
+entity("Ntilde") -> 209;
+entity("Ograve") -> 210;
+entity("Oacute") -> 211;
+entity("Ocirc") -> 212;
+entity("Otilde") -> 213;
+entity("Ouml") -> 214;
+entity("times") -> 215;
+entity("Oslash") -> 216;
+entity("Ugrave") -> 217;
+entity("Uacute") -> 218;
+entity("Ucirc") -> 219;
+entity("Uuml") -> 220;
+entity("Yacute") -> 221;
+entity("THORN") -> 222;
+entity("szlig") -> 223;
+entity("agrave") -> 224;
+entity("aacute") -> 225;
+entity("acirc") -> 226;
+entity("atilde") -> 227;
+entity("auml") -> 228;
+entity("aring") -> 229;
+entity("aelig") -> 230;
+entity("ccedil") -> 231;
+entity("egrave") -> 232;
+entity("eacute") -> 233;
+entity("ecirc") -> 234;
+entity("euml") -> 235;
+entity("igrave") -> 236;
+entity("iacute") -> 237;
+entity("icirc") -> 238;
+entity("iuml") -> 239;
+entity("eth") -> 240;
+entity("ntilde") -> 241;
+entity("ograve") -> 242;
+entity("oacute") -> 243;
+entity("ocirc") -> 244;
+entity("otilde") -> 245;
+entity("ouml") -> 246;
+entity("divide") -> 247;
+entity("oslash") -> 248;
+entity("ugrave") -> 249;
+entity("uacute") -> 250;
+entity("ucirc") -> 251;
+entity("uuml") -> 252;
+entity("yacute") -> 253;
+entity("thorn") -> 254;
+entity("yuml") -> 255;
+entity("fnof") -> 402;
+entity("Alpha") -> 913;
+entity("Beta") -> 914;
+entity("Gamma") -> 915;
+entity("Delta") -> 916;
+entity("Epsilon") -> 917;
+entity("Zeta") -> 918;
+entity("Eta") -> 919;
+entity("Theta") -> 920;
+entity("Iota") -> 921;
+entity("Kappa") -> 922;
+entity("Lambda") -> 923;
+entity("Mu") -> 924;
+entity("Nu") -> 925;
+entity("Xi") -> 926;
+entity("Omicron") -> 927;
+entity("Pi") -> 928;
+entity("Rho") -> 929;
+entity("Sigma") -> 931;
+entity("Tau") -> 932;
+entity("Upsilon") -> 933;
+entity("Phi") -> 934;
+entity("Chi") -> 935;
+entity("Psi") -> 936;
+entity("Omega") -> 937;
+entity("alpha") -> 945;
+entity("beta") -> 946;
+entity("gamma") -> 947;
+entity("delta") -> 948;
+entity("epsilon") -> 949;
+entity("zeta") -> 950;
+entity("eta") -> 951;
+entity("theta") -> 952;
+entity("iota") -> 953;
+entity("kappa") -> 954;
+entity("lambda") -> 955;
+entity("mu") -> 956;
+entity("nu") -> 957;
+entity("xi") -> 958;
+entity("omicron") -> 959;
+entity("pi") -> 960;
+entity("rho") -> 961;
+entity("sigmaf") -> 962;
+entity("sigma") -> 963;
+entity("tau") -> 964;
+entity("upsilon") -> 965;
+entity("phi") -> 966;
+entity("chi") -> 967;
+entity("psi") -> 968;
+entity("omega") -> 969;
+entity("thetasym") -> 977;
+entity("upsih") -> 978;
+entity("piv") -> 982;
+entity("bull") -> 8226;
+entity("hellip") -> 8230;
+entity("prime") -> 8242;
+entity("Prime") -> 8243;
+entity("oline") -> 8254;
+entity("frasl") -> 8260;
+entity("weierp") -> 8472;
+entity("image") -> 8465;
+entity("real") -> 8476;
+entity("trade") -> 8482;
+entity("alefsym") -> 8501;
+entity("larr") -> 8592;
+entity("uarr") -> 8593;
+entity("rarr") -> 8594;
+entity("darr") -> 8595;
+entity("harr") -> 8596;
+entity("crarr") -> 8629;
+entity("lArr") -> 8656;
+entity("uArr") -> 8657;
+entity("rArr") -> 8658;
+entity("dArr") -> 8659;
+entity("hArr") -> 8660;
+entity("forall") -> 8704;
+entity("part") -> 8706;
+entity("exist") -> 8707;
+entity("empty") -> 8709;
+entity("nabla") -> 8711;
+entity("isin") -> 8712;
+entity("notin") -> 8713;
+entity("ni") -> 8715;
+entity("prod") -> 8719;
+entity("sum") -> 8721;
+entity("minus") -> 8722;
+entity("lowast") -> 8727;
+entity("radic") -> 8730;
+entity("prop") -> 8733;
+entity("infin") -> 8734;
+entity("ang") -> 8736;
+entity("and") -> 8743;
+entity("or") -> 8744;
+entity("cap") -> 8745;
+entity("cup") -> 8746;
+entity("int") -> 8747;
+entity("there4") -> 8756;
+entity("sim") -> 8764;
+entity("cong") -> 8773;
+entity("asymp") -> 8776;
+entity("ne") -> 8800;
+entity("equiv") -> 8801;
+entity("le") -> 8804;
+entity("ge") -> 8805;
+entity("sub") -> 8834;
+entity("sup") -> 8835;
+entity("nsub") -> 8836;
+entity("sube") -> 8838;
+entity("supe") -> 8839;
+entity("oplus") -> 8853;
+entity("otimes") -> 8855;
+entity("perp") -> 8869;
+entity("sdot") -> 8901;
+entity("lceil") -> 8968;
+entity("rceil") -> 8969;
+entity("lfloor") -> 8970;
+entity("rfloor") -> 8971;
+entity("lang") -> 9001;
+entity("rang") -> 9002;
+entity("loz") -> 9674;
+entity("spades") -> 9824;
+entity("clubs") -> 9827;
+entity("hearts") -> 9829;
+entity("diams") -> 9830;
+entity("quot") -> 34;
+entity("amp") -> 38;
+entity("lt") -> 60;
+entity("gt") -> 62;
+entity("OElig") -> 338;
+entity("oelig") -> 339;
+entity("Scaron") -> 352;
+entity("scaron") -> 353;
+entity("Yuml") -> 376;
+entity("circ") -> 710;
+entity("tilde") -> 732;
+entity("ensp") -> 8194;
+entity("emsp") -> 8195;
+entity("thinsp") -> 8201;
+entity("zwnj") -> 8204;
+entity("zwj") -> 8205;
+entity("lrm") -> 8206;
+entity("rlm") -> 8207;
+entity("ndash") -> 8211;
+entity("mdash") -> 8212;
+entity("lsquo") -> 8216;
+entity("rsquo") -> 8217;
+entity("sbquo") -> 8218;
+entity("ldquo") -> 8220;
+entity("rdquo") -> 8221;
+entity("bdquo") -> 8222;
+entity("dagger") -> 8224;
+entity("Dagger") -> 8225;
+entity("permil") -> 8240;
+entity("lsaquo") -> 8249;
+entity("rsaquo") -> 8250;
+entity("euro") -> 8364;
+entity(_) -> undefined.
+
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+exhaustive_entity_test() ->
+ T = mochiweb_cover:clause_lookup_table(?MODULE, entity),
+ [?assertEqual(V, entity(K)) || {K, V} <- T].
+
+charref_test() ->
+ 1234 = charref("#1234"),
+ 255 = charref("#xfF"),
+ 255 = charref(<<"#XFf">>),
+ 38 = charref("amp"),
+ 38 = charref(<<"amp">>),
+ undefined = charref("not_an_entity"),
+ undefined = charref("#not_an_entity"),
+ undefined = charref("#xnot_an_entity"),
+ ok.
+
+-endif.