1 |
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
|
2 |
// Version 3.2
|
3 |
// Copyright (C) 2004-2009 Martin Jericho
|
4 |
// http://jericho.htmlparser.net/
|
5 |
//
|
6 |
// This library is free software; you can redistribute it and/or
|
7 |
// modify it under the terms of either one of the following licences:
|
8 |
//
|
9 |
// 1. The Eclipse Public License (EPL) version 1.0,
|
10 |
// included in this distribution in the file licence-epl-1.0.html
|
11 |
// or available at http://www.eclipse.org/legal/epl-v10.html
|
12 |
//
|
13 |
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
|
14 |
// included in this distribution in the file licence-lgpl-2.1.txt
|
15 |
// or available at http://www.gnu.org/licenses/lgpl.txt
|
16 |
//
|
17 |
// This library is distributed on an "AS IS" basis,
|
18 |
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
19 |
// See the individual licence texts for more details.
|
20 |
|
21 |
package net.htmlparser.jericho;
|
22 |
|
23 |
import java.util.*;
|
24 |
import java.io.*;
|
25 |
|
26 |
/**
|
27 |
* Represents an HTML <a target="_blank" href="http://www.w3.org/TR/REC-html40/charset.html#h-5.3.2">Character Entity Reference</a>.
|
28 |
* <p>
|
29 |
* <b>Click <a href="#method_summary">here</a> to scroll down to the method summary.</b>
|
30 |
* <p>
|
31 |
* The full list of HTML character entity references can be found at the following URL:<br />
|
32 |
* <a target="_blank" href="http://www.w3.org/TR/REC-html40/sgml/entities.html">http://www.w3.org/TR/REC-html40/sgml/entities.html</a>.
|
33 |
* <p>
|
34 |
* There are a total of 253 HTML character entity references, ranging from codepoints U+0022 to U+2666.
|
35 |
* <p>
|
36 |
* Static methods to {@linkplain #encode(CharSequence) encode} and {@linkplain #decode(CharSequence) decode} strings
|
37 |
* and single characters can be found in the {@link CharacterReference} superclass.
|
38 |
* <p>
|
39 |
* The {@link #_apos &apos;} entity reference is not defined for use in HTML.
|
40 |
* It is defined in the <a target="_blank" href="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>,
|
41 |
* and is the only one that is not included in both HTML and XHTML.
|
42 |
* For this reason, the <code>&apos;</code> entity reference is recognised by this library in decoding functions, but in encoding functions
|
43 |
* the numeric character reference <code>&#39;</code> is used instead.
|
44 |
* Most modern browsers support it in both XHTML and HTML, with the notable exception
|
45 |
* of Microsoft Internet Explorer 6.0, which doesn't support it in either.
|
46 |
* <p>
|
47 |
* <code>CharacterEntityReference</code> instances are obtained using one of the following methods:
|
48 |
* <ul>
|
49 |
* <li>{@link CharacterReference#parse(CharSequence characterReferenceText)}
|
50 |
* <li>{@link Source#getNextCharacterReference(int pos)}
|
51 |
* <li>{@link Source#getPreviousCharacterReference(int pos)}
|
52 |
* <li>{@link Segment#getAllCharacterReferences()}
|
53 |
* </ul>
|
54 |
*
|
55 |
* @see CharacterReference
|
56 |
* @see NumericCharacterReference
|
57 |
*/
|
58 |
public class CharacterEntityReference extends CharacterReference {
|
59 |
private String name;
|
60 |
|
61 |
/** <samp> </samp> <code>&nbsp; = &#160;</code> -- no-break space = non-breaking space, U+00A0 ISOnum. */
|
62 |
public static final char _nbsp='\u00A0';
|
63 |
/** <samp>¡</samp> <code>&iexcl; = &#161;</code> -- inverted exclamation mark, U+00A1 ISOnum. */
|
64 |
public static final char _iexcl='\u00A1';
|
65 |
/** <samp>¢</samp> <code>&cent; = &#162;</code> -- cent sign, U+00A2 ISOnum. */
|
66 |
public static final char _cent='\u00A2';
|
67 |
/** <samp>£</samp> <code>&pound; = &#163;</code> -- pound sign, U+00A3 ISOnum. */
|
68 |
public static final char _pound='\u00A3';
|
69 |
/** <samp>¤</samp> <code>&curren; = &#164;</code> -- currency sign, U+00A4 ISOnum. */
|
70 |
public static final char _curren='\u00A4';
|
71 |
/** <samp>¥</samp> <code>&yen; = &#165;</code> -- yen sign = yuan sign, U+00A5 ISOnum. */
|
72 |
public static final char _yen='\u00A5';
|
73 |
/** <samp>¦</samp> <code>&brvbar; = &#166;</code> -- broken bar = broken vertical bar, U+00A6 ISOnum. */
|
74 |
public static final char _brvbar='\u00A6';
|
75 |
/** <samp>§</samp> <code>&sect; = &#167;</code> -- section sign, U+00A7 ISOnum. */
|
76 |
public static final char _sect='\u00A7';
|
77 |
/** <samp>¨</samp> <code>&uml; = &#168;</code> -- diaeresis = spacing diaeresis, U+00A8 ISOdia. */
|
78 |
public static final char _uml='\u00A8';
|
79 |
/** <samp>©</samp> <code>&copy; = &#169;</code> -- copyright sign, U+00A9 ISOnum. */
|
80 |
public static final char _copy='\u00A9';
|
81 |
/** <samp>ª</samp> <code>&ordf; = &#170;</code> -- feminine ordinal indicator, U+00AA ISOnum. */
|
82 |
public static final char _ordf='\u00AA';
|
83 |
/** <samp>«</samp> <code>&laquo; = &#171;</code> -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum. */
|
84 |
public static final char _laquo='\u00AB';
|
85 |
/** <samp>¬</samp> <code>&not; = &#172;</code> -- not sign = angled dash, U+00AC ISOnum. */
|
86 |
public static final char _not='\u00AC';
|
87 |
/** <samp>­</samp> <code>&shy; = &#173;</code> -- soft hyphen = discretionary hyphen, U+00AD ISOnum. */
|
88 |
public static final char _shy='\u00AD';
|
89 |
/** <samp>®</samp> <code>&reg; = &#174;</code> -- registered sign = registered trade mark sign, U+00AE ISOnum. */
|
90 |
public static final char _reg='\u00AE';
|
91 |
/** <samp>¯</samp> <code>&macr; = &#175;</code> -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia. */
|
92 |
public static final char _macr='\u00AF';
|
93 |
/** <samp>°</samp> <code>&deg; = &#176;</code> -- degree sign, U+00B0 ISOnum. */
|
94 |
public static final char _deg='\u00B0';
|
95 |
/** <samp>±</samp> <code>&plusmn; = &#177;</code> -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum. */
|
96 |
public static final char _plusmn='\u00B1';
|
97 |
/** <samp>²</samp> <code>&sup2; = &#178;</code> -- superscript two = superscript digit two = squared, U+00B2 ISOnum. */
|
98 |
public static final char _sup2='\u00B2';
|
99 |
/** <samp>³</samp> <code>&sup3; = &#179;</code> -- superscript three = superscript digit three = cubed, U+00B3 ISOnum. */
|
100 |
public static final char _sup3='\u00B3';
|
101 |
/** <samp>´</samp> <code>&acute; = &#180;</code> -- acute accent = spacing acute, U+00B4 ISOdia. */
|
102 |
public static final char _acute='\u00B4';
|
103 |
/** <samp>µ</samp> <code>&micro; = &#181;</code> -- micro sign, U+00B5 ISOnum. */
|
104 |
public static final char _micro='\u00B5';
|
105 |
/** <samp>¶</samp> <code>&para; = &#182;</code> -- pilcrow sign = paragraph sign, U+00B6 ISOnum. */
|
106 |
public static final char _para='\u00B6';
|
107 |
/** <samp>·</samp> <code>&middot; = &#183;</code> -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum. */
|
108 |
public static final char _middot='\u00B7';
|
109 |
/** <samp>¸</samp> <code>&cedil; = &#184;</code> -- cedilla = spacing cedilla, U+00B8 ISOdia. */
|
110 |
public static final char _cedil='\u00B8';
|
111 |
/** <samp>¹</samp> <code>&sup1; = &#185;</code> -- superscript one = superscript digit one, U+00B9 ISOnum. */
|
112 |
public static final char _sup1='\u00B9';
|
113 |
/** <samp>º</samp> <code>&ordm; = &#186;</code> -- masculine ordinal indicator, U+00BA ISOnum. */
|
114 |
public static final char _ordm='\u00BA';
|
115 |
/** <samp>»</samp> <code>&raquo; = &#187;</code> -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum. */
|
116 |
public static final char _raquo='\u00BB';
|
117 |
/** <samp>¼</samp> <code>&frac14; = &#188;</code> -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum. */
|
118 |
public static final char _frac14='\u00BC';
|
119 |
/** <samp>½</samp> <code>&frac12; = &#189;</code> -- vulgar fraction one half = fraction one half, U+00BD ISOnum. */
|
120 |
public static final char _frac12='\u00BD';
|
121 |
/** <samp>¾</samp> <code>&frac34; = &#190;</code> -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum. */
|
122 |
public static final char _frac34='\u00BE';
|
123 |
/** <samp>¿</samp> <code>&iquest; = &#191;</code> -- inverted question mark = turned question mark, U+00BF ISOnum. */
|
124 |
public static final char _iquest='\u00BF';
|
125 |
/** <samp>À</samp> <code>&Agrave; = &#192;</code> -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1. */
|
126 |
public static final char _Agrave='\u00C0';
|
127 |
/** <samp>Á</samp> <code>&Aacute; = &#193;</code> -- latin capital letter A with acute, U+00C1 ISOlat1. */
|
128 |
public static final char _Aacute='\u00C1';
|
129 |
/** <samp>Â</samp> <code>&Acirc; = &#194;</code> -- latin capital letter A with circumflex, U+00C2 ISOlat1. */
|
130 |
public static final char _Acirc='\u00C2';
|
131 |
/** <samp>Ã</samp> <code>&Atilde; = &#195;</code> -- latin capital letter A with tilde, U+00C3 ISOlat1. */
|
132 |
public static final char _Atilde='\u00C3';
|
133 |
/** <samp>Ä</samp> <code>&Auml; = &#196;</code> -- latin capital letter A with diaeresis, U+00C4 ISOlat1. */
|
134 |
public static final char _Auml='\u00C4';
|
135 |
/** <samp>Å</samp> <code>&Aring; = &#197;</code> -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1. */
|
136 |
public static final char _Aring='\u00C5';
|
137 |
/** <samp>Æ</samp> <code>&AElig; = &#198;</code> -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1. */
|
138 |
public static final char _AElig='\u00C6';
|
139 |
/** <samp>Ç</samp> <code>&Ccedil; = &#199;</code> -- latin capital letter C with cedilla, U+00C7 ISOlat1. */
|
140 |
public static final char _Ccedil='\u00C7';
|
141 |
/** <samp>È</samp> <code>&Egrave; = &#200;</code> -- latin capital letter E with grave, U+00C8 ISOlat1. */
|
142 |
public static final char _Egrave='\u00C8';
|
143 |
/** <samp>É</samp> <code>&Eacute; = &#201;</code> -- latin capital letter E with acute, U+00C9 ISOlat1. */
|
144 |
public static final char _Eacute='\u00C9';
|
145 |
/** <samp>Ê</samp> <code>&Ecirc; = &#202;</code> -- latin capital letter E with circumflex, U+00CA ISOlat1. */
|
146 |
public static final char _Ecirc='\u00CA';
|
147 |
/** <samp>Ë</samp> <code>&Euml; = &#203;</code> -- latin capital letter E with diaeresis, U+00CB ISOlat1. */
|
148 |
public static final char _Euml='\u00CB';
|
149 |
/** <samp>Ì</samp> <code>&Igrave; = &#204;</code> -- latin capital letter I with grave, U+00CC ISOlat1. */
|
150 |
public static final char _Igrave='\u00CC';
|
151 |
/** <samp>Í</samp> <code>&Iacute; = &#205;</code> -- latin capital letter I with acute, U+00CD ISOlat1. */
|
152 |
public static final char _Iacute='\u00CD';
|
153 |
/** <samp>Î</samp> <code>&Icirc; = &#206;</code> -- latin capital letter I with circumflex, U+00CE ISOlat1. */
|
154 |
public static final char _Icirc='\u00CE';
|
155 |
/** <samp>Ï</samp> <code>&Iuml; = &#207;</code> -- latin capital letter I with diaeresis, U+00CF ISOlat1. */
|
156 |
public static final char _Iuml='\u00CF';
|
157 |
/** <samp>Ð</samp> <code>&ETH; = &#208;</code> -- latin capital letter ETH, U+00D0 ISOlat1. */
|
158 |
public static final char _ETH='\u00D0';
|
159 |
/** <samp>Ñ</samp> <code>&Ntilde; = &#209;</code> -- latin capital letter N with tilde, U+00D1 ISOlat1. */
|
160 |
public static final char _Ntilde='\u00D1';
|
161 |
/** <samp>Ò</samp> <code>&Ograve; = &#210;</code> -- latin capital letter O with grave, U+00D2 ISOlat1. */
|
162 |
public static final char _Ograve='\u00D2';
|
163 |
/** <samp>Ó</samp> <code>&Oacute; = &#211;</code> -- latin capital letter O with acute, U+00D3 ISOlat1. */
|
164 |
public static final char _Oacute='\u00D3';
|
165 |
/** <samp>Ô</samp> <code>&Ocirc; = &#212;</code> -- latin capital letter O with circumflex, U+00D4 ISOlat1. */
|
166 |
public static final char _Ocirc='\u00D4';
|
167 |
/** <samp>Õ</samp> <code>&Otilde; = &#213;</code> -- latin capital letter O with tilde, U+00D5 ISOlat1. */
|
168 |
public static final char _Otilde='\u00D5';
|
169 |
/** <samp>Ö</samp> <code>&Ouml; = &#214;</code> -- latin capital letter O with diaeresis, U+00D6 ISOlat1. */
|
170 |
public static final char _Ouml='\u00D6';
|
171 |
/** <samp>×</samp> <code>&times; = &#215;</code> -- multiplication sign, U+00D7 ISOnum. */
|
172 |
public static final char _times='\u00D7';
|
173 |
/** <samp>Ø</samp> <code>&Oslash; = &#216;</code> -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1. */
|
174 |
public static final char _Oslash='\u00D8';
|
175 |
/** <samp>Ù</samp> <code>&Ugrave; = &#217;</code> -- latin capital letter U with grave, U+00D9 ISOlat1. */
|
176 |
public static final char _Ugrave='\u00D9';
|
177 |
/** <samp>Ú</samp> <code>&Uacute; = &#218;</code> -- latin capital letter U with acute, U+00DA ISOlat1. */
|
178 |
public static final char _Uacute='\u00DA';
|
179 |
/** <samp>Û</samp> <code>&Ucirc; = &#219;</code> -- latin capital letter U with circumflex, U+00DB ISOlat1. */
|
180 |
public static final char _Ucirc='\u00DB';
|
181 |
/** <samp>Ü</samp> <code>&Uuml; = &#220;</code> -- latin capital letter U with diaeresis, U+00DC ISOlat1. */
|
182 |
public static final char _Uuml='\u00DC';
|
183 |
/** <samp>Ý</samp> <code>&Yacute; = &#221;</code> -- latin capital letter Y with acute, U+00DD ISOlat1. */
|
184 |
public static final char _Yacute='\u00DD';
|
185 |
/** <samp>Þ</samp> <code>&THORN; = &#222;</code> -- latin capital letter THORN, U+00DE ISOlat1. */
|
186 |
public static final char _THORN='\u00DE';
|
187 |
/** <samp>ß</samp> <code>&szlig; = &#223;</code> -- latin small letter sharp s = ess-zed, U+00DF ISOlat1. */
|
188 |
public static final char _szlig='\u00DF';
|
189 |
/** <samp>à</samp> <code>&agrave; = &#224;</code> -- latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1. */
|
190 |
public static final char _agrave='\u00E0';
|
191 |
/** <samp>á</samp> <code>&aacute; = &#225;</code> -- latin small letter a with acute, U+00E1 ISOlat1. */
|
192 |
public static final char _aacute='\u00E1';
|
193 |
/** <samp>â</samp> <code>&acirc; = &#226;</code> -- latin small letter a with circumflex, U+00E2 ISOlat1. */
|
194 |
public static final char _acirc='\u00E2';
|
195 |
/** <samp>ã</samp> <code>&atilde; = &#227;</code> -- latin small letter a with tilde, U+00E3 ISOlat1. */
|
196 |
public static final char _atilde='\u00E3';
|
197 |
/** <samp>ä</samp> <code>&auml; = &#228;</code> -- latin small letter a with diaeresis, U+00E4 ISOlat1. */
|
198 |
public static final char _auml='\u00E4';
|
199 |
/** <samp>å</samp> <code>&aring; = &#229;</code> -- latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1. */
|
200 |
public static final char _aring='\u00E5';
|
201 |
/** <samp>æ</samp> <code>&aelig; = &#230;</code> -- latin small letter ae = latin small ligature ae, U+00E6 ISOlat1. */
|
202 |
public static final char _aelig='\u00E6';
|
203 |
/** <samp>ç</samp> <code>&ccedil; = &#231;</code> -- latin small letter c with cedilla, U+00E7 ISOlat1. */
|
204 |
public static final char _ccedil='\u00E7';
|
205 |
/** <samp>è</samp> <code>&egrave; = &#232;</code> -- latin small letter e with grave, U+00E8 ISOlat1. */
|
206 |
public static final char _egrave='\u00E8';
|
207 |
/** <samp>é</samp> <code>&eacute; = &#233;</code> -- latin small letter e with acute, U+00E9 ISOlat1. */
|
208 |
public static final char _eacute='\u00E9';
|
209 |
/** <samp>ê</samp> <code>&ecirc; = &#234;</code> -- latin small letter e with circumflex, U+00EA ISOlat1. */
|
210 |
public static final char _ecirc='\u00EA';
|
211 |
/** <samp>ë</samp> <code>&euml; = &#235;</code> -- latin small letter e with diaeresis, U+00EB ISOlat1. */
|
212 |
public static final char _euml='\u00EB';
|
213 |
/** <samp>ì</samp> <code>&igrave; = &#236;</code> -- latin small letter i with grave, U+00EC ISOlat1. */
|
214 |
public static final char _igrave='\u00EC';
|
215 |
/** <samp>í</samp> <code>&iacute; = &#237;</code> -- latin small letter i with acute, U+00ED ISOlat1. */
|
216 |
public static final char _iacute='\u00ED';
|
217 |
/** <samp>î</samp> <code>&icirc; = &#238;</code> -- latin small letter i with circumflex, U+00EE ISOlat1. */
|
218 |
public static final char _icirc='\u00EE';
|
219 |
/** <samp>ï</samp> <code>&iuml; = &#239;</code> -- latin small letter i with diaeresis, U+00EF ISOlat1. */
|
220 |
public static final char _iuml='\u00EF';
|
221 |
/** <samp>ð</samp> <code>&eth; = &#240;</code> -- latin small letter eth, U+00F0 ISOlat1. */
|
222 |
public static final char _eth='\u00F0';
|
223 |
/** <samp>ñ</samp> <code>&ntilde; = &#241;</code> -- latin small letter n with tilde, U+00F1 ISOlat1. */
|
224 |
public static final char _ntilde='\u00F1';
|
225 |
/** <samp>ò</samp> <code>&ograve; = &#242;</code> -- latin small letter o with grave, U+00F2 ISOlat1. */
|
226 |
public static final char _ograve='\u00F2';
|
227 |
/** <samp>ó</samp> <code>&oacute; = &#243;</code> -- latin small letter o with acute, U+00F3 ISOlat1. */
|
228 |
public static final char _oacute='\u00F3';
|
229 |
/** <samp>ô</samp> <code>&ocirc; = &#244;</code> -- latin small letter o with circumflex, U+00F4 ISOlat1. */
|
230 |
public static final char _ocirc='\u00F4';
|
231 |
/** <samp>õ</samp> <code>&otilde; = &#245;</code> -- latin small letter o with tilde, U+00F5 ISOlat1. */
|
232 |
public static final char _otilde='\u00F5';
|
233 |
/** <samp>ö</samp> <code>&ouml; = &#246;</code> -- latin small letter o with diaeresis, U+00F6 ISOlat1. */
|
234 |
public static final char _ouml='\u00F6';
|
235 |
/** <samp>÷</samp> <code>&divide; = &#247;</code> -- division sign, U+00F7 ISOnum. */
|
236 |
public static final char _divide='\u00F7';
|
237 |
/** <samp>ø</samp> <code>&oslash; = &#248;</code> -- latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1. */
|
238 |
public static final char _oslash='\u00F8';
|
239 |
/** <samp>ù</samp> <code>&ugrave; = &#249;</code> -- latin small letter u with grave, U+00F9 ISOlat1. */
|
240 |
public static final char _ugrave='\u00F9';
|
241 |
/** <samp>ú</samp> <code>&uacute; = &#250;</code> -- latin small letter u with acute, U+00FA ISOlat1. */
|
242 |
public static final char _uacute='\u00FA';
|
243 |
/** <samp>û</samp> <code>&ucirc; = &#251;</code> -- latin small letter u with circumflex, U+00FB ISOlat1. */
|
244 |
public static final char _ucirc='\u00FB';
|
245 |
/** <samp>ü</samp> <code>&uuml; = &#252;</code> -- latin small letter u with diaeresis, U+00FC ISOlat1. */
|
246 |
public static final char _uuml='\u00FC';
|
247 |
/** <samp>ý</samp> <code>&yacute; = &#253;</code> -- latin small letter y with acute, U+00FD ISOlat1. */
|
248 |
public static final char _yacute='\u00FD';
|
249 |
/** <samp>þ</samp> <code>&thorn; = &#254;</code> -- latin small letter thorn, U+00FE ISOlat1. */
|
250 |
public static final char _thorn='\u00FE';
|
251 |
/** <samp>ÿ</samp> <code>&yuml; = &#255;</code> -- latin small letter y with diaeresis, U+00FF ISOlat1. */
|
252 |
public static final char _yuml='\u00FF';
|
253 |
/** <samp>ƒ</samp> <code>&fnof; = &#402;</code> -- latin small letter f with hook = function = florin, U+0192 ISOtech. */
|
254 |
public static final char _fnof='\u0192';
|
255 |
/** <samp>Α</samp> <code>&Alpha; = &#913;</code> -- greek capital letter alpha, U+0391. */
|
256 |
public static final char _Alpha='\u0391';
|
257 |
/** <samp>Β</samp> <code>&Beta; = &#914;</code> -- greek capital letter beta, U+0392. */
|
258 |
public static final char _Beta='\u0392';
|
259 |
/** <samp>Γ</samp> <code>&Gamma; = &#915;</code> -- greek capital letter gamma, U+0393 ISOgrk3. */
|
260 |
public static final char _Gamma='\u0393';
|
261 |
/** <samp>Δ</samp> <code>&Delta; = &#916;</code> -- greek capital letter delta, U+0394 ISOgrk3. */
|
262 |
public static final char _Delta='\u0394';
|
263 |
/** <samp>Ε</samp> <code>&Epsilon; = &#917;</code> -- greek capital letter epsilon, U+0395. */
|
264 |
public static final char _Epsilon='\u0395';
|
265 |
/** <samp>Ζ</samp> <code>&Zeta; = &#918;</code> -- greek capital letter zeta, U+0396. */
|
266 |
public static final char _Zeta='\u0396';
|
267 |
/** <samp>Η</samp> <code>&Eta; = &#919;</code> -- greek capital letter eta, U+0397. */
|
268 |
public static final char _Eta='\u0397';
|
269 |
/** <samp>Θ</samp> <code>&Theta; = &#920;</code> -- greek capital letter theta, U+0398 ISOgrk3. */
|
270 |
public static final char _Theta='\u0398';
|
271 |
/** <samp>Ι</samp> <code>&Iota; = &#921;</code> -- greek capital letter iota, U+0399. */
|
272 |
public static final char _Iota='\u0399';
|
273 |
/** <samp>Κ</samp> <code>&Kappa; = &#922;</code> -- greek capital letter kappa, U+039A. */
|
274 |
public static final char _Kappa='\u039A';
|
275 |
/** <samp>Λ</samp> <code>&Lambda; = &#923;</code> -- greek capital letter lambda, U+039B ISOgrk3. */
|
276 |
public static final char _Lambda='\u039B';
|
277 |
/** <samp>Μ</samp> <code>&Mu; = &#924;</code> -- greek capital letter mu, U+039C. */
|
278 |
public static final char _Mu='\u039C';
|
279 |
/** <samp>Ν</samp> <code>&Nu; = &#925;</code> -- greek capital letter nu, U+039D. */
|
280 |
public static final char _Nu='\u039D';
|
281 |
/** <samp>Ξ</samp> <code>&Xi; = &#926;</code> -- greek capital letter xi, U+039E ISOgrk3. */
|
282 |
public static final char _Xi='\u039E';
|
283 |
/** <samp>Ο</samp> <code>&Omicron; = &#927;</code> -- greek capital letter omicron, U+039F. */
|
284 |
public static final char _Omicron='\u039F';
|
285 |
/** <samp>Π</samp> <code>&Pi; = &#928;</code> -- greek capital letter pi, U+03A0 ISOgrk3. */
|
286 |
public static final char _Pi='\u03A0';
|
287 |
/** <samp>Ρ</samp> <code>&Rho; = &#929;</code> -- greek capital letter rho, U+03A1. */
|
288 |
public static final char _Rho='\u03A1';
|
289 |
/** <samp>Σ</samp> <code>&Sigma; = &#931;</code> -- greek capital letter sigma, U+03A3 ISOgrk3. */
|
290 |
public static final char _Sigma='\u03A3';
|
291 |
/** <samp>Τ</samp> <code>&Tau; = &#932;</code> -- greek capital letter tau, U+03A4. */
|
292 |
public static final char _Tau='\u03A4';
|
293 |
/** <samp>Υ</samp> <code>&Upsilon; = &#933;</code> -- greek capital letter upsilon, U+03A5 ISOgrk3. */
|
294 |
public static final char _Upsilon='\u03A5';
|
295 |
/** <samp>Φ</samp> <code>&Phi; = &#934;</code> -- greek capital letter phi, U+03A6 ISOgrk3. */
|
296 |
public static final char _Phi='\u03A6';
|
297 |
/** <samp>Χ</samp> <code>&Chi; = &#935;</code> -- greek capital letter chi, U+03A7. */
|
298 |
public static final char _Chi='\u03A7';
|
299 |
/** <samp>Ψ</samp> <code>&Psi; = &#936;</code> -- greek capital letter psi, U+03A8 ISOgrk3. */
|
300 |
public static final char _Psi='\u03A8';
|
301 |
/** <samp>Ω</samp> <code>&Omega; = &#937;</code> -- greek capital letter omega, U+03A9 ISOgrk3. */
|
302 |
public static final char _Omega='\u03A9';
|
303 |
/** <samp>α</samp> <code>&alpha; = &#945;</code> -- greek small letter alpha, U+03B1 ISOgrk3. */
|
304 |
public static final char _alpha='\u03B1';
|
305 |
/** <samp>β</samp> <code>&beta; = &#946;</code> -- greek small letter beta, U+03B2 ISOgrk3. */
|
306 |
public static final char _beta='\u03B2';
|
307 |
/** <samp>γ</samp> <code>&gamma; = &#947;</code> -- greek small letter gamma, U+03B3 ISOgrk3. */
|
308 |
public static final char _gamma='\u03B3';
|
309 |
/** <samp>δ</samp> <code>&delta; = &#948;</code> -- greek small letter delta, U+03B4 ISOgrk3. */
|
310 |
public static final char _delta='\u03B4';
|
311 |
/** <samp>ε</samp> <code>&epsilon; = &#949;</code> -- greek small letter epsilon, U+03B5 ISOgrk3. */
|
312 |
public static final char _epsilon='\u03B5';
|
313 |
/** <samp>ζ</samp> <code>&zeta; = &#950;</code> -- greek small letter zeta, U+03B6 ISOgrk3. */
|
314 |
public static final char _zeta='\u03B6';
|
315 |
/** <samp>η</samp> <code>&eta; = &#951;</code> -- greek small letter eta, U+03B7 ISOgrk3. */
|
316 |
public static final char _eta='\u03B7';
|
317 |
/** <samp>θ</samp> <code>&theta; = &#952;</code> -- greek small letter theta, U+03B8 ISOgrk3. */
|
318 |
public static final char _theta='\u03B8';
|
319 |
/** <samp>ι</samp> <code>&iota; = &#953;</code> -- greek small letter iota, U+03B9 ISOgrk3. */
|
320 |
public static final char _iota='\u03B9';
|
321 |
/** <samp>κ</samp> <code>&kappa; = &#954;</code> -- greek small letter kappa, U+03BA ISOgrk3. */
|
322 |
public static final char _kappa='\u03BA';
|
323 |
/** <samp>λ</samp> <code>&lambda; = &#955;</code> -- greek small letter lambda, U+03BB ISOgrk3. */
|
324 |
public static final char _lambda='\u03BB';
|
325 |
/** <samp>μ</samp> <code>&mu; = &#956;</code> -- greek small letter mu, U+03BC ISOgrk3. */
|
326 |
public static final char _mu='\u03BC';
|
327 |
/** <samp>ν</samp> <code>&nu; = &#957;</code> -- greek small letter nu, U+03BD ISOgrk3. */
|
328 |
public static final char _nu='\u03BD';
|
329 |
/** <samp>ξ</samp> <code>&xi; = &#958;</code> -- greek small letter xi, U+03BE ISOgrk3. */
|
330 |
public static final char _xi='\u03BE';
|
331 |
/** <samp>ο</samp> <code>&omicron; = &#959;</code> -- greek small letter omicron, U+03BF NEW. */
|
332 |
public static final char _omicron='\u03BF';
|
333 |
/** <samp>π</samp> <code>&pi; = &#960;</code> -- greek small letter pi, U+03C0 ISOgrk3. */
|
334 |
public static final char _pi='\u03C0';
|
335 |
/** <samp>ρ</samp> <code>&rho; = &#961;</code> -- greek small letter rho, U+03C1 ISOgrk3. */
|
336 |
public static final char _rho='\u03C1';
|
337 |
/** <samp>ς</samp> <code>&sigmaf; = &#962;</code> -- greek small letter final sigma, U+03C2 ISOgrk3. */
|
338 |
public static final char _sigmaf='\u03C2';
|
339 |
/** <samp>σ</samp> <code>&sigma; = &#963;</code> -- greek small letter sigma, U+03C3 ISOgrk3. */
|
340 |
public static final char _sigma='\u03C3';
|
341 |
/** <samp>τ</samp> <code>&tau; = &#964;</code> -- greek small letter tau, U+03C4 ISOgrk3. */
|
342 |
public static final char _tau='\u03C4';
|
343 |
/** <samp>υ</samp> <code>&upsilon; = &#965;</code> -- greek small letter upsilon, U+03C5 ISOgrk3. */
|
344 |
public static final char _upsilon='\u03C5';
|
345 |
/** <samp>φ</samp> <code>&phi; = &#966;</code> -- greek small letter phi, U+03C6 ISOgrk3. */
|
346 |
public static final char _phi='\u03C6';
|
347 |
/** <samp>χ</samp> <code>&chi; = &#967;</code> -- greek small letter chi, U+03C7 ISOgrk3. */
|
348 |
public static final char _chi='\u03C7';
|
349 |
/** <samp>ψ</samp> <code>&psi; = &#968;</code> -- greek small letter psi, U+03C8 ISOgrk3. */
|
350 |
public static final char _psi='\u03C8';
|
351 |
/** <samp>ω</samp> <code>&omega; = &#969;</code> -- greek small letter omega, U+03C9 ISOgrk3. */
|
352 |
public static final char _omega='\u03C9';
|
353 |
/** <samp>ϑ</samp> <code>&thetasym; = &#977;</code> -- greek small letter theta symbol, U+03D1 NEW. */
|
354 |
public static final char _thetasym='\u03D1';
|
355 |
/** <samp>ϒ</samp> <code>&upsih; = &#978;</code> -- greek upsilon with hook symbol, U+03D2 NEW. */
|
356 |
public static final char _upsih='\u03D2';
|
357 |
/** <samp>ϖ</samp> <code>&piv; = &#982;</code> -- greek pi symbol, U+03D6 ISOgrk3. */
|
358 |
public static final char _piv='\u03D6';
|
359 |
/** <samp>•</samp> <code>&bull; = &#8226;</code> -- bullet = black small circle, U+2022 ISOpub<br />(see <a href="#_bull">comments</a>).<p>bullet is NOT the same as bullet operator, U+2219</p> */
|
360 |
public static final char _bull='\u2022';
|
361 |
/** <samp>…</samp> <code>&hellip; = &#8230;</code> -- horizontal ellipsis = three dot leader, U+2026 ISOpub. */
|
362 |
public static final char _hellip='\u2026';
|
363 |
/** <samp>′</samp> <code>&prime; = &#8242;</code> -- prime = minutes = feet, U+2032 ISOtech. */
|
364 |
public static final char _prime='\u2032';
|
365 |
/** <samp>″</samp> <code>&Prime; = &#8243;</code> -- double prime = seconds = inches, U+2033 ISOtech. */
|
366 |
public static final char _Prime='\u2033';
|
367 |
/** <samp>‾</samp> <code>&oline; = &#8254;</code> -- overline = spacing overscore, U+203E NEW. */
|
368 |
public static final char _oline='\u203E';
|
369 |
/** <samp>⁄</samp> <code>&frasl; = &#8260;</code> -- fraction slash, U+2044 NEW. */
|
370 |
public static final char _frasl='\u2044';
|
371 |
/** <samp>℘</samp> <code>&weierp; = &#8472;</code> -- script capital P = power set = Weierstrass p, U+2118 ISOamso. */
|
372 |
public static final char _weierp='\u2118';
|
373 |
/** <samp>ℑ</samp> <code>&image; = &#8465;</code> -- black-letter capital I = imaginary part, U+2111 ISOamso. */
|
374 |
public static final char _image='\u2111';
|
375 |
/** <samp>ℜ</samp> <code>&real; = &#8476;</code> -- black-letter capital R = real part symbol, U+211C ISOamso. */
|
376 |
public static final char _real='\u211C';
|
377 |
/** <samp>™</samp> <code>&trade; = &#8482;</code> -- trade mark sign, U+2122 ISOnum. */
|
378 |
public static final char _trade='\u2122';
|
379 |
/** <samp>ℵ</samp> <code>&alefsym; = &#8501;</code> -- alef symbol = first transfinite cardinal, U+2135 NEW<br />(see <a href="#_alefsym">comments</a>).<p>alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters</p> */
|
380 |
public static final char _alefsym='\u2135';
|
381 |
/** <samp>←</samp> <code>&larr; = &#8592;</code> -- leftwards arrow, U+2190 ISOnum. */
|
382 |
public static final char _larr='\u2190';
|
383 |
/** <samp>↑</samp> <code>&uarr; = &#8593;</code> -- upwards arrow, U+2191 ISOnum. */
|
384 |
public static final char _uarr='\u2191';
|
385 |
/** <samp>→</samp> <code>&rarr; = &#8594;</code> -- rightwards arrow, U+2192 ISOnum. */
|
386 |
public static final char _rarr='\u2192';
|
387 |
/** <samp>↓</samp> <code>&darr; = &#8595;</code> -- downwards arrow, U+2193 ISOnum. */
|
388 |
public static final char _darr='\u2193';
|
389 |
/** <samp>↔</samp> <code>&harr; = &#8596;</code> -- left right arrow, U+2194 ISOamsa. */
|
390 |
public static final char _harr='\u2194';
|
391 |
/** <samp>↵</samp> <code>&crarr; = &#8629;</code> -- downwards arrow with corner leftwards = carriage return, U+21B5 NEW. */
|
392 |
public static final char _crarr='\u21B5';
|
393 |
/** <samp>⇐</samp> <code>&lArr; = &#8656;</code> -- leftwards double arrow, U+21D0 ISOtech<br />(see <a href="#_lArr">comments</a>).<p>ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests</p> */
|
394 |
public static final char _lArr='\u21D0';
|
395 |
/** <samp>⇑</samp> <code>&uArr; = &#8657;</code> -- upwards double arrow, U+21D1 ISOamsa. */
|
396 |
public static final char _uArr='\u21D1';
|
397 |
/** <samp>⇒</samp> <code>&rArr; = &#8658;</code> -- rightwards double arrow, U+21D2 ISOtech<br />(see <a href="#_rArr">comments</a>).<p>ISO 10646 does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests</p> */
|
398 |
public static final char _rArr='\u21D2';
|
399 |
/** <samp>⇓</samp> <code>&dArr; = &#8659;</code> -- downwards double arrow, U+21D3 ISOamsa. */
|
400 |
public static final char _dArr='\u21D3';
|
401 |
/** <samp>⇔</samp> <code>&hArr; = &#8660;</code> -- left right double arrow, U+21D4 ISOamsa. */
|
402 |
public static final char _hArr='\u21D4';
|
403 |
/** <samp>∀</samp> <code>&forall; = &#8704;</code> -- for all, U+2200 ISOtech. */
|
404 |
public static final char _forall='\u2200';
|
405 |
/** <samp>∂</samp> <code>&part; = &#8706;</code> -- partial differential, U+2202 ISOtech. */
|
406 |
public static final char _part='\u2202';
|
407 |
/** <samp>∃</samp> <code>&exist; = &#8707;</code> -- there exists, U+2203 ISOtech. */
|
408 |
public static final char _exist='\u2203';
|
409 |
/** <samp>∅</samp> <code>&empty; = &#8709;</code> -- empty set = null set = diameter, U+2205 ISOamso. */
|
410 |
public static final char _empty='\u2205';
|
411 |
/** <samp>∇</samp> <code>&nabla; = &#8711;</code> -- nabla = backward difference, U+2207 ISOtech. */
|
412 |
public static final char _nabla='\u2207';
|
413 |
/** <samp>∈</samp> <code>&isin; = &#8712;</code> -- element of, U+2208 ISOtech. */
|
414 |
public static final char _isin='\u2208';
|
415 |
/** <samp>∉</samp> <code>&notin; = &#8713;</code> -- not an element of, U+2209 ISOtech. */
|
416 |
public static final char _notin='\u2209';
|
417 |
/** <samp>∋</samp> <code>&ni; = &#8715;</code> -- contains as member, U+220B ISOtech<br />(see <a href="#_ni">comments</a>).<p>should there be a more memorable name than 'ni'?</p> */
|
418 |
public static final char _ni='\u220B';
|
419 |
/** <samp>∏</samp> <code>&prod; = &#8719;</code> -- n-ary product = product sign, U+220F ISOamsb<br />(see <a href="#_prod">comments</a>).<p>prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both</p> */
|
420 |
public static final char _prod='\u220F';
|
421 |
/** <samp>∑</samp> <code>&sum; = &#8721;</code> -- n-ary summation, U+2211 ISOamsb<br />(see <a href="#_sum">comments</a>).<p>sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both</p> */
|
422 |
public static final char _sum='\u2211';
|
423 |
/** <samp>−</samp> <code>&minus; = &#8722;</code> -- minus sign, U+2212 ISOtech. */
|
424 |
public static final char _minus='\u2212';
|
425 |
/** <samp>∗</samp> <code>&lowast; = &#8727;</code> -- asterisk operator, U+2217 ISOtech. */
|
426 |
public static final char _lowast='\u2217';
|
427 |
/** <samp>√</samp> <code>&radic; = &#8730;</code> -- square root = radical sign, U+221A ISOtech. */
|
428 |
public static final char _radic='\u221A';
|
429 |
/** <samp>∝</samp> <code>&prop; = &#8733;</code> -- proportional to, U+221D ISOtech. */
|
430 |
public static final char _prop='\u221D';
|
431 |
/** <samp>∞</samp> <code>&infin; = &#8734;</code> -- infinity, U+221E ISOtech. */
|
432 |
public static final char _infin='\u221E';
|
433 |
/** <samp>∠</samp> <code>&ang; = &#8736;</code> -- angle, U+2220 ISOamso. */
|
434 |
public static final char _ang='\u2220';
|
435 |
/** <samp>∧</samp> <code>&and; = &#8743;</code> -- logical and = wedge, U+2227 ISOtech. */
|
436 |
public static final char _and='\u2227';
|
437 |
/** <samp>∨</samp> <code>&or; = &#8744;</code> -- logical or = vee, U+2228 ISOtech. */
|
438 |
public static final char _or='\u2228';
|
439 |
/** <samp>∩</samp> <code>&cap; = &#8745;</code> -- intersection = cap, U+2229 ISOtech. */
|
440 |
public static final char _cap='\u2229';
|
441 |
/** <samp>∪</samp> <code>&cup; = &#8746;</code> -- union = cup, U+222A ISOtech. */
|
442 |
public static final char _cup='\u222A';
|
443 |
/** <samp>∫</samp> <code>&int; = &#8747;</code> -- integral, U+222B ISOtech. */
|
444 |
public static final char _int='\u222B';
|
445 |
/** <samp>∴</samp> <code>&there4; = &#8756;</code> -- therefore, U+2234 ISOtech. */
|
446 |
public static final char _there4='\u2234';
|
447 |
/** <samp>∼</samp> <code>&sim; = &#8764;</code> -- tilde operator = varies with = similar to, U+223C ISOtech<br />(see <a href="#_sim">comments</a>).<p>tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both</p> */
|
448 |
public static final char _sim='\u223C';
|
449 |
/** <samp>≅</samp> <code>&cong; = &#8773;</code> -- approximately equal to, U+2245 ISOtech. */
|
450 |
public static final char _cong='\u2245';
|
451 |
/** <samp>≈</samp> <code>&asymp; = &#8776;</code> -- almost equal to = asymptotic to, U+2248 ISOamsr. */
|
452 |
public static final char _asymp='\u2248';
|
453 |
/** <samp>≠</samp> <code>&ne; = &#8800;</code> -- not equal to, U+2260 ISOtech. */
|
454 |
public static final char _ne='\u2260';
|
455 |
/** <samp>≡</samp> <code>&equiv; = &#8801;</code> -- identical to, U+2261 ISOtech. */
|
456 |
public static final char _equiv='\u2261';
|
457 |
/** <samp>≤</samp> <code>&le; = &#8804;</code> -- less-than or equal to, U+2264 ISOtech. */
|
458 |
public static final char _le='\u2264';
|
459 |
/** <samp>≥</samp> <code>&ge; = &#8805;</code> -- greater-than or equal to, U+2265 ISOtech. */
|
460 |
public static final char _ge='\u2265';
|
461 |
/** <samp>⊂</samp> <code>&sub; = &#8834;</code> -- subset of, U+2282 ISOtech. */
|
462 |
public static final char _sub='\u2282';
|
463 |
/** <samp>⊃</samp> <code>&sup; = &#8835;</code> -- superset of, U+2283 ISOtech<br />(see <a href="#_sup">comments</a>).<p>note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry? It is in ISOamsn</p> */
|
464 |
public static final char _sup='\u2283';
|
465 |
/** <samp>⊄</samp> <code>&nsub; = &#8836;</code> -- not a subset of, U+2284 ISOamsn. */
|
466 |
public static final char _nsub='\u2284';
|
467 |
/** <samp>⊆</samp> <code>&sube; = &#8838;</code> -- subset of or equal to, U+2286 ISOtech. */
|
468 |
public static final char _sube='\u2286';
|
469 |
/** <samp>⊇</samp> <code>&supe; = &#8839;</code> -- superset of or equal to, U+2287 ISOtech. */
|
470 |
public static final char _supe='\u2287';
|
471 |
/** <samp>⊕</samp> <code>&oplus; = &#8853;</code> -- circled plus = direct sum, U+2295 ISOamsb. */
|
472 |
public static final char _oplus='\u2295';
|
473 |
/** <samp>⊗</samp> <code>&otimes; = &#8855;</code> -- circled times = vector product, U+2297 ISOamsb. */
|
474 |
public static final char _otimes='\u2297';
|
475 |
/** <samp>⊥</samp> <code>&perp; = &#8869;</code> -- up tack = orthogonal to = perpendicular, U+22A5 ISOtech. */
|
476 |
public static final char _perp='\u22A5';
|
477 |
/** <samp>⋅</samp> <code>&sdot; = &#8901;</code> -- dot operator, U+22C5 ISOamsb<br />(see <a href="#_sdot">comments</a>).<p>dot operator is NOT the same character as U+00B7 middle dot</p> */
|
478 |
public static final char _sdot='\u22C5';
|
479 |
/** <samp>⌈</samp> <code>&lceil; = &#8968;</code> -- left ceiling = APL upstile, U+2308 ISOamsc. */
|
480 |
public static final char _lceil='\u2308';
|
481 |
/** <samp>⌉</samp> <code>&rceil; = &#8969;</code> -- right ceiling, U+2309 ISOamsc. */
|
482 |
public static final char _rceil='\u2309';
|
483 |
/** <samp>⌊</samp> <code>&lfloor; = &#8970;</code> -- left floor = APL downstile, U+230A ISOamsc. */
|
484 |
public static final char _lfloor='\u230A';
|
485 |
/** <samp>⌋</samp> <code>&rfloor; = &#8971;</code> -- right floor, U+230B ISOamsc. */
|
486 |
public static final char _rfloor='\u230B';
|
487 |
/** <samp>⟨</samp> <code>&lang; = &#9001;</code> -- left-pointing angle bracket = bra, U+2329 ISOtech<br />(see <a href="#_lang">comments</a>).<p>lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark'</p> */
|
488 |
public static final char _lang='\u2329';
|
489 |
/** <samp>⟩</samp> <code>&rang; = &#9002;</code> -- right-pointing angle bracket = ket, U+232A ISOtech<br />(see <a href="#_rang">comments</a>).<p>rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark'</p> */
|
490 |
public static final char _rang='\u232A';
|
491 |
/** <samp>◊</samp> <code>&loz; = &#9674;</code> -- lozenge, U+25CA ISOpub. */
|
492 |
public static final char _loz='\u25CA';
|
493 |
/** <samp>♠</samp> <code>&spades; = &#9824;</code> -- black spade suit, U+2660 ISOpub<br />(see <a href="#_spades">comments</a>).<p>black here seems to mean filled as opposed to hollow</p> */
|
494 |
public static final char _spades='\u2660';
|
495 |
/** <samp>♣</samp> <code>&clubs; = &#9827;</code> -- black club suit = shamrock, U+2663 ISOpub. */
|
496 |
public static final char _clubs='\u2663';
|
497 |
/** <samp>♥</samp> <code>&hearts; = &#9829;</code> -- black heart suit = valentine, U+2665 ISOpub. */
|
498 |
public static final char _hearts='\u2665';
|
499 |
/** <samp>♦</samp> <code>&diams; = &#9830;</code> -- black diamond suit, U+2666 ISOpub. */
|
500 |
public static final char _diams='\u2666';
|
501 |
/** <samp>"</samp> <code>&quot; = &#34;</code> -- quotation mark = APL quote, U+0022 ISOnum. */
|
502 |
public static final char _quot='\u0022';
|
503 |
/** <samp>&</samp> <code>&amp; = &#38;</code> -- ampersand, U+0026 ISOnum. */
|
504 |
public static final char _amp='\u0026';
|
505 |
/** <samp><</samp> <code>&lt; = &#60;</code> -- less-than sign, U+003C ISOnum. */
|
506 |
public static final char _lt='\u003C';
|
507 |
/** <samp>></samp> <code>&gt; = &#62;</code> -- greater-than sign, U+003E ISOnum. */
|
508 |
public static final char _gt='\u003E';
|
509 |
/** <samp>Œ</samp> <code>&OElig; = &#338;</code> -- latin capital ligature OE, U+0152 ISOlat2. */
|
510 |
public static final char _OElig='\u0152';
|
511 |
/** <samp>œ</samp> <code>&oelig; = &#339;</code> -- latin small ligature oe, U+0153 ISOlat2<br />(see <a href="#_oelig">comments</a>).<p>ligature is a misnomer, this is a separate character in some languages</p> */
|
512 |
public static final char _oelig='\u0153';
|
513 |
/** <samp>Š</samp> <code>&Scaron; = &#352;</code> -- latin capital letter S with caron, U+0160 ISOlat2. */
|
514 |
public static final char _Scaron='\u0160';
|
515 |
/** <samp>š</samp> <code>&scaron; = &#353;</code> -- latin small letter s with caron, U+0161 ISOlat2. */
|
516 |
public static final char _scaron='\u0161';
|
517 |
/** <samp>Ÿ</samp> <code>&Yuml; = &#376;</code> -- latin capital letter Y with diaeresis, U+0178 ISOlat2. */
|
518 |
public static final char _Yuml='\u0178';
|
519 |
/** <samp>ˆ</samp> <code>&circ; = &#710;</code> -- modifier letter circumflex accent, U+02C6 ISOpub. */
|
520 |
public static final char _circ='\u02C6';
|
521 |
/** <samp>˜</samp> <code>&tilde; = &#732;</code> -- small tilde, U+02DC ISOdia. */
|
522 |
public static final char _tilde='\u02DC';
|
523 |
/** <samp> </samp> <code>&ensp; = &#8194;</code> -- en space, U+2002 ISOpub. */
|
524 |
public static final char _ensp='\u2002';
|
525 |
/** <samp> </samp> <code>&emsp; = &#8195;</code> -- em space, U+2003 ISOpub. */
|
526 |
public static final char _emsp='\u2003';
|
527 |
/** <samp> </samp> <code>&thinsp; = &#8201;</code> -- thin space, U+2009 ISOpub. */
|
528 |
public static final char _thinsp='\u2009';
|
529 |
/** <samp>‌</samp> <code>&zwnj; = &#8204;</code> -- zero width non-joiner, U+200C NEW RFC 2070. */
|
530 |
public static final char _zwnj='\u200C';
|
531 |
/** <samp>‍</samp> <code>&zwj; = &#8205;</code> -- zero width joiner, U+200D NEW RFC 2070. */
|
532 |
public static final char _zwj='\u200D';
|
533 |
/** <samp>‎</samp> <code>&lrm; = &#8206;</code> -- left-to-right mark, U+200E NEW RFC 2070. */
|
534 |
public static final char _lrm='\u200E';
|
535 |
/** <samp>‏</samp> <code>&rlm; = &#8207;</code> -- right-to-left mark, U+200F NEW RFC 2070. */
|
536 |
public static final char _rlm='\u200F';
|
537 |
/** <samp>–</samp> <code>&ndash; = &#8211;</code> -- en dash, U+2013 ISOpub. */
|
538 |
public static final char _ndash='\u2013';
|
539 |
/** <samp>—</samp> <code>&mdash; = &#8212;</code> -- em dash, U+2014 ISOpub. */
|
540 |
public static final char _mdash='\u2014';
|
541 |
/** <samp>‘</samp> <code>&lsquo; = &#8216;</code> -- left single quotation mark, U+2018 ISOnum. */
|
542 |
public static final char _lsquo='\u2018';
|
543 |
/** <samp>’</samp> <code>&rsquo; = &#8217;</code> -- right single quotation mark, U+2019 ISOnum. */
|
544 |
public static final char _rsquo='\u2019';
|
545 |
/** <samp>‚</samp> <code>&sbquo; = &#8218;</code> -- single low-9 quotation mark, U+201A NEW. */
|
546 |
public static final char _sbquo='\u201A';
|
547 |
/** <samp>“</samp> <code>&ldquo; = &#8220;</code> -- left double quotation mark, U+201C ISOnum. */
|
548 |
public static final char _ldquo='\u201C';
|
549 |
/** <samp>”</samp> <code>&rdquo; = &#8221;</code> -- right double quotation mark, U+201D ISOnum. */
|
550 |
public static final char _rdquo='\u201D';
|
551 |
/** <samp>„</samp> <code>&bdquo; = &#8222;</code> -- double low-9 quotation mark, U+201E NEW. */
|
552 |
public static final char _bdquo='\u201E';
|
553 |
/** <samp>†</samp> <code>&dagger; = &#8224;</code> -- dagger, U+2020 ISOpub. */
|
554 |
public static final char _dagger='\u2020';
|
555 |
/** <samp>‡</samp> <code>&Dagger; = &#8225;</code> -- double dagger, U+2021 ISOpub. */
|
556 |
public static final char _Dagger='\u2021';
|
557 |
/** <samp>‰</samp> <code>&permil; = &#8240;</code> -- per mille sign, U+2030 ISOtech. */
|
558 |
public static final char _permil='\u2030';
|
559 |
/** <samp>‹</samp> <code>&lsaquo; = &#8249;</code> -- single left-pointing angle quotation mark, U+2039 ISO proposed<br />(see <a href="#_lsaquo">comments</a>).<p>lsaquo is proposed but not yet ISO standardized</p> */
|
560 |
public static final char _lsaquo='\u2039';
|
561 |
/** <samp>›</samp> <code>&rsaquo; = &#8250;</code> -- single right-pointing angle quotation mark, U+203A ISO proposed<br />(see <a href="#_rsaquo">comments</a>).<p>rsaquo is proposed but not yet ISO standardized</p> */
|
562 |
public static final char _rsaquo='\u203A';
|
563 |
/** <samp>€</samp> <code>&euro; = &#8364;</code> -- euro sign, U+20AC NEW. */
|
564 |
public static final char _euro='\u20AC';
|
565 |
/**
|
566 |
* <samp>'</samp> <code>&apos; = &#39;</code> -- apostrophe = APL quote, U+0027 ISOnum<br />(see <a href="#_apos">comments</a>).<p>
|
567 |
* apos is only defined for use in XHTML
|
568 |
* (see the <a target="_blank" href="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>),
|
569 |
* but not in HTML.
|
570 |
* @see Config#IsApostropheEncoded
|
571 |
*/
|
572 |
public static final char _apos='\'';
|
573 |
|
574 |
private static Map<String,Integer> NAME_TO_CODE_POINT_MAP=new HashMap<String,Integer>(512,1.0F); // 253 entities in total
|
575 |
private static IntStringHashMap CODE_POINT_TO_NAME_MAP;
|
576 |
|
577 |
private static int MAX_NAME_LENGTH=0;
|
578 |
|
579 |
static {
|
580 |
NAME_TO_CODE_POINT_MAP.put("nbsp",new Integer(_nbsp));
|
581 |
NAME_TO_CODE_POINT_MAP.put("iexcl",new Integer(_iexcl));
|
582 |
NAME_TO_CODE_POINT_MAP.put("cent",new Integer(_cent));
|
583 |
NAME_TO_CODE_POINT_MAP.put("pound",new Integer(_pound));
|
584 |
NAME_TO_CODE_POINT_MAP.put("curren",new Integer(_curren));
|
585 |
NAME_TO_CODE_POINT_MAP.put("yen",new Integer(_yen));
|
586 |
NAME_TO_CODE_POINT_MAP.put("brvbar",new Integer(_brvbar));
|
587 |
NAME_TO_CODE_POINT_MAP.put("sect",new Integer(_sect));
|
588 |
NAME_TO_CODE_POINT_MAP.put("uml",new Integer(_uml));
|
589 |
NAME_TO_CODE_POINT_MAP.put("copy",new Integer(_copy));
|
590 |
NAME_TO_CODE_POINT_MAP.put("ordf",new Integer(_ordf));
|
591 |
NAME_TO_CODE_POINT_MAP.put("laquo",new Integer(_laquo));
|
592 |
NAME_TO_CODE_POINT_MAP.put("not",new Integer(_not));
|
593 |
NAME_TO_CODE_POINT_MAP.put("shy",new Integer(_shy));
|
594 |
NAME_TO_CODE_POINT_MAP.put("reg",new Integer(_reg));
|
595 |
NAME_TO_CODE_POINT_MAP.put("macr",new Integer(_macr));
|
596 |
NAME_TO_CODE_POINT_MAP.put("deg",new Integer(_deg));
|
597 |
NAME_TO_CODE_POINT_MAP.put("plusmn",new Integer(_plusmn));
|
598 |
NAME_TO_CODE_POINT_MAP.put("sup2",new Integer(_sup2));
|
599 |
NAME_TO_CODE_POINT_MAP.put("sup3",new Integer(_sup3));
|
600 |
NAME_TO_CODE_POINT_MAP.put("acute",new Integer(_acute));
|
601 |
NAME_TO_CODE_POINT_MAP.put("micro",new Integer(_micro));
|
602 |
NAME_TO_CODE_POINT_MAP.put("para",new Integer(_para));
|
603 |
NAME_TO_CODE_POINT_MAP.put("middot",new Integer(_middot));
|
604 |
NAME_TO_CODE_POINT_MAP.put("cedil",new Integer(_cedil));
|
605 |
NAME_TO_CODE_POINT_MAP.put("sup1",new Integer(_sup1));
|
606 |
NAME_TO_CODE_POINT_MAP.put("ordm",new Integer(_ordm));
|
607 |
NAME_TO_CODE_POINT_MAP.put("raquo",new Integer(_raquo));
|
608 |
NAME_TO_CODE_POINT_MAP.put("frac14",new Integer(_frac14));
|
609 |
NAME_TO_CODE_POINT_MAP.put("frac12",new Integer(_frac12));
|
610 |
NAME_TO_CODE_POINT_MAP.put("frac34",new Integer(_frac34));
|
611 |
NAME_TO_CODE_POINT_MAP.put("iquest",new Integer(_iquest));
|
612 |
NAME_TO_CODE_POINT_MAP.put("Agrave",new Integer(_Agrave));
|
613 |
NAME_TO_CODE_POINT_MAP.put("Aacute",new Integer(_Aacute));
|
614 |
NAME_TO_CODE_POINT_MAP.put("Acirc",new Integer(_Acirc));
|
615 |
NAME_TO_CODE_POINT_MAP.put("Atilde",new Integer(_Atilde));
|
616 |
NAME_TO_CODE_POINT_MAP.put("Auml",new Integer(_Auml));
|
617 |
NAME_TO_CODE_POINT_MAP.put("Aring",new Integer(_Aring));
|
618 |
NAME_TO_CODE_POINT_MAP.put("AElig",new Integer(_AElig));
|
619 |
NAME_TO_CODE_POINT_MAP.put("Ccedil",new Integer(_Ccedil));
|
620 |
NAME_TO_CODE_POINT_MAP.put("Egrave",new Integer(_Egrave));
|
621 |
NAME_TO_CODE_POINT_MAP.put("Eacute",new Integer(_Eacute));
|
622 |
NAME_TO_CODE_POINT_MAP.put("Ecirc",new Integer(_Ecirc));
|
623 |
NAME_TO_CODE_POINT_MAP.put("Euml",new Integer(_Euml));
|
624 |
NAME_TO_CODE_POINT_MAP.put("Igrave",new Integer(_Igrave));
|
625 |
NAME_TO_CODE_POINT_MAP.put("Iacute",new Integer(_Iacute));
|
626 |
NAME_TO_CODE_POINT_MAP.put("Icirc",new Integer(_Icirc));
|
627 |
NAME_TO_CODE_POINT_MAP.put("Iuml",new Integer(_Iuml));
|
628 |
NAME_TO_CODE_POINT_MAP.put("ETH",new Integer(_ETH));
|
629 |
NAME_TO_CODE_POINT_MAP.put("Ntilde",new Integer(_Ntilde));
|
630 |
NAME_TO_CODE_POINT_MAP.put("Ograve",new Integer(_Ograve));
|
631 |
NAME_TO_CODE_POINT_MAP.put("Oacute",new Integer(_Oacute));
|
632 |
NAME_TO_CODE_POINT_MAP.put("Ocirc",new Integer(_Ocirc));
|
633 |
NAME_TO_CODE_POINT_MAP.put("Otilde",new Integer(_Otilde));
|
634 |
NAME_TO_CODE_POINT_MAP.put("Ouml",new Integer(_Ouml));
|
635 |
NAME_TO_CODE_POINT_MAP.put("times",new Integer(_times));
|
636 |
NAME_TO_CODE_POINT_MAP.put("Oslash",new Integer(_Oslash));
|
637 |
NAME_TO_CODE_POINT_MAP.put("Ugrave",new Integer(_Ugrave));
|
638 |
NAME_TO_CODE_POINT_MAP.put("Uacute",new Integer(_Uacute));
|
639 |
NAME_TO_CODE_POINT_MAP.put("Ucirc",new Integer(_Ucirc));
|
640 |
NAME_TO_CODE_POINT_MAP.put("Uuml",new Integer(_Uuml));
|
641 |
NAME_TO_CODE_POINT_MAP.put("Yacute",new Integer(_Yacute));
|
642 |
NAME_TO_CODE_POINT_MAP.put("THORN",new Integer(_THORN));
|
643 |
NAME_TO_CODE_POINT_MAP.put("szlig",new Integer(_szlig));
|
644 |
NAME_TO_CODE_POINT_MAP.put("agrave",new Integer(_agrave));
|
645 |
NAME_TO_CODE_POINT_MAP.put("aacute",new Integer(_aacute));
|
646 |
NAME_TO_CODE_POINT_MAP.put("acirc",new Integer(_acirc));
|
647 |
NAME_TO_CODE_POINT_MAP.put("atilde",new Integer(_atilde));
|
648 |
NAME_TO_CODE_POINT_MAP.put("auml",new Integer(_auml));
|
649 |
NAME_TO_CODE_POINT_MAP.put("aring",new Integer(_aring));
|
650 |
NAME_TO_CODE_POINT_MAP.put("aelig",new Integer(_aelig));
|
651 |
NAME_TO_CODE_POINT_MAP.put("ccedil",new Integer(_ccedil));
|
652 |
NAME_TO_CODE_POINT_MAP.put("egrave",new Integer(_egrave));
|
653 |
NAME_TO_CODE_POINT_MAP.put("eacute",new Integer(_eacute));
|
654 |
NAME_TO_CODE_POINT_MAP.put("ecirc",new Integer(_ecirc));
|
655 |
NAME_TO_CODE_POINT_MAP.put("euml",new Integer(_euml));
|
656 |
NAME_TO_CODE_POINT_MAP.put("igrave",new Integer(_igrave));
|
657 |
NAME_TO_CODE_POINT_MAP.put("iacute",new Integer(_iacute));
|
658 |
NAME_TO_CODE_POINT_MAP.put("icirc",new Integer(_icirc));
|
659 |
NAME_TO_CODE_POINT_MAP.put("iuml",new Integer(_iuml));
|
660 |
NAME_TO_CODE_POINT_MAP.put("eth",new Integer(_eth));
|
661 |
NAME_TO_CODE_POINT_MAP.put("ntilde",new Integer(_ntilde));
|
662 |
NAME_TO_CODE_POINT_MAP.put("ograve",new Integer(_ograve));
|
663 |
NAME_TO_CODE_POINT_MAP.put("oacute",new Integer(_oacute));
|
664 |
NAME_TO_CODE_POINT_MAP.put("ocirc",new Integer(_ocirc));
|
665 |
NAME_TO_CODE_POINT_MAP.put("otilde",new Integer(_otilde));
|
666 |
NAME_TO_CODE_POINT_MAP.put("ouml",new Integer(_ouml));
|
667 |
NAME_TO_CODE_POINT_MAP.put("divide",new Integer(_divide));
|
668 |
NAME_TO_CODE_POINT_MAP.put("oslash",new Integer(_oslash));
|
669 |
NAME_TO_CODE_POINT_MAP.put("ugrave",new Integer(_ugrave));
|
670 |
NAME_TO_CODE_POINT_MAP.put("uacute",new Integer(_uacute));
|
671 |
NAME_TO_CODE_POINT_MAP.put("ucirc",new Integer(_ucirc));
|
672 |
NAME_TO_CODE_POINT_MAP.put("uuml",new Integer(_uuml));
|
673 |
NAME_TO_CODE_POINT_MAP.put("yacute",new Integer(_yacute));
|
674 |
NAME_TO_CODE_POINT_MAP.put("thorn",new Integer(_thorn));
|
675 |
NAME_TO_CODE_POINT_MAP.put("yuml",new Integer(_yuml));
|
676 |
NAME_TO_CODE_POINT_MAP.put("fnof",new Integer(_fnof));
|
677 |
NAME_TO_CODE_POINT_MAP.put("Alpha",new Integer(_Alpha));
|
678 |
NAME_TO_CODE_POINT_MAP.put("Beta",new Integer(_Beta));
|
679 |
NAME_TO_CODE_POINT_MAP.put("Gamma",new Integer(_Gamma));
|
680 |
NAME_TO_CODE_POINT_MAP.put("Delta",new Integer(_Delta));
|
681 |
NAME_TO_CODE_POINT_MAP.put("Epsilon",new Integer(_Epsilon));
|
682 |
NAME_TO_CODE_POINT_MAP.put("Zeta",new Integer(_Zeta));
|
683 |
NAME_TO_CODE_POINT_MAP.put("Eta",new Integer(_Eta));
|
684 |
NAME_TO_CODE_POINT_MAP.put("Theta",new Integer(_Theta));
|
685 |
NAME_TO_CODE_POINT_MAP.put("Iota",new Integer(_Iota));
|
686 |
NAME_TO_CODE_POINT_MAP.put("Kappa",new Integer(_Kappa));
|
687 |
NAME_TO_CODE_POINT_MAP.put("Lambda",new Integer(_Lambda));
|
688 |
NAME_TO_CODE_POINT_MAP.put("Mu",new Integer(_Mu));
|
689 |
NAME_TO_CODE_POINT_MAP.put("Nu",new Integer(_Nu));
|
690 |
NAME_TO_CODE_POINT_MAP.put("Xi",new Integer(_Xi));
|
691 |
NAME_TO_CODE_POINT_MAP.put("Omicron",new Integer(_Omicron));
|
692 |
NAME_TO_CODE_POINT_MAP.put("Pi",new Integer(_Pi));
|
693 |
NAME_TO_CODE_POINT_MAP.put("Rho",new Integer(_Rho));
|
694 |
NAME_TO_CODE_POINT_MAP.put("Sigma",new Integer(_Sigma));
|
695 |
NAME_TO_CODE_POINT_MAP.put("Tau",new Integer(_Tau));
|
696 |
NAME_TO_CODE_POINT_MAP.put("Upsilon",new Integer(_Upsilon));
|
697 |
NAME_TO_CODE_POINT_MAP.put("Phi",new Integer(_Phi));
|
698 |
NAME_TO_CODE_POINT_MAP.put("Chi",new Integer(_Chi));
|
699 |
NAME_TO_CODE_POINT_MAP.put("Psi",new Integer(_Psi));
|
700 |
NAME_TO_CODE_POINT_MAP.put("Omega",new Integer(_Omega));
|
701 |
NAME_TO_CODE_POINT_MAP.put("alpha",new Integer(_alpha));
|
702 |
NAME_TO_CODE_POINT_MAP.put("beta",new Integer(_beta));
|
703 |
NAME_TO_CODE_POINT_MAP.put("gamma",new Integer(_gamma));
|
704 |
NAME_TO_CODE_POINT_MAP.put("delta",new Integer(_delta));
|
705 |
NAME_TO_CODE_POINT_MAP.put("epsilon",new Integer(_epsilon));
|
706 |
NAME_TO_CODE_POINT_MAP.put("zeta",new Integer(_zeta));
|
707 |
NAME_TO_CODE_POINT_MAP.put("eta",new Integer(_eta));
|
708 |
NAME_TO_CODE_POINT_MAP.put("theta",new Integer(_theta));
|
709 |
NAME_TO_CODE_POINT_MAP.put("iota",new Integer(_iota));
|
710 |
NAME_TO_CODE_POINT_MAP.put("kappa",new Integer(_kappa));
|
711 |
NAME_TO_CODE_POINT_MAP.put("lambda",new Integer(_lambda));
|
712 |
NAME_TO_CODE_POINT_MAP.put("mu",new Integer(_mu));
|
713 |
NAME_TO_CODE_POINT_MAP.put("nu",new Integer(_nu));
|
714 |
NAME_TO_CODE_POINT_MAP.put("xi",new Integer(_xi));
|
715 |
NAME_TO_CODE_POINT_MAP.put("omicron",new Integer(_omicron));
|
716 |
NAME_TO_CODE_POINT_MAP.put("pi",new Integer(_pi));
|
717 |
NAME_TO_CODE_POINT_MAP.put("rho",new Integer(_rho));
|
718 |
NAME_TO_CODE_POINT_MAP.put("sigmaf",new Integer(_sigmaf));
|
719 |
NAME_TO_CODE_POINT_MAP.put("sigma",new Integer(_sigma));
|
720 |
NAME_TO_CODE_POINT_MAP.put("tau",new Integer(_tau));
|
721 |
NAME_TO_CODE_POINT_MAP.put("upsilon",new Integer(_upsilon));
|
722 |
NAME_TO_CODE_POINT_MAP.put("phi",new Integer(_phi));
|
723 |
NAME_TO_CODE_POINT_MAP.put("chi",new Integer(_chi));
|
724 |
NAME_TO_CODE_POINT_MAP.put("psi",new Integer(_psi));
|
725 |
NAME_TO_CODE_POINT_MAP.put("omega",new Integer(_omega));
|
726 |
NAME_TO_CODE_POINT_MAP.put("thetasym",new Integer(_thetasym));
|
727 |
NAME_TO_CODE_POINT_MAP.put("upsih",new Integer(_upsih));
|
728 |
NAME_TO_CODE_POINT_MAP.put("piv",new Integer(_piv));
|
729 |
NAME_TO_CODE_POINT_MAP.put("bull",new Integer(_bull));
|
730 |
NAME_TO_CODE_POINT_MAP.put("hellip",new Integer(_hellip));
|
731 |
NAME_TO_CODE_POINT_MAP.put("prime",new Integer(_prime));
|
732 |
NAME_TO_CODE_POINT_MAP.put("Prime",new Integer(_Prime));
|
733 |
NAME_TO_CODE_POINT_MAP.put("oline",new Integer(_oline));
|
734 |
NAME_TO_CODE_POINT_MAP.put("frasl",new Integer(_frasl));
|
735 |
NAME_TO_CODE_POINT_MAP.put("weierp",new Integer(_weierp));
|
736 |
NAME_TO_CODE_POINT_MAP.put("image",new Integer(_image));
|
737 |
NAME_TO_CODE_POINT_MAP.put("real",new Integer(_real));
|
738 |
NAME_TO_CODE_POINT_MAP.put("trade",new Integer(_trade));
|
739 |
NAME_TO_CODE_POINT_MAP.put("alefsym",new Integer(_alefsym));
|
740 |
NAME_TO_CODE_POINT_MAP.put("larr",new Integer(_larr));
|
741 |
NAME_TO_CODE_POINT_MAP.put("uarr",new Integer(_uarr));
|
742 |
NAME_TO_CODE_POINT_MAP.put("rarr",new Integer(_rarr));
|
743 |
NAME_TO_CODE_POINT_MAP.put("darr",new Integer(_darr));
|
744 |
NAME_TO_CODE_POINT_MAP.put("harr",new Integer(_harr));
|
745 |
NAME_TO_CODE_POINT_MAP.put("crarr",new Integer(_crarr));
|
746 |
NAME_TO_CODE_POINT_MAP.put("lArr",new Integer(_lArr));
|
747 |
NAME_TO_CODE_POINT_MAP.put("uArr",new Integer(_uArr));
|
748 |
NAME_TO_CODE_POINT_MAP.put("rArr",new Integer(_rArr));
|
749 |
NAME_TO_CODE_POINT_MAP.put("dArr",new Integer(_dArr));
|
750 |
NAME_TO_CODE_POINT_MAP.put("hArr",new Integer(_hArr));
|
751 |
NAME_TO_CODE_POINT_MAP.put("forall",new Integer(_forall));
|
752 |
NAME_TO_CODE_POINT_MAP.put("part",new Integer(_part));
|
753 |
NAME_TO_CODE_POINT_MAP.put("exist",new Integer(_exist));
|
754 |
NAME_TO_CODE_POINT_MAP.put("empty",new Integer(_empty));
|
755 |
NAME_TO_CODE_POINT_MAP.put("nabla",new Integer(_nabla));
|
756 |
NAME_TO_CODE_POINT_MAP.put("isin",new Integer(_isin));
|
757 |
NAME_TO_CODE_POINT_MAP.put("notin",new Integer(_notin));
|
758 |
NAME_TO_CODE_POINT_MAP.put("ni",new Integer(_ni));
|
759 |
NAME_TO_CODE_POINT_MAP.put("prod",new Integer(_prod));
|
760 |
NAME_TO_CODE_POINT_MAP.put("sum",new Integer(_sum));
|
761 |
NAME_TO_CODE_POINT_MAP.put("minus",new Integer(_minus));
|
762 |
NAME_TO_CODE_POINT_MAP.put("lowast",new Integer(_lowast));
|
763 |
NAME_TO_CODE_POINT_MAP.put("radic",new Integer(_radic));
|
764 |
NAME_TO_CODE_POINT_MAP.put("prop",new Integer(_prop));
|
765 |
NAME_TO_CODE_POINT_MAP.put("infin",new Integer(_infin));
|
766 |
NAME_TO_CODE_POINT_MAP.put("ang",new Integer(_ang));
|
767 |
NAME_TO_CODE_POINT_MAP.put("and",new Integer(_and));
|
768 |
NAME_TO_CODE_POINT_MAP.put("or",new Integer(_or));
|
769 |
NAME_TO_CODE_POINT_MAP.put("cap",new Integer(_cap));
|
770 |
NAME_TO_CODE_POINT_MAP.put("cup",new Integer(_cup));
|
771 |
NAME_TO_CODE_POINT_MAP.put("int",new Integer(_int));
|
772 |
NAME_TO_CODE_POINT_MAP.put("there4",new Integer(_there4));
|
773 |
NAME_TO_CODE_POINT_MAP.put("sim",new Integer(_sim));
|
774 |
NAME_TO_CODE_POINT_MAP.put("cong",new Integer(_cong));
|
775 |
NAME_TO_CODE_POINT_MAP.put("asymp",new Integer(_asymp));
|
776 |
NAME_TO_CODE_POINT_MAP.put("ne",new Integer(_ne));
|
777 |
NAME_TO_CODE_POINT_MAP.put("equiv",new Integer(_equiv));
|
778 |
NAME_TO_CODE_POINT_MAP.put("le",new Integer(_le));
|
779 |
NAME_TO_CODE_POINT_MAP.put("ge",new Integer(_ge));
|
780 |
NAME_TO_CODE_POINT_MAP.put("sub",new Integer(_sub));
|
781 |
NAME_TO_CODE_POINT_MAP.put("sup",new Integer(_sup));
|
782 |
NAME_TO_CODE_POINT_MAP.put("nsub",new Integer(_nsub));
|
783 |
NAME_TO_CODE_POINT_MAP.put("sube",new Integer(_sube));
|
784 |
NAME_TO_CODE_POINT_MAP.put("supe",new Integer(_supe));
|
785 |
NAME_TO_CODE_POINT_MAP.put("oplus",new Integer(_oplus));
|
786 |
NAME_TO_CODE_POINT_MAP.put("otimes",new Integer(_otimes));
|
787 |
NAME_TO_CODE_POINT_MAP.put("perp",new Integer(_perp));
|
788 |
NAME_TO_CODE_POINT_MAP.put("sdot",new Integer(_sdot));
|
789 |
NAME_TO_CODE_POINT_MAP.put("lceil",new Integer(_lceil));
|
790 |
NAME_TO_CODE_POINT_MAP.put("rceil",new Integer(_rceil));
|
791 |
NAME_TO_CODE_POINT_MAP.put("lfloor",new Integer(_lfloor));
|
792 |
NAME_TO_CODE_POINT_MAP.put("rfloor",new Integer(_rfloor));
|
793 |
NAME_TO_CODE_POINT_MAP.put("lang",new Integer(_lang));
|
794 |
NAME_TO_CODE_POINT_MAP.put("rang",new Integer(_rang));
|
795 |
NAME_TO_CODE_POINT_MAP.put("loz",new Integer(_loz));
|
796 |
NAME_TO_CODE_POINT_MAP.put("spades",new Integer(_spades));
|
797 |
NAME_TO_CODE_POINT_MAP.put("clubs",new Integer(_clubs));
|
798 |
NAME_TO_CODE_POINT_MAP.put("hearts",new Integer(_hearts));
|
799 |
NAME_TO_CODE_POINT_MAP.put("diams",new Integer(_diams));
|
800 |
NAME_TO_CODE_POINT_MAP.put("quot",new Integer(_quot));
|
801 |
NAME_TO_CODE_POINT_MAP.put("amp",new Integer(_amp));
|
802 |
NAME_TO_CODE_POINT_MAP.put("lt",new Integer(_lt));
|
803 |
NAME_TO_CODE_POINT_MAP.put("gt",new Integer(_gt));
|
804 |
NAME_TO_CODE_POINT_MAP.put("OElig",new Integer(_OElig));
|
805 |
NAME_TO_CODE_POINT_MAP.put("oelig",new Integer(_oelig));
|
806 |
NAME_TO_CODE_POINT_MAP.put("Scaron",new Integer(_Scaron));
|
807 |
NAME_TO_CODE_POINT_MAP.put("scaron",new Integer(_scaron));
|
808 |
NAME_TO_CODE_POINT_MAP.put("Yuml",new Integer(_Yuml));
|
809 |
NAME_TO_CODE_POINT_MAP.put("circ",new Integer(_circ));
|
810 |
NAME_TO_CODE_POINT_MAP.put("tilde",new Integer(_tilde));
|
811 |
NAME_TO_CODE_POINT_MAP.put("ensp",new Integer(_ensp));
|
812 |
NAME_TO_CODE_POINT_MAP.put("emsp",new Integer(_emsp));
|
813 |
NAME_TO_CODE_POINT_MAP.put("thinsp",new Integer(_thinsp));
|
814 |
NAME_TO_CODE_POINT_MAP.put("zwnj",new Integer(_zwnj));
|
815 |
NAME_TO_CODE_POINT_MAP.put("zwj",new Integer(_zwj));
|
816 |
NAME_TO_CODE_POINT_MAP.put("lrm",new Integer(_lrm));
|
817 |
NAME_TO_CODE_POINT_MAP.put("rlm",new Integer(_rlm));
|
818 |
NAME_TO_CODE_POINT_MAP.put("ndash",new Integer(_ndash));
|
819 |
NAME_TO_CODE_POINT_MAP.put("mdash",new Integer(_mdash));
|
820 |
NAME_TO_CODE_POINT_MAP.put("lsquo",new Integer(_lsquo));
|
821 |
NAME_TO_CODE_POINT_MAP.put("rsquo",new Integer(_rsquo));
|
822 |
NAME_TO_CODE_POINT_MAP.put("sbquo",new Integer(_sbquo));
|
823 |
NAME_TO_CODE_POINT_MAP.put("ldquo",new Integer(_ldquo));
|
824 |
NAME_TO_CODE_POINT_MAP.put("rdquo",new Integer(_rdquo));
|
825 |
NAME_TO_CODE_POINT_MAP.put("bdquo",new Integer(_bdquo));
|
826 |
NAME_TO_CODE_POINT_MAP.put("dagger",new Integer(_dagger));
|
827 |
NAME_TO_CODE_POINT_MAP.put("Dagger",new Integer(_Dagger));
|
828 |
NAME_TO_CODE_POINT_MAP.put("permil",new Integer(_permil));
|
829 |
NAME_TO_CODE_POINT_MAP.put("lsaquo",new Integer(_lsaquo));
|
830 |
NAME_TO_CODE_POINT_MAP.put("rsaquo",new Integer(_rsaquo));
|
831 |
NAME_TO_CODE_POINT_MAP.put("euro",new Integer(_euro));
|
832 |
NAME_TO_CODE_POINT_MAP.put("apos",new Integer(_apos));
|
833 |
|
834 |
CODE_POINT_TO_NAME_MAP=new IntStringHashMap((int)(NAME_TO_CODE_POINT_MAP.size()/0.75F),1.0F);
|
835 |
for (Map.Entry<String,Integer> entry : NAME_TO_CODE_POINT_MAP.entrySet()) {
|
836 |
String name=entry.getKey();
|
837 |
if (MAX_NAME_LENGTH<name.length()) MAX_NAME_LENGTH=name.length();
|
838 |
CODE_POINT_TO_NAME_MAP.put(entry.getValue().intValue(),name);
|
839 |
}
|
840 |
MAX_ENTITY_REFERENCE_LENGTH=MAX_NAME_LENGTH+2; // '&'+name+';'
|
841 |
}
|
842 |
|
843 |
private CharacterEntityReference(final Source source, final int begin, final int end, final int codePoint) {
|
844 |
super(source,begin,end,codePoint);
|
845 |
name=getName(codePoint);
|
846 |
}
|
847 |
|
848 |
/**
|
849 |
* Returns the name of this character entity reference.
|
850 |
* <p>
|
851 |
* <dl>
|
852 |
* <dt>Example:</dt>
|
853 |
* <dd><code>((CharacterEntityReference)CharacterReference.parse("&gt;")).getName()</code> returns "<code>gt</code>"</dd>
|
854 |
* </dl>
|
855 |
* @return the name of this character entity reference.
|
856 |
* @see #getName(int codePoint)
|
857 |
*/
|
858 |
public String getName() {
|
859 |
return name;
|
860 |
}
|
861 |
|
862 |
/**
|
863 |
* Returns the character entity reference name of the specified character.
|
864 |
* <p>
|
865 |
* Since all character entity references represent unicode <a target="_blank" href="http://www.unicode.org/glossary/#bmp_code_point">BMP</a> code points,
|
866 |
* the functionality of this method is identical to that of {@link #getName(int codePoint)}.
|
867 |
* <p>
|
868 |
* <dl>
|
869 |
* <dt>Example:</dt>
|
870 |
* <dd><code>CharacterEntityReference.getName('>')</code> returns "<code>gt</code>"</dd>
|
871 |
* </dl>
|
872 |
* @return the character entity reference name of the specified character, or <code>null</code> if none exists.
|
873 |
*/
|
874 |
public static String getName(final char ch) {
|
875 |
return getName((int)ch);
|
876 |
}
|
877 |
|
878 |
/**
|
879 |
* Returns the character entity reference name of the specified unicode code point.
|
880 |
* <p>
|
881 |
* Since all character entity references represent unicode <a target="_blank" href="http://www.unicode.org/glossary/#bmp_code_point">BMP</a> code points,
|
882 |
* the functionality of this method is identical to that of {@link #getName(char ch)}.
|
883 |
* <p>
|
884 |
* <dl>
|
885 |
* <dt>Example:</dt>
|
886 |
* <dd><code>CharacterEntityReference.getName(62)</code> returns "<code>gt</code>"</dd>
|
887 |
* </dl>
|
888 |
* @return the character entity reference name of the specified unicode code point, or <code>null</code> if none exists.
|
889 |
*/
|
890 |
public static String getName(final int codePoint) {
|
891 |
return CODE_POINT_TO_NAME_MAP.get(codePoint);
|
892 |
}
|
893 |
|
894 |
/**
|
895 |
* Returns the unicode code point of the specified character entity reference name.
|
896 |
* <p>
|
897 |
* If the string does not represent a valid character entity reference name, this method returns {@link #INVALID_CODE_POINT INVALID_CODE_POINT}.
|
898 |
* <p>
|
899 |
* Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case,
|
900 |
* some browsers also recognise them in a case-insensitive way.
|
901 |
* For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case.
|
902 |
* <p>
|
903 |
* <dl>
|
904 |
* <dt>Example:</dt>
|
905 |
* <dd><code>CharacterEntityReference.getCodePointFromName("gt")</code> returns <code>62</code></dd>
|
906 |
* </dl>
|
907 |
* @return the unicode code point of the specified character entity reference name, or {@link #INVALID_CODE_POINT INVALID_CODE_POINT} if the string does not represent a valid character entity reference name.
|
908 |
*/
|
909 |
public static int getCodePointFromName(final String name) {
|
910 |
Integer codePoint=NAME_TO_CODE_POINT_MAP.get(name);
|
911 |
if (codePoint==null) {
|
912 |
// Most browsers recognise character entity references even if they have the wrong case, so check for this as well:
|
913 |
final String lowerCaseName=name.toLowerCase();
|
914 |
if (lowerCaseName!=name) codePoint=NAME_TO_CODE_POINT_MAP.get(lowerCaseName);
|
915 |
}
|
916 |
return (codePoint!=null) ? codePoint.intValue() : INVALID_CODE_POINT;
|
917 |
}
|
918 |
|
919 |
/**
|
920 |
* Returns the correct encoded form of this character entity reference.
|
921 |
* <p>
|
922 |
* Note that the returned string is not necessarily the same as the original source text used to create this object.
|
923 |
* This library recognises certain invalid forms of character references, as detailed in the {@link #decode(CharSequence) decode(String encodedString)} method.
|
924 |
* <p>
|
925 |
* To retrieve the original source text, use the {@link #toString() toString()} method instead.
|
926 |
* <p>
|
927 |
* <dl>
|
928 |
* <dt>Example:</dt>
|
929 |
* <dd><code>CharacterReference.parse("&GT").getCharacterReferenceString()</code> returns "<code>&gt;</code>"</dd>
|
930 |
* </dl>
|
931 |
*
|
932 |
* @return the correct encoded form of this character entity reference.
|
933 |
* @see CharacterReference#getCharacterReferenceString(int codePoint)
|
934 |
*/
|
935 |
public String getCharacterReferenceString() {
|
936 |
return getCharacterReferenceString(name);
|
937 |
}
|
938 |
|
939 |
/**
|
940 |
* Returns the character entity reference encoded form of the specified unicode code point.
|
941 |
* <p>
|
942 |
* If the specified unicode code point does not have an equivalent character entity reference, this method returns <code>null</code>.
|
943 |
* To get either the entity or numeric reference encoded form, use the {@link CharacterReference#getCharacterReferenceString(int codePoint)} method instead.
|
944 |
* <p>
|
945 |
* <dl>
|
946 |
* <dt>Examples:</dt>
|
947 |
* <dd><code>CharacterEntityReference.getCharacterReferenceString(62)</code> returns "<code>&gt;</code>"</dd>
|
948 |
* <dd><code>CharacterEntityReference.getCharacterReferenceString(9786)</code> returns <code>null</code></dd>
|
949 |
* </dl>
|
950 |
*
|
951 |
* @return the character entity reference encoded form of the specified unicode code point, or <code>null</code> if none exists.
|
952 |
* @see CharacterReference#getCharacterReferenceString(int codePoint)
|
953 |
*/
|
954 |
public static String getCharacterReferenceString(final int codePoint) {
|
955 |
if (codePoint>Character.MAX_VALUE) return null;
|
956 |
final String name=getName(codePoint);
|
957 |
return name!=null ? getCharacterReferenceString(name) : null;
|
958 |
}
|
959 |
|
960 |
/**
|
961 |
* Returns a map of character entity reference names (<code>String</code>) to unicode code points (<code>Integer</code>).
|
962 |
* @return a map of character entity reference names to unicode code points.
|
963 |
*/
|
964 |
public static Map<String,Integer> getNameToCodePointMap() {
|
965 |
return NAME_TO_CODE_POINT_MAP;
|
966 |
}
|
967 |
|
968 |
/**
|
969 |
* Returns a string representation of this object useful for debugging purposes.
|
970 |
* @return a string representation of this object useful for debugging purposes.
|
971 |
*/
|
972 |
public String getDebugInfo() {
|
973 |
final StringBuilder sb=new StringBuilder();
|
974 |
sb.append('"');
|
975 |
try {
|
976 |
appendCharacterReferenceString(sb,name);
|
977 |
sb.append("\" ");
|
978 |
appendUnicodeText(sb,codePoint);
|
979 |
} catch (IOException ex) {throw new RuntimeException(ex);} // never happens
|
980 |
sb.append(' ').append(super.getDebugInfo());
|
981 |
return sb.toString();
|
982 |
}
|
983 |
|
984 |
private static String getCharacterReferenceString(final String name) {
|
985 |
try {
|
986 |
return appendCharacterReferenceString(new StringBuilder(),name).toString();
|
987 |
} catch (IOException ex) {throw new RuntimeException(ex);} // never happens
|
988 |
}
|
989 |
|
990 |
static final Appendable appendCharacterReferenceString(final Appendable appendable, final String name) throws IOException {
|
991 |
return appendable.append('&').append(name).append(';');
|
992 |
}
|
993 |
|
994 |
static CharacterReference construct(final Source source, final int begin, final int unterminatedMaxCodePoint) {
|
995 |
// only called from CharacterReference.construct(), so we can assume that first character is '&'
|
996 |
String name;
|
997 |
final int nameBegin=begin+1;
|
998 |
final int maxNameEnd=nameBegin+MAX_NAME_LENGTH;
|
999 |
final int maxSourcePos=source.end-1;
|
1000 |
int end;
|
1001 |
int x=nameBegin;
|
1002 |
boolean unterminated=false;
|
1003 |
while (true) {
|
1004 |
final char ch=source.charAt(x);
|
1005 |
if (ch==';') {
|
1006 |
end=x+1;
|
1007 |
name=source.subSequence(nameBegin,x).toString();
|
1008 |
break;
|
1009 |
}
|
1010 |
if (!isValidReferenceNameChar(ch)) {
|
1011 |
// At this point, ch is determined to be an invalid character, meaning the character reference is unterminated.
|
1012 |
unterminated=true;
|
1013 |
} else if (x==maxSourcePos) {
|
1014 |
// At this point, we have a valid name character but are at the last position in the source text without the terminating semicolon.
|
1015 |
unterminated=true;
|
1016 |
x++; // include this character in the name
|
1017 |
}
|
1018 |
if (unterminated) {
|
1019 |
// Different browsers react differently to unterminated character entity references.
|
1020 |
// The behaviour of this method is determined by the unterminatedMaxCodePoint parameter.
|
1021 |
if (unterminatedMaxCodePoint==INVALID_CODE_POINT) {
|
1022 |
// reject:
|
1023 |
return null;
|
1024 |
} else {
|
1025 |
// accept:
|
1026 |
end=x;
|
1027 |
name=source.subSequence(nameBegin,x).toString();
|
1028 |
break;
|
1029 |
}
|
1030 |
}
|
1031 |
if (++x>maxNameEnd) return null;
|
1032 |
}
|
1033 |
final int codePoint=getCodePointFromName(name);
|
1034 |
if (codePoint==INVALID_CODE_POINT || (unterminated && codePoint>unterminatedMaxCodePoint)) return null;
|
1035 |
return new CharacterEntityReference(source,begin,end,codePoint);
|
1036 |
}
|
1037 |
|
1038 |
private static final boolean isValidReferenceNameChar(final char ch) {
|
1039 |
return ch>='A' && ch<='z' && (ch<='Z' || ch>='a');
|
1040 |
}
|
1041 |
}
|
1042 |
|