/[aagtl_public1]/src/net/htmlparser/jericho/Config.java
aagtl

Contents of /src/net/htmlparser/jericho/Config.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 7 months ago) by zoffadmin
File size: 34866 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 import java.util.*;
24
25 /**
26 * Encapsulates global configuration properties which determine the behaviour of various functions.
27 * <p>
28 * All of the properties in this class are static, affecting all objects and threads.
29 * Multiple concurrent configurations are not possible.
30 * <p>
31 * Properties that relate to <a target="_blank" href="http://www.w3.org/TR/html401/conform.html#didx-user_agent">user agent</a>
32 * compatibility issues are stored in instances of the {@link Config.CompatibilityMode} class.
33 * This allows all of the properties in the compatibility mode to be set as a block by setting the static
34 * {@link #CurrentCompatibilityMode} property to a different instance.
35 *
36 * @see Config.CompatibilityMode
37 */
38 public final class Config {
39 private Config() {}
40
41 /**
42 * Determines the string used to separate a single column's multiple values in the output of the {@link FormFields#getColumnValues(Map)} method.
43 * <p>
44 * The situation where a single column has multiple values only arises if {@link FormField#getUserValueCount()}<code>&gt;1</code>
45 * on the relevant form field, which usually indicates a poorly designed form.
46 * <p>
47 * The default value is "<code>,</code>" (a comma, not including the quotes).
48 * <p>
49 * Must not be <code>null</code>.
50 */
51 public static String ColumnMultipleValueSeparator=",";
52
53 /**
54 * Determines the string that represents the value <code>true</code> in the output of the {@link FormFields#getColumnValues(Map)} method.
55 * <p>
56 * The default value is "<code>true</code>" (without the quotes).
57 * <p>
58 * Must not be <code>null</code>.
59 */
60 public static String ColumnValueTrue=Boolean.toString(true);
61
62 /**
63 * Determines the string that represents the value <code>false</code> in the output of the {@link FormFields#getColumnValues(Map)} method.
64 * <p>
65 * The default value is <code>null</code>, which represents no output at all.
66 */
67 public static String ColumnValueFalse=null;
68
69 /**
70 * Determines whether the {@link CharacterReference#decode(CharSequence)} and similar methods convert non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;}) character references to normal spaces.
71 * <p>
72 * The default value is <code>true</code>.
73 * <p>
74 * When this property is set to <code>false</code>, non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;})
75 * character references are decoded as non-breaking space characters (U+00A0) instead of being converted to normal spaces (U+0020).
76 * <p>
77 * The default behaviour of the library reflects the fact that non-breaking space character references are almost always used in HTML documents
78 * as a <a target="_blank" href="http://en.wikipedia.org/wiki/Non-breaking_space#Use_as_non-collapsing_whitespace">non-collapsing white space</a> character.
79 * Converting them to the correct character code U+00A0, which is represented by a visible character in many older character sets, was confusing to most users
80 * who expected to see only normal spaces.
81 * The most common example of this is its visualisation as the character <b>&aacute;</b> in the MS-DOS <a target="_blank" href="http://en.wikipedia.org/wiki/Code_page_437">CP437</a> character set.
82 * <p>
83 * The functionality of the following methods is affected:
84 * <ul>
85 * <li>{@link CharacterReference#appendCharTo(Appendable)}
86 * <li>{@link CharacterReference#decode(CharSequence)}
87 * <li>{@link CharacterReference#decode(CharSequence, boolean insideAttributeValue)}
88 * <li>{@link CharacterReference#decodeCollapseWhiteSpace(CharSequence)}
89 * <li>{@link CharacterReference#reencode(CharSequence)}
90 * <li>{@link Attribute#getValue()}
91 * <li>{@link Attributes#getValue(String name)}
92 * <li>{@link Attributes#populateMap(Map, boolean convertNamesToLowerCase)}
93 * <li>{@link StartTag#getAttributeValue(String attributeName)}
94 * <li>{@link Element#getAttributeValue(String attributeName)}
95 * <li>{@link FormControl#getPredefinedValues()}
96 * <li>{@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)}
97 * <li>{@link Renderer#getConvertNonBreakingSpaces()}
98 * <li>{@link TextExtractor#getConvertNonBreakingSpaces()}
99 * </ul>
100 */
101 public static boolean ConvertNonBreakingSpaces=true;
102
103
104 /**
105 * Determines the currently active {@linkplain Config.CompatibilityMode compatibility mode}.
106 * <p>
107 * The default setting is {@link Config.CompatibilityMode#IE} (MS Internet Explorer 6.0).
108 * <p>
109 * Must not be <code>null</code>.
110 */
111 public static CompatibilityMode CurrentCompatibilityMode=CompatibilityMode.IE;
112
113 /**
114 * Determines whether apostrophes are encoded when calling the {@link CharacterReference#encode(CharSequence)} method.
115 * <p>
116 * A value of <code>false</code> means {@linkplain CharacterEntityReference#_apos apostrophe}
117 * (U+0027) characters are not encoded.
118 * The only time apostrophes need to be encoded is within an attribute value delimited by
119 * single quotes (apostrophes), so in most cases ignoring apostrophes is perfectly safe and
120 * enhances the readability of the source document.
121 * <p>
122 * Note that apostrophes are always encoded as a {@linkplain NumericCharacterReference numeric character reference}, never as the
123 * character entity reference {@link CharacterEntityReference#_apos &amp;apos;}.
124 * <p>
125 * The default value is <code>false</code>.
126 */
127 public static boolean IsApostropheEncoded=false;
128
129 /**
130 * Determines whether all {@linkplain StartTag#isEmptyElementTag() empty-element tags} are recognised.
131 * <p>
132 * The major browsers do not recognise empty-element tags (those having the characters "/&gt;" at the end of the start tag) if the element is defined by the
133 * HTML specification to have a {@linkplain HTMLElements#getEndTagRequiredElementNames() required} or an {@linkplain HTMLElements#getEndTagOptionalElementNames() optional} end tag.
134 * This is the case even in <a target="_blank" href="http://www.w3.org/TR/xhtml1/">XHTML</a> documents, which can cause a lot of confusion.
135 * <p>
136 * Setting this property to <code>true</code> forces the parser to recognise all {@linkplain StartTag#isSyntacticalEmptyElementTag() syntactical empty-element tags},
137 * regardless of whether the element is defined by the HTML specification to have a required or optional end tag.
138 * <p>
139 * Use of this feature is however not recommended as it makes the parser behaviour inconsistent with that of most browsers.
140 * <p>
141 * The default value is <code>false</code>.
142 *
143 * @see StartTag#isEmptyElementTag()
144 */
145 public static boolean IsHTMLEmptyElementTagRecognised=false;
146
147 /**
148 * Determines the {@link LoggerProvider} that is used to create the default {@link Logger} object for each new {@link Source} object.
149 * <p>
150 * The {@link LoggerProvider} interface contains several predefined <code>LoggerProvider</code> instances which this property can be set to,
151 * mostly representing wrappers to common logging frameworks.
152 * <p>
153 * The default value is <code>null</code>, which results in the auto-detection of the most appropriate logging mechanism according to the following algorithm:
154 * <p>
155 * <ol>
156 * <li>If the class <code>org.slf4j.impl.StaticLoggerBinder</code> is detected:
157 * <ul>
158 * <li>If the class <code>org.slf4j.impl.JDK14LoggerFactory</code> is detected, use {@link LoggerProvider#JAVA}.
159 * <li>If the class <code>org.slf4j.impl.Log4jLoggerFactory</code> is detected, use {@link LoggerProvider#LOG4J}.
160 * <li>If the class <code>org.slf4j.impl.JCLLoggerFactory</code> is NOT detected, use {@link LoggerProvider#SLF4J}.
161 * </ul>
162 * <li>If the class <code>org.apache.commons.logging.Log</code> is detected:
163 * <blockquote>
164 * Create an instance of it using the commons-logging <code>LogFactory</code> class.
165 * <ul>
166 * <li>If the created <code>Log</code> is of type <code>org.apache.commons.logging.impl.Jdk14Logger</code>, use {@link LoggerProvider#JAVA}.
167 * <li>If the created <code>Log</code> is of type <code>org.apache.commons.logging.impl.Log4JLogger</code>, use {@link LoggerProvider#LOG4J}.
168 * <li>otherwise, use {@link LoggerProvider#JCL}.
169 * </ul>
170 * </blockquote>
171 * <li>If the class <code>org.apache.log4j.Logger</code> is detected, use {@link LoggerProvider#LOG4J}.
172 * <li>otherwise, use {@link LoggerProvider#JAVA}.
173 * </ol>
174 *
175 * @see Source#setLogger(Logger)
176 */
177 public static LoggerProvider LoggerProvider=null;
178
179 /**
180 * Determines the string used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in text output throughout the library.
181 * <p>
182 * The default value is the standard new line character sequence of the host platform, determined by <code>System.getProperty("line.separator")</code>.
183 */
184 public static String NewLine=System.getProperty("line.separator");
185
186 /**
187 * Used in Element.getChildElements.
188 * Server elements containing markup should be included in the hierarchy, so consider making this option public in future.
189 */
190 static final boolean IncludeServerTagsInElementHierarchy=false;
191
192 /**
193 * Represents a set of maximum unicode code points to be recognised for the three types of
194 * <a href="CharacterReference.html#Unterminated">unterminated</a> character reference in a given context.
195 * <p>
196 * The three types of character reference are:
197 * <ul>
198 * <li>{@linkplain CharacterEntityReference Character entity reference}
199 * <li><a href="NumericCharacterReference.html#DecimalCharacterReference">Decimal character reference</a>
200 * <li><a href="NumericCharacterReference.html#HexadecimalCharacterReference">Hexadecimal character reference</a>
201 * </ul>
202 * <p>
203 * The two types of contexts used in this library are:
204 * <ul>
205 * <li>Inside an attribute value
206 * <li>Outside an attribute value
207 * </ul>
208 */
209 static class UnterminatedCharacterReferenceSettings {
210 // use volatile fields to make them thread safe
211 public volatile int characterEntityReferenceMaxCodePoint;
212 public volatile int decimalCharacterReferenceMaxCodePoint;
213 public volatile int hexadecimalCharacterReferenceMaxCodePoint;
214
215 public static UnterminatedCharacterReferenceSettings ACCEPT_ALL=new UnterminatedCharacterReferenceSettings(CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL);
216
217 public UnterminatedCharacterReferenceSettings() {
218 this(CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE);
219 }
220
221 public UnterminatedCharacterReferenceSettings(final int characterEntityReferenceMaxCodePoint, final int decimalCharacterReferenceMaxCodePoint, final int hexadecimalCharacterReferenceMaxCodePoint) {
222 this.characterEntityReferenceMaxCodePoint=characterEntityReferenceMaxCodePoint;
223 this.decimalCharacterReferenceMaxCodePoint=decimalCharacterReferenceMaxCodePoint;
224 this.hexadecimalCharacterReferenceMaxCodePoint=hexadecimalCharacterReferenceMaxCodePoint;
225 }
226
227 public String toString() {
228 return Config.NewLine+" Character entity reference: "+getDescription(characterEntityReferenceMaxCodePoint)
229 +Config.NewLine+" Decimal character reference: "+getDescription(decimalCharacterReferenceMaxCodePoint)
230 +Config.NewLine+" Haxadecimal character reference: "+getDescription(hexadecimalCharacterReferenceMaxCodePoint);
231 }
232
233 private String getDescription(final int codePoint) {
234 if (codePoint==CompatibilityMode.CODE_POINTS_NONE) return "None";
235 if (codePoint==CompatibilityMode.CODE_POINTS_ALL) return "All";
236 return "0x"+Integer.toString(codePoint,16);
237 }
238 }
239
240 /**
241 * Represents a set of configuration parameters that relate to
242 * <a target="_blank" href="http://www.w3.org/TR/html401/conform.html#didx-user_agent">user agent</a> compatibility issues.
243 * <p>
244 * The predefined compatibility modes {@link #IE}, {@link #MOZILLA}, {@link #OPERA} and {@link #XHTML} provide an easy means of
245 * ensuring the library interprets the markup in a way consistent with some of the most commonly used browsers,
246 * at least in relation to the behaviour described by the properties in this class.
247 * <p>
248 * The properties of any <code>CompatibilityMode</code> object can be modified individually, including those in
249 * the predefined instances as well as newly constructed instances.
250 * Take note however that modifying the properties of the predefined instances has a global affect.
251 * <p>
252 * The currently active compatibility mode is stored in the static {@link Config#CurrentCompatibilityMode} property.
253 * <p>
254 */
255 public static final class CompatibilityMode {
256 private String name;
257 private volatile boolean formFieldNameCaseInsensitive;
258 volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue;
259 volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue;
260
261 /**
262 * Indicates the recognition of all unicode code points.
263 * <p>
264 * This value is used in properties which specify a maximum unicode code point to be recognised by the parser.
265 *
266 * @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
267 * @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
268 * @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
269 */
270 public static final int CODE_POINTS_ALL=Character.MAX_CODE_POINT; // 0x10FFFF (decimal 1114111)
271
272 /**
273 * Indicates the recognition of no unicode code points.
274 * <p>
275 * This value is used in properties which specify a maximum unicode code point to be recognised by the parser.
276 *
277 * @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
278 * @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
279 * @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
280 */
281 public static final int CODE_POINTS_NONE=CharacterReference.INVALID_CODE_POINT;
282
283 /**
284 * <a target="_blank" href="http://www.microsoft.com/windows/ie/">Microsoft Internet Explorer</a> compatibility mode.
285 * <p>
286 * <code>{@link #getName() Name} = IE</code><br />
287 * <code>{@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true</code><br />
288 * <table cellspacing="0" cellpadding="0">
289 * <tr><th>Recognition of unterminated character references:<th><th align="center">&nbsp; (inside attribute) &nbsp;<th align="center">&nbsp; (outside attribute) &nbsp;
290 * <tr><td>{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">U+00FF<td align="center">U+00FF
291 * <tr><td>{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_ALL All}<td align="center">{@linkplain #CODE_POINTS_ALL All}
292 * <tr><td>{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_ALL All}<td align="center">{@linkplain #CODE_POINTS_NONE None}
293 * </table>
294 */
295 public static final CompatibilityMode IE=new CompatibilityMode("IE",true,
296 new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
297 new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_NONE) // outside attributes
298 );
299
300 /**
301 * <a target="_blank" href="http://www.mozilla.org/products/mozilla1.x/">Mozilla</a> /
302 * <a target="_blank" href="http://www.mozilla.org/products/firefox/">Firefox</a> /
303 * <a target="_blank" href="http://browser.netscape.com/">Netscape</a> compatibility mode.
304 * <p>
305 * <code>{@link #getName() Name} = Mozilla</code><br />
306 * <code>{@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false</code><br />
307 * <table cellspacing="0" cellpadding="0">
308 * <tr><th>Recognition of unterminated character references:<th><th align="center">&nbsp; (inside attribute) &nbsp;<th align="center">&nbsp; (outside attribute) &nbsp;
309 * <tr><td>{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">U+00FF<td align="center">{@linkplain #CODE_POINTS_ALL All}
310 * <tr><td>{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_ALL All}<td align="center">{@linkplain #CODE_POINTS_ALL All}
311 * <tr><td>{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_ALL All}<td align="center">{@linkplain #CODE_POINTS_ALL All}
312 * </table>
313 */
314 public static final CompatibilityMode MOZILLA=new CompatibilityMode("Mozilla",false,
315 new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
316 new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes
317 );
318
319 /**
320 * Opera compatibility mode.
321 * <p>
322 * <code>{@link #getName() Name} = Opera</code><br />
323 * <code>{@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true</code><br />
324 * <table cellspacing="0" cellpadding="0">
325 * <tr><th>Recognition of unterminated character references:<th><th align="center">&nbsp; (inside attribute) &nbsp;<th align="center">&nbsp; (outside attribute) &nbsp;
326 * <tr><td>{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">U+003E<td align="center">{@linkplain #CODE_POINTS_ALL All}
327 * <tr><td>{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_ALL All}<td align="center">{@linkplain #CODE_POINTS_ALL All}
328 * <tr><td>{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_ALL All}<td align="center">{@linkplain #CODE_POINTS_ALL All}
329 * </table>
330 */
331 public static final CompatibilityMode OPERA=new CompatibilityMode("Opera",true,
332 new UnterminatedCharacterReferenceSettings(0x3E, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
333 new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes
334 );
335
336 /**
337 * <a target="_blank" href="http://www.w3.org/TR/xhtml1/#xhtml">XHTML</a> compatibility mode.
338 * <p>
339 * <code>{@link #getName() Name} = XHTML</code><br />
340 * <code>{@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false</code><br />
341 * <table cellspacing="0" cellpadding="0">
342 * <tr><th>Recognition of unterminated character references:<th><th align="center">&nbsp; (inside attribute) &nbsp;<th align="center">&nbsp; (outside attribute) &nbsp;
343 * <tr><td>{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_NONE None}<td align="center">{@linkplain #CODE_POINTS_NONE None}
344 * <tr><td>{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_NONE None}<td align="center">{@linkplain #CODE_POINTS_NONE None}
345 * <tr><td>{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}<td><code>&nbsp;=</code><td align="center">{@linkplain #CODE_POINTS_NONE None}<td align="center">{@linkplain #CODE_POINTS_NONE None}
346 * </table>
347 */
348 public static final CompatibilityMode XHTML=new CompatibilityMode("XHTML");
349
350 /**
351 * Constructs a new <code>CompatibilityMode</code> with the given {@linkplain #getName() name}.
352 * <p>
353 * All properties in the new instance are initially assigned their default values, which are the same as the strict
354 * rules of the {@link #XHTML} compatibility mode.
355 *
356 * @param name the {@linkplain #getName() name} of the new compatibility mode
357 */
358 public CompatibilityMode(final String name) {
359 this(name,false,new UnterminatedCharacterReferenceSettings(),new UnterminatedCharacterReferenceSettings());
360 }
361
362 private CompatibilityMode(final String name, final boolean formFieldNameCaseInsensitive, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue) {
363 this.name=name;
364 this.formFieldNameCaseInsensitive=formFieldNameCaseInsensitive;
365 this.unterminatedCharacterReferenceSettingsInsideAttributeValue=unterminatedCharacterReferenceSettingsInsideAttributeValue;
366 this.unterminatedCharacterReferenceSettingsOutsideAttributeValue=unterminatedCharacterReferenceSettingsOutsideAttributeValue;
367 }
368
369 /**
370 * Returns the name of this compatibility mode.
371 * @return the name of this compatibility mode.
372 */
373 public String getName() {
374 return name;
375 }
376
377 /**
378 * Indicates whether {@linkplain FormField#getName() form field names} are treated as case insensitive.
379 * <p>
380 * Microsoft Internet Explorer treats field names as case insensitive,
381 * while Mozilla treats them as case sensitive.
382 * <p>
383 * The value of this property in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}
384 * affects all instances of the {@link FormFields} class.
385 * It should be set to the desired configuration before any instances of <code>FormFields</code> are created.
386 *
387 * @return <code>true</code> if {@linkplain FormField#getName() form field names} are treated as case insensitive, otherwise <code>false</code>.
388 * @see #setFormFieldNameCaseInsensitive(boolean)
389 */
390 public boolean isFormFieldNameCaseInsensitive() {
391 return formFieldNameCaseInsensitive;
392 }
393
394 /**
395 * Sets whether {@linkplain FormField#getName() form field names} are treated as case insensitive.
396 * <p>
397 * See {@link #isFormFieldNameCaseInsensitive()} for the documentation of this property.
398 *
399 * @param value the new value of the property
400 */
401 public void setFormFieldNameCaseInsensitive(final boolean value) {
402 formFieldNameCaseInsensitive=value;
403 }
404
405 /**
406 * Returns the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a>
407 * {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
408 * <p>
409 * For example, if <code>getUnterminatedCharacterEntityReferenceMaxCodePoint(true)</code> has the value <code>0xFF</code> (U+00FF)
410 * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
411 * <ul>
412 * <li>{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&amp;gt",true)}
413 * returns "<code>&gt;</code>".<br />
414 * The string is recognised as the character entity reference {@link CharacterEntityReference#_gt &amp;gt;}
415 * despite the fact that it is <a href="CharacterReference.html#Unterminated">unterminated</a>,
416 * because its unicode code point U+003E is below the maximum of U+00FF set by this property.
417 * <li>{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&amp;euro",true)}
418 * returns "<code>&amp;euro</code>".<br />
419 * The string is not recognised as the character entity reference {@link CharacterEntityReference#_euro &amp;euro;}
420 * because it is <a href="CharacterReference.html#Unterminated">unterminated</a>
421 * and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
422 * </ul>
423 * <p>
424 * See the documentation of the {@link Attribute#getValue()} method for further discussion.
425 *
426 * @param insideAttributeValue the context within an HTML document - <code>true</code> if inside an attribute value or <code>false</code> if outside an attribute value.
427 * @return the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a> {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
428 * @see #setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
429 */
430 public int getUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue) {
431 return getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint;
432 }
433
434 /**
435 * Sets the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a>
436 * {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
437 * <p>
438 * See {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
439 *
440 * @param insideAttributeValue the context within an HTML document - <code>true</code> if inside an attribute value or <code>false</code> if outside an attribute value.
441 * @param maxCodePoint the maximum unicode code point.
442 */
443 public void setUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
444 getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint=maxCodePoint;
445 }
446
447 /**
448 * Returns the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a>
449 * <a href="NumericCharacterReference.html#DecimalCharacterReference">decimal character reference</a> which is to be recognised in the specified context.
450 * <p>
451 * For example, if <code>getUnterminatedDecimalCharacterReferenceMaxCodePoint(true)</code> had the hypothetical value <code>0xFF</code> (U+00FF)
452 * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
453 * <ul>
454 * <li>{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&amp;#62",true)}
455 * returns "<code>&gt;</code>".<br />
456 * The string is recognised as the numeric character reference <code>&amp;#62;</code>
457 * despite the fact that it is <a href="CharacterReference.html#Unterminated">unterminated</a>,
458 * because its unicode code point U+003E is below the maximum of U+00FF set by this property.
459 * <li>{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&amp;#8364",true)}
460 * returns "<code>&amp;#8364</code>".<br />
461 * The string is not recognised as the numeric character reference <code>&amp;#8364;</code>
462 * because it is <a href="CharacterReference.html#Unterminated">unterminated</a>
463 * and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
464 * </ul>
465 *
466 * @param insideAttributeValue the context within an HTML document - <code>true</code> if inside an attribute value or <code>false</code> if outside an attribute value.
467 * @return the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a> <a href="NumericCharacterReference.html#DecimalCharacterReference">decimal character reference</a> which is to be recognised in the specified context.
468 * @see #setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
469 */
470 public int getUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) {
471 return getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint;
472 }
473
474 /**
475 * Sets the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a>
476 * <a href="NumericCharacterReference.html#DecimalCharacterReference">decimal character reference</a> which is to be recognised in the specified context.
477 * <p>
478 * See {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
479 *
480 * @param insideAttributeValue the context within an HTML document - <code>true</code> if inside an attribute value or <code>false</code> if outside an attribute value.
481 * @param maxCodePoint the maximum unicode code point.
482 */
483 public void setUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
484 getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint=maxCodePoint;
485 }
486
487 /**
488 * Returns the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a>
489 * <a href="NumericCharacterReference.html#HexadecimalCharacterReference">hexadecimal character reference</a> which is to be recognised in the specified context.
490 * <p>
491 * For example, if <code>getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(true)</code> had the hypothetical value <code>0xFF</code> (U+00FF)
492 * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
493 * <ul>
494 * <li>{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&amp;#x3e",true)}
495 * returns "<code>&gt;</code>".<br />
496 * The string is recognised as the numeric character reference <code>&amp;#x3e;</code>
497 * despite the fact that it is <a href="CharacterReference.html#Unterminated">unterminated</a>,
498 * because its unicode code point U+003E is below the maximum of U+00FF set by this property.
499 * <li>{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&amp;#x20ac",true)}
500 * returns "<code>&amp;#x20ac</code>".<br />
501 * The string is not recognised as the numeric character reference <code>&amp;#20ac;</code>
502 * because it is <a href="CharacterReference.html#Unterminated">unterminated</a>
503 * and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
504 * </ul>
505 *
506 * @param insideAttributeValue the context within an HTML document - <code>true</code> if inside an attribute value or <code>false</code> if outside an attribute value.
507 * @return the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a> <a href="NumericCharacterReference.html#HexadecimalCharacterReference">hexadecimal character reference</a> which is to be recognised in the specified context.
508 * @see #setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
509 */
510 public int getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) {
511 return getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint;
512 }
513
514 /**
515 * Sets the maximum unicode code point of an <a href="CharacterReference.html#Unterminated">unterminated</a>
516 * <a href="NumericCharacterReference.html#HexadecimalCharacterReference">headecimal character reference</a> which is to be recognised in the specified context.
517 * <p>
518 * See {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
519 *
520 * @param insideAttributeValue the context within an HTML document - <code>true</code> if inside an attribute value or <code>false</code> if outside an attribute value.
521 * @param maxCodePoint the maximum unicode code point.
522 */
523 public void setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
524 getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint=maxCodePoint;
525 }
526
527 /**
528 * Returns a string representation of this object useful for debugging purposes.
529 * @return a string representation of this object useful for debugging purposes.
530 */
531 public String getDebugInfo() {
532 return "Form field name case insensitive: "+formFieldNameCaseInsensitive
533 +Config.NewLine+"Maximum codepoints in unterminated character references:"
534 +Config.NewLine+" Inside attribute values:"
535 +unterminatedCharacterReferenceSettingsInsideAttributeValue
536 +Config.NewLine+" Outside attribute values:"
537 +unterminatedCharacterReferenceSettingsOutsideAttributeValue;
538 }
539
540 /**
541 * Returns the {@linkplain #getName() name} of this compatibility mode.
542 * @return the {@linkplain #getName() name} of this compatibility mode.
543 */
544 public String toString() {
545 return getName();
546 }
547
548 UnterminatedCharacterReferenceSettings getUnterminatedCharacterReferenceSettings(final boolean insideAttributeValue) {
549 return insideAttributeValue ? unterminatedCharacterReferenceSettingsInsideAttributeValue : unterminatedCharacterReferenceSettingsOutsideAttributeValue;
550 }
551 }
552 }

   
Visit the aagtl Website