1 |
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
|
2 |
// Version 3.2
|
3 |
// Copyright (C) 2004-2009 Martin Jericho
|
4 |
// http://jericho.htmlparser.net/
|
5 |
//
|
6 |
// This library is free software; you can redistribute it and/or
|
7 |
// modify it under the terms of either one of the following licences:
|
8 |
//
|
9 |
// 1. The Eclipse Public License (EPL) version 1.0,
|
10 |
// included in this distribution in the file licence-epl-1.0.html
|
11 |
// or available at http://www.eclipse.org/legal/epl-v10.html
|
12 |
//
|
13 |
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
|
14 |
// included in this distribution in the file licence-lgpl-2.1.txt
|
15 |
// or available at http://www.gnu.org/licenses/lgpl.txt
|
16 |
//
|
17 |
// This library is distributed on an "AS IS" basis,
|
18 |
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
19 |
// See the individual licence texts for more details.
|
20 |
|
21 |
package net.htmlparser.jericho;
|
22 |
|
23 |
import java.util.*;
|
24 |
|
25 |
/**
|
26 |
* Defines the syntax for a tag type that can be recognised by the parser.
|
27 |
* <p>
|
28 |
* This class is the root abstract class common to all tag types, and contains methods to {@linkplain #register() register}
|
29 |
* and {@linkplain #deregister() deregister} tag types as well as various methods to aid in their implementation.
|
30 |
* <p>
|
31 |
* Every tag type is represented by a singleton instance of a class that must be a subclass of either
|
32 |
* {@link StartTagType} or {@link EndTagType}. These two abstract classes, the only direct descendants of this class,
|
33 |
* represent the two major classifications under which every tag type exists.
|
34 |
* <p>
|
35 |
* Because all <code>TagType</code> instaces must be singletons, the '<code>==</code>' operator can be used to test for a particular tag type
|
36 |
* instead of the <code>equals(Object)</code> method.
|
37 |
* <p>
|
38 |
* The term <i><a name="Predefined">predefined tag type</a></i> refers to any of the tag types defined in this library,
|
39 |
* including both <a href="#Standard">standard</a> and <a href="#Extended">extended</a> tag types.
|
40 |
* <p>
|
41 |
* The term <i><a name="Standard">standard tag type</a></i> refers to any of the tag types represented by instances
|
42 |
* in static fields of the {@link StartTagType} and {@link EndTagType} subclasses.
|
43 |
* Standard tag types are registered by default, and define the tags most commonly found in HTML documents.
|
44 |
* <p>
|
45 |
* The term <i><a name="Extended">extended tag type</a></i> refers to any <a href="#Predefined">predefined</a> tag type
|
46 |
* that is not a <a href="#Standard">standard</a> tag type.
|
47 |
* The {@link PHPTagTypes} and {@link MasonTagTypes} classes contain extended tag types related to their respective server platforms.
|
48 |
* The tag types defined within them must be registered by the user before they are recognised by the parser.
|
49 |
* <p>
|
50 |
* The term <i><a name="Custom">custom tag type</a></i> refers to any user-defined tag type, or any tag type that is
|
51 |
* not a <a href="#Predefined">predefined</a> tag type.
|
52 |
* <p>
|
53 |
* The tag recognition process of the parser gives each tag type a <i><a name="Precedence">precedence</a></i> level,
|
54 |
* which is primarily determined by the length of its {@linkplain #getStartDelimiter() start delimiter}.
|
55 |
* A tag type with a more specific start delimiter is chosen in preference to one with a less specific start delimiter,
|
56 |
* assuming they both share the same prefix. If two tag types have exactly the same start delimiter, the one which was
|
57 |
* {@linkplain #register() registered} later has the higher precedence.
|
58 |
* <p>
|
59 |
* The two special tag types {@link StartTagType#UNREGISTERED} and {@link EndTagType#UNREGISTERED} represent
|
60 |
* tags that do not match the syntax of any other tag type. They have the lowest <a href="#Precedence">precedence</a>
|
61 |
* of all the tag types. The {@link Tag#isUnregistered()} method provides a detailed explanation of unregistered tags.
|
62 |
* <p>
|
63 |
* See the documentation of the <a href="Tag.html#ParsingProcess">tag parsing process</a> for more information
|
64 |
* on how each tag is identified by the parser.
|
65 |
* <p>
|
66 |
* <a name="Normal"></a>Note that the standard {@linkplain HTMLElementName HTML element names} do not represent different
|
67 |
* tag <i>types</i>. All standard HTML tags have a tag type of {@link StartTagType#NORMAL} or {@link EndTagType#NORMAL},
|
68 |
* and are also referred to as <i>normal</i> tags.
|
69 |
* <p>
|
70 |
* Apart from the <a href="#RegistrationRelated">registration related</a> methods, all of the methods in this class and its
|
71 |
* subclasses relate to the implementation of <a href="#Custom">custom tag types</a> and are not relevant to the majority of users
|
72 |
* who just use the <a href="#Predefined">predefined tag types</a>.
|
73 |
* <p>
|
74 |
* For perfomance reasons, this library only allows tag types that {@linkplain #getStartDelimiter() start}
|
75 |
* with a '<code><</code>' character.
|
76 |
* The character following this defines the immediate subclass of the tag type.
|
77 |
* An {@link EndTagType} always has a slash ('<code>/</code>') as the second character, while a {@link StartTagType}
|
78 |
* has any character other than a slash as the second character.
|
79 |
* This definition means that tag types which are not intuitively classified as either start tag types or end tag types
|
80 |
* (such as an HTML {@linkplain StartTagType#COMMENT comment}) are mostly classified as start tag types.
|
81 |
* <p>
|
82 |
* Every method in this and the {@link StartTagType} and {@link EndTagType} abstract classes can be categorised
|
83 |
* as one of the following:
|
84 |
* <dl>
|
85 |
* <dt><a name="Property">Properties:</a>
|
86 |
* <dd>Simple properties (marked final) that were either specified as parameters
|
87 |
* during construction or are derived from those parameters.
|
88 |
* <dt><a name="AbstractImplementation">Abstract implementation methods:</a>
|
89 |
* <dd>Methods that must be implemented in a subclass.
|
90 |
* <dt><a name="DefaultImplementation">Default implementation methods:</a>
|
91 |
* <dd>Methods (not marked final) that implement common behaviour, but may be overridden in a subclass.
|
92 |
* <dt><a name="ImplementationAssistance">Implementation assistance methods:</a>
|
93 |
* <dd>Protected methods that provide low-level functionality and are only of use within other implementation methods.
|
94 |
* <dt><a name="RegistrationRelated">Registration related methods:</a>
|
95 |
* <dd>Utility methods (marked final) relating to the {@linkplain #register() registration} of tag type instances.
|
96 |
* </dl>
|
97 |
*/
|
98 |
public abstract class TagType {
|
99 |
private final String description;
|
100 |
private final String startDelimiter;
|
101 |
private final String closingDelimiter;
|
102 |
private final boolean isServerTag;
|
103 |
private final String namePrefix;
|
104 |
final String startDelimiterPrefix;
|
105 |
|
106 |
private static Logger logger=null;
|
107 |
|
108 |
TagType(final String description, final String startDelimiter, final String closingDelimiter, final boolean isServerTag, final String startDelimiterPrefix) {
|
109 |
// startDelimiterPrefix is either "<" or "</"
|
110 |
this.description=description;
|
111 |
this.startDelimiter=startDelimiter;
|
112 |
this.closingDelimiter=closingDelimiter;
|
113 |
this.isServerTag=isServerTag;
|
114 |
this.namePrefix=startDelimiter.substring(startDelimiterPrefix.length());
|
115 |
this.startDelimiterPrefix=startDelimiterPrefix;
|
116 |
}
|
117 |
|
118 |
/**
|
119 |
* Registers this tag type for recognition by the parser.
|
120 |
* <br />(<a href="TagType.html#RegistrationRelated">registration related</a> method)
|
121 |
* <p>
|
122 |
* The order of registration affects the <a href="TagType.html#Precedence">precedence</a> of the tag type when a potential tag is being parsed.
|
123 |
*
|
124 |
* @see #deregister()
|
125 |
*/
|
126 |
public final void register() {
|
127 |
getLogger().debug("Register tag type: "+this);
|
128 |
TagTypeRegister.add(this);
|
129 |
}
|
130 |
|
131 |
/**
|
132 |
* Deregisters this tag type.
|
133 |
* <br />(<a href="TagType.html#RegistrationRelated">registration related</a> method)
|
134 |
*
|
135 |
* @see #register()
|
136 |
*/
|
137 |
public final void deregister() {
|
138 |
getLogger().debug("Deregister tag type "+this);
|
139 |
TagTypeRegister.remove(this);
|
140 |
}
|
141 |
|
142 |
/**
|
143 |
* Returns a list of all the currently registered tag types in order of lowest to highest <a href="TagType.html#Precedence">precedence</a>.
|
144 |
* <br />(<a href="TagType.html#RegistrationRelated">registration related</a> method)
|
145 |
* @return a list of all the currently registered tag types in order of lowest to highest <a href="TagType.html#Precedence">precedence</a>.
|
146 |
*/
|
147 |
public static final List<TagType> getRegisteredTagTypes() {
|
148 |
return TagTypeRegister.getList();
|
149 |
}
|
150 |
|
151 |
/**
|
152 |
* Returns a description of this tag type useful for debugging purposes.
|
153 |
* <br />(<a href="TagType.html#Property">property</a> method)
|
154 |
*
|
155 |
* @return a description of this tag type useful for debugging purposes.
|
156 |
*/
|
157 |
public final String getDescription() {
|
158 |
return description;
|
159 |
}
|
160 |
|
161 |
/**
|
162 |
* Returns the character sequence that marks the start of the tag.
|
163 |
* <br />(<a href="TagType.html#Property">property</a> method)
|
164 |
* <p>
|
165 |
* The character sequence must be all in lower case.
|
166 |
* <p>
|
167 |
* The first character in this property <b>must</b> be '<code><</code>'.
|
168 |
* This is a deliberate limitation of the system which is necessary to retain reasonable performance.
|
169 |
* <p>
|
170 |
* The second character in this property must be '<code>/</code>' if the implementing class is an {@link EndTagType}.
|
171 |
* It must <b>not</b> be '<code>/</code>' if the implementing class is a {@link StartTagType}.
|
172 |
* <p>
|
173 |
* <dl>
|
174 |
* <dt>Standard Tag Type Values:</dt>
|
175 |
* <dd>
|
176 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
177 |
* <tr><th>Tag Type<th>Start Delimiter
|
178 |
* <tr><td>{@link StartTagType#UNREGISTERED}<td><code><</code>
|
179 |
* <tr><td>{@link StartTagType#NORMAL}<td><code><</code>
|
180 |
* <tr><td>{@link StartTagType#COMMENT}<td><code><!--</code>
|
181 |
* <tr><td>{@link StartTagType#XML_DECLARATION}<td><code><?xml</code>
|
182 |
* <tr><td>{@link StartTagType#XML_PROCESSING_INSTRUCTION}<td><code><?</code>
|
183 |
* <tr><td>{@link StartTagType#DOCTYPE_DECLARATION}<td><code><!doctype</code>
|
184 |
* <tr><td>{@link StartTagType#MARKUP_DECLARATION}<td><code><!</code>
|
185 |
* <tr><td>{@link StartTagType#CDATA_SECTION}<td><code><![cdata[</code>
|
186 |
* <tr><td>{@link StartTagType#SERVER_COMMON}<td><code><%</code>
|
187 |
* <tr><td>{@link EndTagType#UNREGISTERED}<td><code></</code>
|
188 |
* <tr><td>{@link EndTagType#NORMAL}<td><code></</code>
|
189 |
* </table>
|
190 |
* </dl>
|
191 |
* <dl>
|
192 |
* <dt>Extended Tag Type Values:</dt>
|
193 |
* <dd>
|
194 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
195 |
* <tr><th>Tag Type<th>Start Delimiter
|
196 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_IF}<td><code><!--[if</code>
|
197 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_ENDIF}<td><code><![endif]--></code>
|
198 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_IF}<td><code><![if</code>
|
199 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_ENDIF}<td><code><![endif]></code>
|
200 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_IF}<td><code><!--[if</code>
|
201 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_ENDIF}<td><code><!--<![endif]--></code>
|
202 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_SIMPLIFIED_IF}<td><code><!--[if</code>
|
203 |
* <tr><td>{@link PHPTagTypes#PHP_SCRIPT}<td><code><script</code>
|
204 |
* <tr><td>{@link PHPTagTypes#PHP_SHORT}<td><code><?</code>
|
205 |
* <tr><td>{@link PHPTagTypes#PHP_STANDARD}<td><code><?php</code>
|
206 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALL}<td><code><&</code>
|
207 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}<td><code><&|</code>
|
208 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}<td><code></&</code>
|
209 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK}<td><code><%</code>
|
210 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK_END}<td><code></%</code>
|
211 |
* </table>
|
212 |
* </dl>
|
213 |
*
|
214 |
* @return the character sequence that marks the start of the tag.
|
215 |
*/
|
216 |
public final String getStartDelimiter() {
|
217 |
return startDelimiter;
|
218 |
}
|
219 |
|
220 |
/**
|
221 |
* Returns the character sequence that marks the end of the tag.
|
222 |
* <br />(<a href="TagType.html#Property">property</a> method)
|
223 |
* <p>
|
224 |
* The character sequence must be all in lower case.
|
225 |
* <p>
|
226 |
* In a {@link StartTag} of a {@linkplain StartTagType type} that {@linkplain StartTagType#hasAttributes() has attributes},
|
227 |
* characters appearing inside a quoted attribute value are ignored when determining the location of the closing delimiter.
|
228 |
* <p>
|
229 |
* Note that the optional '<code>/</code>' character preceding the closing '<code>></code>' in an
|
230 |
* {@linkplain StartTag#isEmptyElementTag() empty-element tag} is not considered part of the end delimiter.
|
231 |
* This property must define the closing delimiter common to all instances of the tag type.
|
232 |
* <p>
|
233 |
* <dl>
|
234 |
* <dt>Standard Tag Type Values:</dt>
|
235 |
* <dd>
|
236 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
237 |
* <tr><th>Tag Type<th>Closing Delimiter
|
238 |
* <tr><td>{@link StartTagType#UNREGISTERED}<td><code>></code>
|
239 |
* <tr><td>{@link StartTagType#NORMAL}<td><code>></code>
|
240 |
* <tr><td>{@link StartTagType#COMMENT}<td><code>--></code>
|
241 |
* <tr><td>{@link StartTagType#XML_DECLARATION}<td><code>?></code>
|
242 |
* <tr><td>{@link StartTagType#XML_PROCESSING_INSTRUCTION}<td><code>?></code>
|
243 |
* <tr><td>{@link StartTagType#DOCTYPE_DECLARATION}<td><code>></code>
|
244 |
* <tr><td>{@link StartTagType#MARKUP_DECLARATION}<td><code>></code>
|
245 |
* <tr><td>{@link StartTagType#CDATA_SECTION}<td><code>]]></code>
|
246 |
* <tr><td>{@link StartTagType#SERVER_COMMON}<td><code>%></code>
|
247 |
* <tr><td>{@link EndTagType#UNREGISTERED}<td><code>></code>
|
248 |
* <tr><td>{@link EndTagType#NORMAL}<td><code>></code>
|
249 |
* </table>
|
250 |
* </dl>
|
251 |
* <dl>
|
252 |
* <dt>Extended Tag Type Values:</dt>
|
253 |
* <dd>
|
254 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
255 |
* <tr><th>Tag Type<th>Closing Delimiter
|
256 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_IF}<td><code>]></code>
|
257 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_ENDIF}<td><i>(empty string)</i>
|
258 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_IF}<td><code>]></code>
|
259 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_ENDIF}<td><i>(empty string)</i>
|
260 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_IF}<td><code>]><!--></code>
|
261 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_ENDIF}<td><i>(empty string)</i>
|
262 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_SIMPLIFIED_IF}<td><code>]>--></code>
|
263 |
* <tr><td>{@link PHPTagTypes#PHP_SCRIPT}<td><code>></code>
|
264 |
* <tr><td>{@link PHPTagTypes#PHP_SHORT}<td><code>?></code>
|
265 |
* <tr><td>{@link PHPTagTypes#PHP_STANDARD}<td><code>?></code>
|
266 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALL}<td><code>&></code>
|
267 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}<td><code>&></code>
|
268 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}<td><code>></code>
|
269 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK}<td><code>></code>
|
270 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK_END}<td><code>></code>
|
271 |
* </table>
|
272 |
* </dl>
|
273 |
*
|
274 |
* @return the character sequence that marks the end of the tag.
|
275 |
*/
|
276 |
public final String getClosingDelimiter() {
|
277 |
return closingDelimiter;
|
278 |
}
|
279 |
|
280 |
/**
|
281 |
* Indicates whether this tag type represents a server tag.
|
282 |
* <br />(<a href="TagType.html#Property">property</a> method)
|
283 |
* <p>
|
284 |
* Server tags are typically parsed by some process on the web server and substituted with other text or markup before delivery to the
|
285 |
* <a target="_blank" href="http://www.w3.org/TR/html401/conform.html#didx-user_agent">user agent</a>.
|
286 |
* This parser therefore handles them differently to non-server tags in that they can occur at any position in the document
|
287 |
* without regard for the HTML document structure.
|
288 |
* As a result they can occur anywhere inside any other tag, although a non-server tag cannot theoretically occur inside a server tag.
|
289 |
* <p>
|
290 |
* The documentation of the <a href="Tag.html#ParsingProcess">tag parsing process</a> explains in detail
|
291 |
* how the value of this property affects the recognition of server tags,
|
292 |
* as well as how the presence of server tags affects the recognition of non-server tags in and around them.
|
293 |
* <p>
|
294 |
* Most XML-style server tags can not be represented as a distinct tag type because they are generally indistinguishable from non-server XML tags.
|
295 |
* See the {@link Segment#ignoreWhenParsing()} method for information about how to prevent such server tags from interfering with the proper parsing
|
296 |
* of the rest of the document.
|
297 |
* <p>
|
298 |
* <dl>
|
299 |
* <dt>Standard Tag Type Values:</dt>
|
300 |
* <dd>
|
301 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
302 |
* <tr><th>Tag Type<th>Is Server Tag
|
303 |
* <tr><td>{@link StartTagType#UNREGISTERED}<td><code>false</code>
|
304 |
* <tr><td>{@link StartTagType#NORMAL}<td><code>false</code>
|
305 |
* <tr><td>{@link StartTagType#COMMENT}<td><code>false</code>
|
306 |
* <tr><td>{@link StartTagType#XML_DECLARATION}<td><code>false</code>
|
307 |
* <tr><td>{@link StartTagType#XML_PROCESSING_INSTRUCTION}<td><code>false</code>
|
308 |
* <tr><td>{@link StartTagType#DOCTYPE_DECLARATION}<td><code>false</code>
|
309 |
* <tr><td>{@link StartTagType#MARKUP_DECLARATION}<td><code>false</code>
|
310 |
* <tr><td>{@link StartTagType#CDATA_SECTION}<td><code>false</code>
|
311 |
* <tr><td>{@link StartTagType#SERVER_COMMON}<td><code>true</code>
|
312 |
* <tr><td>{@link EndTagType#UNREGISTERED}<td><code>false</code>
|
313 |
* <tr><td>{@link EndTagType#NORMAL}<td><code>false</code>
|
314 |
* </table>
|
315 |
* </dl>
|
316 |
* <dl>
|
317 |
* <dt>Extended Tag Type Values:</dt>
|
318 |
* <dd>
|
319 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
320 |
* <tr><th>Tag Type<th>Is Server Tag
|
321 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_IF}<td><code>false</code>
|
322 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_ENDIF}<td><code>false</code>
|
323 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_IF}<td><code>false</code>
|
324 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_ENDIF}<td><code>false</code>
|
325 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_IF}<td><code>false</code>
|
326 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_ENDIF}<td><code>false</code>
|
327 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_SIMPLIFIED_IF}<td><code>false</code>
|
328 |
* <tr><td>{@link PHPTagTypes#PHP_SCRIPT}<td><code>true</code>
|
329 |
* <tr><td>{@link PHPTagTypes#PHP_SHORT}<td><code>true</code>
|
330 |
* <tr><td>{@link PHPTagTypes#PHP_STANDARD}<td><code>true</code>
|
331 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALL}<td><code>true</code>
|
332 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}<td><code>true</code>
|
333 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}<td><code>true</code>
|
334 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK}<td><code>true</code>
|
335 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK_END}<td><code>true</code>
|
336 |
* </table>
|
337 |
* </dl>
|
338 |
*
|
339 |
* @return <code>true</code> if this tag type represents a server tag, otherwise <code>false</code>.
|
340 |
*/
|
341 |
public final boolean isServerTag() {
|
342 |
return isServerTag;
|
343 |
}
|
344 |
|
345 |
/**
|
346 |
* Returns the {@linkplain Tag#getName() name} prefix required by this tag type.
|
347 |
* <br />(<a href="TagType.html#Property">property</a> method)
|
348 |
* <p>
|
349 |
* This string is identical to the {@linkplain #getStartDelimiter() start delimiter}, except that it does not include the
|
350 |
* initial "<code><</code>" or "<code></</code>" characters that always prefix the start delimiter of a
|
351 |
* {@link StartTagType} or {@link EndTagType} respectively.
|
352 |
* <p>
|
353 |
* The {@linkplain Tag#getName() name} of a tag of this type may or may not include extra characters after the prefix.
|
354 |
* This is determined by properties such as {@link StartTagType#isNameAfterPrefixRequired()}
|
355 |
* or {@link EndTagTypeGenericImplementation#isStatic()}.
|
356 |
* <p>
|
357 |
* <dl>
|
358 |
* <dt>Standard Tag Type Values:</dt>
|
359 |
* <dd>
|
360 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
361 |
* <tr><th>Tag Type<th>Name Prefix
|
362 |
* <tr><td>{@link StartTagType#UNREGISTERED}<td><i>(empty string)</i>
|
363 |
* <tr><td>{@link StartTagType#NORMAL}<td><i>(empty string)</i>
|
364 |
* <tr><td>{@link StartTagType#COMMENT}<td><code>!--</code>
|
365 |
* <tr><td>{@link StartTagType#XML_DECLARATION}<td><code>?xml</code>
|
366 |
* <tr><td>{@link StartTagType#XML_PROCESSING_INSTRUCTION}<td><code>?</code>
|
367 |
* <tr><td>{@link StartTagType#DOCTYPE_DECLARATION}<td><code>!doctype</code>
|
368 |
* <tr><td>{@link StartTagType#MARKUP_DECLARATION}<td><code>!</code>
|
369 |
* <tr><td>{@link StartTagType#CDATA_SECTION}<td><code>![cdata[</code>
|
370 |
* <tr><td>{@link StartTagType#SERVER_COMMON}<td><code>%</code>
|
371 |
* <tr><td>{@link EndTagType#UNREGISTERED}<td><i>(empty string)</i>
|
372 |
* <tr><td>{@link EndTagType#NORMAL}<td><i>(empty string)</i>
|
373 |
* </table>
|
374 |
* </dl>
|
375 |
* <dl>
|
376 |
* <dt>Extended Tag Type Values:</dt>
|
377 |
* <dd>
|
378 |
* <table class="bordered" style="margin: 15px" cellspacing="0">
|
379 |
* <tr><th>Tag Type<th>Name Prefix
|
380 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_IF}<td><code>!--[if</code>
|
381 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_HIDDEN_ENDIF}<td><code>![endif]--></code>
|
382 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_IF}<td><code>![if</code>
|
383 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_ENDIF}<td><code>![endif]></code>
|
384 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_IF}<td><code>!--[if</code>
|
385 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_ENDIF}<td><code>!--<![endif]--></code>
|
386 |
* <tr><td>{@link MicrosoftConditionalCommentTagTypes#DOWNLEVEL_REVEALED_VALIDATING_SIMPLIFIED_IF}<td><code>!--[if</code>
|
387 |
* <tr><td>{@link PHPTagTypes#PHP_SCRIPT}<td><code>script</code>
|
388 |
* <tr><td>{@link PHPTagTypes#PHP_SHORT}<td><code>?</code>
|
389 |
* <tr><td>{@link PHPTagTypes#PHP_STANDARD}<td><code>?php</code>
|
390 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALL}<td><code>&</code>
|
391 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}<td><code>&|</code>
|
392 |
* <tr><td>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}<td><code>&</code>
|
393 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK}<td><code>%</code>
|
394 |
* <tr><td>{@link MasonTagTypes#MASON_NAMED_BLOCK_END}<td><code>%</code>
|
395 |
* </table>
|
396 |
* </dl>
|
397 |
*
|
398 |
* @return the {@linkplain Tag#getName() name} prefix required by this tag type.
|
399 |
* @see #getStartDelimiter()
|
400 |
*/
|
401 |
protected final String getNamePrefix() {
|
402 |
return namePrefix;
|
403 |
}
|
404 |
|
405 |
/**
|
406 |
* Indicates whether a tag of this type is valid in the specified position of the specified source document.
|
407 |
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
|
408 |
* <p>
|
409 |
* This method is called immediately before {@link #constructTagAt(Source, int pos)}
|
410 |
* to do a preliminary check on the validity of a tag of this type in the specified position.
|
411 |
* <p>
|
412 |
* This check is not performed as part of the {@link #constructTagAt(Source, int pos)} call because the same
|
413 |
* validation is used for all the <a href="TagType.html#Standard">standard</a> tag types, and is likely to be sufficient
|
414 |
* for all <a href="TagType.html#Custom">custom tag types</a>.
|
415 |
* Having this check separated into a different method helps to isolate common code from the code that is unique to each tag type.
|
416 |
* <p>
|
417 |
* A {@linkplain TagType#isServerTag() server tag} is valid in any position except inside a {@linkplain StartTagType#SERVER_COMMON_COMMENT server-side comment},
|
418 |
* but a non-server tag is not valid inside any other tag, nor inside elements with CDATA content such as
|
419 |
* {@link HTMLElementName#SCRIPT SCRIPT} and {@link HTMLElementName#STYLE STYLE} elements.
|
420 |
* <p>
|
421 |
* The common implementation of this method behaves differently depending upon whether or not a {@linkplain Source#fullSequentialParse() full sequential parse}
|
422 |
* is being peformed.
|
423 |
* <p>
|
424 |
* For server tags it simply checks that the position is not enclosed by a {@linkplain StartTagType#SERVER_COMMON_COMMENT server-side comment} if a full sequential parse
|
425 |
* is not being performed. If a full sequential parse is being performed, it always returns <code>true</code> for server tags as the parser automatically skips over
|
426 |
* all positions enclosed by server-side comments, so this method is only called in positions where a server tag is always valid.
|
427 |
* <p>
|
428 |
* When this method is called for non-server tags during a full sequential parse, the <code>fullSequentialParseData</code> argument contains information
|
429 |
* allowing the exact theoretical check to be performed, rejecting a non-server tag if it is inside any other tag.
|
430 |
* See below for further information about the <code>fullSequentialParseData</code> parameter.
|
431 |
* <p>
|
432 |
* When this method is called in <a href="Source.html#ParseOnDemand">parse on demand</a> mode
|
433 |
* (not during a full sequential parse, <code>fullSequentialParseData==null</code>),
|
434 |
* practical constraints prevent the exact theoretical check from being carried out, and non-server tags are only rejected
|
435 |
* if they are found inside HTML {@linkplain StartTagType#COMMENT comments} or {@linkplain StartTagType#CDATA_SECTION CDATA sections}.
|
436 |
* <p>
|
437 |
* This behaviour is configurable by manipulating the static {@link TagType#getTagTypesIgnoringEnclosedMarkup() TagTypesIgnoringEnclosedMarkup} array
|
438 |
* to determine which tag types can not contain non-server tags in <a href="Source.html#ParseOnDemand">parse on demand</a> mode.
|
439 |
* The {@linkplain TagType#getTagTypesIgnoringEnclosedMarkup() documentation of this property} contains
|
440 |
* a more detailed analysis of the subject and explains why only the {@linkplain StartTagType#COMMENT comment} and
|
441 |
* {@linkplain StartTagType#CDATA_SECTION CDATA section} tag types are included by default.
|
442 |
* <p>
|
443 |
* See the documentation of the <a href="Tag.html#ParsingProcess">tag parsing process</a> for more information about how this method fits into the whole tag parsing process.
|
444 |
* <p>
|
445 |
* This method can be overridden in <a href="TagType.html#Custom">custom tag types</a> if the default implementation is unsuitable.
|
446 |
* <p>
|
447 |
* <b>The <code>fullSequentialParseData</code> parameter:</b>
|
448 |
* <p>
|
449 |
* This parameter is used to discard non-server tags that are found inside other tags or inside {@link HTMLElementName#SCRIPT SCRIPT} elements.
|
450 |
* <p>
|
451 |
* In the current version of this library, the <code>fullSequentialParseData</code> argument is either <code>null</code>
|
452 |
* (in <a href="Source.html#ParseOnDemand">parse on demand</a> mode) or an integer array containing only a single entry
|
453 |
* (if a {@linkplain Source#fullSequentialParse() full sequential parse} is being peformed).
|
454 |
* <p>
|
455 |
* The integer contained in the array is the maximum position in the document at which the end of a tag has been found,
|
456 |
* indicating that no non-server tags should be recognised before that position.
|
457 |
* If no tags have yet been encountered, the value of this integer is zero.
|
458 |
* <p>
|
459 |
* If the last tag encountered was the {@linkplain StartTag start tag} of a {@link HTMLElementName#SCRIPT SCRIPT} element,
|
460 |
* the value of this integer is <code>Integer.MAX_VALUE</code>, indicating that no other non-server elements should be recognised until the
|
461 |
* {@linkplain EndTag end tag} of the {@link HTMLElementName#SCRIPT SCRIPT} element is found.
|
462 |
* According to the <a target="_blank" href="http://www.w3.org/TR/html401/types.html#idx-CDATA-1">HTML 4.01 specification section 6.2</a>,
|
463 |
* the first occurrence of the character sequence "<code></</code>" terminates the special handling of CDATA within
|
464 |
* {@link HTMLElementName#SCRIPT SCRIPT} and {@link HTMLElementName#STYLE STYLE} elements.
|
465 |
* This library however only terminates the CDATA handling of {@link HTMLElementName#SCRIPT SCRIPT} element content
|
466 |
* when the character sequence "<code></script</code>" is detected, in line with the behaviour of the major browsers.
|
467 |
* <p>
|
468 |
* Note that the implicit treatment of {@link HTMLElementName#SCRIPT SCRIPT} element content as CDATA should theoretically also prevent the recognition of
|
469 |
* {@linkplain StartTagType#COMMENT comments} and explicit {@linkplain StartTagType#CDATA_SECTION CDATA sections} inside script elements.
|
470 |
* While this is true for explicit {@linkplain StartTagType#CDATA_SECTION CDATA sections}, the parser does still recognise
|
471 |
* {@linkplain StartTagType#COMMENT comments} inside {@link HTMLElementName#SCRIPT SCRIPT} elements in order to maintain compatability with the major browsers.
|
472 |
* This prevents the character sequence "<code></script</code>" from terminating the {@link HTMLElementName#SCRIPT SCRIPT} element
|
473 |
* if it occurs inside a {@linkplain StartTagType#COMMENT comment}. The end of the {@linkplain StartTagType#COMMENT comment} however also
|
474 |
* ends the implicit treatment of the {@link HTMLElementName#SCRIPT SCRIPT} element content as CDATA.
|
475 |
* <p>
|
476 |
* Although {@link HTMLElementName#STYLE STYLE} elements should theoretically be treated in the same way as {@link HTMLElementName#SCRIPT SCRIPT} elements,
|
477 |
* the syntax of <a target="_blank" href="http://www.w3.org/Style/CSS/">Cascading Style Sheets</a> (CSS) does not contain any constructs that
|
478 |
* could be misinterpreted as HTML tags, so there is virtually no need to perform any special checks in this case.
|
479 |
* <p>
|
480 |
* IMPLEMENTATION NOTE: The rationale behind using an integer array to hold this value, rather than a scalar <code>int</code> value,
|
481 |
* is to emulate passing the parameter by reference.
|
482 |
* This value needs to be shared amongst several internal methods during the {@linkplain Source#fullSequentialParse() full sequential parse} process,
|
483 |
* and any one of those methods needs to be able to modify the value and pass it back to the calling method.
|
484 |
* This would normally be implemented by passing the parameter by reference, but because Java does not support this language construct, a container for a
|
485 |
* mutable integer must be passed instead.
|
486 |
* Because the standard Java library does not provide a class for holding a single mutable integer (the <code>java.lang.Integer</code> class is immutable),
|
487 |
* the easiest container to use, without creating a class especially for this purpose, is an integer array.
|
488 |
* The use of an array does not imply any intention to use more than a single array entry in subsequent versions.
|
489 |
*
|
490 |
* @param source the {@link Source} document.
|
491 |
* @param pos the character position in the source document to check.
|
492 |
* @param fullSequentialParseData an integer array containing data allowing this method to implement a better algorithm when a {@linkplain Source#fullSequentialParse() full sequential parse} is being performed, or <code>null</code> in <a href="Source.html#ParseOnDemand">parse on demand</a> mode.
|
493 |
* @return <code>true</code> if a tag of this type is valid in the specified position of the specified source document, otherwise <code>false</code>.
|
494 |
*/
|
495 |
protected boolean isValidPosition(final Source source, final int pos, final int[] fullSequentialParseData) {
|
496 |
if (isServerTag()) {
|
497 |
// the only thing preventing inclusion of a server tag is if it is enclosed by a server comment.
|
498 |
if (fullSequentialParseData!=null) return true; // full sequential parse skips over segments enclosed by server comments so no need to check.
|
499 |
return !StartTagType.SERVER_COMMON_COMMENT.tagEncloses(source,pos);
|
500 |
}
|
501 |
if (fullSequentialParseData!=null) {
|
502 |
// use simplified check when doing full sequential parse. Normally we are only able to check whether a tag is inside specially cached
|
503 |
// tag types for efficiency reasons, but during a full sequential parse we can reject a tag if it is inside any other tag.
|
504 |
if (fullSequentialParseData[0]==Integer.MAX_VALUE) { // we are in a SCRIPT element
|
505 |
if (this==EndTagType.NORMAL && source.getParseText().containsAt("</script",pos)) {
|
506 |
// The character sequence "</script" terminates the implicit CDATA section inside the SCRIPT element
|
507 |
fullSequentialParseData[0]=pos;
|
508 |
return true;
|
509 |
}
|
510 |
if (this==StartTagType.COMMENT) {
|
511 |
// Although not technically correct, all major browsers also recognise comments inside SCRIPT elements.
|
512 |
// The end of the comment will however terminate the implicit CDATA section inside the SCRIPT element.
|
513 |
fullSequentialParseData[0]=pos;
|
514 |
return true;
|
515 |
}
|
516 |
return false; // reject any other tags inside SCRIPT element
|
517 |
}
|
518 |
return pos>=fullSequentialParseData[0]; // accept the non-server tag only if it is after the end of the last found non-server tag
|
519 |
}
|
520 |
// Use the normal method of checking whether the position is inside a tag of a tag type that ignores enclosed markup:
|
521 |
final TagType[] tagTypesIgnoringEnclosedMarkup=getTagTypesIgnoringEnclosedMarkup();
|
522 |
for (int i=0; i<tagTypesIgnoringEnclosedMarkup.length; i++) {
|
523 |
final TagType tagTypeIgnoringEnclosedMarkup=tagTypesIgnoringEnclosedMarkup[i];
|
524 |
// If this tag type is a comment, don't bother checking whether it is inside another comment.
|
525 |
// See javadocs for getTagTypesIgnoringEnclosedMarkup() for more explanation.
|
526 |
// Allowing it might result in multiple comments being recognised with the same end delimiter, but the risk of this occuring in a syntactically invalid document
|
527 |
// is outweighed by the benefit of not recursively checking all previous comments in a document, risking stack overflow.
|
528 |
if (this==StartTagType.COMMENT && tagTypeIgnoringEnclosedMarkup==StartTagType.COMMENT) continue;
|
529 |
if (tagTypeIgnoringEnclosedMarkup.tagEncloses(source,pos)) return false;
|
530 |
}
|
531 |
return true;
|
532 |
}
|
533 |
|
534 |
/**
|
535 |
* Returns an array of all the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags
|
536 |
* in <a href="Source.html#ParseOnDemand">parse on demand</a> mode.
|
537 |
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
|
538 |
* <p>
|
539 |
* The tag types returned by this property (referred to in the following paragraphs as the "listed types") default to
|
540 |
* {@link StartTagType#COMMENT} and {@link StartTagType#CDATA_SECTION}.
|
541 |
* <p>
|
542 |
* This property is used by the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData) isValidPosition} method
|
543 |
* in <a href="Source.html#ParseOnDemand">parse on demand</a> mode.
|
544 |
* It is not used at all during a {@linkplain Source#fullSequentialParse() full sequential parse}.
|
545 |
* <p>
|
546 |
* In the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData) isValidPosition} method,
|
547 |
* in <a href="Source.html#ParseOnDemand">parse on demand</a> mode,
|
548 |
* every new non-server tag found by the parser (referred to as a "new tag") undergoes a check to see whether it is enclosed
|
549 |
* by a tag of one of the listed types.
|
550 |
* This includes new tags of the listed types themselves if they are non-server tags.
|
551 |
* The recursive nature of this check means that <i>all</i> tags of the listed types occurring before the new tag must be found
|
552 |
* by the parser before it can determine whether the new tag should be ignored.
|
553 |
* To mitigate any performance issues arising from this process, the listed types are given special treatment in the tag cache.
|
554 |
* This dramatically decreases the time taken to search on these tag types, so adding a tag type to this array that
|
555 |
* is easily recognised and occurs infrequently only results in a small degradation in overall performance.
|
556 |
* <p>
|
557 |
* A special exception to the algorithm described above applies to {@link StartTagType#COMMENT COMMENT} tags.
|
558 |
* The default implementation of the {@link #isValidPosition(Source,int,int[]) isValidPosition} method
|
559 |
* does not check whether a {@link StartTagType#COMMENT COMMENT} tag is inside another {@link StartTagType#COMMENT COMMENT} tag,
|
560 |
* as this should never happen in a syntactically correct document (the characters '<code>--</code>' should not occur inside a comment).
|
561 |
* Skipping this check also avoids the need to recursively check every {@link StartTagType#COMMENT COMMENT} tag back to the start of the document,
|
562 |
* which has the potential to cause a stack overflow in a large document containing lots of comments.
|
563 |
* <p>
|
564 |
* Theoretically, non-server tags appearing inside any other tag should be ignored, which is how the parser behaves during a
|
565 |
* {@linkplain Source#fullSequentialParse() full sequential parse}.
|
566 |
* <p>
|
567 |
* Server tags in particular very often contain other "tags" that should not be recognised as tags by the parser.
|
568 |
* If this behaviour is required in <a href="Source.html#ParseOnDemand">parse on demand</a>, the tag type of each server tag that might be found
|
569 |
* in the source documents can be added to this property using the static {@link #setTagTypesIgnoringEnclosedMarkup(TagType[])} method.
|
570 |
* For example, the following command would prevent non-server tags from being recognised inside {@linkplain PHPTagTypes#PHP_STANDARD standard PHP} tags,
|
571 |
* as well as the default {@linkplain StartTagType#COMMENT comment} and {@linkplain StartTagType#CDATA_SECTION CDATA section} tags:
|
572 |
* <p>
|
573 |
* <blockquote><code>TagType.setTagTypesIgnoringEnclosedMarkup(new TagType[] {PHPTagTypes.PHP_STANDARD, StartTagType.COMMENT, StartTagType.CDATA_SECTION});</code></blockquote>
|
574 |
* <p>
|
575 |
* The only situation where a non-server tag can legitimately contain a sequence of characters that resembles a tag is within an attribute value.
|
576 |
* The <a target="_blank" href="http://www.w3.org/TR/html401/charset.html#h-5.3.2">HTML 4.01 specification section 5.3.2</a>
|
577 |
* specifically allows the presence of '<code><</code>' and '<code>></code>' characters within attribute values.
|
578 |
* A common occurrence of this is in <a target="_blank" href="http://www.w3.org/TR/html401/interact/scripts.html#events">event</a> attributes containing scripts,
|
579 |
* such as the <code><a target="_blank" href="http://www.w3.org/TR/html401/interact/scripts.html#adef-onclick">onclick</a></code> attribute.
|
580 |
* There is no way of preventing such "tags" from being recognised in <a href="Source.html#ParseOnDemand">parse on demand</a> mode, as adding
|
581 |
* {@link StartTagType#NORMAL} to this property as a listed type would be far too inefficient.
|
582 |
* Performing a {@linkplain Source#fullSequentialParse() full sequential parse} of the source document prevents these attribute values from being
|
583 |
* recognised as tags, but can be very expensive if only a few tags in the document need to be parsed.
|
584 |
* The penalty of not parsing every tag in the document is that the exactness of this check is compromised, but in practical terms the difference is inconsequential.
|
585 |
* The default listed types of {@linkplain StartTagType#COMMENT comments} and {@linkplain StartTagType#CDATA_SECTION CDATA sections} yields sensible results
|
586 |
* in the vast majority of practical applications with only a minor impact on performance.
|
587 |
* <p>
|
588 |
* In <a target="_blank" href="http://www.w3.org/TR/xhtml1/">XHTML</a>, '<code><</code>' and '<code>></code>' characters
|
589 |
* must be represented in attribute values as {@linkplain CharacterReference character references}
|
590 |
* (see the XML 1.0 specification section <a target="_blank" href="http://www.w3.org/TR/REC-xml#CleanAttrVals">3.1</a>),
|
591 |
* so the situation should never arise that a tag is found inside another tag unless one of them is a
|
592 |
* {@linkplain #isServerTag() server tag}.
|
593 |
*
|
594 |
* @return an array of all the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags.
|
595 |
*/
|
596 |
public static final TagType[] getTagTypesIgnoringEnclosedMarkup() {
|
597 |
return TagTypesIgnoringEnclosedMarkup.array;
|
598 |
}
|
599 |
|
600 |
/**
|
601 |
* Sets the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags.
|
602 |
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
|
603 |
* <p>
|
604 |
* See {@link #getTagTypesIgnoringEnclosedMarkup()} for the documentation of this property.
|
605 |
*
|
606 |
* @param tagTypes an array of tag types.
|
607 |
*/
|
608 |
public static final void setTagTypesIgnoringEnclosedMarkup(TagType[] tagTypes) {
|
609 |
if (tagTypes==null) throw new IllegalArgumentException();
|
610 |
TagTypesIgnoringEnclosedMarkup.array=tagTypes;
|
611 |
}
|
612 |
|
613 |
/**
|
614 |
* Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
|
615 |
* <br />(<a href="TagType.html#AbstractImplementation">abstract implementation</a> method)
|
616 |
* <p>
|
617 |
* The implementation of this method must check that the text at the specified position meets all of
|
618 |
* the criteria of this tag type, including such checks as the presence of the correct or well formed
|
619 |
* {@linkplain #getClosingDelimiter() closing delimiter}, {@linkplain Tag#getName() name}, {@linkplain Attributes attributes},
|
620 |
* {@linkplain EndTag end tag}, or any other distinguishing features.
|
621 |
* <p>
|
622 |
* It can be assumed that the specified position starts with the {@linkplain #getStartDelimiter() start delimiter} of this tag type,
|
623 |
* and that all other tag types with higher <a href="TagType.html#Precedence">precedence</a> (if any) have already been rejected as candidates.
|
624 |
* Tag types with lower precedence will be considered if this method returns <code>null</code>.
|
625 |
* <p>
|
626 |
* This method is only called after a successful check of the tag's position, i.e.
|
627 |
* {@link #isValidPosition(Source,int,int[]) isValidPosition(source,pos,fullSequentialParseData)}<code>==true</code>.
|
628 |
* <p>
|
629 |
* The {@link StartTagTypeGenericImplementation} and {@link EndTagTypeGenericImplementation} subclasses provide default
|
630 |
* implementations of this method that allow the use of much simpler <a href="TagType.html#Property">properties</a> and
|
631 |
* <a href="TagType.html#ImplementationAssistance">implementation assistance</a> methods and to carry out the required functions.
|
632 |
*
|
633 |
* @param source the {@link Source} document.
|
634 |
* @param pos the position in the source document.
|
635 |
* @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or <code>null</code> if it does not meet the criteria.
|
636 |
*/
|
637 |
protected abstract Tag constructTagAt(Source source, int pos);
|
638 |
|
639 |
/**
|
640 |
* Indicates whether a tag of this type encloses the specified position of the specified source document.
|
641 |
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
|
642 |
* <p>
|
643 |
* This is logically equivalent to <code>source.</code>{@link Source#getEnclosingTag(int,TagType) getEnclosingTag(pos,this)}<code>!=null</code>,
|
644 |
* but is safe to use within other implementation methods without the risk of causing an infinite recursion.
|
645 |
* <p>
|
646 |
* This method is called from the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData)} method.
|
647 |
*
|
648 |
* @param source the {@link Source} document.
|
649 |
* @param pos the character position in the source document to check.
|
650 |
* @return <code>true</code> if a tag of this type encloses the specified position of the specified source document, otherwise <code>false</code>.
|
651 |
*/
|
652 |
protected final boolean tagEncloses(final Source source, final int pos) {
|
653 |
if (pos==0) return false;
|
654 |
final Tag enclosingTag=source.getEnclosingTag(pos-1,this); // use pos-1 otherwise a tag at pos could cause infinite recursion when this is called from constructTagAt
|
655 |
return enclosingTag!=null && pos!=enclosingTag.getEnd(); // make sure pos!=enclosingTag.getEnd() to compensate for using pos-1 above (important if the tag in question immediately follows an end tag delimiter)
|
656 |
}
|
657 |
|
658 |
/**
|
659 |
* Returns a string representation of this object useful for debugging purposes.
|
660 |
* @return a string representation of this object useful for debugging purposes.
|
661 |
*/
|
662 |
public String toString() {
|
663 |
return getDescription();
|
664 |
}
|
665 |
|
666 |
static final Tag getTagAt(final Source source, final int pos, final boolean serverTagOnly, final boolean assumeNoNestedTags) {
|
667 |
final TagTypeRegister.ProspectiveTagTypeIterator prospectiveTagTypeIterator=new TagTypeRegister.ProspectiveTagTypeIterator(source,pos);
|
668 |
// prospectiveTagTypeIterator is empty if pos is out of range.
|
669 |
while (prospectiveTagTypeIterator.hasNext()) {
|
670 |
final TagType tagType=prospectiveTagTypeIterator.next();
|
671 |
if (serverTagOnly && !tagType.isServerTag()) continue;
|
672 |
if (!assumeNoNestedTags && !tagType.isValidPosition(source,pos,source.fullSequentialParseData)) continue;
|
673 |
try {
|
674 |
final Tag tag=tagType.constructTagAt(source,pos);
|
675 |
if (tag!=null) return tag;
|
676 |
} catch (IndexOutOfBoundsException ex) {
|
677 |
if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("Tag at ")).append(" not recognised as type '").append(tagType.getDescription()).append("' because it has no end delimiter").toString());
|
678 |
}
|
679 |
}
|
680 |
return null;
|
681 |
}
|
682 |
|
683 |
private static Logger getLogger() {
|
684 |
if (logger==null) logger=Source.newLogger();
|
685 |
return logger;
|
686 |
}
|
687 |
|
688 |
private static final class TagTypesIgnoringEnclosedMarkup {
|
689 |
// This internal class is used to contain the array because its static initialisation can occur after
|
690 |
// the StartTagType.COMMENT and StartTagType.CDATA_SECTION members have been created.
|
691 |
public static TagType[] array=new TagType[] {
|
692 |
StartTagType.COMMENT,
|
693 |
StartTagType.CDATA_SECTION
|
694 |
};
|
695 |
}
|
696 |
}
|
697 |
|