/[aagtl_public1]/src/net/htmlparser/jericho/Element.java
aagtl

Contents of /src/net/htmlparser/jericho/Element.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 8 months ago) by zoffadmin
File size: 27394 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 import java.util.*;
24
25 /**
26 * Represents an <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#h-3.2.1">element</a>
27 * in a specific {@linkplain Source source} document, which encompasses a {@linkplain #getStartTag() start tag},
28 * an optional {@linkplain #getEndTag() end tag} and all {@linkplain #getContent() content} in between.
29 * <p>
30 * Take the following HTML segment as an example:
31 * <p>
32 * <code>&lt;p&gt;This is a sample paragraph.&lt;/p&gt;</code>
33 * <p>
34 * The whole segment is represented by an <code>Element</code> object. This is comprised of the {@link StartTag} "<code>&lt;p&gt;</code>",
35 * the {@link EndTag} "<code>&lt;/p&gt;</code>", as well as the text in between.
36 * An element may also contain other elements between its start and end tags.
37 * <p>
38 * The term <i><a name="Normal">normal element</a></i> refers to an element having a {@linkplain #getStartTag() start tag}
39 * with a {@linkplain StartTag#getStartTagType() type} of {@link StartTagType#NORMAL}.
40 * This comprises all {@linkplain HTMLElements HTML elements} and <a href="HTMLElements.html#NonHTMLElement">non-HTML elements</a>.
41 * <p>
42 * <code>Element</code> instances are obtained using one of the following methods:
43 * <ul>
44 * <li>{@link StartTag#getElement()}
45 * <li>{@link EndTag#getElement()}
46 * <li>{@link Segment#getAllElements()}
47 * <li>{@link Segment#getAllElements(String name)}
48 * <li>{@link Segment#getAllElements(StartTagType)}
49 * </ul>
50 * See also the {@link HTMLElements} class, and the
51 * <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-element">XML 1.0 specification for elements</a>.
52 * <h3><a name="Structure">Element Structure</a></h3>
53 * <p>
54 * The three possible structures of an element are listed below:
55 * <dl class="Separated">
56 * <dt><a name="SingleTag">Single Tag Element</a>:
57 * <dd>
58 * Example:<br />
59 * <code>&lt;img src="mypicture.jpg"&gt;</code>
60 * <p>
61 * The element consists only of a single {@linkplain #getStartTag() start tag} and has no {@linkplain #getContent() element content}
62 * (although the start tag itself may have {@linkplain StartTag#getTagContent() tag content}).
63 * <br />{@link #getEndTag()}<code>==null</code>
64 * <br />{@link #isEmpty()}<code>==true</code>
65 * <br />{@link #getEnd() getEnd()}<code>==</code>{@link #getStartTag()}<code>.</code>{@link #getEnd() getEnd()}
66 * <p>
67 * This occurs in the following situations:
68 * <ul class="Unseparated">
69 * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the {@linkplain HTMLElements#getEndTagForbiddenElementNames() end tag is forbidden}.
70 * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the {@linkplain HTMLElements#getEndTagRequiredElementNames() end tag is required},
71 * but the end tag is not present in the source document.
72 * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional},
73 * where the <a href="#ImplicitlyTerminated">implicitly terminating</a> tag is situated immediately after the element's
74 * {@linkplain #getStartTag() start tag}.
75 * <li>An {@linkplain #isEmptyElementTag() empty element tag}
76 * <li>A <a href="HTMLElements.html#NonHTMLElement">non-HTML element</a> that is not an {@linkplain #isEmptyElementTag() empty element tag} but is missing its end tag.
77 * <li>An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that does not define a
78 * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}.
79 * <li>An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that does define a
80 * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type} but is missing its end tag.
81 * </ul>
82 * <dt><a name="ExplicitlyTerminated">Explicitly Terminated Element</a>:
83 * <dd>
84 * Example:<br />
85 * <code>&lt;p&gt;This is a sample paragraph.&lt;/p&gt;</code>
86 * <p>
87 * The element consists of a {@linkplain #getStartTag() start tag}, {@linkplain #getContent() content},
88 * and an {@linkplain #getEndTag() end tag}.
89 * <br />{@link #getEndTag()}<code>!=null</code>.
90 * <br />{@link #isEmpty()}<code>==false</code> (provided the end tag doesn't immediately follow the start tag)
91 * <br />{@link #getEnd() getEnd()}<code>==</code>{@link #getEndTag()}<code>.</code>{@link #getEnd() getEnd()}.
92 * <p>
93 * This occurs in the following situations, assuming the start tag's matching end tag is present in the source document:
94 * <ul class="Unseparated">
95 * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the end tag is either
96 * {@linkplain HTMLElements#getEndTagRequiredElementNames() required} or {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}.
97 * <li>A <a href="HTMLElements.html#NonHTMLElement">non-HTML element</a> that is not an {@linkplain #isEmptyElementTag() empty element tag}.
98 * <li>An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that defines a
99 * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}.
100 * </ul>
101 * <dt><a name="ImplicitlyTerminated">Implicitly Terminated Element</a>:
102 * <dd>
103 * Example:<br />
104 * <code>&lt;p&gt;This text is included in the paragraph element even though no end tag is present.</code><br />
105 * <code>&lt;p&gt;This is the next paragraph.</code>
106 * <p>
107 * The element consists of a {@linkplain #getStartTag() start tag} and {@linkplain #getContent() content},
108 * but no {@linkplain #getEndTag() end tag}.
109 * <br />{@link #getEndTag()}<code>==null</code>.
110 * <br />{@link #isEmpty()}<code>==false</code>
111 * <br />{@link #getEnd() getEnd()}<code>!=</code>{@link #getStartTag()}<code>.</code>{@link #getEnd() getEnd()}.
112 * <p>
113 * This only occurs in an <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the
114 * {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional}.
115 * <p>
116 * The element ends at the start of a tag which implies the termination of the element, called the <i>implicitly terminating tag</i>.
117 * If the implicitly terminating tag is situated immediately after the element's {@linkplain #getStartTag() start tag},
118 * the element is classed as a <a href="#SingleTag">single tag element</a>.
119 * <p>
120 * See the <a href="Element.html#ParsingRulesHTMLEndTagOptional">element parsing rules for HTML elements with optional end tags</a>
121 * for details on which tags can implicitly terminate a given element.
122 * <p>
123 * See also the documentation of the {@link HTMLElements#getEndTagOptionalElementNames()} method.
124 * </dl>
125 * <h3><a name="ParsingRules">Element Parsing Rules</a></h3>
126 * The following rules describe the algorithm used in the {@link StartTag#getElement()} method to construct an element.
127 * The detection of the start tag's matching end tag or other terminating tags always takes into account the possible nesting of elements.
128 * <p>
129 * <ul class="Separated">
130 * <li>
131 * If the start tag has a {@linkplain StartTag#getStartTagType() type} of {@link StartTagType#NORMAL}:
132 * <ul>
133 * <li>
134 * If the {@linkplain StartTag#getName() name} of the start tag matches one of the
135 * recognised {@linkplain HTMLElementName HTML element names} (indicating an <a href="HTMLElements.html#HTMLElement">HTML element</a>):
136 * <ul>
137 * <li>
138 * <a name="ParsingRulesHTMLEndTagForbidden"></a>
139 * If the end tag for an element of this {@linkplain StartTag#getName() name} is
140 * {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden},
141 * the parser does not conduct any search for an end tag and a <a href="#SingleTag">single tag element</a> is created.
142 * <li>
143 * <a name="ParsingRulesHTMLEndTagRequired"></a>
144 * If the end tag for an element of this {@linkplain StartTag#getName() name} is
145 * {@linkplain HTMLElements#getEndTagRequiredElementNames() required}, the parser searches for the start tag's matching end tag.
146 * <ul class="Unseparated">
147 * <li>
148 * If the matching end tag is found, an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
149 * <li>
150 * If no matching end tag is found, the source document is not valid HTML and the incident is
151 * {@linkplain Source#getLogger() logged} as a missing required end tag.
152 * In this situation a <a href="#SingleTag">single tag element</a> is created.
153 * </ul>
154 * <li>
155 * <a name="ParsingRulesHTMLEndTagOptional"></a>
156 * If the end tag for an element of this {@linkplain StartTag#getName() name} is
157 * {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}, the parser searches not only for the start tag's matching end tag,
158 * but also for any other tag that <a href="#ImplicitlyTerminated">implicitly terminates</a> the element.
159 * <br />For each tag (<i>T2</i>) following the start tag (<i>ST1</i>) of this element (<i>E1</i>):
160 * <ul class="Unseparated">
161 * <li>
162 * If <i>T2</i> is a start tag:
163 * <ul>
164 * <li>
165 * If the {@linkplain StartTag#getName() name} of <i>T2</i> is in the list of
166 * {@linkplain HTMLElements#getNonterminatingElementNames(String) non-terminating element names} for <i>E1</i>,
167 * then continue evaluating tags from the {@linkplain Element#getEnd() end} of <i>T2</i>'s corresponding
168 * {@linkplain StartTag#getElement() element}.
169 * <li>
170 * If the {@linkplain StartTag#getName() name} of <i>T2</i> is in the list of
171 * {@linkplain HTMLElements#getTerminatingStartTagNames(String) terminating start tag names} for <i>E1</i>,
172 * then <i>E1</i> ends at the {@linkplain StartTag#getBegin() beginning} of <i>T2</i>.
173 * If <i>T2</i> follows immediately after <i>ST1</i>, a <a href="#SingleTag">single tag element</a> is created,
174 * otherwise an <a href="#ImplicitlyTerminated">implicitly terminated element</a> is created.
175 * </ul>
176 * <li>
177 * If <i>T2</i> is an end tag:
178 * <ul>
179 * <li>
180 * If the {@linkplain EndTag#getName() name} of <i>T2</i> is the same as that of <i>ST1</i>,
181 * an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
182 * <li>
183 * If the {@linkplain EndTag#getName() name} of <i>T2</i> is in the list of
184 * {@linkplain HTMLElements#getTerminatingEndTagNames(String) terminating end tag names} for <i>E1</i>,
185 * then <i>E1</i> ends at the {@linkplain EndTag#getBegin() beginning} of <i>T2</i>.
186 * If <i>T2</i> follows immediately after <i>ST1</i>, a <a href="#SingleTag">single tag element</a> is created,
187 * otherwise an <a href="#ImplicitlyTerminated">implicitly terminated element</a> is created.
188 * </ul>
189 * <li>
190 * If no more tags are present in the source document, then <i>E1</i> ends at the end of the file, and an
191 * <a href="#ImplicitlyTerminated">implicitly terminated element</a> is created.
192 * </ul>
193 * </ul>
194 * Note that the syntactical indication of an {@linkplain StartTag#isSyntacticalEmptyElementTag() empty-element tag} in the start tag
195 * is ignored when determining the end of <a href="HTMLElements.html#HTMLElement">HTML elements</a>.
196 * See the documentation of the {@link #isEmptyElementTag()} method for more information.
197 * <li>
198 * If the {@linkplain StartTag#getName() name} of the start tag does not match one of the
199 * recognised {@linkplain HTMLElementName HTML element names} (indicating a <a href="HTMLElements.html#NonHTMLElement">non-HTML element</a>):
200 * <ul>
201 * <li>
202 * If the start tag is {@linkplain StartTag#isSyntacticalEmptyElementTag() syntactically an empty-element tag},
203 * the parser does not conduct any search for an end tag and a <a href="#SingleTag">single tag element</a> is created.
204 * <li>
205 * Otherwise, section <a target="_blank" href="http://www.w3.org/TR/REC-xml#CleanAttrVals">3.1</a>
206 * of the XML 1.0 specification states that a matching end tag MUST be present, and
207 * the parser searches for the start tag's matching end tag.
208 * <ul class="Unseparated">
209 * <li>
210 * If the matching end tag is found, an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
211 * <li>
212 * If no matching end tag is found, the source document is not valid XML and the incident is
213 * {@linkplain Source#getLogger() logged} as a missing required end tag.
214 * In this situation a <a href="#SingleTag">single tag element</a> is created.
215 * </ul>
216 * </ul>
217 * </ul>
218 * <li>
219 * If the start tag has any {@linkplain StartTag#getStartTagType() type} other than {@link StartTagType#NORMAL}:
220 * <ul>
221 * <li>
222 * If the start tag's type does not define a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type},
223 * the parser does not conduct any search for an end tag and a <a href="#SingleTag">single tag element</a> is created.
224 * <li>
225 * If the start tag's type does define a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type},
226 * the parser assumes that a matching end tag is required and searches for it.
227 * <ul class="Unseparated">
228 * <li>
229 * If the matching end tag is found, an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
230 * <li>
231 * If no matching end tag is found, the missing required end tag is {@linkplain Source#getLogger() logged}
232 * and a <a href="#SingleTag">single tag element</a> is created.
233 * </ul>
234 * </ul>
235 * </ul>
236 * @see HTMLElements
237 */
238 public final class Element extends Segment {
239 private final StartTag startTag;
240 private final EndTag endTag;
241 private Segment content=null;
242 Element parentElement=Element.NOT_CACHED;
243 private int depth=-1;
244 private List<Element> childElements=null;
245
246 static final Element NOT_CACHED=new Element();
247
248 private static final boolean INCLUDE_INCORRECTLY_NESTED_CHILDREN_IN_HIERARCHY=true;
249
250 Element(final Source source, final StartTag startTag, final EndTag endTag) {
251 super(source, startTag.begin, endTag==null ? startTag.end : endTag.end);
252 if (source.isStreamed()) throw new UnsupportedOperationException("Elements are not supported when using StreamedSource");
253 this.startTag=startTag;
254 this.endTag=(endTag==null || endTag.length()==0) ? null : endTag;
255 }
256
257 // used only to construct NOT_CACHED
258 private Element() {
259 startTag=null;
260 endTag=null;
261 }
262
263 /**
264 * Returns the parent of this element in the document element hierarchy.
265 * <p>
266 * The {@link Source#fullSequentialParse()} method must be called (either explicitly or implicitly) immediately after construction of the <code>Source</code> object if this method is to be used.
267 * An <code>IllegalStateException</code> is thrown if a full sequential parse has not been performed or if it was performed after this element was found.
268 * <p>
269 * This method returns <code>null</code> for a <a href="Source.html#TopLevelElement">top-level element</a>,
270 * as well as any element formed from a {@linkplain TagType#isServerTag() server tag}, regardless of whether it is nested inside a normal element.
271 * <p>
272 * See the {@link Source#getChildElements()} method for more details.
273 *
274 * @return the parent of this element in the document element hierarchy, or <code>null</code> if this element is a <a href="Source.html#TopLevelElement">top-level element</a>.
275 * @throws IllegalStateException if a {@linkplain Source#fullSequentialParse() full sequential parse} has not been performed or if it was performed after this element was found.
276 * @see #getChildElements()
277 */
278 public Element getParentElement() {
279 if (parentElement==Element.NOT_CACHED) {
280 if (!source.wasFullSequentialParseCalled()) throw new IllegalStateException("This operation is only possible after a full sequential parse has been performed");
281 if (startTag.isOrphaned()) throw new IllegalStateException("This operation is only possible if a full sequential parse was performed immediately after construction of the Source object");
282 source.getChildElements();
283 if (parentElement==Element.NOT_CACHED) parentElement=null;
284 }
285 return parentElement;
286 }
287
288 /**
289 * Returns a list of the immediate children of this element in the document element hierarchy.
290 * <p>
291 * The objects in the list are all of type {@link Element}.
292 * <p>
293 * See the {@link Source#getChildElements()} method for more details.
294 *
295 * @return a list of the immediate children of this element in the document element hierarchy, guaranteed not <code>null</code>.
296 * @see #getParentElement()
297 */
298 @Override public final List<Element> getChildElements() {
299 return childElements!=null ? childElements : getChildElements(-1);
300 }
301
302 final List<Element> getChildElements(int depth) {
303 if (depth!=-1) this.depth=depth;
304 if (childElements==null) {
305 if (!Config.IncludeServerTagsInElementHierarchy && end==startTag.end) {
306 childElements=Collections.emptyList();
307 } else {
308 final int childDepth=(depth==-1 ? -1 : depth+1);
309 childElements=new ArrayList<Element>();
310 int pos=Config.IncludeServerTagsInElementHierarchy ? begin+1 : startTag.end;
311 final int maxChildBegin=(Config.IncludeServerTagsInElementHierarchy || endTag==null) ? end : endTag.begin;
312 while (true) {
313 final StartTag childStartTag=source.getNextStartTag(pos);
314 if (childStartTag==null || childStartTag.begin>=maxChildBegin) break;
315 if (Config.IncludeServerTagsInElementHierarchy) {
316 if (childStartTag.begin<startTag.end && !childStartTag.getTagType().isServerTag() && !startTag.getTagType().isServerTag()) {
317 // A start tag is found within another start tag, but neither is a server tag.
318 // This only legitimately happens in very rare cases like entity definitions in doctype.
319 // We don't want to include the child elements in the hierarchy.
320 pos=childStartTag.end;
321 continue;
322 }
323 } else if (childStartTag.getTagType().isServerTag()) {
324 pos=childStartTag.end;
325 continue;
326 }
327 final Element childElement=childStartTag.getElement();
328 if (childElement.end>end) {
329 if (source.logger.isInfoEnabled()) source.logger.info("Child "+childElement.getDebugInfo()+" extends beyond end of parent "+getDebugInfo());
330 if (!INCLUDE_INCORRECTLY_NESTED_CHILDREN_IN_HIERARCHY) {
331 pos=childElement.end;
332 continue;
333 }
334 }
335 childElement.getChildElements(childDepth);
336 if (childElement.parentElement==Element.NOT_CACHED) { // make sure element was not added as a child of a descendent element (can happen with overlapping elements)
337 childElement.parentElement=this;
338 childElements.add(childElement);
339 }
340 pos=childElement.end;
341 }
342 }
343 }
344 return childElements;
345 }
346
347 /**
348 * Returns the nesting depth of this element in the document element hierarchy.
349 * <p>
350 * The {@link Source#fullSequentialParse()} method must be called (either explicitly or implicitly) after construction of the <code>Source</code> object if this method is to be used.
351 * An <code>IllegalStateException</code> is thrown if a full sequential parse has not been performed or if it was performed after this element was found.
352 * <p>
353 * A <a href="Source.html#TopLevelElement">top-level element</a> has a nesting depth of <code>0</code>.
354 * <p>
355 * An element formed from a {@linkplain TagType#isServerTag() server tag} always have a nesting depth of <code>0</code>,
356 * regardless of whether it is nested inside a normal element.
357 * <p>
358 * See the {@link Source#getChildElements()} method for more details.
359 *
360 * @return the nesting depth of this element in the document element hierarchy.
361 * @throws IllegalStateException if a {@linkplain Source#fullSequentialParse() full sequential parse} has not been performed or if it was performed after this element was found.
362 * @see #getParentElement()
363 */
364 public int getDepth() {
365 if (depth==-1) {
366 getParentElement();
367 if (depth==-1) depth=0;
368 }
369 return depth;
370 }
371
372 /**
373 * Returns the segment representing the <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-content">content</a> of the element.
374 * <p>
375 * This segment spans between the end of the start tag and the start of the end tag.
376 * If the end tag is not present, the content reaches to the end of the element.
377 * <p>
378 * A zero-length segment is returned if the element is {@linkplain #isEmpty() empty},
379 *
380 * @return the segment representing the content of the element, guaranteed not <code>null</code>.
381 */
382 public Segment getContent() {
383 if (content==null) content=new Segment(source,startTag.end,getContentEnd());
384 return content;
385 }
386
387 /**
388 * Returns the start tag of the element.
389 * @return the start tag of the element.
390 */
391 public StartTag getStartTag() {
392 return startTag;
393 }
394
395 /**
396 * Returns the end tag of the element.
397 * <p>
398 * If the element has no end tag this method returns <code>null</code>.
399 *
400 * @return the end tag of the element, or <code>null</code> if the element has no end tag.
401 */
402 public EndTag getEndTag() {
403 return endTag;
404 }
405
406 /**
407 * Returns the {@linkplain StartTag#getName() name} of the {@linkplain #getStartTag() start tag} of this element, always in lower case.
408 * <p>
409 * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getName() getName()}.
410 * <p>
411 * See the {@link Tag#getName()} method for more information.
412 *
413 * @return the name of the {@linkplain #getStartTag() start tag} of this element, always in lower case.
414 */
415 public String getName() {
416 return startTag.getName();
417 }
418
419 /**
420 * Indicates whether this element has zero-length {@linkplain #getContent() content}.
421 * <p>
422 * This is equivalent to {@link #getContent()}<code>.</code>{@link Segment#length() length()}<code>==0</code>.
423 * <p>
424 * Note that this is a broader definition than that of both the
425 * <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#didx-element-4">HTML definition of an empty element</a>,
426 * which is only those elements whose end tag is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}, and the
427 * <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-empty">XML definition of an empty element</a>,
428 * which is "either a start-tag immediately followed by an end-tag, or an {@linkplain #isEmptyElementTag() empty-element tag}".
429 * The other possibility covered by this property is the case of an <a href="HTMLElements.html#HTMLElement">HTML element</a> with an
430 * {@linkplain HTMLElements#getEndTagOptionalElementNames() optional} end tag that is immediately followed by another tag that implicitly
431 * terminates the element.
432 *
433 * @return <code>true</code> if this element has zero-length {@linkplain #getContent() content}, otherwise <code>false</code>.
434 * @see #isEmptyElementTag()
435 */
436 public boolean isEmpty() {
437 return startTag.end==getContentEnd();
438 }
439
440 /**
441 * Indicates whether this element is an <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>.
442 * <p>
443 * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#isEmptyElementTag() isEmptyElementTag()}.
444 *
445 * @return <code>true</code> if this element is an <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>, otherwise <code>false</code>.
446 */
447 public boolean isEmptyElementTag() {
448 return startTag.isEmptyElementTag();
449 }
450
451 /**
452 * Returns the attributes specified in this element's start tag.
453 * <p>
454 * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getAttributes() getAttributes()}.
455 *
456 * @return the attributes specified in this element's start tag.
457 * @see StartTag#getAttributes()
458 */
459 public Attributes getAttributes() {
460 return getStartTag().getAttributes();
461 }
462
463 /**
464 * Returns the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name (case insensitive).
465 * <p>
466 * Returns <code>null</code> if the {@linkplain #getStartTag() start tag of this element} does not
467 * {@linkplain StartTagType#hasAttributes() have attributes},
468 * no attribute with the specified name exists or the attribute {@linkplain Attribute#hasValue() has no value}.
469 * <p>
470 * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getAttributeValue(String) getAttributeValue(attributeName)}.
471 *
472 * @param attributeName the name of the attribute to get.
473 * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or <code>null</code> if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}.
474 */
475 public String getAttributeValue(final String attributeName) {
476 return getStartTag().getAttributeValue(attributeName);
477 }
478
479 /**
480 * Returns the {@link FormControl} defined by this element.
481 * @return the {@link FormControl} defined by this element, or <code>null</code> if it is not a <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-controls">control</a>.
482 */
483 public FormControl getFormControl() {
484 return FormControl.construct(this);
485 }
486
487 public String getDebugInfo() {
488 if (this==NOT_CACHED) return "NOT_CACHED";
489 final StringBuilder sb=new StringBuilder();
490 sb.append("Element ");
491 startTag.appendDebugTag(sb);
492 if (!isEmpty()) sb.append('-');
493 if (endTag!=null) sb.append(endTag);
494 sb.append(' ');
495 startTag.appendDebugTagType(sb);
496 sb.append(super.getDebugInfo());
497 return sb.toString();
498 }
499
500 int getContentEnd() {
501 return endTag!=null ? endTag.begin : end;
502 }
503 }

   
Visit the aagtl Website