/[aagtl_public1]/src/net/htmlparser/jericho/EndTag.java
aagtl

Contents of /src/net/htmlparser/jericho/EndTag.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 7 months ago) by zoffadmin
File size: 10903 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 import java.util.*;
24
25 /**
26 * Represents the <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#didx-element-3">end tag</a> of an
27 * {@linkplain Element element} in a specific {@linkplain Source source} document.
28 * <p>
29 * An end tag always has a {@linkplain #getTagType() type} that is a subclass of {@link EndTagType}, meaning it
30 * always starts with the characters '<code>&lt;/</code>'.
31 * <p>
32 * <code>EndTag</code> instances are obtained using one of the following methods:
33 * <ul>
34 * <li>{@link Element#getEndTag()}
35 * <li>{@link Tag#getNextTag()}
36 * <li>{@link Tag#getPreviousTag()}
37 * <li>{@link Source#getPreviousEndTag(int pos)}
38 * <li>{@link Source#getPreviousEndTag(int pos, String name)}
39 * <li>{@link Source#getPreviousTag(int pos)}
40 * <li>{@link Source#getPreviousTag(int pos, TagType)}
41 * <li>{@link Source#getNextEndTag(int pos)}
42 * <li>{@link Source#getNextEndTag(int pos, String name)}
43 * <li>{@link Source#getNextEndTag(int pos, String name, EndTagType)}
44 * <li>{@link Source#getNextTag(int pos)}
45 * <li>{@link Source#getNextTag(int pos, TagType)}
46 * <li>{@link Source#getEnclosingTag(int pos)}
47 * <li>{@link Source#getEnclosingTag(int pos, TagType)}
48 * <li>{@link Source#getTagAt(int pos)}
49 * <li>{@link Segment#getAllTags()}
50 * <li>{@link Segment#getAllTags(TagType)}
51 * </ul>
52 * <p>
53 * The {@link Tag} superclass defines the {@link Tag#getName() getName()} method used to get the name of this end tag.
54 * <p>
55 * See also the XML 1.0 specification for <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-etag">end tags</a>.
56 *
57 * @see Tag
58 * @see StartTag
59 * @see Element
60 */
61 public final class EndTag extends Tag {
62 private final EndTagType endTagType;
63
64 /**
65 * Constructs a new <code>EndTag</code>.
66 *
67 * @param source the {@link Source} document.
68 * @param begin the character position in the source document where this tag {@linkplain Segment#getBegin() begins}.
69 * @param end the character position in the source document where this tag {@linkplain Segment#getEnd() ends}.
70 * @param endTagType the {@linkplain #getEndTagType() type} of the end tag.
71 * @param name the {@linkplain Tag#getName() name} of the tag.
72 */
73 EndTag(final Source source, final int begin, final int end, final EndTagType endTagType, final String name) {
74 super(source,begin,end,name);
75 this.endTagType=endTagType;
76 }
77
78 /**
79 * Returns the {@linkplain Element element} that is ended by this end tag.
80 * <p>
81 * Returns <code>null</code> if this end tag is not properly matched to any {@linkplain StartTag start tag} in the source document.
82 * <p>
83 * This method is much less efficient than the {@link StartTag#getElement()} method.
84 * <p>
85 * IMPLEMENTATION NOTE: The explanation for why this method is relatively inefficient lies in the fact that more than one
86 * {@linkplain StartTagType start tag type} can have the same
87 * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}, so it is not possible to know for certain
88 * which type of start tag this end tag is matched to (see {@link EndTagType#getCorrespondingStartTagType()} for more explanation).
89 * Because of this uncertainty, the implementation of this method must check every start tag preceding this end tag, calling its
90 * {@link StartTag#getElement()} method to see whether it is terminated by this end tag.
91 *
92 * @return the {@linkplain Element element} that is ended by this end tag.
93 */
94 public Element getElement() {
95 if (element!=Element.NOT_CACHED) return element;
96 int pos=begin;
97 while (pos!=0) {
98 StartTag startTag=source.getPreviousStartTag(pos-1);
99 if (startTag==null) break;
100 Element foundElement=startTag.getElement(); // this automatically sets foundElement.getEndTag().element cache
101 if (foundElement.getEndTag()==this) return foundElement; // no need to set element as it was already done in previous statement
102 pos=startTag.begin;
103 }
104 return element=null;
105 }
106
107 /**
108 * Returns the {@linkplain EndTagType type} of this end tag.
109 * <p>
110 * This is equivalent to <code>(EndTagType)</code>{@link #getTagType()}.
111 *
112 * @return the {@linkplain EndTagType type} of this end tag.
113 */
114 public EndTagType getEndTagType() {
115 return endTagType;
116 }
117
118 // Documentation inherited from Tag
119 public TagType getTagType() {
120 return endTagType;
121 }
122
123 // Documentation inherited from Tag
124 public boolean isUnregistered() {
125 return endTagType==EndTagType.UNREGISTERED;
126 }
127
128 /**
129 * Returns an XML representation of this end tag.
130 * <p>
131 * This method is included for symmetry with the {@link StartTag#tidy()} method and simply
132 * returns the {@linkplain Segment#toString() source text} of the tag.
133 *
134 * @return an XML representation of this end tag.
135 */
136 public String tidy() {
137 return toString();
138 }
139
140 /**
141 * Generates the HTML text of a {@linkplain EndTagType#NORMAL normal} end tag with the specified tag {@linkplain #getName() name}.
142 * <p>
143 * <dl>
144 * <dt>Example:</dt>
145 * <dd>
146 * <p>
147 * The following method call:
148 * <blockquote class="code">
149 * <code>EndTag.generateHTML("INPUT")</code>
150 * </blockquote>
151 * returns the following output:
152 * <blockquote class="code">
153 * <code>&lt;/INPUT&gt;</code>
154 * </blockquote>
155 * </dd>
156 * </dl>
157 *
158 * @param tagName the {@linkplain #getName() name} of the end tag.
159 * @return the HTML text of a {@linkplain EndTagType#NORMAL normal} end tag with the specified tag {@linkplain #getName() name}.
160 * @see StartTag#generateHTML(String tagName, Map attributesMap, boolean emptyElementTag)
161 */
162 public static String generateHTML(final String tagName) {
163 return EndTagType.NORMAL.generateHTML(tagName);
164 }
165
166 public String getDebugInfo() {
167 final StringBuilder sb=new StringBuilder();
168 sb.append(this).append(' ');
169 if (endTagType!=EndTagType.NORMAL) sb.append('(').append(endTagType.getDescription()).append(") ");
170 sb.append(super.getDebugInfo());
171 return sb.toString();
172 }
173
174 /**
175 * Returns the previous end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position.
176 * <p>
177 * Called from {@link Source#getPreviousEndTag(int pos, String name)}.
178 *
179 * @param source the {@link Source} document.
180 * @param pos the position to search from.
181 * @param name the {@linkplain #getName() name} of the tag including its {@linkplain TagType#getNamePrefix() prefix} (must be lower case, may be null).
182 * @param endTagType the {@linkplain EndTagType type} of end tag to search for.
183 * @return the previous end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position, or null if none is found.
184 */
185 static EndTag getPrevious(final Source source, final int pos, final String name, final EndTagType endTagType) {
186 if (name==null) return (EndTag)Tag.getPreviousTag(source,pos,endTagType);
187 if (name.length()==0) throw new IllegalArgumentException("name argument must not be zero length");
188 final String searchString=endTagType.START_DELIMITER_PREFIX+name;
189 try {
190 final ParseText parseText=source.getParseText();
191 int begin=pos;
192 do {
193 begin=parseText.lastIndexOf(searchString,begin);
194 if (begin==-1) return null;
195 final EndTag endTag=(EndTag)source.getTagAt(begin);
196 if (endTag!=null && endTag.getEndTagType()==endTagType && name.equals(endTag.getName())) return endTag;
197 } while ((begin-=1)>=0);
198 } catch (IndexOutOfBoundsException ex) {
199 // this should never happen during a get previous operation so rethrow it:
200 throw ex;
201 }
202 return null;
203 }
204
205 /**
206 * Returns the next end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position.
207 * <p>
208 * Called from {@link Source#getNextEndTag(int pos, String name, EndTagType endTagType)}.
209 *
210 * @param source the {@link Source} document.
211 * @param pos the position to search from.
212 * @param name the {@linkplain #getName() name} of the tag including its {@linkplain TagType#getNamePrefix() prefix} (must be lower case, may be null).
213 * @param endTagType the {@linkplain EndTagType type} of end tag to search for.
214 * @return the next end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position, or null if none is found.
215 */
216 static EndTag getNext(final Source source, final int pos, final String name, final EndTagType endTagType) {
217 if (name==null) return (EndTag)Tag.getNextTag(source,pos,endTagType);
218 if (name.length()==0) throw new IllegalArgumentException("name argument must not be zero length");
219 final String searchString=endTagType.START_DELIMITER_PREFIX+name;
220 try {
221 final ParseText parseText=source.getParseText();
222 int begin=pos;
223 do {
224 begin=parseText.indexOf(searchString,begin);
225 if (begin==-1) return null;
226 final EndTag endTag=(EndTag)source.getTagAt(begin);
227 if (endTag!=null && endTag.getEndTagType()==endTagType && name.equals(endTag.getName())) return endTag;
228 } while ((begin+=1)<source.end);
229 } catch (IndexOutOfBoundsException ex) {
230 // this should only happen when the end of file is reached in the middle of a tag.
231 // we don't have to do anything to handle it as there will be no more tags anyway.
232 }
233 return null;
234 }
235
236 static EndTag getPrevious(final Source source, int pos) {
237 while (true) {
238 final Tag tag=Tag.getPreviousTag(source,pos);
239 if (tag==null) return null;
240 if (tag instanceof EndTag) return (EndTag)tag;
241 pos-=1;
242 }
243 }
244
245 static EndTag getNext(final Source source, int pos) {
246 while (true) {
247 final Tag tag=Tag.getNextTag(source,pos);
248 if (tag==null) return null;
249 if (tag instanceof EndTag) return (EndTag)tag;
250 pos+=1;
251 }
252 }
253 }
254

   
Visit the aagtl Website