/[aagtl_public1]/src/net/htmlparser/jericho/StartTagTypeGenericImplementation.java
aagtl

Contents of /src/net/htmlparser/jericho/StartTagTypeGenericImplementation.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 8 months ago) by zoffadmin
File size: 10223 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 import java.util.*;
24
25 /**
26 * Provides a generic implementation of the abstract {@link StartTagType} class based on the most common start tag behaviour.
27 * <p>
28 * This class is only of interest to users who wish to create <a href="TagType.html#Custom">custom tag types</a>.
29 * <p>
30 * The only external difference between this class and its abstract superclass {@link StartTagType} is that it provides a default
31 * implementation of the {@link #constructTagAt(Source, int pos)} method.
32 * <p>
33 * Most of the <a href="Tag.html#Predefined">predefined</a> start tag types are implemented using this class or a subclass of it.
34 *
35 * @see EndTagTypeGenericImplementation
36 */
37 public class StartTagTypeGenericImplementation extends StartTagType {
38 final boolean nameCharAfterPrefixAllowed;
39
40 /**
41 * Constructs a new <code>StartTagTypeGenericImplementation</code> object with the specified properties.
42 * <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
43 * <p>
44 * This is equivalent to calling
45 * <br /><code>new&nbsp;</code>{@link #StartTagTypeGenericImplementation(String,String,String,EndTagType,boolean,boolean,boolean) StartTagTypeGenericImplementation}<code>(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false)</code>.
46 *
47 * @param description a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes.
48 * @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type.
49 * @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type.
50 * @param correspondingEndTagType the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type.
51 * @param isServerTag indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}.
52 */
53 protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag) {
54 this(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false);
55 }
56
57 /**
58 * Constructs a new <code>StartTagTypeGenericImplementation</code> object with the specified properties.
59 * <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
60 *
61 * @param description a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes.
62 * @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type.
63 * @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type.
64 * @param correspondingEndTagType the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type.
65 * @param isServerTag indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}.
66 * @param hasAttributes indicates whether the new start tag type {@linkplain #hasAttributes() has attributes}.
67 * @param isNameAfterPrefixRequired indicates whether a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}.
68 */
69 protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag, final boolean hasAttributes, final boolean isNameAfterPrefixRequired) {
70 super(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,hasAttributes,isNameAfterPrefixRequired);
71 nameCharAfterPrefixAllowed=(getNamePrefix().length()==0 || !Character.isLetter(getNamePrefix().charAt(getNamePrefix().length()-1)));
72 }
73
74 /**
75 * Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
76 * <br />(<a href="TagType.html#DefaultImplementation">default implementation</a> method)
77 * <p>
78 * This default implementation performs the following steps:
79 * <ol class="Separated">
80 * <li>
81 * If a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}, search for a valid
82 * {@linkplain Tag#isXMLName(CharSequence) XML tag name} directly after the
83 * {@linkplain #getNamePrefix() name prefix} using the {@link Source#getNameEnd(int pos)} method.
84 * If one is found, set the {@linkplain Tag#getName() name} to include it, otherwise return <code>null</code>.
85 * <li>
86 * If the last character of the {@linkplain #getNamePrefix() name prefix} is a letter
87 * (indicating that the prefix includes the full {@linkplain Tag#getName() name} of the tag),
88 * and the character following the prefix in the source text is also a letter
89 * or any other valid {@linkplain Tag#isXMLNameChar(char) XML name character},
90 * return <code>null</code>.
91 * <br />Example: the source text "<code>&lt;?xmlt ?&gt;</code>" should not be recognised as an
92 * {@linkplain #XML_PROCESSING_INSTRUCTION XML processing instruction}, which has the prefix "<code>&lt;?xml</code>".
93 * <li>
94 * If the tag type {@linkplain #hasAttributes() has attributes}, call
95 * {@link #parseAttributes(Source,int,String) parseAttributes(source,pos,name)} to parse them.
96 * Return <code>null</code> if too many errors occur while parsing the attributes.
97 * <li>
98 * Find the {@linkplain Tag#getEnd() end} of the tag using the {@link #getEnd(Source, int pos)} method,
99 * where <code>pos</code> is either the end of the {@linkplain StartTag#getAttributes() attributes} segment or the end of the
100 * {@linkplain Tag#getName() name} depending on whether the tag type {@linkplain #hasAttributes() has attributes}.
101 * Return <code>null</code> if the end of the tag can not be found.
102 * <li>
103 * Construct the {@link StartTag} object using the
104 * {@link #constructStartTag(Source,int,int,String,Attributes) constructStartTag(Source, int pos, int end, String name, Attributes)}
105 * method with the argument values collected over the previous steps.
106 * </ol>
107 * <p>
108 * See {@link TagType#constructTagAt(Source, int pos)} for more important information about this method.
109 *
110 * @param source the {@link Source} document.
111 * @param pos the position in the source document.
112 * @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or <code>null</code> if it does not meet the criteria.
113 */
114 protected Tag constructTagAt(final Source source, final int pos) {
115 final ParseText parseText=source.getParseText();
116 final int nameBegin=pos+1;
117 String name=getNamePrefix();
118 int nameEnd=nameBegin+getNamePrefix().length();
119 if (isNameAfterPrefixRequired()) {
120 final int extendedNameEnd=source.getNameEnd(nameEnd);
121 if (extendedNameEnd==-1) return null;
122 name=source.getName(nameBegin,extendedNameEnd);
123 nameEnd=extendedNameEnd;
124 } else if (!nameCharAfterPrefixAllowed && Tag.isXMLNameChar(parseText.charAt(nameEnd))) {
125 return null;
126 }
127 int end;
128 Attributes attributes=null;
129 if (hasAttributes()) {
130 // it is necessary to get the attributes so that we can be sure that the search on the closing delimiter doesn't pick up
131 // anything from the attribute values, which can legally contain ">" characters.
132 attributes=parseAttributes(source,pos,name);
133 if (attributes==null) return null; // happens if attributes not properly formed
134 end=getEnd(source,attributes.getEnd()); // should always return a valid end
135 } else {
136 end=getEnd(source,nameEnd);
137 if (end<0) {
138 if (end==-1 && source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("StartTag ").append(name).append(" at ")).append(" not recognised as type '").append(getDescription()).append("' because it has no closing delimiter").toString());
139 return null;
140 }
141 }
142 return constructStartTag(source,pos,end,name,attributes);
143 }
144
145 /**
146 * Returns the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document.
147 * <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
148 * <p>
149 * This default implementation simply searches for the first occurrence of the
150 * {@linkplain #getClosingDelimiter() closing delimiter} after the specified position, and returns the position immediately
151 * after the end of it.
152 * <p>
153 * If the closing delimiter is not found, the value <code>-1</code> is returned.
154 *
155 * @param source the {@link Source} document.
156 * @param pos the position in the source document.
157 * @return the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document, or <code>-1</code> if the end of the tag can not be found.
158 */
159 protected int getEnd(final Source source, final int pos) {
160 final int delimiterBegin=source.getParseText().indexOf(getClosingDelimiter(),pos);
161 return (delimiterBegin==-1 ? -1 : delimiterBegin+getClosingDelimiter().length());
162 }
163 }

   
Visit the aagtl Website