/[aagtl_public1]/src/net/htmlparser/jericho/ParseText.java
aagtl

Contents of /src/net/htmlparser/jericho/ParseText.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 7 months ago) by zoffadmin
File size: 10331 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 /**
24 * Represents the text from the {@linkplain Source source} document that is to be parsed.
25 * <p>
26 * This interface is normally only of interest to users who wish to create <a href="TagType.html#Custom">custom tag types</a>.
27 * <p>
28 * The parse text is defined as the entire text of the source document in lower case, with all
29 * {@linkplain Segment#ignoreWhenParsing() ignored} segments replaced by space characters.
30 * <p>
31 * The text is stored in lower case to make case insensitive parsing as efficient as possible.
32 * <p>
33 * This interface provides many methods which are also provided by the <code>java.lang.String</code> class,
34 * but adds an extra parameter called <code>breakAtIndex</code> to the various <code>indexOf</code> methods.
35 * This parameter allows a search on only a specified segment of the text, which is not possible using the normal <code>String</code> class.
36 * <p>
37 * <code>ParseText</code> instances are obtained using the {@link Source#getParseText()} method.
38 */
39 public interface ParseText extends CharSequence {
40 /** A value to use as the <code>breakAtIndex</code> argument in certain methods to indicate that the search should continue to the start or end of the parse text. */
41 public static final int NO_BREAK=-1;
42
43 /**
44 * Returns the character at the specified index.
45 * @param index the index of the character.
46 * @return the character at the specified index, which is always in lower case.
47 */
48 public char charAt(int index);
49
50 /**
51 * Indicates whether this parse text contains the specified string at the specified position.
52 * <p>
53 * This method is analogous to the <code>java.lang.String.startsWith(String prefix, int toffset)</code> method.
54 *
55 * @param str a string.
56 * @param pos the position (index) in this parse text at which to check for the specified string.
57 * @return <code>true</code> if this parse text contains the specified string at the specified position, otherwise <code>false</code>.
58 */
59 public boolean containsAt(String str, int pos);
60
61 /**
62 * Returns the index within this parse text of the first occurrence of the specified character,
63 * starting the search at the position specified by <code>fromIndex</code>.
64 * <p>
65 * If the specified character is not found then -1 is returned.
66 *
67 * @param searchChar a character.
68 * @param fromIndex the index to start the search from.
69 * @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.
70 */
71 public int indexOf(char searchChar, int fromIndex);
72
73 /**
74 * Returns the index within this parse text of the first occurrence of the specified character,
75 * starting the search at the position specified by <code>fromIndex</code>,
76 * and breaking the search at the index specified by <code>breakAtIndex</code>.
77 * <p>
78 * The position specified by <code>breakAtIndex</code> is not included in the search.
79 * <p>
80 * If the search is to continue to the end of the text,
81 * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
82 * <p>
83 * If the specified character is not found then -1 is returned.
84 *
85 * @param searchChar a character.
86 * @param fromIndex the index to start the search from.
87 * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text.
88 * @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.
89 */
90 public int indexOf(char searchChar, int fromIndex, int breakAtIndex);
91
92 /**
93 * Returns the index within this parse text of the first occurrence of the specified string,
94 * starting the search at the position specified by <code>fromIndex</code>.
95 * <p>
96 * If the specified string is not found then -1 is returned.
97 *
98 * @param searchString a string.
99 * @param fromIndex the index to start the search from.
100 * @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.
101 */
102 public int indexOf(String searchString, int fromIndex);
103
104 /**
105 * Returns the index within this parse text of the first occurrence of the specified string,
106 * starting the search at the position specified by <code>fromIndex</code>,
107 * and breaking the search at the index specified by <code>breakAtIndex</code>.
108 * <p>
109 * The position specified by <code>breakAtIndex</code> is not included in the search.
110 * <p>
111 * If the search is to continue to the end of the text,
112 * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
113 * <p>
114 * If the specified string is not found then -1 is returned.
115 *
116 * @param searchString a string.
117 * @param fromIndex the index to start the search from.
118 * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text.
119 * @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.
120 */
121 public int indexOf(String searchString, int fromIndex, int breakAtIndex);
122
123 /**
124 * Returns the index within this parse text of the last occurrence of the specified character,
125 * searching backwards starting at the position specified by <code>fromIndex</code>.
126 * <p>
127 * If the specified character is not found then -1 is returned.
128 *
129 * @param searchChar a character.
130 * @param fromIndex the index to start the search from.
131 * @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.
132 */
133 public int lastIndexOf(char searchChar, int fromIndex);
134
135 /**
136 * Returns the index within this parse text of the last occurrence of the specified character,
137 * searching backwards starting at the position specified by <code>fromIndex</code>,
138 * and breaking the search at the index specified by <code>breakAtIndex</code>.
139 * <p>
140 * The position specified by <code>breakAtIndex</code> is not included in the search.
141 * <p>
142 * If the search is to continue to the start of the text,
143 * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
144 * <p>
145 * If the specified character is not found then -1 is returned.
146 *
147 * @param searchChar a character.
148 * @param fromIndex the index to start the search from.
149 * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text.
150 * @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.
151 */
152 public int lastIndexOf(char searchChar, int fromIndex, int breakAtIndex);
153
154 /**
155 * Returns the index within this parse text of the last occurrence of the specified string,
156 * searching backwards starting at the position specified by <code>fromIndex</code>.
157 * <p>
158 * If the specified string is not found then -1 is returned.
159 *
160 * @param searchString a string.
161 * @param fromIndex the index to start the search from.
162 * @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.
163 */
164 public int lastIndexOf(String searchString, int fromIndex);
165
166 /**
167 * Returns the index within this parse text of the last occurrence of the specified string,
168 * searching backwards starting at the position specified by <code>fromIndex</code>,
169 * and breaking the search at the index specified by <code>breakAtIndex</code>.
170 * <p>
171 * The position specified by <code>breakAtIndex</code> is not included in the search.
172 * <p>
173 * If the search is to continue to the start of the text,
174 * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
175 * <p>
176 * If the specified string is not found then -1 is returned.
177 *
178 * @param searchString a string.
179 * @param fromIndex the index to start the search from.
180 * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text.
181 * @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.
182 */
183 public int lastIndexOf(String searchString, int fromIndex, int breakAtIndex);
184
185 /**
186 * Returns the length of the parse text.
187 * @return the length of the parse text.
188 */
189 public int length();
190
191 /**
192 * Returns a new character sequence that is a subsequence of this sequence.
193 *
194 * @param begin the begin position, inclusive.
195 * @param end the end position, exclusive.
196 * @return a new character sequence that is a subsequence of this sequence.
197 */
198 public CharSequence subSequence(int begin, int end);
199
200 /**
201 * Returns the content of the parse text as a <code>String</code>.
202 * @return the content of the parse text as a <code>String</code>.
203 */
204 public String toString();
205 }

   
Visit the aagtl Website