1 |
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
|
2 |
// Version 3.2
|
3 |
// Copyright (C) 2004-2009 Martin Jericho
|
4 |
// http://jericho.htmlparser.net/
|
5 |
//
|
6 |
// This library is free software; you can redistribute it and/or
|
7 |
// modify it under the terms of either one of the following licences:
|
8 |
//
|
9 |
// 1. The Eclipse Public License (EPL) version 1.0,
|
10 |
// included in this distribution in the file licence-epl-1.0.html
|
11 |
// or available at http://www.eclipse.org/legal/epl-v10.html
|
12 |
//
|
13 |
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
|
14 |
// included in this distribution in the file licence-lgpl-2.1.txt
|
15 |
// or available at http://www.gnu.org/licenses/lgpl.txt
|
16 |
//
|
17 |
// This library is distributed on an "AS IS" basis,
|
18 |
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
19 |
// See the individual licence texts for more details.
|
20 |
|
21 |
package net.htmlparser.jericho;
|
22 |
|
23 |
/**
|
24 |
* Represents the text from the {@linkplain Source source} document that is to be parsed.
|
25 |
* <p>
|
26 |
* This interface is normally only of interest to users who wish to create <a href="TagType.html#Custom">custom tag types</a>.
|
27 |
* <p>
|
28 |
* The parse text is defined as the entire text of the source document in lower case, with all
|
29 |
* {@linkplain Segment#ignoreWhenParsing() ignored} segments replaced by space characters.
|
30 |
* <p>
|
31 |
* The text is stored in lower case to make case insensitive parsing as efficient as possible.
|
32 |
* <p>
|
33 |
* This interface provides many methods which are also provided by the <code>java.lang.String</code> class,
|
34 |
* but adds an extra parameter called <code>breakAtIndex</code> to the various <code>indexOf</code> methods.
|
35 |
* This parameter allows a search on only a specified segment of the text, which is not possible using the normal <code>String</code> class.
|
36 |
* <p>
|
37 |
* <code>ParseText</code> instances are obtained using the {@link Source#getParseText()} method.
|
38 |
*/
|
39 |
public interface ParseText extends CharSequence {
|
40 |
/** A value to use as the <code>breakAtIndex</code> argument in certain methods to indicate that the search should continue to the start or end of the parse text. */
|
41 |
public static final int NO_BREAK=-1;
|
42 |
|
43 |
/**
|
44 |
* Returns the character at the specified index.
|
45 |
* @param index the index of the character.
|
46 |
* @return the character at the specified index, which is always in lower case.
|
47 |
*/
|
48 |
public char charAt(int index);
|
49 |
|
50 |
/**
|
51 |
* Indicates whether this parse text contains the specified string at the specified position.
|
52 |
* <p>
|
53 |
* This method is analogous to the <code>java.lang.String.startsWith(String prefix, int toffset)</code> method.
|
54 |
*
|
55 |
* @param str a string.
|
56 |
* @param pos the position (index) in this parse text at which to check for the specified string.
|
57 |
* @return <code>true</code> if this parse text contains the specified string at the specified position, otherwise <code>false</code>.
|
58 |
*/
|
59 |
public boolean containsAt(String str, int pos);
|
60 |
|
61 |
/**
|
62 |
* Returns the index within this parse text of the first occurrence of the specified character,
|
63 |
* starting the search at the position specified by <code>fromIndex</code>.
|
64 |
* <p>
|
65 |
* If the specified character is not found then -1 is returned.
|
66 |
*
|
67 |
* @param searchChar a character.
|
68 |
* @param fromIndex the index to start the search from.
|
69 |
* @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.
|
70 |
*/
|
71 |
public int indexOf(char searchChar, int fromIndex);
|
72 |
|
73 |
/**
|
74 |
* Returns the index within this parse text of the first occurrence of the specified character,
|
75 |
* starting the search at the position specified by <code>fromIndex</code>,
|
76 |
* and breaking the search at the index specified by <code>breakAtIndex</code>.
|
77 |
* <p>
|
78 |
* The position specified by <code>breakAtIndex</code> is not included in the search.
|
79 |
* <p>
|
80 |
* If the search is to continue to the end of the text,
|
81 |
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
|
82 |
* <p>
|
83 |
* If the specified character is not found then -1 is returned.
|
84 |
*
|
85 |
* @param searchChar a character.
|
86 |
* @param fromIndex the index to start the search from.
|
87 |
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text.
|
88 |
* @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.
|
89 |
*/
|
90 |
public int indexOf(char searchChar, int fromIndex, int breakAtIndex);
|
91 |
|
92 |
/**
|
93 |
* Returns the index within this parse text of the first occurrence of the specified string,
|
94 |
* starting the search at the position specified by <code>fromIndex</code>.
|
95 |
* <p>
|
96 |
* If the specified string is not found then -1 is returned.
|
97 |
*
|
98 |
* @param searchString a string.
|
99 |
* @param fromIndex the index to start the search from.
|
100 |
* @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.
|
101 |
*/
|
102 |
public int indexOf(String searchString, int fromIndex);
|
103 |
|
104 |
/**
|
105 |
* Returns the index within this parse text of the first occurrence of the specified string,
|
106 |
* starting the search at the position specified by <code>fromIndex</code>,
|
107 |
* and breaking the search at the index specified by <code>breakAtIndex</code>.
|
108 |
* <p>
|
109 |
* The position specified by <code>breakAtIndex</code> is not included in the search.
|
110 |
* <p>
|
111 |
* If the search is to continue to the end of the text,
|
112 |
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
|
113 |
* <p>
|
114 |
* If the specified string is not found then -1 is returned.
|
115 |
*
|
116 |
* @param searchString a string.
|
117 |
* @param fromIndex the index to start the search from.
|
118 |
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text.
|
119 |
* @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.
|
120 |
*/
|
121 |
public int indexOf(String searchString, int fromIndex, int breakAtIndex);
|
122 |
|
123 |
/**
|
124 |
* Returns the index within this parse text of the last occurrence of the specified character,
|
125 |
* searching backwards starting at the position specified by <code>fromIndex</code>.
|
126 |
* <p>
|
127 |
* If the specified character is not found then -1 is returned.
|
128 |
*
|
129 |
* @param searchChar a character.
|
130 |
* @param fromIndex the index to start the search from.
|
131 |
* @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.
|
132 |
*/
|
133 |
public int lastIndexOf(char searchChar, int fromIndex);
|
134 |
|
135 |
/**
|
136 |
* Returns the index within this parse text of the last occurrence of the specified character,
|
137 |
* searching backwards starting at the position specified by <code>fromIndex</code>,
|
138 |
* and breaking the search at the index specified by <code>breakAtIndex</code>.
|
139 |
* <p>
|
140 |
* The position specified by <code>breakAtIndex</code> is not included in the search.
|
141 |
* <p>
|
142 |
* If the search is to continue to the start of the text,
|
143 |
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
|
144 |
* <p>
|
145 |
* If the specified character is not found then -1 is returned.
|
146 |
*
|
147 |
* @param searchChar a character.
|
148 |
* @param fromIndex the index to start the search from.
|
149 |
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text.
|
150 |
* @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.
|
151 |
*/
|
152 |
public int lastIndexOf(char searchChar, int fromIndex, int breakAtIndex);
|
153 |
|
154 |
/**
|
155 |
* Returns the index within this parse text of the last occurrence of the specified string,
|
156 |
* searching backwards starting at the position specified by <code>fromIndex</code>.
|
157 |
* <p>
|
158 |
* If the specified string is not found then -1 is returned.
|
159 |
*
|
160 |
* @param searchString a string.
|
161 |
* @param fromIndex the index to start the search from.
|
162 |
* @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.
|
163 |
*/
|
164 |
public int lastIndexOf(String searchString, int fromIndex);
|
165 |
|
166 |
/**
|
167 |
* Returns the index within this parse text of the last occurrence of the specified string,
|
168 |
* searching backwards starting at the position specified by <code>fromIndex</code>,
|
169 |
* and breaking the search at the index specified by <code>breakAtIndex</code>.
|
170 |
* <p>
|
171 |
* The position specified by <code>breakAtIndex</code> is not included in the search.
|
172 |
* <p>
|
173 |
* If the search is to continue to the start of the text,
|
174 |
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the <code>breakAtIndex</code>.
|
175 |
* <p>
|
176 |
* If the specified string is not found then -1 is returned.
|
177 |
*
|
178 |
* @param searchString a string.
|
179 |
* @param fromIndex the index to start the search from.
|
180 |
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text.
|
181 |
* @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.
|
182 |
*/
|
183 |
public int lastIndexOf(String searchString, int fromIndex, int breakAtIndex);
|
184 |
|
185 |
/**
|
186 |
* Returns the length of the parse text.
|
187 |
* @return the length of the parse text.
|
188 |
*/
|
189 |
public int length();
|
190 |
|
191 |
/**
|
192 |
* Returns a new character sequence that is a subsequence of this sequence.
|
193 |
*
|
194 |
* @param begin the begin position, inclusive.
|
195 |
* @param end the end position, exclusive.
|
196 |
* @return a new character sequence that is a subsequence of this sequence.
|
197 |
*/
|
198 |
public CharSequence subSequence(int begin, int end);
|
199 |
|
200 |
/**
|
201 |
* Returns the content of the parse text as a <code>String</code>.
|
202 |
* @return the content of the parse text as a <code>String</code>.
|
203 |
*/
|
204 |
public String toString();
|
205 |
}
|