1 |
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
|
2 |
// Version 3.2
|
3 |
// Copyright (C) 2004-2009 Martin Jericho
|
4 |
// http://jericho.htmlparser.net/
|
5 |
//
|
6 |
// This library is free software; you can redistribute it and/or
|
7 |
// modify it under the terms of either one of the following licences:
|
8 |
//
|
9 |
// 1. The Eclipse Public License (EPL) version 1.0,
|
10 |
// included in this distribution in the file licence-epl-1.0.html
|
11 |
// or available at http://www.eclipse.org/legal/epl-v10.html
|
12 |
//
|
13 |
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
|
14 |
// included in this distribution in the file licence-lgpl-2.1.txt
|
15 |
// or available at http://www.gnu.org/licenses/lgpl.txt
|
16 |
//
|
17 |
// This library is distributed on an "AS IS" basis,
|
18 |
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
19 |
// See the individual licence texts for more details.
|
20 |
|
21 |
package net.htmlparser.jericho;
|
22 |
|
23 |
class CharSequenceParseText implements ParseText {
|
24 |
private final CharSequence charSequence;
|
25 |
|
26 |
CharSequenceParseText(final CharSequence charSequence) {
|
27 |
this.charSequence=charSequence;
|
28 |
}
|
29 |
|
30 |
public final char charAt(final int index) {
|
31 |
final char ch=charSequence.charAt(index);
|
32 |
return (ch>='A' && ch<='Z') ? ((char)(ch ^ 0x20)) : ch;
|
33 |
}
|
34 |
|
35 |
public final boolean containsAt(final String str, final int pos) {
|
36 |
for (int i=0; i<str.length(); i++)
|
37 |
if (str.charAt(i)!=charAt(pos+i)) return false;
|
38 |
return true;
|
39 |
}
|
40 |
|
41 |
public final int indexOf(final char searchChar, final int fromIndex) {
|
42 |
return indexOf(searchChar,fromIndex,NO_BREAK);
|
43 |
}
|
44 |
|
45 |
public final int indexOf(final char searchChar, final int fromIndex, final int breakAtIndex) {
|
46 |
final int actualBreakAtIndex=(breakAtIndex==NO_BREAK || breakAtIndex>getEnd() ? getEnd() : breakAtIndex);
|
47 |
try {
|
48 |
for (int i=(fromIndex<0 ? 0 : fromIndex); i<actualBreakAtIndex; i++)
|
49 |
if (charAt(i)==searchChar) return i;
|
50 |
} catch (IndexOutOfBoundsException ex) {} // only happens in StreamedParseText subclass - this is the normal way to catch end of stream
|
51 |
return -1;
|
52 |
}
|
53 |
|
54 |
public final int indexOf(final String searchString, final int fromIndex) {
|
55 |
return indexOf(searchString,fromIndex,NO_BREAK);
|
56 |
}
|
57 |
|
58 |
public final int indexOf(final String searchString, final int fromIndex, final int breakAtIndex) {
|
59 |
if (searchString.length()==1) return indexOf(searchString.charAt(0),fromIndex,breakAtIndex);
|
60 |
if (searchString.length()==0) return fromIndex;
|
61 |
final char firstChar=searchString.charAt(0);
|
62 |
final int lastPossibleBreakAtIndex=getEnd()-searchString.length()+1;
|
63 |
final int actualBreakAtIndex=(breakAtIndex==NO_BREAK || breakAtIndex>lastPossibleBreakAtIndex) ? lastPossibleBreakAtIndex : breakAtIndex;
|
64 |
outerLoop: for (int i=(fromIndex<0 ? 0 : fromIndex); i<actualBreakAtIndex; i++) {
|
65 |
if (charAt(i)==firstChar) {
|
66 |
for (int j=1; j<searchString.length(); j++)
|
67 |
if (searchString.charAt(j)!=charAt(j+i)) continue outerLoop;
|
68 |
return i;
|
69 |
}
|
70 |
}
|
71 |
return -1;
|
72 |
}
|
73 |
|
74 |
public final int lastIndexOf(final char searchChar, final int fromIndex) {
|
75 |
return lastIndexOf(searchChar,fromIndex,NO_BREAK);
|
76 |
}
|
77 |
|
78 |
public final int lastIndexOf(final char searchChar, final int fromIndex, final int breakAtIndex) {
|
79 |
for (int i=(fromIndex>getEnd() ? getEnd() : fromIndex); i>breakAtIndex; i--)
|
80 |
if (charAt(i)==searchChar) return i;
|
81 |
return -1;
|
82 |
}
|
83 |
|
84 |
public final int lastIndexOf(final String searchString, final int fromIndex) {
|
85 |
return lastIndexOf(searchString,fromIndex,NO_BREAK);
|
86 |
}
|
87 |
|
88 |
public final int lastIndexOf(final String searchString, int fromIndex, final int breakAtIndex) {
|
89 |
if (searchString.length()==1) return lastIndexOf(searchString.charAt(0),fromIndex,breakAtIndex);
|
90 |
if (searchString.length()==0) return fromIndex;
|
91 |
final int rightIndex=getEnd()-searchString.length();
|
92 |
if (breakAtIndex>rightIndex) return -1;
|
93 |
if (fromIndex>rightIndex) fromIndex=rightIndex;
|
94 |
final int lastCharIndex=searchString.length()-1;
|
95 |
final char lastChar=searchString.charAt(lastCharIndex);
|
96 |
final int actualBreakAtPos=breakAtIndex+lastCharIndex;
|
97 |
outerLoop: for (int i=fromIndex+lastCharIndex; i>actualBreakAtPos; i--) {
|
98 |
if (charAt(i)==lastChar) {
|
99 |
final int startIndex=i-lastCharIndex;
|
100 |
for (int j=lastCharIndex-1; j>=0; j--)
|
101 |
if (searchString.charAt(j)!=charAt(j+startIndex)) continue outerLoop;
|
102 |
return startIndex;
|
103 |
}
|
104 |
}
|
105 |
return -1;
|
106 |
}
|
107 |
|
108 |
public final int length() {
|
109 |
return charSequence.length();
|
110 |
}
|
111 |
|
112 |
public final CharSequence subSequence(final int begin, final int end) {
|
113 |
// doesn't have to be efficient because it is not actually used anywhere internally.
|
114 |
return substring(begin,end);
|
115 |
}
|
116 |
|
117 |
public final String toString() {
|
118 |
return charSequence.toString();
|
119 |
}
|
120 |
|
121 |
protected int getEnd() {
|
122 |
return charSequence.length();
|
123 |
}
|
124 |
|
125 |
protected String substring(final int begin, final int end) {
|
126 |
return charSequence.subSequence(begin,end).toString().toLowerCase();
|
127 |
}
|
128 |
}
|