1 |
/**
|
2 |
* aagtl Advanced Geocaching Tool for Android
|
3 |
* loosely based on agtl by Daniel Fett <fett@danielfett.de>
|
4 |
* Copyright (C) 2010 - 2012 Zoff <aagtl@work.zoff.cc>
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* version 2 as published by the Free Software Foundation.
|
9 |
*
|
10 |
* This program is distributed in the hope that it will be useful,
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13 |
* GNU General Public License for more details.
|
14 |
*
|
15 |
* You should have received a copy of the GNU General Public License
|
16 |
* along with this program; if not, write to the
|
17 |
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
18 |
* Boston, MA 02110-1301, USA.
|
19 |
*/
|
20 |
|
21 |
package com.zoffcc.applications.aagtl;
|
22 |
|
23 |
import java.util.StringTokenizer;
|
24 |
|
25 |
public class StringUtils
|
26 |
{
|
27 |
// \u3000 is the double-byte space character in UTF-8
|
28 |
// \u00A0 is the non-breaking space character ( )
|
29 |
// \u2007 is the figure space character ( )
|
30 |
// \u202F is the narrow non-breaking space character ( )
|
31 |
public static final String WHITE_SPACES = " \r\n\t\u3000\u00A0\u2007\u202F";
|
32 |
|
33 |
private StringUtils()
|
34 |
{
|
35 |
}
|
36 |
|
37 |
private static String[][] htmlEscape = { { "<", "<" }, { ">", ">" }, { "&", "&" }, { """, "\"" },
|
38 |
{ "à", "à" }, { "À", "À" }, { "â", "â" }, { "ä", "ä" }, { "Ä", "Ä" },
|
39 |
{ "Â", "Â" }, { "å", "å" }, { "Å", "Å" }, { "æ", "æ" }, { "Æ", "Æ" },
|
40 |
{ "ç", "ç" }, { "Ç", "Ç" }, { "é", "é" }, { "É", "É" }, { "è", "è" },
|
41 |
{ "È", "È" }, { "ê", "ê" }, { "Ê", "Ê" }, { "ë", "ë" }, { "Ë", "Ë" },
|
42 |
{ "ï", "ï" }, { "Ï", "Ï" }, { "ô", "ô" }, { "Ô", "Ô" }, { "ö", "ö" },
|
43 |
{ "Ö", "Ö" }, { "ø", "ø" }, { "Ø", "Ø" }, { "ß", "ß" }, { "ù", "ù" },
|
44 |
{ "Ù", "Ù" }, { "û", "û" }, { "Û", "Û" }, { "ü", "ü" }, { "Ü", "Ü" },
|
45 |
{ " ", " " }, { "©", "\u00a9" }, { "®", "\u00ae" }, { "€", "\u20a0" } };
|
46 |
|
47 |
public static final String unescapeHTML(String s, int start)
|
48 |
{
|
49 |
int i, j, k;
|
50 |
|
51 |
i = s.indexOf("&", start);
|
52 |
start = i + 1;
|
53 |
if (i > -1)
|
54 |
{
|
55 |
j = s.indexOf(";", i);
|
56 |
/*
|
57 |
* we don't want to start from the beginning
|
58 |
* the next time, to handle the case of the &
|
59 |
* thanks to Pieter Hertogh for the bug fix!
|
60 |
*/
|
61 |
if (j > i)
|
62 |
{
|
63 |
// ok this is not most optimized way to
|
64 |
// do it, a StringBuffer would be better,
|
65 |
// this is left as an exercise to the reader!
|
66 |
String temp = s.substring(i, j + 1);
|
67 |
// search in htmlEscape[][] if temp is there
|
68 |
k = 0;
|
69 |
while (k < htmlEscape.length)
|
70 |
{
|
71 |
if (htmlEscape[k][0].equals(temp))
|
72 |
break;
|
73 |
else
|
74 |
k++;
|
75 |
}
|
76 |
if (k < htmlEscape.length)
|
77 |
{
|
78 |
s = s.substring(0, i) + htmlEscape[k][1] + s.substring(j + 1);
|
79 |
return unescapeHTML(s, i); // recursive call
|
80 |
}
|
81 |
}
|
82 |
}
|
83 |
return s;
|
84 |
}
|
85 |
|
86 |
public static String stripAndCollapse(String str)
|
87 |
{
|
88 |
return collapseWhitespace(strip(str));
|
89 |
}
|
90 |
|
91 |
public static String strip(String str)
|
92 |
{
|
93 |
return megastrip(str, true, true, WHITE_SPACES);
|
94 |
}
|
95 |
|
96 |
public static String megastrip(String str, boolean left, boolean right, String what)
|
97 |
{
|
98 |
if (str == null)
|
99 |
{
|
100 |
return null;
|
101 |
}
|
102 |
|
103 |
int limitLeft = 0;
|
104 |
int limitRight = str.length() - 1;
|
105 |
|
106 |
while (left && limitLeft <= limitRight && what.indexOf(str.charAt(limitLeft)) >= 0)
|
107 |
{
|
108 |
limitLeft++;
|
109 |
}
|
110 |
while (right && limitRight >= limitLeft && what.indexOf(str.charAt(limitRight)) >= 0)
|
111 |
{
|
112 |
limitRight--;
|
113 |
}
|
114 |
|
115 |
return str.substring(limitLeft, limitRight + 1);
|
116 |
}
|
117 |
|
118 |
public static String collapseWhitespace(String str)
|
119 |
{
|
120 |
return collapse(str, WHITE_SPACES, " ");
|
121 |
}
|
122 |
|
123 |
public static String collapse(String str, String chars, String replacement)
|
124 |
{
|
125 |
if (str == null)
|
126 |
{
|
127 |
return null;
|
128 |
}
|
129 |
StringBuilder newStr = new StringBuilder();
|
130 |
|
131 |
boolean prevCharMatched = false;
|
132 |
char c;
|
133 |
for (int i = 0; i < str.length(); i++)
|
134 |
{
|
135 |
c = str.charAt(i);
|
136 |
if (chars.indexOf(c) != -1)
|
137 |
{
|
138 |
// this character is matched
|
139 |
if (prevCharMatched)
|
140 |
{
|
141 |
// apparently a string of matched chars, so don't append
|
142 |
// anything
|
143 |
// to the string
|
144 |
continue;
|
145 |
}
|
146 |
prevCharMatched = true;
|
147 |
newStr.append(replacement);
|
148 |
}
|
149 |
else
|
150 |
{
|
151 |
prevCharMatched = false;
|
152 |
newStr.append(c);
|
153 |
}
|
154 |
}
|
155 |
|
156 |
return newStr.toString();
|
157 |
}
|
158 |
|
159 |
public static String fixedWidth(String str, int width)
|
160 |
{
|
161 |
String[] lines = split(str, "\n");
|
162 |
return fixedWidth(lines, width);
|
163 |
}
|
164 |
|
165 |
public static String[] splitAndTrim(String str, String delims)
|
166 |
{
|
167 |
return split(str, delims, true);
|
168 |
}
|
169 |
|
170 |
public static String[] split(String str, String delims)
|
171 |
{
|
172 |
return split(str, delims, false);
|
173 |
}
|
174 |
|
175 |
/**
|
176 |
* Split "str" into tokens by delimiters and optionally remove white spaces
|
177 |
* from the splitted tokens.
|
178 |
*
|
179 |
* @param trimTokens
|
180 |
* if true, then trim the tokens
|
181 |
*/
|
182 |
public static String[] split(String str, String delims, boolean trimTokens)
|
183 |
{
|
184 |
StringTokenizer tokenizer = new StringTokenizer(str, delims);
|
185 |
int n = tokenizer.countTokens();
|
186 |
String[] list = new String[n];
|
187 |
for (int i = 0; i < n; i++)
|
188 |
{
|
189 |
if (trimTokens)
|
190 |
{
|
191 |
list[i] = tokenizer.nextToken().trim();
|
192 |
}
|
193 |
else
|
194 |
{
|
195 |
list[i] = tokenizer.nextToken();
|
196 |
}
|
197 |
}
|
198 |
return list;
|
199 |
}
|
200 |
|
201 |
public static String fixedWidth(String[] lines, int width)
|
202 |
{
|
203 |
StringBuilder formatStr = new StringBuilder();
|
204 |
|
205 |
for (int i = 0; i < lines.length; i++)
|
206 |
{
|
207 |
int curWidth = 0;
|
208 |
if (i != 0)
|
209 |
{
|
210 |
formatStr.append("\n");
|
211 |
}
|
212 |
// a small optimization
|
213 |
if (lines[i].length() <= width)
|
214 |
{
|
215 |
formatStr.append(lines[i]);
|
216 |
continue;
|
217 |
}
|
218 |
String[] words = splitAndTrim(lines[i], WHITE_SPACES);
|
219 |
for (int j = 0; j < words.length; j++)
|
220 |
{
|
221 |
if (curWidth == 0 || (curWidth + words[j].length()) < width)
|
222 |
{
|
223 |
// add a space if we're not at the beginning of a line
|
224 |
if (curWidth != 0)
|
225 |
{
|
226 |
formatStr.append(" ");
|
227 |
curWidth += 1;
|
228 |
}
|
229 |
curWidth += words[j].length();
|
230 |
formatStr.append(words[j]);
|
231 |
}
|
232 |
else
|
233 |
{
|
234 |
formatStr.append("\n");
|
235 |
curWidth = words[j].length();
|
236 |
formatStr.append(words[j]);
|
237 |
}
|
238 |
}
|
239 |
}
|
240 |
|
241 |
return formatStr.toString();
|
242 |
}
|
243 |
|
244 |
/*
|
245 |
* public static void main(String args[]) throws Exception
|
246 |
* {
|
247 |
* // to see accented character to the console
|
248 |
* java.io.PrintStream ps = new java.io.PrintStream(System.out, true,
|
249 |
* "Cp850");
|
250 |
* String test = "© 2000 Réal Gagnon <www.rgagnon.com>";
|
251 |
* ps.println(test + "\n-->\n" + unescapeHTML(test, 0));
|
252 |
* }
|
253 |
*/
|
254 |
}
|