/[aagtl_public1]/src/com/zoffcc/applications/aagtl/StringUtils.java
aagtl

Contents of /src/com/zoffcc/applications/aagtl/StringUtils.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (show annotations) (download)
Sun Aug 5 14:00:28 2012 UTC (11 years, 7 months ago) by zoffadmin
File size: 6986 byte(s)
license text correction
1 /**
2 * aagtl Advanced Geocaching Tool for Android
3 * loosely based on agtl by Daniel Fett <fett@danielfett.de>
4 * Copyright (C) 2010 - 2012 Zoff <aagtl@work.zoff.cc>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the
17 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 */
20
21 package com.zoffcc.applications.aagtl;
22
23 import java.util.StringTokenizer;
24
25 public class StringUtils
26 {
27 // \u3000 is the double-byte space character in UTF-8
28 // \u00A0 is the non-breaking space character (&nbsp;)
29 // \u2007 is the figure space character (&#8199;)
30 // \u202F is the narrow non-breaking space character (&#8239;)
31 public static final String WHITE_SPACES = " \r\n\t\u3000\u00A0\u2007\u202F";
32
33 private StringUtils()
34 {
35 }
36
37 private static String[][] htmlEscape = { { "&lt;", "<" }, { "&gt;", ">" }, { "&amp;", "&" }, { "&quot;", "\"" },
38 { "&agrave;", "à" }, { "&Agrave;", "À" }, { "&acirc;", "â" }, { "&auml;", "ä" }, { "&Auml;", "Ä" },
39 { "&Acirc;", "Â" }, { "&aring;", "å" }, { "&Aring;", "Å" }, { "&aelig;", "æ" }, { "&AElig;", "Æ" },
40 { "&ccedil;", "ç" }, { "&Ccedil;", "Ç" }, { "&eacute;", "é" }, { "&Eacute;", "É" }, { "&egrave;", "è" },
41 { "&Egrave;", "È" }, { "&ecirc;", "ê" }, { "&Ecirc;", "Ê" }, { "&euml;", "ë" }, { "&Euml;", "Ë" },
42 { "&iuml;", "ï" }, { "&Iuml;", "Ï" }, { "&ocirc;", "ô" }, { "&Ocirc;", "Ô" }, { "&ouml;", "ö" },
43 { "&Ouml;", "Ö" }, { "&oslash;", "ø" }, { "&Oslash;", "Ø" }, { "&szlig;", "ß" }, { "&ugrave;", "ù" },
44 { "&Ugrave;", "Ù" }, { "&ucirc;", "û" }, { "&Ucirc;", "Û" }, { "&uuml;", "ü" }, { "&Uuml;", "Ü" },
45 { "&nbsp;", " " }, { "&copy;", "\u00a9" }, { "&reg;", "\u00ae" }, { "&euro;", "\u20a0" } };
46
47 public static final String unescapeHTML(String s, int start)
48 {
49 int i, j, k;
50
51 i = s.indexOf("&", start);
52 start = i + 1;
53 if (i > -1)
54 {
55 j = s.indexOf(";", i);
56 /*
57 * we don't want to start from the beginning
58 * the next time, to handle the case of the &
59 * thanks to Pieter Hertogh for the bug fix!
60 */
61 if (j > i)
62 {
63 // ok this is not most optimized way to
64 // do it, a StringBuffer would be better,
65 // this is left as an exercise to the reader!
66 String temp = s.substring(i, j + 1);
67 // search in htmlEscape[][] if temp is there
68 k = 0;
69 while (k < htmlEscape.length)
70 {
71 if (htmlEscape[k][0].equals(temp))
72 break;
73 else
74 k++;
75 }
76 if (k < htmlEscape.length)
77 {
78 s = s.substring(0, i) + htmlEscape[k][1] + s.substring(j + 1);
79 return unescapeHTML(s, i); // recursive call
80 }
81 }
82 }
83 return s;
84 }
85
86 public static String stripAndCollapse(String str)
87 {
88 return collapseWhitespace(strip(str));
89 }
90
91 public static String strip(String str)
92 {
93 return megastrip(str, true, true, WHITE_SPACES);
94 }
95
96 public static String megastrip(String str, boolean left, boolean right, String what)
97 {
98 if (str == null)
99 {
100 return null;
101 }
102
103 int limitLeft = 0;
104 int limitRight = str.length() - 1;
105
106 while (left && limitLeft <= limitRight && what.indexOf(str.charAt(limitLeft)) >= 0)
107 {
108 limitLeft++;
109 }
110 while (right && limitRight >= limitLeft && what.indexOf(str.charAt(limitRight)) >= 0)
111 {
112 limitRight--;
113 }
114
115 return str.substring(limitLeft, limitRight + 1);
116 }
117
118 public static String collapseWhitespace(String str)
119 {
120 return collapse(str, WHITE_SPACES, " ");
121 }
122
123 public static String collapse(String str, String chars, String replacement)
124 {
125 if (str == null)
126 {
127 return null;
128 }
129 StringBuilder newStr = new StringBuilder();
130
131 boolean prevCharMatched = false;
132 char c;
133 for (int i = 0; i < str.length(); i++)
134 {
135 c = str.charAt(i);
136 if (chars.indexOf(c) != -1)
137 {
138 // this character is matched
139 if (prevCharMatched)
140 {
141 // apparently a string of matched chars, so don't append
142 // anything
143 // to the string
144 continue;
145 }
146 prevCharMatched = true;
147 newStr.append(replacement);
148 }
149 else
150 {
151 prevCharMatched = false;
152 newStr.append(c);
153 }
154 }
155
156 return newStr.toString();
157 }
158
159 public static String fixedWidth(String str, int width)
160 {
161 String[] lines = split(str, "\n");
162 return fixedWidth(lines, width);
163 }
164
165 public static String[] splitAndTrim(String str, String delims)
166 {
167 return split(str, delims, true);
168 }
169
170 public static String[] split(String str, String delims)
171 {
172 return split(str, delims, false);
173 }
174
175 /**
176 * Split "str" into tokens by delimiters and optionally remove white spaces
177 * from the splitted tokens.
178 *
179 * @param trimTokens
180 * if true, then trim the tokens
181 */
182 public static String[] split(String str, String delims, boolean trimTokens)
183 {
184 StringTokenizer tokenizer = new StringTokenizer(str, delims);
185 int n = tokenizer.countTokens();
186 String[] list = new String[n];
187 for (int i = 0; i < n; i++)
188 {
189 if (trimTokens)
190 {
191 list[i] = tokenizer.nextToken().trim();
192 }
193 else
194 {
195 list[i] = tokenizer.nextToken();
196 }
197 }
198 return list;
199 }
200
201 public static String fixedWidth(String[] lines, int width)
202 {
203 StringBuilder formatStr = new StringBuilder();
204
205 for (int i = 0; i < lines.length; i++)
206 {
207 int curWidth = 0;
208 if (i != 0)
209 {
210 formatStr.append("\n");
211 }
212 // a small optimization
213 if (lines[i].length() <= width)
214 {
215 formatStr.append(lines[i]);
216 continue;
217 }
218 String[] words = splitAndTrim(lines[i], WHITE_SPACES);
219 for (int j = 0; j < words.length; j++)
220 {
221 if (curWidth == 0 || (curWidth + words[j].length()) < width)
222 {
223 // add a space if we're not at the beginning of a line
224 if (curWidth != 0)
225 {
226 formatStr.append(" ");
227 curWidth += 1;
228 }
229 curWidth += words[j].length();
230 formatStr.append(words[j]);
231 }
232 else
233 {
234 formatStr.append("\n");
235 curWidth = words[j].length();
236 formatStr.append(words[j]);
237 }
238 }
239 }
240
241 return formatStr.toString();
242 }
243
244 /*
245 * public static void main(String args[]) throws Exception
246 * {
247 * // to see accented character to the console
248 * java.io.PrintStream ps = new java.io.PrintStream(System.out, true,
249 * "Cp850");
250 * String test = "&copy; 2000 R&eacute;al Gagnon &lt;www.rgagnon.com&gt;";
251 * ps.println(test + "\n-->\n" + unescapeHTML(test, 0));
252 * }
253 */
254 }

   
Visit the aagtl Website