/[aagtl_public1]/src/com/zoffcc/applications/aagtl/HtmlToText.java
aagtl

Contents of /src/com/zoffcc/applications/aagtl/HtmlToText.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (show annotations) (download)
Sun Aug 5 14:00:28 2012 UTC (11 years, 7 months ago) by zoffadmin
File size: 3821 byte(s)
license text correction
1 /**
2 * aagtl Advanced Geocaching Tool for Android
3 * loosely based on agtl by Daniel Fett <fett@danielfett.de>
4 * Copyright (C) 2010 - 2012 Zoff <aagtl@work.zoff.cc>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the
17 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 */
20
21 /** Copyright (c) 2008 Google Inc.
22 *
23 * Licensed under the Apache License, Version 2.0 (the "License");
24 * you may not use this file except in compliance with the License.
25 * You may obtain a copy of the License at
26 *
27 * http://www.apache.org/licenses/LICENSE-2.0
28 *
29 * Unless required by applicable law or agreed to in writing, software
30 * distributed under the License is distributed on an "AS IS" BASIS,
31 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
32 * See the License for the specific language governing permissions and
33 * limitations under the License.
34 */
35
36 package com.zoffcc.applications.aagtl;
37
38 import java.util.regex.Pattern;
39
40 /**
41 * Convert provided html formatted string to text format.
42 *
43 *
44 */
45 public final class HtmlToText
46 {
47
48 /**
49 * Regular expression to match html line breaks or paragraph tags
50 * and adjacent whitespace
51 */
52 private static final Pattern htmlNewlinePattern = Pattern.compile("\\s*<(br|/?p)>\\s*");
53
54 /** Regular expression to match list tags and adjacent whitespace */
55 private static final Pattern htmlListPattern = Pattern.compile("\\s*<li>\\s*");
56
57 /** Regular expression to match any remaining html tags */
58 private static final Pattern htmlTagPattern = Pattern.compile("</?([^<]*)>");
59
60 /** Maximum length of a line in email body (in characters) */
61 public static final int EMAIL_LINE_WIDTH_MAX = 72;
62
63 // This class should not be instantiated, hence the private constructor
64 private HtmlToText()
65 {
66 }
67
68 /**
69 * Convert provided html string to plain text preserving the formatting
70 * as much as possible. Ensure line wrapping to 72 chars as default.
71 * NOTE: add support for more HTML tags here.
72 * For the present, convert <br>
73 * to '\n'
74 * convert
75 * <p>
76 * and
77 * </p>
78 * to '\n'
79 * convert <li>to "\n- "
80 *
81 * @throws NullPointerException
82 */
83 public static String htmlToPlainText(String html)
84 {
85
86 if (html == null)
87 {
88 throw new NullPointerException("Html parameter may not be null.");
89 }
90
91 // Clear any html indentation and incidental whitespace
92 String text = StringUtils.stripAndCollapse(html);
93
94 /*
95 * Replace <br> and <p> tags with new line characters.
96 * Replace <li> tags (HTML bullets) with dashes.
97 * Remove any remaining HTML tags not supported yet.
98 * Finally replace any HTML escape string with appropriate character
99 */
100 text = htmlNewlinePattern.matcher(text).replaceAll("\n");
101 text = htmlListPattern.matcher(text).replaceAll("\n- ");
102 text = htmlTagPattern.matcher(text).replaceAll("");
103 text = StringUtils.unescapeHTML(text, 0).trim();
104
105 /*
106 * Ensure no line of plain text is longer than default (72 chars)
107 * NOTE: Use String.split, NOT StringUtil.split, in order to preserve
108 * consecutive newline characters originating from <br> and <p> tags
109 */
110 return StringUtils.fixedWidth(text.split("\n"), EMAIL_LINE_WIDTH_MAX);
111 }
112 }

   
Visit the aagtl Website