1 |
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
|
2 |
// Version 3.2
|
3 |
// Copyright (C) 2004-2009 Martin Jericho
|
4 |
// http://jericho.htmlparser.net/
|
5 |
//
|
6 |
// This library is free software; you can redistribute it and/or
|
7 |
// modify it under the terms of either one of the following licences:
|
8 |
//
|
9 |
// 1. The Eclipse Public License (EPL) version 1.0,
|
10 |
// included in this distribution in the file licence-epl-1.0.html
|
11 |
// or available at http://www.eclipse.org/legal/epl-v10.html
|
12 |
//
|
13 |
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
|
14 |
// included in this distribution in the file licence-lgpl-2.1.txt
|
15 |
// or available at http://www.gnu.org/licenses/lgpl.txt
|
16 |
//
|
17 |
// This library is distributed on an "AS IS" basis,
|
18 |
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
19 |
// See the individual licence texts for more details.
|
20 |
|
21 |
package net.htmlparser.jericho;
|
22 |
|
23 |
import java.util.*;
|
24 |
import java.io.*;
|
25 |
|
26 |
/**
|
27 |
* Contains miscellaneous utility methods not directly associated with the HTML Parser library.
|
28 |
*/
|
29 |
public final class Util {
|
30 |
private static final int BUFFER_SIZE=2048;
|
31 |
private static final String CSVNewLine=System.getProperty("line.separator");
|
32 |
|
33 |
private Util() {}
|
34 |
|
35 |
/**
|
36 |
* Returns the text loaded from the specified <code>Reader</code> as a string.
|
37 |
* <p>
|
38 |
* If a <code>null</code> argument is supplied to this method, an empty string is returned.
|
39 |
* <p>
|
40 |
* To load text from an <code>InputStream</code>, use <code>getString(new InputStreamReader(inputStream,encoding))</code>.
|
41 |
*
|
42 |
* @param reader the <code>java.io.Reader</code> from which to load the text.
|
43 |
* @return the text loaded from the specified <code>java.io.Reader</code> as a string.
|
44 |
* @throws java.io.IOException if an I/O error occurs.
|
45 |
*/
|
46 |
public static String getString(final Reader reader) throws IOException {
|
47 |
if (reader==null) return "";
|
48 |
try {
|
49 |
int charsRead;
|
50 |
final char[] copyBuffer=new char[BUFFER_SIZE];
|
51 |
final StringBuilder sb=new StringBuilder();
|
52 |
while ((charsRead=reader.read(copyBuffer,0,BUFFER_SIZE))!=-1)
|
53 |
sb.append(copyBuffer,0,charsRead);
|
54 |
return sb.toString();
|
55 |
} finally {
|
56 |
reader.close();
|
57 |
}
|
58 |
}
|
59 |
|
60 |
/**
|
61 |
* Outputs the specified array of strings to the specified <code>Writer</code> in the format of a line for a CSV file.
|
62 |
* <p>
|
63 |
* "CSV" stands for <i>Comma Separated Values</i>.
|
64 |
* There is no formal specification for a CSV file, so there is significant variation in
|
65 |
* the way different applications handle issues like the encoding of different data types and special characters.
|
66 |
* <p>
|
67 |
* Generally, a CSV file contains a list of records separated by line breaks, with each record consisting of a list of
|
68 |
* field values separated by commas.
|
69 |
* Each record in the file should contain the same number of field values, with the values at each position representing the same
|
70 |
* type of data in all the records. In this way the file can also be divided into columns, often with the first line of the
|
71 |
* file containing the column labels.
|
72 |
* <p>
|
73 |
* Columns can have different data types such as text, numeric, date / time and boolean.
|
74 |
* A text value is often delimited with single (<code>'</code>) or double-quotes (<code>"</code>),
|
75 |
* especially if the value contains a comma, line feed, or other special character that is significant to the syntax.
|
76 |
* Encoding techniques for including quote characters themselves in text values vary widely.
|
77 |
* Values of other types are generally unquoted to distinguish them from text values.
|
78 |
* <p>
|
79 |
* This method produces output that is readable by MS-Excel, conforming to the following rules:
|
80 |
* <p>
|
81 |
* <ul>
|
82 |
* <li>All values are considered to be of type text, except for the static constants {@link Config#ColumnValueTrue}
|
83 |
* and {@link Config#ColumnValueFalse}, representing the boolean values <code>true</code> and <code>false</code> respectively.
|
84 |
* <li>All text values are enclosed in double-quotes.
|
85 |
* <li>Double-quote characters contained in text values are encoded using two consecutive double-quotes (<code>""</code>).
|
86 |
* <li><code>null</code> values are represented as empty fields.
|
87 |
* <li>The end of each record is represented by a carriage-return / line-feed (CR/LF) pair.
|
88 |
* <li>Line breaks inside text values are represented by a single line feed (LF) character.
|
89 |
* </ul>
|
90 |
*
|
91 |
* @param writer the destination <code>java.io.Writer</code> for the output.
|
92 |
* @throws java.io.IOException if an I/O error occurs.
|
93 |
* @see FormFields#getColumnLabels()
|
94 |
* @see FormFields#getColumnValues(Map)
|
95 |
*/
|
96 |
public static void outputCSVLine(final Writer writer, final String[] values) throws IOException {
|
97 |
for (int i=0; i<values.length;) {
|
98 |
final String value=values[i];
|
99 |
if (value!=null) {
|
100 |
if (value==Config.ColumnValueTrue || value==Config.ColumnValueFalse) {
|
101 |
writer.write(value); // assumes neither ColumnTrue or ColumnFalse contain double quotes.
|
102 |
} else {
|
103 |
writer.write('"');
|
104 |
outputValueEscapeQuotes(writer,value);
|
105 |
writer.write('"');
|
106 |
}
|
107 |
}
|
108 |
if (++i!=values.length) writer.write(',');
|
109 |
}
|
110 |
writer.write(CSVNewLine);
|
111 |
}
|
112 |
|
113 |
private static void outputValueEscapeQuotes(final Writer writer, final String text) throws IOException {
|
114 |
for (int i=0; i<text.length(); i++) {
|
115 |
final char ch=text.charAt(i);
|
116 |
writer.write(ch);
|
117 |
if (ch=='"') writer.write(ch);
|
118 |
}
|
119 |
}
|
120 |
|
121 |
static char[] getConcatenatedCharArray(final String string1, final String string2) {
|
122 |
final char[] charArray=new char[string1.length()+string2.length()];
|
123 |
string1.getChars(0,string1.length(),charArray,0);
|
124 |
string2.getChars(0,string2.length(),charArray,string1.length());
|
125 |
return charArray;
|
126 |
}
|
127 |
}
|