1 |
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
|
2 |
// Version 3.2
|
3 |
// Copyright (C) 2004-2009 Martin Jericho
|
4 |
// http://jericho.htmlparser.net/
|
5 |
//
|
6 |
// This library is free software; you can redistribute it and/or
|
7 |
// modify it under the terms of either one of the following licences:
|
8 |
//
|
9 |
// 1. The Eclipse Public License (EPL) version 1.0,
|
10 |
// included in this distribution in the file licence-epl-1.0.html
|
11 |
// or available at http://www.eclipse.org/legal/epl-v10.html
|
12 |
//
|
13 |
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
|
14 |
// included in this distribution in the file licence-lgpl-2.1.txt
|
15 |
// or available at http://www.gnu.org/licenses/lgpl.txt
|
16 |
//
|
17 |
// This library is distributed on an "AS IS" basis,
|
18 |
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
19 |
// See the individual licence texts for more details.
|
20 |
|
21 |
package net.htmlparser.jericho;
|
22 |
|
23 |
import java.util.*;
|
24 |
import java.io.*;
|
25 |
import java.net.*;
|
26 |
|
27 |
/**
|
28 |
* Compacts HTML source by removing all unnecessary white space.
|
29 |
* <p>
|
30 |
* Use one of the following methods to obtain the output:
|
31 |
* <ul>
|
32 |
* <li>{@link #writeTo(Writer)}</li>
|
33 |
* <li>{@link #appendTo(Appendable)}</li>
|
34 |
* <li>{@link #toString()}</li>
|
35 |
* <li>{@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}</li>
|
36 |
* </ul>
|
37 |
* <p>
|
38 |
* The output text is functionally equivalent to the original source and should be rendered identically.
|
39 |
* <p>
|
40 |
* Compacting an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically.
|
41 |
*/
|
42 |
public final class SourceCompactor implements CharStreamSource {
|
43 |
private final Segment segment;
|
44 |
private String newLine=null;
|
45 |
|
46 |
/**
|
47 |
* Constructs a new <code>SourceCompactor</code> based on the specified {@link Segment}.
|
48 |
* @param segment the segment containing the HTML to be compacted.
|
49 |
*/
|
50 |
public SourceCompactor(final Segment segment) {
|
51 |
this.segment=segment;
|
52 |
}
|
53 |
|
54 |
// Documentation inherited from CharStreamSource
|
55 |
public void writeTo(final Writer writer) throws IOException {
|
56 |
appendTo(writer);
|
57 |
writer.flush();
|
58 |
}
|
59 |
|
60 |
// Documentation inherited from CharStreamSource
|
61 |
public void appendTo(final Appendable appendable) throws IOException {
|
62 |
new SourceFormatter(segment).setTidyTags(true).setNewLine(newLine).setRemoveLineBreaks(true).appendTo(appendable);
|
63 |
}
|
64 |
|
65 |
// Documentation inherited from CharStreamSource
|
66 |
public long getEstimatedMaximumOutputLength() {
|
67 |
return segment.length();
|
68 |
}
|
69 |
|
70 |
// Documentation inherited from CharStreamSource
|
71 |
public String toString() {
|
72 |
return CharStreamSourceUtil.toString(this);
|
73 |
}
|
74 |
|
75 |
/**
|
76 |
* Sets the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
|
77 |
* <p>
|
78 |
* The default is to use the same new line string as is used in the source document, which is determined via the {@link Source#getNewLine()} method.
|
79 |
* If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document,
|
80 |
* or using the value from the static {@link Config#NewLine} property.
|
81 |
* <p>
|
82 |
* Specifying a <code>null</code> argument resets the property to its default value, which is to use the same new line string as is used in the source document.
|
83 |
*
|
84 |
* @param newLine the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output, may be <code>null</code>.
|
85 |
* @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement.
|
86 |
* @see #getNewLine()
|
87 |
*/
|
88 |
public SourceCompactor setNewLine(final String newLine) {
|
89 |
this.newLine=newLine;
|
90 |
return this;
|
91 |
}
|
92 |
|
93 |
/**
|
94 |
* Returns the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
|
95 |
* <p>
|
96 |
* See the {@link #setNewLine(String)} method for a full description of this property.
|
97 |
*
|
98 |
* @return the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
|
99 |
*/
|
100 |
public String getNewLine() {
|
101 |
if (newLine==null) newLine=segment.source.getBestGuessNewLine();
|
102 |
return newLine;
|
103 |
}
|
104 |
|
105 |
}
|