/[aagtl_public1]/src/net/htmlparser/jericho/SourceCompactor.java
aagtl

Contents of /src/net/htmlparser/jericho/SourceCompactor.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 8 months ago) by zoffadmin
File size: 4363 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 import java.util.*;
24 import java.io.*;
25 import java.net.*;
26
27 /**
28 * Compacts HTML source by removing all unnecessary white space.
29 * <p>
30 * Use one of the following methods to obtain the output:
31 * <ul>
32 * <li>{@link #writeTo(Writer)}</li>
33 * <li>{@link #appendTo(Appendable)}</li>
34 * <li>{@link #toString()}</li>
35 * <li>{@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}</li>
36 * </ul>
37 * <p>
38 * The output text is functionally equivalent to the original source and should be rendered identically.
39 * <p>
40 * Compacting an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically.
41 */
42 public final class SourceCompactor implements CharStreamSource {
43 private final Segment segment;
44 private String newLine=null;
45
46 /**
47 * Constructs a new <code>SourceCompactor</code> based on the specified {@link Segment}.
48 * @param segment the segment containing the HTML to be compacted.
49 */
50 public SourceCompactor(final Segment segment) {
51 this.segment=segment;
52 }
53
54 // Documentation inherited from CharStreamSource
55 public void writeTo(final Writer writer) throws IOException {
56 appendTo(writer);
57 writer.flush();
58 }
59
60 // Documentation inherited from CharStreamSource
61 public void appendTo(final Appendable appendable) throws IOException {
62 new SourceFormatter(segment).setTidyTags(true).setNewLine(newLine).setRemoveLineBreaks(true).appendTo(appendable);
63 }
64
65 // Documentation inherited from CharStreamSource
66 public long getEstimatedMaximumOutputLength() {
67 return segment.length();
68 }
69
70 // Documentation inherited from CharStreamSource
71 public String toString() {
72 return CharStreamSourceUtil.toString(this);
73 }
74
75 /**
76 * Sets the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
77 * <p>
78 * The default is to use the same new line string as is used in the source document, which is determined via the {@link Source#getNewLine()} method.
79 * If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document,
80 * or using the value from the static {@link Config#NewLine} property.
81 * <p>
82 * Specifying a <code>null</code> argument resets the property to its default value, which is to use the same new line string as is used in the source document.
83 *
84 * @param newLine the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output, may be <code>null</code>.
85 * @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement.
86 * @see #getNewLine()
87 */
88 public SourceCompactor setNewLine(final String newLine) {
89 this.newLine=newLine;
90 return this;
91 }
92
93 /**
94 * Returns the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
95 * <p>
96 * See the {@link #setNewLine(String)} method for a full description of this property.
97 *
98 * @return the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
99 */
100 public String getNewLine() {
101 if (newLine==null) newLine=segment.source.getBestGuessNewLine();
102 return newLine;
103 }
104
105 }

   
Visit the aagtl Website