/[aagtl_public1]/src/net/htmlparser/jericho/NodeIterator.java
aagtl

Contents of /src/net/htmlparser/jericho/NodeIterator.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 8 months ago) by zoffadmin
File size: 3865 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 import java.util.*;
24
25 /**
26 * Iterates over the "nodes" in a segment.
27 * <p>
28 * Every object returned is a Segment. All tags found with the Segment.getAllTags() method are included, as well as segments representing the plain text in between them,
29 * and character references within the plain text are also included as separate nodes.
30 */
31 class NodeIterator implements Iterator<Segment> {
32 private final Segment segment;
33 private final Source source;
34 private int pos;
35 private Tag nextTag;
36 private CharacterReference characterReferenceAtCurrentPosition=null;
37
38 private final boolean legacyIteratorCompatabilityMode=Source.LegacyIteratorCompatabilityMode;
39
40 public NodeIterator(final Segment segment) {
41 this.segment=segment;
42 source=segment.source;
43 if (segment==source) source.fullSequentialParse();
44 pos=segment.begin;
45 nextTag=source.getNextTag(pos);
46 if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null;
47 }
48
49 public boolean hasNext() {
50 return pos<segment.end || nextTag!=null;
51 }
52
53 public Segment next() {
54 final int oldPos=pos;
55 if (nextTag!=null) {
56 if (oldPos<nextTag.begin) return nextNonTagSegment(oldPos,nextTag.begin);
57 final Tag tag=nextTag;
58 nextTag=nextTag.getNextTag();
59 if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null;
60 if (pos<tag.end) pos=tag.end;
61 return tag;
62 } else {
63 if (!hasNext()) throw new NoSuchElementException();
64 return nextNonTagSegment(oldPos,segment.end);
65 }
66 }
67
68 private Segment nextNonTagSegment(final int begin, final int end) {
69 if (!legacyIteratorCompatabilityMode) {
70 final CharacterReference characterReference=characterReferenceAtCurrentPosition;
71 if (characterReference!=null) {
72 characterReferenceAtCurrentPosition=null;
73 pos=characterReference.end;
74 return characterReference;
75 }
76 final ParseText parseText=source.getParseText();
77 int potentialCharacterReferenceBegin=parseText.indexOf('&',begin,end);
78 while (potentialCharacterReferenceBegin!=-1) {
79 final CharacterReference nextCharacterReference=CharacterReference.construct(source,potentialCharacterReferenceBegin,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL);
80 if (nextCharacterReference!=null) {
81 if (potentialCharacterReferenceBegin==begin) {
82 pos=nextCharacterReference.end;
83 return nextCharacterReference;
84 } else {
85 pos=nextCharacterReference.begin;
86 characterReferenceAtCurrentPosition=nextCharacterReference;
87 return new Segment(source,begin,pos);
88 }
89 }
90 potentialCharacterReferenceBegin=parseText.indexOf('&',potentialCharacterReferenceBegin+1,end);
91 }
92 }
93 return new Segment(source,begin,pos=end);
94 }
95
96 public void skipToPos(final int pos) {
97 if (pos<this.pos) return; // can't go backwards
98 this.pos=pos;
99 nextTag=source.getNextTag(pos);
100 }
101
102 public void remove() {
103 throw new UnsupportedOperationException();
104 }
105 }

   
Visit the aagtl Website