/[aagtl_public1]/src/net/htmlparser/jericho/FormField.java
aagtl

Contents of /src/net/htmlparser/jericho/FormField.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations) (download)
Sun Aug 5 13:48:36 2012 UTC (11 years, 7 months ago) by zoffadmin
File size: 23107 byte(s)
initial import of aagtl source code
1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20
21 package net.htmlparser.jericho;
22
23 import java.util.*;
24
25 /**
26 * Represents a <em>field</em> in an HTML <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html">form</a>,
27 * a <em>field</em> being defined as the group of all {@linkplain FormControl form controls}
28 * having the same {@linkplain FormControl#getName() name}.
29 * <p>
30 * The {@link #getFormControls()} method can be used to obtain the collection of this field's constituent
31 * {@link FormControl} objects.
32 * <p>
33 * The {@link FormFields} class, which represents a collection of <code>FormField</code> objects, provides the highest level
34 * interface for dealing with form fields and controls. For the most common tasks it can be used directly without
35 * the need to work with its constituent <code>FormField</code> or {@link FormControl} objects.
36 * <p>
37 * The <code>FormField</code> class serves two main purposes:
38 * <ol>
39 * <li style="margin-bottom: 1.5em">
40 * Provide methods for the modification and retrieval of form control <a href="FormControl.html#SubmissionValue">submission values</a>
41 * while ensuring that the states of all the field's constituent form controls remain consistent with each other.
42 * <p>
43 * The methods available for this purpose are:<br />
44 * {@link #getValues() List getValues()}<br />
45 * {@link #clearValues() void clearValues()}<br />
46 * {@link #setValues(Collection) void setValues(Collection)}<br />
47 * {@link #setValue(String) boolean setValue(String)}<br />
48 * {@link #addValue(String) boolean addValue(String)}<br />
49 * <p>
50 * Although the {@link FormControl} class provides methods for directly modifying the submission values
51 * of individual form controls, it is generally recommended to use the interface provided by the {@link FormFields} class
52 * unless there is a specific requirement for the lower level functionality.
53 * The {@link FormFields} class contains convenience methods providing most of the functionality of the above methods,
54 * as well as some higher level functionality such as the ability to set the form
55 * <a href="#SubmissionValue">submission values</a> as a complete <a href="FormFields.html#FieldDataSet">field data set</a>
56 * using the {@link FormFields#setDataSet(Map)} method.
57 * <li><a name="DataStructureProperties"></a>
58 * Provide a means of determining the data structure of the field, allowing a server receiving a
59 * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#submit-format">submitted</a>
60 * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a>
61 * to interpret and store the data in an appropriate way.
62 * <p>
63 * The properties available for this purpose are:<br />
64 * {@link #allowsMultipleValues() boolean allowsMultipleValues()}<br />
65 * {@link #getUserValueCount() int getUserValueCount()}<br />
66 * {@link #getPredefinedValues() Collection getPredefinedValues()}<br />
67 * <p>
68 * The {@link FormFields#getColumnLabels()} and {@link FormFields#getColumnValues(Map)} methods utilise these properties
69 * to convert data from a <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a>
70 * (represented as a <a href="#FieldDataSet">field data set</a>) into a simple array format,
71 * suitable for storage in a tabular format such as a database table or <code>.CSV</code> file.
72 * <p>
73 * The properties need only be utilised directly in the event that a
74 * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a> is to be converted
75 * from its <a href="FormFields.html#FieldDataSet">normal format</a> into some other type of data structure.
76 * </ol>
77 * A form field which allows user values normally consists of a single
78 * <a href="FormControl.html#UserValueControl">user value control</a>,
79 * such as a {@link FormControlType#TEXT TEXT} control.
80 * <p>
81 * When a form field consists of more than one control, these controls are normally all
82 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a> of the same
83 * {@linkplain FormControlType type}, such as {@link FormControlType#CHECKBOX CHECKBOX} controls.
84 * <p>
85 * Form fields consisting of more than one control do not necessarily return {@linkplain #allowsMultipleValues() multiple values}.
86 * A form field consisting of {@link FormControlType#CHECKBOX CHECKBOX} controls can return multiple values, whereas
87 * a form field consisting of {@link FormControlType#CHECKBOX RADIO} controls returns at most one value.
88 * <p>
89 * The HTML author can disregard convention and mix all types of controls with the same name in the same form,
90 * or include multiple <a href="FormControl.html#UserValueControl">user value controls</a> of the same name.
91 * The evidence that such an unusual combination is present is when {@link #getUserValueCount()}<code>&gt;1</code>.
92 * <p>
93 * <code>FormField</code> instances are created automatically with the creation of a {@link FormFields} collection.
94 * <p>
95 * The case sensitivity of form field names is determined by the static
96 * {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property.
97 *
98 * @see FormFields
99 * @see FormControl
100 * @see FormControlType
101 */
102 public final class FormField {
103 private final String name;
104 private int userValueCount=0;
105 private boolean allowsMultipleValues=false;
106 private LinkedHashSet<String> predefinedValues=null; // String objects, null if none
107 private final LinkedHashSet<FormControl> formControls=new LinkedHashSet<FormControl>();
108 private transient FormControl firstFormControl=null; // this field is simply a cache for the getFirstFormControl() method
109 int columnIndex; // see FormFields.initColumns()
110
111 /** Constructor called from FormFields class. */
112 FormField(final String name) {
113 this.name=name;
114 }
115
116 /**
117 * Returns the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}.
118 * <p>
119 * If the static {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() isFormFieldNameCaseInsensitive()}
120 * property is set to <code>true</code>, the grouping of the controls by name is case insensitive
121 * and this method always returns the name in lower case.
122 * <p>
123 * Since a form field is simply a group of controls with the same name, the terms <i>control name</i> and
124 * <i>field name</i> are for the most part synonymous, with only a possible difference in case differentiating them.
125 *
126 * @return the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}.
127 * @see FormControl#getName()
128 */
129 public String getName() {
130 return name;
131 }
132
133 /**
134 * Returns a collection of all the constituent {@linkplain FormControl form controls} in this field.
135 * <p>
136 * An iterator over this collection returns the controls in the order of appearance in the source.
137 *
138 * @return a collection of all the constituent {@linkplain FormControl form controls} in this field.
139 * @see #getFormControl()
140 * @see #getFormControl(String predefinedValue)
141 */
142 public Collection<FormControl> getFormControls() {
143 return formControls;
144 }
145
146 /**
147 * Returns the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}.
148 * <p>
149 * Specifying a predefined value of <code>null</code> returns the first control without a predefined value.
150 *
151 * @param predefinedValue the predefined value of the control to be returned, or <code>null</code> to return the first control without a predefined value.
152 * @return the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}, or <code>null</code> if none exists.
153 * @see #getFormControl()
154 * @see #getFormControls()
155 */
156 public FormControl getFormControl(final String predefinedValue) {
157 if (predefinedValue==null) {
158 for (FormControl formControl : formControls) {
159 if (!formControl.getFormControlType().hasPredefinedValue()) return formControl;
160 if (formControl.getFormControlType().getElementName()!=HTMLElementName.SELECT && formControl.getPredefinedValue()==null) return formControl;
161 }
162 } else {
163 for (FormControl formControl : formControls) {
164 if (formControl.getFormControlType().getElementName()==HTMLElementName.SELECT) {
165 if (formControl.getPredefinedValues().contains(predefinedValue)) return formControl;
166 } else {
167 if (predefinedValue.equals(formControl.getPredefinedValue())) return formControl;
168 }
169 }
170 }
171 return null;
172 }
173
174 /**
175 * Returns the first {@link FormControl} from this field.
176 * @return the first {@link FormControl} from this field, guaranteed not <code>null</code>.
177 * @see #getFormControl(String predefinedValue)
178 * @see #getFormControls()
179 */
180 public FormControl getFormControl() {
181 return formControls.iterator().next();
182 }
183
184 /**
185 * Indicates whether the field allows multiple values.
186 * <p>
187 * Returns <code>false</code> in any one of the following circumstances:
188 * <ul>
189 * <li>The field consists of only one control (unless it is a
190 * {@linkplain FormControlType#SELECT_MULTIPLE multiple select} with more than one option)
191 * <li>The field consists entirely of {@linkplain FormControlType#RADIO radio buttons}
192 * <li>The field consists entirely of {@linkplain FormControlType#isSubmit() submit} buttons
193 * </ul>
194 * If none of these three conditions are met, the method returns <code>true</code>.
195 *
196 * @return <code>true</code> if the field allows multiple values, otherwise <code>false</code>.
197 */
198 public boolean allowsMultipleValues() {
199 return allowsMultipleValues;
200 }
201
202 /**
203 * Returns the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field.
204 * This should in most cases be either <code>0</code> or <code>1</code>.
205 * <p>
206 * A value of <code>0</code> indicates the field values consist only of
207 * {@linkplain #getPredefinedValues() predefined values}, which is the case when the field consists only of
208 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>.
209 * <p>
210 * A value of <code>1</code> indicates the field values consist of at most one value set by the user.
211 * It is still possible in this case to receive multiple values in the unlikely event that the HTML author mixed
212 * controls of different types with the same name, but any other values would consist only of
213 * {@linkplain #getPredefinedValues() predefined values}.
214 * <p>
215 * A value greater than <code>1</code> indicates that the HTML author has included more than one
216 * <a href="FormControl.html#UserValueControl">user value control</a> with the same name.
217 * This would nearly always indicate an unintentional error in the HTML source document,
218 * in which case your application can either log a warning that a poorly designed form has been encountered,
219 * or take special action to try to interpret the multiple user values that might be submitted.
220 *
221 * @return the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field.
222 */
223 public int getUserValueCount() {
224 return userValueCount;
225 }
226
227 /**
228 * Returns a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field.
229 * <p>
230 * All objects in the returned collection are of type <code>String</code>, with no <code>null</code> entries.
231 * <p>
232 * An interator over this collection returns the values in the order of appearance in the source document.
233 *
234 * @return a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field, or <code>null</code> if none.
235 * @see FormControl#getPredefinedValues()
236 */
237 public Collection<String> getPredefinedValues() {
238 if (predefinedValues==null) return Collections.emptySet();
239 return predefinedValues;
240 }
241
242 /**
243 * Returns a list of the <a href="#FieldSubmissionValues">field submission values</a> in order of appearance.
244 * <p>
245 * The term <i><a name="FieldSubmissionValues">field submission values</a></i> is used in this library to refer to the aggregate of all the
246 * <a href="FormControl.html#SubmissionValue">submission values</a> of a field's constituent {@linkplain #getFormControls() form controls}.
247 * <p>
248 * All objects in the returned list are of type <code>String</code>, with no <code>null</code> entries.
249 * <p>
250 * The list may contain duplicates if the this field has multiple controls with the same value.
251 *
252 * @return a list of the <a href="#FieldSubmissionValues">field submission values</a> in order of appearance, guaranteed not <code>null</code>.
253 */
254 public List<String> getValues() {
255 final List<String> values=new ArrayList<String>();
256 for (FormControl formControl : formControls) formControl.addValuesTo(values);
257 return values;
258 }
259
260 /**
261 * Clears the <a href="FormControl.html#SubmissionValue">submission values</a> of all the constituent {@linkplain #getFormControls() form controls} in this field.
262 * @see FormControl#clearValues()
263 */
264 public void clearValues() {
265 for (FormControl formControl : formControls) formControl.clearValues();
266 }
267
268 /**
269 * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the specified values.
270 * <p>
271 * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)} for each
272 * value in the specified collection.
273 * <p>
274 * The specified collection must not contain any <code>null</code> values.
275 *
276 * @param values the new <a href="#FieldSubmissionValues">field submission values</a> of this field.
277 * @see #addValue(String value)
278 */
279 public void setValues(final Collection<String> values) {
280 clearValues();
281 addValues(values);
282 }
283
284 /**
285 * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the single specified value.
286 * <p>
287 * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)}.
288 * <p>
289 * The return value indicates whether any of the constituent form controls "accepted" the value.
290 * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field.
291 * <p>
292 * Specifying a <code>null</code> value is equivalent to calling {@link #clearValues()} alone, and always returns <code>true</code>.
293 * <p>
294 * See the {@link #addValue(String value)} method for more information.
295 *
296 * @param value the new <a href="#FieldSubmissionValues">field submission value</a> of this field, or <code>null</code> to {@linkplain #clearValues() clear} the field of all submission values.
297 * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>.
298 * @see FormFields#setValue(String fieldName, String value)
299 */
300 public boolean setValue(final String value) {
301 clearValues();
302 return value!=null ? addValue(value) : true;
303 }
304
305 /**
306 * Adds the specified value to the <a href="#FieldSubmissionValues">field submission values</a> of this field.
307 * <p>
308 * This is achieved internally by attempting to {@linkplain FormControl#addValue(String) add the value} to every constituent
309 * {@linkplain #getFormControls() form control} until one "accepts" it.
310 * <p>
311 * The return value indicates whether any of the constituent form controls accepted the value.
312 * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field.
313 * <p>
314 * In the unusual case that this field consists of multiple form controls, but not all of them are
315 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>, priority is given to the predefined value controls
316 * before attempting to add the value to the <a href="FormControl.html#UserValueControl">user value controls</a>.
317 *
318 * @param value the new <a href="#FieldSubmissionValues">field submission value</a> to add to this field, must not be <code>null</code>.
319 * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>.
320 */
321 public boolean addValue(final String value) {
322 if (value==null) throw new IllegalArgumentException("value argument must not be null");
323 if (formControls.size()==1) return getFirstFormControl().addValue(value);
324 List<FormControl> userValueControls=null;
325 for (FormControl formControl : formControls) {
326 if (!formControl.getFormControlType().hasPredefinedValue()) {
327 // A user value control has been found, but is not the only control with this name.
328 // This shouldn't normally happen in a well designed form, but we will save the user value control
329 // for later and give all predefined value controls first opportunity to take the value.
330 if (userValueControls==null) userValueControls=new LinkedList<FormControl>();
331 userValueControls.add(formControl);
332 continue;
333 }
334 if (formControl.addValue(value)) return true; // return value of true from formControl.addValue(value) means the value was taken by the control
335 }
336 if (userValueControls==null) return false;
337 for (FormControl userFormControl : userValueControls) {
338 if (userFormControl.addValue(value)) return true;
339 }
340 return false;
341 }
342
343 /**
344 * Returns a string representation of this object useful for debugging purposes.
345 * @return a string representation of this object useful for debugging purposes.
346 */
347 public String getDebugInfo() {
348 final StringBuilder sb=new StringBuilder();
349 sb.append("Field: ").append(name).append(", UserValueCount=").append(userValueCount).append(", AllowsMultipleValues=").append(allowsMultipleValues);
350 if (predefinedValues!=null) {
351 for (String predefinedValue : predefinedValues) sb.append(Config.NewLine).append("PredefinedValue: ").append(predefinedValue);
352 }
353 for (FormControl formControl : formControls) sb.append(Config.NewLine).append("FormControl: ").append(formControl.getDebugInfo());
354 sb.append(Config.NewLine).append(Config.NewLine);
355 return sb.toString();
356 }
357
358 /**
359 * Returns a string representation of this object useful for debugging purposes.
360 * <p>
361 * This is equivalent to {@link #getDebugInfo()}.
362 *
363 * @return a string representation of this object useful for debugging purposes.
364 */
365 public String toString() {
366 return getDebugInfo();
367 }
368
369 void addValues(final Collection<String> values) {
370 if (values!=null) for (String value : values) addValue(value);
371 }
372
373 void addValues(final String[] values) {
374 if (values!=null) for (String value : values) addValue(value);
375 }
376
377 void addFormControl(final FormControl formControl, final String predefinedValue) {
378 // predefinedValue==null if we are adding a user value
379 if (predefinedValue==null) {
380 userValueCount++;
381 } else {
382 if (predefinedValues==null) predefinedValues=new LinkedHashSet<String>();
383 predefinedValues.add(predefinedValue);
384 }
385 formControls.add(formControl);
386 allowsMultipleValues=calculateAllowsMultipleValues(formControl);
387 }
388
389 private boolean calculateAllowsMultipleValues(final FormControl newFormControl) {
390 // false if only one control (unless it is a multiple select with more than one option),
391 // or all of the controls are radio buttons, or all of the controls are submit buttons
392 if (allowsMultipleValues || userValueCount>1) return true;
393 if (userValueCount==1) return predefinedValues!=null;
394 // at this stage we know userValueCount==0 && predefinedValues.size()>=1
395 if (predefinedValues.size()==1) return false;
396 final FormControlType newFormControlType=newFormControl.getFormControlType();
397 if (formControls.size()==1) return newFormControlType==FormControlType.SELECT_MULTIPLE;
398 // at this stage we know there are multiple predefined values in multiple controls.
399 // if all of the controls are radio buttons or all are submit buttons, allowsMultipleValues is false, otherwise true.
400 // checking only the first control and the new control is equivalent to checking them all because if they weren't all
401 // the same allowsMultipleValues would already be true.
402 final FormControlType firstFormControlType=getFirstFormControl().getFormControlType();
403 if (newFormControlType==FormControlType.RADIO && firstFormControlType==FormControlType.RADIO) return false;
404 if (newFormControlType.isSubmit() && firstFormControlType.isSubmit()) return false;
405 return true;
406 }
407
408 FormControl getFirstFormControl() {
409 // formControls must be ordered collection for this method to work.
410 // It has to return the first FormControl entered into the collection
411 // for the algorithm in calculateAllowsMultipleValues() to work.
412 if (firstFormControl==null) firstFormControl=formControls.iterator().next();
413 return firstFormControl;
414 }
415
416 /** only called from FormFields class */
417 void merge(final FormField formField) {
418 if (formField.userValueCount>userValueCount) userValueCount=formField.userValueCount;
419 allowsMultipleValues=allowsMultipleValues || formField.allowsMultipleValues;
420 if (predefinedValues==null) {
421 predefinedValues=formField.predefinedValues;
422 } else if (formField.predefinedValues!=null) {
423 for (String predefinedValue : predefinedValues) predefinedValues.add(predefinedValue);
424 }
425 for (FormControl formControl : formField.getFormControls()) formControls.add(formControl);
426 }
427 }
428

   
Visit the aagtl Website