/[aagtl_public1]/src/com/zoffcc/applications/aagtl/HTMLDownloader.java
aagtl

Diff of /src/com/zoffcc/applications/aagtl/HTMLDownloader.java

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

Revision 3 Revision 4
113 private static final Pattern patternLogged2In = Pattern.compile("<strong>\\W*Hello,[^<]*<a[^>]+>([^<]+)</a>[^<]*</strong>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); 113 private static final Pattern patternLogged2In = Pattern.compile("<strong>\\W*Hello,[^<]*<a[^>]+>([^<]+)</a>[^<]*</strong>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
114 private static final Pattern patternViewstateFieldCount = Pattern.compile("id=\"__VIEWSTATEFIELDCOUNT\"[^(value)]+value=\"(\\d+)\"[^>]+>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); 114 private static final Pattern patternViewstateFieldCount = Pattern.compile("id=\"__VIEWSTATEFIELDCOUNT\"[^(value)]+value=\"(\\d+)\"[^>]+>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
115 private static final Pattern patternViewstates = Pattern.compile("id=\"__VIEWSTATE(\\d*)\"[^(value)]+value=\"([^\"]+)\"[^>]+>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); 115 private static final Pattern patternViewstates = Pattern.compile("id=\"__VIEWSTATE(\\d*)\"[^(value)]+value=\"([^\"]+)\"[^>]+>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
116 private static final Pattern patternUserToken = Pattern.compile("userToken\\s*=\\s*'([^']+)'"); 116 private static final Pattern patternUserToken = Pattern.compile("userToken\\s*=\\s*'([^']+)'");
117 117
118 private static Segment GC_DOWNLOAD__s_;
119 private static List<GeocacheCoordinate> GC_DOWNLOAD__gc_list = new ArrayList<GeocacheCoordinate>();
120 private static int GC_DOWNLOAD__count_p = 0;
121 private static final int GC_DOWNLOAD__max_threads_concurrent = 5;
122
118 // patterns from c:geo opensource 123 // patterns from c:geo opensource
119 124
120 public HTMLDownloader(aagtl main) 125 public HTMLDownloader(aagtl main)
121 { 126 {
122 this.main_aagtl = main; 127 this.main_aagtl = main;
336 return ret; 341 return ret;
337 } 342 }
338 343
339 public get_geocaches_ret get_geocaches(Coordinate[] location, int count_p, int max_p, int rec_depth, Handler h, int zoom_level) 344 public get_geocaches_ret get_geocaches(Coordinate[] location, int count_p, int max_p, int rec_depth, Handler h, int zoom_level)
340 { 345 {
346 get_geocaches_ret r = null;
347
348 try
349 {
341 return get_geocaches_v3(location, count_p, max_p, rec_depth, h, zoom_level); 350 r = get_geocaches_v3(location, count_p, max_p, rec_depth, h, zoom_level);
351 }
352 catch (Exception e)
353 {
354 r = new get_geocaches_ret();
355 r.points = null;
356 r.count_p = 0;
357 }
358 return r;
342 } 359 }
343 360
344 public get_geocaches_ret get_geocaches_v3(Coordinate[] location, int count_p, int max_p, int rec_depth, Handler h, int zoom_level) 361 public get_geocaches_ret get_geocaches_v3(Coordinate[] location, int count_p, int max_p, int rec_depth, Handler h, int zoom_level)
345 { 362 {
346 this.main_aagtl.set_bar_slow(h, "get geocaches", "downloading ...", count_p, max_p, true); 363 this.main_aagtl.set_bar_slow(h, "get geocaches", "downloading ...", count_p, max_p, true);
386 r2.points = null; 403 r2.points = null;
387 404
388 Boolean cont = true; 405 Boolean cont = true;
389 Source source = null; 406 Source source = null;
390 407
391 List<GeocacheCoordinate> gc_list = new ArrayList<GeocacheCoordinate>(); 408 GC_DOWNLOAD__gc_list = new ArrayList<GeocacheCoordinate>();
392 count_p = 0; 409 count_p = 0;
393 max_p = 0; 410 max_p = 0;
394 while (cont) 411 while (cont)
395 { 412 {
396 source = new Source(the_page); 413 source = new Source(the_page);
412 Boolean disabled = false; 429 Boolean disabled = false;
413 List<? extends Segment> segments2 = (source.getFirstElement("class", "SearchResultsTable Table", false).getContent().getAllElements(HTMLElementName.TR)); 430 List<? extends Segment> segments2 = (source.getFirstElement("class", "SearchResultsTable Table", false).getContent().getAllElements(HTMLElementName.TR));
414 // displaySegments(segments2); 431 // displaySegments(segments2);
415 try 432 try
416 { 433 {
434 int count_list_elements = 0;
417 for (Segment s_ : segments2) 435 for (Segment s_ : segments2)
418 { 436 {
437 count_list_elements++;
438 }
439 //System.out.println("AAGTL:count=" + count_list_elements);
440 Thread tmp_thread_array[] = new Thread[count_list_elements];
441 int threads_running = 0;
442 int cur_thread = 0;
443 GC_DOWNLOAD__count_p = count_p;
444 for (Segment s_ : segments2)
445 {
446 GC_DOWNLOAD__s_ = s_;
447 // --------------------------------------------------------
448 // ---------------- download a single cache ---------------
449 // --------------------------------------------------------
450 final class MyRunnable implements Runnable
451 {
452 public Handler h_2;
453 public int max_p_2;
454
455 MyRunnable(List<GeocacheCoordinate> gc_list, Handler h, int max_p)
456 {
457 this.h_2 = h;
458 this.max_p_2 = max_p;
459 }
460
461 public void run()
462 {
463
464 String guid2 = "";
465 String gccode2 = "";
466 Boolean disabled2 = false;
467
419 guid = ""; 468 guid2 = "";
420 disabled = false; 469 disabled2 = false;
421 gccode = null; 470 gccode2 = null;
422 try 471 try
423 { 472 {
424 List<? extends Segment> segments3 = s_.getAllElements("class", "Merge", false); 473 List<? extends Segment> segments3 = GC_DOWNLOAD__s_.getAllElements("class", "Merge", false);
425 // displaySegments(segments2); 474 // displaySegments(segments2);
426 guid = segments3.get(0).getFirstElement(HTMLElementName.A).getAttributeValue("href"); 475 guid2 = segments3.get(0).getFirstElement(HTMLElementName.A).getAttributeValue("href");
427 guid = guid.split("guid=", 3)[1]; 476 guid2 = guid2.split("guid=", 3)[1];
428 //System.out.println("guid=:" + guid); 477 //System.out.println("guid=:" + guid2);
429 478
479 try
480 {
481 // <a href="/seek/cache_details.aspx?guid=d9dbf39a-e2e6-4640-b951-d1d6307b16bd" class="lnk Strike"><span>Cineasten sehen mehr</span></a>
482 if (segments3.get(1).getFirstElement(HTMLElementName.A).getAttributeValue("class").equalsIgnoreCase("lnk Strike"))
483 {
484 // System.out.println("disabled=:" + disabled);
485 disabled2 = true;
486 }
487 }
488 catch (Exception e3)
489 {
490 }
491
492 gccode2 = segments3.get(1).getFirstElement("class", "small", false).getTextExtractor().toString();
493 gccode2 = gccode2.split("\\|")[1].trim();
494 //System.out.println("gccode=:" + gccode2);
495 }
496 catch (Exception e2)
497 {
498 e2.printStackTrace();
499 }
500
501 if (gccode2 != null)
502 {
503 GeocacheCoordinate c__ = null;
504 c__ = new GeocacheCoordinate(0, 0, gccode2);
505 if (disabled2)
506 {
507 c__.status = GeocacheCoordinate.STATUS_DISABLED;
508 }
509
510 String url2 = "http://www.geocaching.com/seek/cdpf.aspx?guid=" + guid2;
511 //System.out.println("url=" + url2);
512
513 List<NameValuePair> values_list_2 = new ArrayList<NameValuePair>();
514 //values_list.add(new BasicNameValuePair("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)"));
515 //values_list.add(new BasicNameValuePair("Pragma", "no-cache"));
516 // bs = new ByteArrayOutputStream();
517 String the_page2 = get_reader_stream(url2, values_list_2, null, true);
518 c__ = CacheDownloader.__parse_cache_page_print(the_page2, c__);
519 if (c__ != null)
520 {
521 synchronized (GC_DOWNLOAD__gc_list)
522 {
523 GC_DOWNLOAD__gc_list.add(c__);
524 GC_DOWNLOAD__count_p = GC_DOWNLOAD__count_p + 1;
525 main_aagtl.set_bar_slow(h_2, "get geocaches", c__.title, GC_DOWNLOAD__count_p, max_p_2, true);
526 }
527 }
528 }
529
530 }
531 }
532 //
533 tmp_thread_array[cur_thread] = new Thread(new MyRunnable(GC_DOWNLOAD__gc_list, h, max_p));
534 tmp_thread_array[cur_thread].start();
535 System.out.println("++ start thread ++");
536 cur_thread++;
537 threads_running++;
538
539 if (threads_running > (GC_DOWNLOAD__max_threads_concurrent - 1))
540 {
430 try 541 try
431 { 542 {
432 // <a href="/seek/cache_details.aspx?guid=d9dbf39a-e2e6-4640-b951-d1d6307b16bd" class="lnk Strike"><span>Cineasten sehen mehr</span></a> 543 while (threads_running > 0)
433 if (segments3.get(1).getFirstElement(HTMLElementName.A).getAttributeValue("class").equalsIgnoreCase("lnk Strike"))
434 { 544 {
435 // System.out.println("disabled=:" + disabled); 545 System.out.println("** waiting for thread to finish **");
436 disabled = true; 546 tmp_thread_array[cur_thread - 1].join();
547 threads_running--;
437 } 548 }
438 } 549 }
439 catch (Exception e3) 550 catch (Exception e)
440 { 551 {
552 e.printStackTrace();
441 } 553 }
554 }
555 // --------------------------------------------------------
556 // ---------------- download a single cache ---------------
557 // --------------------------------------------------------
442 558
443 gccode = segments3.get(1).getFirstElement("class", "small", false).getTextExtractor().toString(); 559 } // -- end "for"-loop
444 gccode = gccode.split("\\|")[1].trim(); 560
445 //System.out.println("gccode=:" + gccode); 561 int i7 = 0;
562 for (i7 = 0; i7 < cur_thread; i7++)
563 {
564 try
446 } 565 {
566 tmp_thread_array[i7].join();
567 }
447 catch (Exception e2) 568 catch (Exception e)
448 { 569 {
449 e2.printStackTrace(); 570 e.printStackTrace();
450 }
451
452 if (gccode != null)
453 {
454 GeocacheCoordinate c__ = null;
455 c__ = new GeocacheCoordinate(0, 0, gccode);
456 if (disabled)
457 {
458 c__.status = GeocacheCoordinate.STATUS_DISABLED;
459 }
460
461 String url2 = "http://www.geocaching.com/seek/cdpf.aspx?guid=" + guid;
462 //System.out.println("url=" + url);
463
464 values_list = new ArrayList<NameValuePair>();
465 //values_list.add(new BasicNameValuePair("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)"));
466 //values_list.add(new BasicNameValuePair("Pragma", "no-cache"));
467 // bs = new ByteArrayOutputStream();
468 String the_page2 = get_reader_stream(url2, values_list, null, true);
469 c__ = CacheDownloader.__parse_cache_page_print(the_page2, c__);
470 if (c__ != null)
471 {
472 gc_list.add(c__);
473 count_p = count_p + 1;
474 this.main_aagtl.set_bar_slow(h, "get geocaches", c__.title, count_p, max_p, true);
475 }
476 } 571 }
477 } 572 }
573
574 count_p = GC_DOWNLOAD__count_p;
478 } 575 }
479 catch (Exception e) 576 catch (Exception e)
480 { 577 {
481 e.printStackTrace(); 578 e.printStackTrace();
482 } 579 }
523 // ----------- check for paging ---------------- 620 // ----------- check for paging ----------------
524 // ----------- and run through pages ----------- 621 // ----------- and run through pages -----------
525 } 622 }
526 623
527 int jk; 624 int jk;
528 r2.count_p = gc_list.size(); 625 r2.count_p = GC_DOWNLOAD__gc_list.size();
529 r2.points = new GeocacheCoordinate[gc_list.size()]; 626 r2.points = new GeocacheCoordinate[GC_DOWNLOAD__gc_list.size()];
530 627
531 for (jk = 0; jk < gc_list.size(); jk++) 628 for (jk = 0; jk < GC_DOWNLOAD__gc_list.size(); jk++)
532 { 629 {
533 r2.points[jk] = gc_list.get(jk); 630 r2.points[jk] = GC_DOWNLOAD__gc_list.get(jk);
534 } 631 }
535 // gc_list.clear(); 632 // gc_list.clear();
536 633
537 return r2; 634 return r2;
538 } 635 }
1145 } 1242 }
1146 1243
1147 public boolean login() 1244 public boolean login()
1148 { 1245 {
1149 // System.out.println("--L--- LOGIN START -----"); 1246 // System.out.println("--L--- LOGIN START -----");
1150 1247
1151 String login_url = "https://www.geocaching.com/login/default.aspx"; 1248 String login_url = "https://www.geocaching.com/login/default.aspx";
1152 1249
1153 DefaultHttpClient client2 = null; 1250 DefaultHttpClient client2 = null;
1154 HttpHost proxy = null; 1251 HttpHost proxy = null;
1155 1252
1657 this_cookie.setDomain(c_domain); 1754 this_cookie.setDomain(c_domain);
1658 this_cookie.setPath(c_path); 1755 this_cookie.setPath(c_path);
1659 // System.out.println("created cookie: ->" + 1756 // System.out.println("created cookie: ->" +
1660 // String.valueOf(this_cookie)); 1757 // String.valueOf(this_cookie));
1661 1758
1662 this.cookie_jar.addCookie(this_cookie); 1759 HTMLDownloader.cookie_jar.addCookie(this_cookie);
1663 1760
1664 } 1761 }
1665 } 1762 }
1666 // single cookie 1763 // single cookie
1667 else 1764 else
1706 this_cookie.setDomain(c_domain); 1803 this_cookie.setDomain(c_domain);
1707 this_cookie.setPath(c_path); 1804 this_cookie.setPath(c_path);
1708 // System.out.println("created cookie: ->" + 1805 // System.out.println("created cookie: ->" +
1709 // String.valueOf(this_cookie)); 1806 // String.valueOf(this_cookie));
1710 1807
1711 this.cookie_jar.addCookie(this_cookie); 1808 HTMLDownloader.cookie_jar.addCookie(this_cookie);
1712 } 1809 }
1713 } 1810 }
1714 1811
1715 return; 1812 return;
1716 } 1813 }

Legend:
Removed from v.3  
changed lines
  Added in v.4

   
Visit the aagtl Website