 |
|
Home:
Perl Programming Help:
Beginner:
Re: [zhihe2] need help for perl web crawler:
Edit Log
|
|

7stud
Enthusiast
Oct 6, 2010, 11:40 AM
Views: 183
|
|
Re: [zhihe2] need help for perl web crawler
|
|
|
use strict; use warnings; use 5.010; my $string =<<'END_OF_HTML'; <p class="explanation"></p></div><div class="weatherhistory_results_datavalue temp"><h4>Mean Temperature</h4><p><span class="value">77.4</span> <span class="units">°F</span> END_OF_HTML $string =~ /<h4>Mean.+?<span .*?>(.+?)</; say $1; --output:-- 77.4
use strict; use warnings; use 5.010; my $string =<<'END_OF_HTML'; <p class="explanation"></p></div><div class="weatherhistory_results_datavalue temp"><h4>Mean Temperature</h4><p><span class="value">77.4</span> <span class="units">°F</span> <span class="value">89.4</span> END_OF_HTML #The following regex should have a period between the brackets #but the forum software does not display it correctly: while ($string =~ /(\d+ [] \d+)/xmsg) { say $1; } --output:-- 77.4 89.4
use strict; use warnings; use 5.010; use LWP::Simple; use HTML::TreeBuilder; my $url = 'http://www.almanac.com/weather/history/zipcode/21218/2008-09-02'; my $html = get($url); my $tree = HTML::TreeBuilder->new_from_content($html); my @spans = $tree->look_down( _tag => 'span', class => 'value', ); for my $span (@spans) { say $span->as_trimmed_text(); } $tree->delete(); --output:-- 68.5 77.4 89.4 30.08 0.00 4.03 7.00
(This post was edited by 7stud on Oct 6, 2010, 2:36 PM)
|
|
|
Edit Log:
|
|
Post edited by 7stud
(Enthusiast) on Oct 6, 2010, 11:49 AM
|
|
Post edited by 7stud
(Enthusiast) on Oct 6, 2010, 11:54 AM
|
|
Post edited by 7stud
(Enthusiast) on Oct 6, 2010, 11:55 AM
|
|
Post edited by 7stud
(Enthusiast) on Oct 6, 2010, 2:32 PM
|
|
Post edited by 7stud
(Enthusiast) on Oct 6, 2010, 2:36 PM
|
|
|  |