#!/usr/bin/perl
# Butchered up from:
# http://search.cpan.org/~gaas/HTML-Parser-3.69/lib/HTML/LinkExtor.pm
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
my @params = ();
my $url = "http://perlguru.com/gforum.cgi?post=59493;sb=post_latest_reply;so=ASC;forum_view=forum_view_collapsed;;page=unread#unread"; # for instance
$ua = LWP::UserAgent->new();
# Make the parser. Unfortunately, we don't know the base yet
# (it might be different from $url)
$p = HTML::LinkExtor->new(\&callback);
# Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])});
# Expand all image URLs to absolute ones
my $base = $res->base;
@params = map { $_ = url($_, $base)->abs; } @params;
# Print them out
print join("\n", @params), "\n";
# Set up a callback that collect links with parameters
sub callback {
my($tag, %links) = @_;
return if $tag ne "a";
foreach $elm (keys(%links)){
push(@params,$links{$elm}) if ( $elm eq "href" && $links{$elm} =~ /\?.*=/);
}
}