sub exnHandler
{
$html = shift;
my( @rValue ) = ();
my( @rValue2 ) = ();
my ( $headline, $date, $blurb, $graphic, $link, $alt );
while( $html =~ s/\
(.*?)\<\/td\>//si )
{
$tableData = $2;
if( $1 =~ /bgcolor/i )
{
$tableData =~ s/\<.+?\>//gs;
$tableData =~ s/(\n|\r)//gs;
$tableData =~ s/\ //gs;
#print "Possible Headline: $tableData\n";
if( $tableData =~ /([^;\n]+)(.*)$/ ) {
$headline = $1;
$date = $2;
$date =~ s/;//gs;
#print "\nHeadline: $headline\n";
#print "Date: $date\n";
}
}
elsif ( $2 =~ /\\/i )
{
#print "Ref: $1\nImg: $2\n";
$link = $1;
$graphic = $2;
$alt = $4;
#print "alt: $alt\n";
$blurb = $';
$blurb =~ s/\<.+?\>//gs;
$blurb =~ s/(\n|\r)//gs;
$blurb =~ s/\ //gs;
$blurb =~ s/\(Full Story\)//gs;
#print "Content: $blurb\n";
push @rValue, { 'headline' => $headline,
'graphic' => $graphic,
'link' => $link,
'date' => $date,
'blurb' => $blurb,
'alt' => $alt
}
}
else
{
my @items = split /\/, $tableData;
my $item;
#print "\n";
while( $item = pop @items )
{
#print "item: $item\n";
if( $item =~ // )
{
$link = $1;
$item = $';
$item =~ s/\<.+?\>//gs;
$blurb = $item;
# Condition the hell out of the result, no newlines, no leading
# or trailing spaces, no semi colons, no  , no double spaces.
$blurb =~ s/(\n|\r)//gs;
$blurb =~ s/\ //gs;
$blurb =~ s/\;//gs;
$blurb =~ s/^\s*(.*?)\s*$/$1/;
$blurb =~ s/\s\s/ /g;
#print "Blurb: $blurb\n";
push @rValue2, { 'headline' => $headline,
'link' => $link,
'blurb' => $blurb
}
}
}
}
}
#print "\n\n";
}
push @rValue, @rValue2;
return @rValue;
}
return 1;
|