.*?<\/FONT><\/A>: )?
(.*?)<\/b> # title
.*?<\/table>.*?src=\" #
(.*?)\" # icon
.*? #
Posted\s+by\s+ #
(.*?)\s+ #poster
on\s+(.*?)<\/b> # date
.*?(.*?)<\/b>.*? # department
#
(.*?).*? # description
\(.*?(.*?)<\/a>.*?of\s+(.*?)<\/a> # comments
.*?<\/p> # end of this story
/gixs
)
{
$name = $2;
$icon = $3;
$poster = $4;
$date = $5;
$department = $6;
$description = $7;
$url = $8;
$cmntsGood = $9;
$cmntsAll = $10;
$comments = $cmntsGood . " of " . $cmntsAll;
# But, irritatingly, $name could contain a link. Let's remove it and
# stuff it into a different field in the return hash.
if ($name =~ /(.*?)<\/font>.*?<\/a>.*?:\s*(.*)/si;
# print STDERR "name = ($name)\n";
# print STDERR "cat = ($category)\n";
# print STDERR "url = ($categoryUrl)\n";
# exit;
}
# clean up the whacked out // crap at the front of each thing likely to
# have such foolishness.
$icon =~ s/\/\/[a-z]*.?slashdot\.org//si;
$url =~ s/\/\/[a-z]*.?slashdot\.org//si;
# jjmac: (2003.01.24) this is broken for categories like
# books.slashdot.org. Let's look at it later.
$categoryUrl =~ s/\/\/slashdot\.org//si;
# print STDERR "category = $category\n";
# print STDERR "catUrl = $categoryUrl\n";
# print STDERR "name = $name\n";
# print STDERR "icon = $icon\n";
# print STDERR "poster = $poster\n";
# print STDERR "date = $date\n";
# print STDERR "dept = $department\n";
# print STDERR "url = $url\n";
# print STDERR "comments = $comments\n";
push @rValue, {
'name' => $name,
'poster' => $poster,
'date' => $date,
'department' => $department,
'description' => $description,
'url' => $url,
'icon' => $icon,
'comments' => $comments,
'category' => $category,
'catUrl' => $categoryUrl
};
# clean up after ourselves.
undef($name);
undef($icon);
undef($poster);
undef($date);
undef($department);
undef($description);
undef($url);
undef($comments);
undef($cmntsAll);
undef($cmntsGood);
undef($category);
undef($categoryUrl);
$count++;
if( $count > 20 )
{
return @rValue;
}
}
return @rValue;
}
return 1;
# vi: set ts=3 sw=3 nowrap expandtab: