Contenu du fichier de /eztv_scripts/eztv.pl (télécharger eztv.pl)
#!/usr/bin/perl -w
use strict;
use LWP::Simple;
use HTML::Entities;
use Date::Manip;
use URI::Escape;
Date_Init("TZ=EST");
=for suggestions arnaud
gestion erreurs pour:
* LWP::UserAgent->new
* FCGI::Request();
* CGI::Simple->new();
* $browser->get
gestion d'erreurs = log + die/warn selon
Même en cas de no $reponse->success() tu mets renseignes $data{$id}{content}
avec $content....
$data{$id} = {'content' => $content, 'date' => time()};
11/09/2011: il ne manque pas un ORDER BY (date_fetch) desc et AND LIMIT machin ici ?
my $sth = $dbi->prepare("select * from eztv where id= ? and now() - date_fetch < interval '30 minute'");
12/09/2011: tfe: non, cela ne match qu'un resultat. A chaque inseration, on supprime l'element anterieur s'il existe.
=cut
my $browser = LWP::UserAgent->new;
$browser->timeout(30);
my $errors=0;
my $cache="/home/tfe/public_html/cache/eztv/shows.txt";
#my $cache="/home/tfe/bin/shows.txt";
my $file_path="/home/tfe/public_html/cache/eztv/";
#my $file_path="/home/tfe/bin/";
my $url = 'http://eztv.it/search/';
my $main_url = 'http://eztv.it/page_%num%';
my $url_news = 'http://eztv.it/';
my $tvnews_url = 'http://eztv.it/tvnews/';
my %month = (
'January' => "01",
'February' => "02",
'March' => "03",
'April' => "04",
'May' => "05",
'June' => "06",
'July' => "07",
'August' => "08",
'September' => "09",
'October' => "10",
'November' => "11",
'December' => "12");
open(MYCACHE,">/home/tfe/bin/log");
print MYCACHE "\n\n";
print MYCACHE scalar localtime();
print MYCACHE "\n";
open(INFO,$cache);
while(<INFO>)
{
my $content="";
/(.*?):(.*)/;
my ($id,$name) = ($1,$2);
print MYCACHE "Id: $1 y $name\n";
# Format, name, id
my $html="";
my $xml="";
if(!$name) { $name=""; }
$html.= "<html>
<head>
<title>$name - Ezrss/Eztv RSS feed .</title>
<link rel=\"stylesheet\" type=\"text/css\" href=\"/eztv.css\" />
</head>
<body>
<div class=\"page\">
<h1>RSS Feed for eztv $name</h1>
<p>This is the html version of the feed. To get the RSS, please visit :<br /><a href=\"/cgi-bin/eztv.pl?id=$id&name=$name\">http://eztv.ptain.info/cgi-bin/eztv.pl?id=$id&name=$name</a>.</p>
<div class=\"list\">
<table border=\"1\">
<tr>
<th>Date</th>
<th>Show</th>
</tr>
";
$xml.= "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>
<rss version=\"2.0\">
<channel>
<title>Eztv/Ezrss Dynamic rss from eztv.it search</title>
<link>http://eztv.ptain.info</link>
<ttl>30</ttl>
<description>EZTV RSS feed for selected show/news</description>
";
my $response;
print "Fetching $id / errors occured: $errors\n";
if($errors<100)
{
if($id eq "index")
{
for(my $i=0;$i<5;$i++)
{
my $url = $main_url;
$url=~ s/%num%/$i/;
$response = $browser->get( $url);
if(!$response || !$response->is_success)
{
$response = $browser->get( $url);
}
if($response && $response->is_success)
{
$content .= $response->content;
}
}
}
elsif($id eq "tvnews")
{
$response = $browser->get( $tvnews_url);
if(!$response || !$response->is_success)
{
$response = $browser->get( $tvnews_url);
}
if($response && $response->is_success)
{
$content = $response->content;
}
}
else
{
$response = $browser->post ( $url,
[
'SearchString' => $id
]);
if(!$response || !$response->is_success)
{
$response = $browser->post ( $url,
[
'SearchString' => $id
]);
}
if($response && $response->is_success)
{
$content = $response->content;
}
}
}
if(!$response || !$response->is_success)
{
print "ERROR!\n";
$errors++;
}
if($content ne "")
{
if($id eq "tvnews")
{
while($content =~ /class="tvnews_header".*?href="(.*?)".*?<b>(.*?)<\/b>.*?tvnews_content" align="left">(.*?)<\/td>/sg)
{
my $title =$2;
my $link = $1;
my $desc = $3;
$title =~ s/</</g;
$title =~ s/>/>/g;
my $news_content =$3;
$news_content =~ s/</</g;
$news_content =~ s/>/>/g;
$html.= "<li class=\"news\"><a href=\"$url_news$link\">$title</a> <br /><span class=\"news_content\">$news_content</span></li>\n";
$link = decode_entities($link);
$link=~s/\/tvnews\///;
$xml.= "<item>
<title><![CDATA[$title]]></title>
<link><![CDATA[$tvnews_url$link]]></link>
<description><![CDATA[$desc]]></description>
</item>";
}
}
else
{
my $last_added_on = "";
my $last_added_on_html = "";
while($content=~ /<tr(.*?)>(.*?)<\/tr>/sg)
{
my $tr_attr = $1;
my $tr_data = $2;
if($tr_attr !~ /name="hover"/)
{
if($tr_data =~ /Added on: <b>(.*?), (.*?), (.*?)</)
{
my ($c_day, $c_month, $c_year) = ($1, $2, $3);
#$last_added_on_html = "$c_day, $c_month, $c_year";
$last_added_on = sprintf "%4d-%02d-%02d 00:00:00", $c_year, $month{$c_month}, $c_day;
$last_added_on_html = UnixDate(ParseDate($last_added_on),"%d/%m/%Y" );
$last_added_on = UnixDate(ParseDate($last_added_on),"%a, %d %b %Y %H:%M:%S GMT" );
}
}
if($tr_data =~ /class="epinfo">(.*?)<\/a>.*?href="(.*?)"/sg)
{
my $title = $1;
my $link = $2;
$link = uri_unescape($link);
$html.= "<tr><td>$last_added_on_html</td><td><a href=\"$link\">$title</a></td></tr>";
if($link =~ /^(.*)\/([^?]+)$/)
{
$link = $1."/".uri_unescape($2);
}
$xml.= "<item>
<title><![CDATA[$title]]></title>
<pubDate>$last_added_on</pubDate>
<link><![CDATA[$link]]></link>
<description><![CDATA[$title - $link]]></description>
</item>";
}
}
}
$html.= "</table></div></div><p><a href=\"/cgi-bin/eztv_list.pl\">Back to eztv rss feeds</a></p></body></html>\n";
$xml.= "</channel></rss>\n";
open(WRITE,">$file_path$id.html");
print WRITE $html;
close(WRITE);
open(WRITE,">$file_path$id.xml");
print WRITE $xml;
close(WRITE);
if($name !~ /\//)
{
open(WRITE,">$file_path$name.html");
print WRITE $html;
close(WRITE);
open(WRITE,">$file_path$name.xml");
print WRITE $xml;
close(WRITE);
open(WRITE,">$file_path".lc($name).".html");
print WRITE $html;
close(WRITE);
open(WRITE,">$file_path".lc($name).".xml");
print WRITE $xml;
close(WRITE);
}
print MYCACHE "Write $id/$name : ".length($html)." y ".length($xml)."\n";
flush MYCACHE;
}
else
{
print MYCACHE "Error fetching\n";
flush MYCACHE;
}
}
close MYCACHE;