tfeserver: Blog de tfe

html, scripts et tout un tas de trucs...

Download

Contenu du fichier de /eztv_scripts/eztv.pl (télécharger eztv.pl)
#!/usr/bin/perl -w
use strict;
use LWP::Simple;
use HTML::Entities;
use Date::Manip;
use URI::Escape;


Date_Init("TZ=EST");

=for suggestions arnaud
gestion erreurs pour:
* LWP::UserAgent->new
* FCGI::Request();
* CGI::Simple->new();
* $browser->get

gestion d'erreurs = log + die/warn selon

Même en cas de no $reponse->success() tu mets renseignes $data{$id}{content}
avec $content....

$data{$id} = {'content' => $content, 'date' => time()};

11/09/2011: il ne manque pas un ORDER BY (date_fetch) desc et AND LIMIT machin ici ?
	my $sth = $dbi->prepare("select * from eztv where id= ? and now() - date_fetch  < interval '30 minute'");
12/09/2011: tfe: non, cela ne match qu'un resultat. A chaque inseration, on supprime l'element anterieur s'il existe.

=cut

my $browser = LWP::UserAgent->new;
$browser->timeout(30);
my $errors=0;

my $cache="/home/tfe/public_html/cache/eztv/shows.txt";
#my $cache="/home/tfe/bin/shows.txt";
my $file_path="/home/tfe/public_html/cache/eztv/";
#my $file_path="/home/tfe/bin/";
my $url = 'http://eztv.it/search/';
my $main_url = 'http://eztv.it/page_%num%';
my $url_news = 'http://eztv.it/';
my $tvnews_url = 'http://eztv.it/tvnews/';
my %month = (
    'January'       => "01",
    'February'      => "02",
    'March'         => "03",
    'April'         => "04",
    'May'           => "05",
    'June'          => "06",
    'July'          => "07",
    'August'        => "08",
    'September'     => "09",
    'October'       => "10",
    'November'      => "11",
    'December'      => "12");
        

open(MYCACHE,">/home/tfe/bin/log");
print MYCACHE "\n\n";
print MYCACHE scalar localtime();
print MYCACHE "\n";

open(INFO,$cache);
while(<INFO>)
{
	my $content="";
	/(.*?):(.*)/;
	my ($id,$name) = ($1,$2);
	print MYCACHE "Id: $1 y $name\n";

# Format, name, id
	my $html="";
	my $xml="";

	if(!$name) { $name=""; }
	$html.= "<html>
		<head>
		<title>$name - Ezrss/Eztv  RSS feed .</title>
        <link rel=\"stylesheet\" type=\"text/css\" href=\"/eztv.css\" />
		</head>
		<body>
		<div class=\"page\">
		<h1>RSS Feed for eztv $name</h1>
		<p>This is the html version of the feed. To get the RSS, please visit :<br /><a href=\"/cgi-bin/eztv.pl?id=$id&name=$name\">http://eztv.ptain.info/cgi-bin/eztv.pl?id=$id&name=$name</a>.</p>
		<div class=\"list\">
        <table border=\"1\">
			<tr>
					<th>Date</th>
					<th>Show</th>
			</tr>
		";


	$xml.= "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>
		<rss version=\"2.0\">
		<channel>
		<title>Eztv/Ezrss Dynamic rss from eztv.it search</title>
		<link>http://eztv.ptain.info</link>
		<ttl>30</ttl>
		<description>EZTV RSS feed for selected show/news</description>
		";



	my $response;
	print "Fetching $id / errors occured: $errors\n";

	if($errors<100)
	{
		if($id eq "index")
		{
            for(my $i=0;$i<5;$i++)
            {
                my $url = $main_url;
                $url=~ s/%num%/$i/;
                $response = $browser->get( $url);
                if(!$response || !$response->is_success)
                {
                    $response = $browser->get( $url);
                }
                if($response && $response->is_success)
                {
                    $content .= $response->content;
                }
            }
		}
		elsif($id eq "tvnews")
		{
			$response = $browser->get( $tvnews_url);
			if(!$response || !$response->is_success)
			{
				$response = $browser->get( $tvnews_url);
			}
            if($response && $response->is_success)
            {
                $content = $response->content;
            }
		}
		else
		{	
			$response = $browser->post ( $url,
			[
			'SearchString' => $id
			]);
			if(!$response || !$response->is_success)
			{
				$response = $browser->post ( $url,
				[
				'SearchString' => $id
				]);
			}
            if($response && $response->is_success)
            {
                $content = $response->content;
            }
		}
	}
	if(!$response || !$response->is_success)
	{
		print "ERROR!\n";
		$errors++;
	}


	if($content ne "")
	{
		if($id eq "tvnews")
		{
			while($content =~ /class="tvnews_header".*?href="(.*?)".*?<b>(.*?)<\/b>.*?tvnews_content" align="left">(.*?)<\/td>/sg)
			{
				my $title =$2;
				my $link = $1;
				my $desc = $3;
				$title =~ s/</</g;
				$title =~ s/>/>/g;
				my $news_content =$3;
				$news_content =~ s/</</g;
				$news_content =~ s/>/>/g;
				$html.= "<li class=\"news\"><a href=\"$url_news$link\">$title</a> <br /><span class=\"news_content\">$news_content</span></li>\n";

				$link = decode_entities($link);
				$link=~s/\/tvnews\///;
				$xml.= "<item>
					<title><![CDATA[$title]]></title>
					<link><![CDATA[$tvnews_url$link]]></link>
					<description><![CDATA[$desc]]></description>
					</item>";
			}
		}
		else
		{
			my $last_added_on = "";
			my $last_added_on_html = "";
			while($content=~ /<tr(.*?)>(.*?)<\/tr>/sg)
			{
				my $tr_attr = $1;
				my $tr_data = $2;
					if($tr_attr !~ /name="hover"/)
					{
						if($tr_data =~ /Added on: <b>(.*?), (.*?), (.*?)</)
						{
							my ($c_day, $c_month, $c_year) = ($1, $2, $3);
							#$last_added_on_html = "$c_day, $c_month, $c_year";
							$last_added_on = sprintf "%4d-%02d-%02d 00:00:00", $c_year, $month{$c_month}, $c_day;
							$last_added_on_html = UnixDate(ParseDate($last_added_on),"%d/%m/%Y" );
							$last_added_on = UnixDate(ParseDate($last_added_on),"%a, %d %b %Y %H:%M:%S GMT" );
						}
					}
					if($tr_data =~ /class="epinfo">(.*?)<\/a>.*?href="(.*?)"/sg)
					{
						my $title = $1;
						my $link = $2;
						$link = uri_unescape($link);

						$html.= "<tr><td>$last_added_on_html</td><td><a href=\"$link\">$title</a></td></tr>";
						if($link =~ /^(.*)\/([^?]+)$/)
						{
							$link  = $1."/".uri_unescape($2);
						}

						$xml.= "<item>
							<title><![CDATA[$title]]></title>
							<pubDate>$last_added_on</pubDate>
							<link><![CDATA[$link]]></link>
							<description><![CDATA[$title - $link]]></description>
							</item>";
					}
			}
		}
		$html.= "</table></div></div><p><a href=\"/cgi-bin/eztv_list.pl\">Back to eztv rss feeds</a></p></body></html>\n";
		$xml.= "</channel></rss>\n";

		open(WRITE,">$file_path$id.html");
		print WRITE $html;
		close(WRITE);
		open(WRITE,">$file_path$id.xml");
		print WRITE $xml;
		close(WRITE);

		if($name !~ /\//)
		{
			open(WRITE,">$file_path$name.html");
			print WRITE $html;
			close(WRITE);
			open(WRITE,">$file_path$name.xml");
			print WRITE $xml;
			close(WRITE);

			open(WRITE,">$file_path".lc($name).".html");
			print WRITE $html;
			close(WRITE);
			open(WRITE,">$file_path".lc($name).".xml");
			print WRITE $xml;
			close(WRITE);
		}

		print MYCACHE "Write $id/$name : ".length($html)." y ".length($xml)."\n";
		flush MYCACHE;
	}
	else
	{
		print MYCACHE "Error fetching\n";
		flush MYCACHE;
	}
}
close MYCACHE;