User:AW01545
From MythTV Official Wiki
- strictly a preliminary version
- attention Doug!
#!/usr/bin/env perl
# Date: $Date: 2010-06-21 23:12:19 -0400 (Mon, 21 Jun 2010) $
# Author: $Author: renickle $
# Revision: $Revision: 8 $
# Id: $Id: hir21.pl 8 2010-06-22 03:12:19Z renickle $
#
# MythNetvision Grabber Script for hirtv (Hungarian News)
#
################################################################################
use strict;
$|=1;
use LWP::Simple;
use HTML::TreeBuilder;
use HTML::Entities;
use Data::Dumper;
use Getopt::Long;
use Time::Local;
use File::stat;
use DateTime;
my $site="HirTV";
my $baseurl=cleantext("http://www.hirtv.hu");
my $url=$baseurl . "/?tPath=/view/videoview/hirado&sp=Offer";
my $cleanurl=cleantext($url);
my $description="HirTV Hungarian News";
my $video_created="";
my $cache_file="/tmp/hirado.content";
my $cache_time=7200;
my $header = '<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:cnettv="http://cnettv.com/mrss/"
xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule"
xmlns:media="http://search.yahoo.com/mrss/"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:amp="http://www.adobe.com/amp/1.0"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:mythtv="http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format">
';
my $footer = "</rss>\n";
my %dirurls;
my %dirdates;
my %options=();
my $opt_return=GetOptions(\%options,'T|Tree!','S|Search=s','v|version!','p|page=i');
if($options{"v"}) {
print "HirTV|T\n";
exit 0;
print <<"_EOXML_";
<grabber>
<name>Hir21</name>
<command>hir21.pl</command>
<author>ricknickle\@hotmail.com</author>
<thumbnnail src="/usr/share/mythtv/mythnetvision/icons/news.png"/>
<type>video</type>
<description>Hirado 21 is the 9 o\'clock news hour on HirTV, the conservative television network in Hungary.</description>
<version>2</version>
<search>false</search>
<tree>true</tree>
</grabber>
_EOXML_
exit 0;
}
if($options{"T"}) {
print $header;
hirado_channel();
print $footer;
exit 0;
}
sub hirado_channel {
my $content="";
if( -e $cache_file) {
my $ss=stat($cache_file);
if(($ss->mtime) && (time() - $ss->mtime > $cache_time)) {
unlink($cache_file);
}
}
if(! -e $cache_file) {
$content = get($url);
open(OUT,">$cache_file");
print OUT $content;
close(OUT);
} else {
my $save_rs=$/;
$/=undef;
open(IN,"<$cache_file");
$content=<IN>;
close(IN);
$/=$save_rs;
}
$content=~s/<br \/>/ /gm;
unless ($content) {
die "Could not retrieve $url";
}
my $tree = HTML::TreeBuilder->new;
eval { $tree->parse($content); };
if ($@) {
die "$url parse failed, $@";
}
my @as = $tree->find_by_tag_name('a');
foreach my $a (@as) {
my $dir = $a->as_trimmed_text();
# H 303 255 r a d 303 263 2 1 2 0 1 0
$dir=~s/H\303\255rad\303\263\s+21\s+\d+\s//;
my $video_src="";
my $video_date="";
if($a->attr('href')=~/videoview_src=(\S+)\&/) {
$video_src=$1;
}
if($a->attr('href')=~/videoview_date=(\d+)/) {
$video_date=$1;
}
if($video_date=~/(\d\d\d\d)(\d\d)(\d\d)/) {
my $dt=DateTime->new(year=>$1,month=>$2,day=>$3,
hour=>21,minute=>00,second=>00,
nanosecond=>500000000,time_zone=>'Europe/Budapest');
$video_created=sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT",
$dt->day_abbr,$dt->day,$dt->month_abbr,$dt->year,
$dt->hour,$dt->minute,$dt->second
);
} else {
$video_created="Thu, 17 Dec 2009 22:00:00 GMT";
}
$video_src=~s/date/$video_date/;
my $video_link=cleantext($baseurl.$video_src);
if($dir=~/^\s*$/) {
#http://www.hirtv.hu/?tPath=/view/videoview/hirado&sp=Offer/?tPath=/view/videoview/hirado&sp=Video&videoview_src=/filmek/hirado21/hiradodate.wmv&videoview_date=20100612
# http://www.hirtv.hu/filmek/hirado21/hirado20100616.wmv
$dirdates{'mai hirek'}=$video_created;
$dirurls{'mai hirek'} = $video_link;
} else {
$dirdates{$dir}=$video_created;
$dirurls{$dir} = $video_link;
}
}
#print STDERR Dumper(%dirurls);
(keys(%dirurls)) or die "No urls found";
#return \%dirurls;
my @dirs=keys %dirurls;
my $nresults=$#dirs+1;
print "<channel>
<title>$site</title>
<link>$cleanurl</link>
<description>$description</description>
<numresults>$nresults</numresults>
<returned>$nresults</returned>
<startindex>1</startindex>
<directory name=\"Hirado 21 News\" thumbnail=\"%SHAREDIR%/mythnetvision/icons/mythtv.png\">
";
foreach my $dir (sort (keys %dirurls)) {
print "
<item>
<title>$dir</title>
<author>HirTV</author>
<pubDate>$dirdates{$dir}</pubDate>
<description>Nightly News (debug URL) $dirurls{$dir}</description>
<link>$dirurls{$dir}</link>
<player>/usr/bin/cvlc</player>
<playerargs>-f %URL%</playerargs>
<media:group>
<media:content url=\"%URL%\"/>
<media:thumbnail url=\"%SHAREDIR%/mythnetvision/icons/hirtv.png\"/>
</media:group>
<rating>>5.0</rating>
</item>
";
}
print " </directory>\n";
print "</channel>\n";
}
sub cleantext {
my $text = shift;
($text) or return;
$text =~ s/\n/ /g;
$text =~ s/^\s+|\s+$//g;
$text = encode_entities($text);
return $text;
}