[mythtv-users] New Australian XMLTV grabber

Michael Cheshire (Mailing Lists) michael-mlists at cheshire.id.au
Wed Nov 17 21:20:01 UTC 2004


Actually there were several more spelling mistakes..

Woops :)

----- Original Message ----- 
From: "Eyal Lebedinsky" <eyal at eyal.emu.id.au>
To: "Discussion about mythtv" <mythtv-users at mythtv.org>
Sent: Wednesday, November 17, 2004 11:03 PM
Subject: Re: [mythtv-users] New Australian XMLTV grabber


> Michael Cheshire (Mailing Lists) wrote:
>> The great script, updated with foxtel channels.
>
> If I did not break anything else then this is the same thing, as an 
> attachment
> so that whitespace is not lost, and with one misspelling fixed...
>
> -- 
> Eyal Lebedinsky (eyal at eyal.emu.id.au) <http://samba.org/eyal/>
>


--------------------------------------------------------------------------------


> #!/usr/bin/perl -w
> # Australian TV Guide XMLTV grabber by Damon Searle
> # Derived from a yahoo XMLTV grabber by Ron Kellam which was itself...
> # Derived from original code by Justin Hawkins
> #
> # This program is free software; you can redistribute it and/or modify
> # it under the terms of the GNU General Public License as published by
> # the Free Software Foundation; either version 2 of the License, or
> # (at your option) any later version.
> #
> # This program is distributed in the hope that it will be useful,
> # but WITHOUT ANY WARRANTY; without even the implied warranty of
> # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> # GNU General Public License for more details.
> #
> # You should have received a copy of the GNU General Public License
> # along with this program; if not, write to the Free Software
> # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 
> USA
>
> # 30 Oct 2004
> #  Damon Searle <djsearle at netspace.net.au>
> #  - wrote first version
> #  - gets data from NineMSN as a backup. Its not that fancy,
> # 31 Oct 2004
> #  Fred Donelly <fdonelly at hotmail.com>
> #  - added an option so that the output file can be specified on the
> #    command line and from the quick test I gave it, it now works with
> #    mythfilldatabase.
> #  - $offset set to +1000 at the top and then had "+1000" set in a
> #  output string further down rather than the variable
> # 4 Nov 2004
> #  Paul Andreassen <paulx at andreassen.com.au>
> #  - learned some perl and now wants to go back to python
> #  - added and then reduced status info
> #  - retry on failure to getstore
> #  - changed cache to '/var/local/tv_grab_au'
> #  - added threading for each day
> # 5 Nov 2004
> #  - improved threading with use of queue
> #  Eyal Lebedinsky <eyal at eyal.emu.id.au>
> #  - easier location selection
> # 8 Nov 2004
> #  Paul
> #  - fixed pid=0 bug
> #  - did some merging, I hate merging
> # 9 Nov 2004
> #  Rob Hill <rob at dot.net.au>
> #  - added Sydney
> # 10 Nov 2004
> #  Mary Wright <mwright at taz-devil.dyndns.org>
> #  - digital info for Sydney
> #  Paul
> #  - more cleanup and improved error checking
> #  - used mirror instead of getstore to get any updates
> #  - mirror didn't work replaced with own smarts to check for updates to 
> times
> # 11 Nov 2004
> #  - added program name in check
> # 13 Nov 2004
> #  - added freesd for Brisbane
> # 14 Nov 2004
> #  - --configure to exit nicely
> #  - if no program data then skip program nicely, mainly for foxtel data
> #  - added foxtel channels
> # 17 Nov 2004
> #  - added remaining foxtel channels
> #  Eyal Lebedinsky <eyal at eyal.emu.id.au>
> #  - Fix misspelling Unknows -> Unknown
> #  - Note: is Sydney now is on summer time +1100?
>
> use strict;
> use Getopt::Long;
> use XMLTV;
> use LWP::Simple;
> use Date::Manip;
> use File::Path;
> use threads;
> use Thread::Queue;
>
> # Instructions:
> # Select your region and source.
> # If your location isn't listed below, go to
> # http://tvguide.ninemsn.com.au/guide/ select your area
> # look at the last number in the URL before ".asp" and set
> # the region variable below. Then put the channel names as listed
> # on the tv guide site into the variables below.
> # Then set your XMLTV ids from the database in the XMLTVID_URL variable.
> #
> # If it doesn't work with mythfilldatabase, try:
> # ./tv_grab_au
> # mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml
>
> # pick your region
> #
> my $location = "Canberra";
> #my $location = "Brisbane";
> #my $location = "Sydney";
> #my $location = "Australia";
>
> # pick your source
> #
> my $source = "free";
> #my $source = "freesd";
> #my $source = "freehd";
> #my $source = "foxtel";
>
> # choose the XMLID URL suffix that mythtv knows
> #
> my $XMLTVID_URL = "d1.com.au";
>
> # change to how you think it should work
> my $days_to_grab = 7;
> my $threads = 5;
> my $retrys = 3;
> my $secondsbeforeretry = 2;
>
> # Variables
> my $guide_url = "http://tvguide.ninemsn.com.au/guide/";
> my $details_url = 
> "http://tvguide.ninemsn.com.au/closeup/default.asp?pid=";
> my $cache_dir = "/var/local/tv_grab_au";
>
> my $XMLTV_prefix = $source . "." . $location . ".";
> my $XMLTV_suffix = "." . $XMLTVID_URL;
>
> my $region; my $offset;
> my %channels;
>
> if ("Canberra" eq $location) {
> $region = "126";
> $offset = "+1100";
> if ("free" eq $source) {
> $channels{"ABC NSW"}="2";
> $channels{"Prime Southern"}="PrimS";
> $channels{"SBS Sydney"}="SBS";
> $channels{"Southern Cross TEN Capital"}="10Cap";
> $channels{"WIN Television NSW"}="WIN"
> } elsif ("freesd" eq $source or "freehd" eq $source) {
> $channels{"ABC NSW"}="2";
> $channels{"Prime Southern"}="7";
> $channels{"SBS Sydney"}="SBS";
> $channels{"Southern Cross TEN Capital"}="10";
> $channels{"WIN Television NSW"}="9"
> } else {
> print "Unknown source '$source' for $location\n";
> exit (1);
> }
> } elsif ("Brisbane" eq $location) {
> $region = "79";
> $offset = "+1000";
> if (("free" eq $source)||("freesd" eq $source)) {
> $channels{"ABC QLD"}="2";
> $channels{"Channel Seven Queensland"}="7";
> $channels{"SBS Queensland"}="SBS";
> $channels{"Southern Cross TEN Queensland"}="10";
> $channels{"WIN Television QLD"}="9";
> } else {
> print "Unknown source '$source' for $location\n";
> exit (1);
> }
> } elsif ("Sydney" eq $location) {
> $region = "73";
> $offset = "+1100";
> if (("free" eq $source)||("freesd" eq $source)) {
> $channels{"ABC NSW"}="2";
> $channels{"Channel Seven Sydney"}="7";
> $channels{"SBS Sydney"}="SBS";
> $channels{"Network TEN Sydney"}="10";
> $channels{"Channel Nine Sydney"}="9";
> } else {
> print "Unknown source '$source' for $location\n";
> exit (1);
> }
> } elsif ("Adelaide" eq $location) {
> $region = "81";
> $offset = "+0930";
> if (("free" eq $source)||("freesd" eq $source)) {
> $channels{"ABC SA"}="2";
> $channels{"Channel Seven Adelaide"}="7";
> $channels{"SBS"}="SBS";
> $channels{"Network TEN Adekaude"}="10";
> $channels{"Channel Nine Adekaude"}="9";
> } else {
> print "Unknown source '$source' for $location\n";
> exit (1);
> }
> } elsif ("Australia" eq $location) {
> $region = "123";
> $offset = "+0930";
> if ("foxtel" eq $source) {
> $channels{"Arena TV"}="Arena";
> $channels{"BBC World"}="BBC";
> $channels{"Cartoon Network"}="Cartoon";
> $channels{"Channel [V]"}="Red";
> $channels{"CNBC"}="CNBC";
> $channels{"CNN"}="CNN";
> $channels{"Discovery Channel"}="Disc";
> $channels{"FOX News"}="FoxFNC";
> $channels{"FOX8"}="FOX";
> $channels{"MAX"}="FoxMMX";
> $channels{"National Geographic Channel"}="NatGe";
> $channels{"Nickelodeon"}="Nick";
> $channels{"Showtime"}="Show";
> $channels{"Showtime 2"}="FoxSH2";
> $channels{"Sky News"}="SkyNews";
> $channels{"TV1"}="TV1";
> $channels{"UKTV"}="UKTV";
> $channels{"Showtime Greats"}="ShowGreats";
> $channels{"World Movies"}="wmov";
> $channels{"WCH"}="WCH";
> $channels{"TVSN"}="TVSN";
> $channels{"Sky Racing"}="SkyRa";
> $channels{"Ovation"}="Ovation";
> $channels{"Disney Channel"}="Disney";
> $channels{"Animal Planet"}="Animal";
> $channels{"The Comedy Channel"}="Com";
> $channels{"The LifeStyle Channel"}="Lifes";
> $channels{"FOX Sports 1"}="FoxFS1";
> $channels{"Movie One"}="Movie1";
> $channels{"TCM"}="TCM";
> $channels{"MTV"}="MTV";
> $channels{"FOX Sports 2"}="FoxSP2";
> $channels{"FOX Footy Channel"}="FFC";
> $channels{"Movie Extra"}="MovieEx";
> $channels{"Hallmark Channel"}="Hall";
> $channels{"The History Channel"}="FoxHST";
> $channels{"ESPN"}="ESPN";
> $channels{"FOX Classics"}="FoxCLA";
> $channels{"Movie Greats"}="MovieGr";
> } else {
> print "Unknown source '$source' for $location\n";
> exit (1);
> }
> } else {
> print "Unknown location '$location'\n";
> exit (1);
> }
>
> my $prog_ref;
> my $chan_ref;
>
> foreach my $channel (keys %channels)
> {
> $$chan_ref{$channel} =
>        {
> 'id' => $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix,
> 'display-name' => [ [ $channel, undef ]]
> };
> }
>
>
> # Options
> my $opt_days;
> my $opt_output;
> my $opt_configfile;
> my $opt_configure = 0;
>
> GetOptions('days=i'        => \$opt_days,
>    'output=s'      => \$opt_output,
>    'config-file=s' => \$opt_configfile,
>    'configure'     => \$opt_configure,
>    );
>
> if ($opt_days) {
> $days_to_grab = $opt_days
> }
>
> if (!($opt_output)) {
> $opt_output = $cache_dir . "/guide.xml";
> }
>
> # $opt_configfile should probably do something 
> ('/home/mythtv/.mythtv/tv_grab_au.xmltv')
>
> if ($opt_configure == 1)
> {
>    print "configuration must be done in this script $0\n";
>    exit (0);
> }
>
> print "grabing $days_to_grab days into $opt_output\n";
>
>
>
>
> print "starting $threads threads\n";
>
> my @thrlist;
> my $datepids = Thread::Queue->new;
>
> for (my $thread=0; $thread<$threads; $thread++)
> {
>        push @thrlist, threads->new(\&fetch_details);
> }
>
> print "loading queue\n";
>
> my $currentday = &ParseDate("today");
> my $day_counter = 1;
> while ($day_counter <= $days_to_grab)
> {
> my $date = &UnixDate($currentday, "%d%m%Y");
> my @day_lines = get_day($date,1);
> if (@day_lines == 0)
> {
> $currentday = &DateCalc($currentday, "+ 1 day");
> $day_counter++;
>         next;
> }
>
> my @pids;
> my @rowspans;
> my @names;
> foreach my $line (@day_lines)
> {
> foreach my $link (split /\n|tr|TR|TD|tr/, $line )
> {
> if ($link =~ /closeup\/default.asp/)
> {
> my $rowspan = $link;
> $rowspan =~ s/.+rowspan=//g;
> $rowspan =~ s/ .+//g;
>
> my $name = $link;
> $name =~ s/.+target=new>(<P>|)//g;
> $name =~ s/<\/a>.+//g;
>
> $link =~ s/.+pid=//g;
> $link =~ s/".+//g; #"
> if (($rowspan =~ /\d+/) and ($link =~ /\d\d+/))
> {
> push @pids, $link;
> push @rowspans, $rowspan;
> push @names, $name;
> }
> }
> }
> }
>
> if (changed_guide($date, at pids, at rowspans, at names))
> {
> for (my $count=0; $count <= $#pids; $count++)
> {
> $datepids->enqueue($date . "-" . $pids[$count]);
> }
> }
>
> $day_counter++;
> $currentday = &DateCalc($currentday, "+ 1 day");
> }
>
> for (my $thread=0; $thread<$threads; $thread++)
> {
> $datepids->enqueue(0 . "-" . 0);
> }
>
> print "queue is complete\n";
>
> foreach my $thr (@thrlist)
> {
>    $thr->join;
> }
>
> print "all threads done\n";
> print "building xml structure\n";
>
> $currentday = &ParseDate("today");
> $day_counter = 1;
> while ($day_counter <= $days_to_grab)
> {
> my @pids;
> my $date = &UnixDate($currentday, "%d%m%Y");
>
> my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn";
> if (open(PRN, $guide_prn_file))
> {
> my @prn = split />/, <PRN>;
> close(PRN);
>
> if ($#prn > 1)
> {
> my $pidlast = ($#prn + 1)/3 - 1;
> @pids=@prn[0..$pidlast];
> }
> else
> {
> print "no pids in $guide_prn_file\n";
> @pids=();
> }
> }
> else
> {
> print "can't read $guide_prn_file\n";
> @pids=()
> }
>
> my $retry = 0;
> foreach my $pid (@pids)
> {
> my @details = get_details($date, $pid);
> if (@details == 0)
> {
>         next;
> }
>
> my $show_details_table = "";
> my $use_line = 0;
> my $close_html = 0;
> foreach my $line (@details)
> {
> if ($line =~ /bgColor=#f7f3e8/)
> {
> $use_line = 0;
> }
> if ($use_line == 1)
> {
> $show_details_table .= $line;
> }
> if ($line =~ /bgcolor=#ffffff/)
> {
> $use_line = 1;
> }
> if ($line =~ /<\/HTML>/)
> {
> $close_html = 1;
> }
>     }
>
> if ($close_html == 0)
> {
> my $name = $cache_dir . "/" . $date . "/" . $pid . ".html";
> if ($retry++ >= $retrys)
> {
> print "giving up on truncated $name\n";
> $retry=0;
> next;
> }
> unlink $name;
> push @pids, $pid;
> print "t"; # truncated
> sleep($secondsbeforeretry);
> next;
> }
>
> if ((length $show_details_table) == 0)
> {
> print "m"; # missing: can't do anything about this
> $retry=0;
> next;
> }
>
>     $show_details_table =~ s/<[^>]*>/\n/g;
> $show_details_table =~ s/\&nbsp\;//g;
> #$show_details_table =~ s/<BR>|<TR>|<TD><B><b><\/B><\/b>/\n/g;
> #$show_details_table =~ s/Genre://g;
> #$show_details_table =~ s/Rated:/\n/g;
> my $count = 0;
>
> my $channel = "";
> my $start_date = &UnixDate($currentday, "%Y-%m-%d");
> my $time;
> my $title1 = "";
> my $title2 = "";
> my $genre = "";
> my $descr = "";
> my $details = "";
> my $duration;
>
>
> #print $show_details_table. "\n\n\n";
> foreach my $line (split /\n/, $show_details_table)
> {
> if ($count == 4){
> #print "Time: " . $line . "\n";
> $time = $line;
> }
> elsif ($count == 7){
> $channel = $line;
> #print "Channel: " . $line . "\n";
> }
> elsif ($count == 19){
> $title1 = $line;
> #print "Program: " . $line . "\n";
> }
> elsif ($count == 20){
> $line =~ s/ - //g;
> $title2 = $line;
> #print "Subtitle: " . $line . "\n";
> }
> elsif ($count == 21){
> $line =~ s/\D//g;
> $duration = $line;
> #print "Run time: " . $line . "\n";
> }
> elsif ($count == 22){
> $line =~ s/[^A-Z]//g;
> $details = $line;
> #print "Rating: " . $line . "\n";
> }
> elsif ($count == 26){
> $line =~ s/ //g;
> $genre = $line;
> #print "Genre: " . $line . "\n";
> }
> elsif ($count == 28 && $line =~ /[a-zA-Z]/){
> $descr = $line;
> #print "Description: " . $line . "\n";
> }
> #elsif ($count == 26 && $line =~ /[a-zA-Z]/){
> # $descr = $line;
> # print "Description: " . $line . "\n";
> #}
> #print $count .": " . $line . "\n";
> ++$count;
> }
>
>
> my $start_time = &UnixDate($time, "%H:%M");
> # my $start_datetime = $start_date . " " . $start_time;
> if ($start_time =~ /00:|01:|02:|03:|04:|05:/)
> {
> $start_date = &DateCalc($start_date, "+ 1 day");
> }
> $start_date = &UnixDate($start_date, "%Y%m%d");
> my $end_time = &DateCalc($start_time, " + " . $duration . "minutes");
> $end_time = &UnixDate($end_time, "%H:%M");
>
> my $end_date;
> if (&Date_Cmp($start_time, $end_time) <= 0)
> {
> $end_date = $start_date;
> }
> else
> {
> my $err;
> my $edate = &DateCalc($start_date, "+ 1 day", \$err);
> $end_date = &UnixDate($edate, "%Y%m%d");
> }
>
> if (defined $channels{$channel})
> {
> $channel = $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix;
> }
> else
> {
> print "unknown channel $channel\n";
> $retry=0;
> next;
> }
>
> my $start;
> my $stop;
>
> $start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset;
> $stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset;
>
> my $a_prog = {
> channel => $channel,
> start   => $start,
> stop    => $stop,
> title   => [ [ $title1, undef ] ]
> };
>
> $descr =~ s/^\s+//;
> $descr =~ s/\s+$//;
>
> if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; }
> if ($descr)  { $$a_prog{desc}        = [ [ $descr, undef ] ]; }
> if ($genre)  { $$a_prog{category}    = [ [ $genre, undef ] ]; }
>
> push @$prog_ref, $a_prog;
> $retry=0;
> }
>
> $currentday = &DateCalc($currentday, "+ 1 day");
> $day_counter++;
> }
>
> my $data = [
>    'ISO-8859-1',
>    {
>     'source-info-name'    => 'http://tvguide.ninemsn.com.au/',
>     'generator-info-name' => 'NineMSN grabber',
>     'generator-info-url'  => '',
>     'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2"
>    },
>    $chan_ref,
>    $prog_ref
> ];
>
> my $hour=&UnixDate(&ParseDate("now"),"%H");
> if ($hour < 6)
> {
>    print "can't update between 0:00 and 6:00\n";
> # If we update between these hours we lose any data we had up to 6:00.
> # This is because the web site starts a day at 6:00 and ends at 6:00 the 
> next day
> # This could be fixed by read the previous days info and adding the needed 
> shows.
> # I did try adding the whole previous day but got lots of mythfilldatabase 
> errors.
>    exit(1);
> }
>
> print "writing file\n";
>
> my $fh = new IO::File ">$opt_output";
> XMLTV::write_data($data, OUTPUT=>$fh);
>
> print "done\n";
>
> # subroutines
> sub get_day
> {
> my $date = shift;
> my $force = shift;
> my $url = $guide_url . $date . "_" . $region . ".asp";
>
> my $guide_dir = $cache_dir . "/" . $date;
> my $guide_file = $guide_dir . "/guide.html";
> mkpath ($guide_dir);
>
> for (my $retry=0; (($force==1) || (!(-e $guide_file))) && 
> is_error(getstore($url, $guide_file)) && ($retry<$retrys); $retry++)
> {
> print ".";
> sleep($secondsbeforeretry);
> }
>
> my @guide_lines;
> if (open(GUIDE, $guide_file))
> {
> @guide_lines = <GUIDE>;
> close(GUIDE);
> }
> else
> {
> @guide_lines = ();
> print "giving up on $guide_file\n";
> }
> return @guide_lines;
> }
>
> sub get_details
> {
> my $date = shift;
> my $program_id = shift;
>
> my $url = $details_url . $program_id;
> my $guide_dir = $cache_dir . "/" . $date;
> my $details_file = $guide_dir . "/" . $program_id . ".html";
> mkpath ($guide_dir);
>
> for (my $retry=0; (!(-e $details_file)) && is_error(getstore($url, 
> $details_file)) && ($retry<$retrys); $retry++)
> {
> print ".";
> sleep($secondsbeforeretry);
> }
>
> my @details_lines;
> if (open(DETAILS, $details_file))
> {
> @details_lines = <DETAILS>;
> close(DETAILS);
> }
> else
> {
> @details_lines = ();
> print "giving up on $details_file\n";
> }
> return @details_lines;
> }
>
> sub fetch_details
> {
> my $datepid=$datepids->dequeue;
> my @datepidl=split /-/, $datepid;
> my $date = $datepidl[0];
> my $pid = $datepidl[1];
>
> while (($date!=0) and ($pid!=0))
> {
> my $guide_dir = $cache_dir . "/" . $date;
> mkpath ($guide_dir);
>
> my $url = $details_url . $pid;
> my $details_file = $guide_dir . "/" . $pid . ".html";
>
> for (my $retry=0; is_error(getstore($url, $details_file)) && 
> ($retry<$retrys); $retry++)
> {
> sleep($secondsbeforeretry);
> }
>
> $datepid=$datepids->dequeue;
> @datepidl=split /-/, $datepid;
> $date = $datepidl[0];
> $pid = $datepidl[1];
> }
> }
>
> sub changed_guide
> {
> my $date = shift;
> my @pidsrowspansnames = @_;
>
> my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn";
> if (open(PRN, $guide_prn_file))
> {
> my @prn = split />/, <PRN>;
> close(PRN);
>
> if (($#prn > 1) and ($#prn == $#pidsrowspansnames))
> {
> my $count;
> my $diff = ((($#prn+1)*2)/3)-1;
> for ($count=0; ($count <= $diff) && 
> ($prn[$count]==$pidsrowspansnames[$count]); $count++)
> { }
>
> if ($count==($diff+1))
> {
> for (; ($count <= $#prn) && ($prn[$count] eq $pidsrowspansnames[$count]); 
> $count++)
> { }
>
>     if ($count==($#prn+1))
> {
> print "$date unchanged\n";
> return 0;
> }
> }
> }
> }
>
> print "$date downloading\n";
>
> if (open(PRN, ">", $guide_prn_file))
> {
> for (my $count=0; $count<$#pidsrowspansnames; $count++)
> {
> print PRN "$pidsrowspansnames[$count]>";
> }
> print PRN "$pidsrowspansnames[$#pidsrowspansnames]";
> close(PRN);
> }
> else
> {
> print "can't open for writing $guide_prn_file\n";
> }
>
> return 1;
> }
>


--------------------------------------------------------------------------------


_______________________________________________
mythtv-users mailing list
mythtv-users at mythtv.org
http://mythtv.org/cgi-bin/mailman/listinfo/mythtv-users
-------------- next part --------------
#!/usr/bin/perl -w
# Australian TV Guide XMLTV grabber by Damon Searle
# Derived from a yahoo XMLTV grabber by Ron Kellam which was itself...
# Derived from original code by Justin Hawkins
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# 30 Oct 2004
#  Damon Searle <djsearle at netspace.net.au>
#  - wrote first version
#  - gets data from NineMSN as a backup. Its not that fancy,
# 31 Oct 2004
#  Fred Donelly <fdonelly at hotmail.com> 
#  - added an option so that the output file can be specified on the 
#    command line and from the quick test I gave it, it now works with 
#    mythfilldatabase.
#  - $offset set to +1000 at the top and then had "+1000" set in a 
#  output string further down rather than the variable
# 4 Nov 2004
#  Paul Andreassen <paulx at andreassen.com.au>
#  - learned some perl and now wants to go back to python
#  - added and then reduced status info
#  - retry on failure to getstore
#  - changed cache to '/var/local/tv_grab_au'
#  - added threading for each day
# 5 Nov 2004
#  - improved threading with use of queue
#  Eyal Lebedinsky <eyal at eyal.emu.id.au>
#  - easier location selection
# 8 Nov 2004
#  Paul
#  - fixed pid=0 bug
#  - did some merging, I hate merging
# 9 Nov 2004
#  Rob Hill <rob at dot.net.au>
#  - added Sydney
# 10 Nov 2004
#  Mary Wright <mwright at taz-devil.dyndns.org>
#  - digital info for Sydney 
#  Paul
#  - more cleanup and improved error checking
#  - used mirror instead of getstore to get any updates
#  - mirror didn't work replaced with own smarts to check for updates to times
# 11 Nov 2004
#  - added program name in check
# 13 Nov 2004
#  - added freesd for Brisbane
# 14 Nov 2004
#  - --configure to exit nicely
#  - if no program data then skip program nicely, mainly for foxtel data
#  - added foxtel channels
# 17 Nov 2004
#  - added remaining foxtel channels
#  Eyal Lebedinsky <eyal at eyal.emu.id.au>
#  - Fix misspelling Unknows -> Unknown
#  - Note: is Sydney now is on summer time +1100?

use strict;
use Getopt::Long;
use XMLTV;
use LWP::Simple;
use Date::Manip;
use File::Path;
use threads;
use Thread::Queue;

# Instructions:
# Select your region and source.
# If your location isn't listed below, go to
# http://tvguide.ninemsn.com.au/guide/ select your area
# look at the last number in the URL before ".asp" and set
# the region variable below. Then put the channel names as listed
# on the tv guide site into the variables below.
# Then set your XMLTV ids from the database in the XMLTVID_URL variable.
#
# If it doesn't work with mythfilldatabase, try:
# ./tv_grab_au
# mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml

# pick your region
#
 my $location = "Canberra";
#my $location = "Brisbane";
#my $location = "Sydney";
#my $location = "Australia";

# pick your source
#
 my $source = "free";
#my $source = "freesd";
#my $source = "freehd";
#my $source = "foxtel";

# choose the XMLID URL suffix that mythtv knows
#
my $XMLTVID_URL = "d1.com.au";

# change to how you think it should work
my $days_to_grab = 7;
my $threads = 5;
my $retrys = 3;
my $secondsbeforeretry = 2;

# Variables
my $guide_url = "http://tvguide.ninemsn.com.au/guide/";
my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid=";
my $cache_dir = "/var/local/tv_grab_au";

my $XMLTV_prefix = $source . "." . $location . ".";
my $XMLTV_suffix = "." . $XMLTVID_URL;

my $region; my $offset;
my %channels;

if ("Canberra" eq $location) {
	$region = "126";
	$offset = "+1100";
	if ("free" eq $source) {
		$channels{"ABC NSW"}="2";
		$channels{"Prime Southern"}="PrimS";
		$channels{"SBS Sydney"}="SBS";
		$channels{"Southern Cross TEN Capital"}="10Cap";
		$channels{"WIN Television NSW"}="WIN"
	} elsif ("freesd" eq $source or "freehd" eq $source) {
		$channels{"ABC NSW"}="2";
		$channels{"Prime Southern"}="7";
		$channels{"SBS Sydney"}="SBS";
		$channels{"Southern Cross TEN Capital"}="10";
		$channels{"WIN Television NSW"}="9"
	} else {
		print "Unknown source '$source' for $location\n";
		exit (1);
	}
} elsif ("Brisbane" eq $location) {
	$region = "79";
	$offset = "+1000";
	if (("free" eq $source)||("freesd" eq $source)) {
		$channels{"ABC QLD"}="2";
		$channels{"Channel Seven Queensland"}="7";
		$channels{"SBS Queensland"}="SBS";
		$channels{"Southern Cross TEN Queensland"}="10";
		$channels{"WIN Television QLD"}="9";
	} else {
		print "Unknown source '$source' for $location\n";
		exit (1);
	}
} elsif ("Sydney" eq $location) {
	$region = "73";
	$offset = "+1100";
	if (("free" eq $source)||("freesd" eq $source)) {
		$channels{"ABC NSW"}="2";
		$channels{"Channel Seven Sydney"}="7";
		$channels{"SBS Sydney"}="SBS";
		$channels{"Network TEN Sydney"}="10";
		$channels{"Channel Nine Sydney"}="9";
	} else {
		print "Unknown source '$source' for $location\n";
		exit (1);
	}
} elsif ("Adelaide" eq $location) {
	$region = "81";
	$offset = "+0930";
	if (("free" eq $source)||("freesd" eq $source)) {
		$channels{"ABC SA"}="2";
		$channels{"Channel Seven Adelaide"}="7";
		$channels{"SBS SA"}="SBS";
		$channels{"Network TEN Adelaide"}="10";
		$channels{"Channel Nine Adelaide"}="9"; 
	} else {
		print "Unknown source '$source' for $location\n";
		exit (1);
	} 
} elsif ("Australia" eq $location) {
	$region = "123";
	$offset = "+0930";
	if ("foxtel" eq $source) {
		$channels{"Arena TV"}="Arena";
		$channels{"BBC World"}="BBC";
		$channels{"Cartoon Network"}="Cartoon";
		$channels{"Channel [V]"}="Red";
		$channels{"CNBC"}="CNBC";
		$channels{"CNN"}="CNN";
		$channels{"Discovery Channel"}="Disc";
		$channels{"FOX News"}="FoxFNC";
		$channels{"FOX8"}="FOX";
		$channels{"MAX"}="FoxMMX";
		$channels{"National Geographic Channel"}="NatGe";
		$channels{"Nickelodeon"}="Nick";
		$channels{"Showtime"}="Show";
		$channels{"Showtime 2"}="FoxSH2";
		$channels{"Sky News"}="SkyNews";
		$channels{"TV1"}="TV1";
		$channels{"UKTV"}="UKTV";
		$channels{"Showtime Greats"}="ShowGreats";
		$channels{"World Movies"}="wmov";
		$channels{"WCH"}="WCH";
		$channels{"TVSN"}="TVSN";
		$channels{"Sky Racing"}="SkyRa";
		$channels{"Ovation"}="Ovation";
		$channels{"Disney Channel"}="Disney";
		$channels{"Animal Planet"}="Animal";
		$channels{"The Comedy Channel"}="Com";
		$channels{"The LifeStyle Channel"}="Lifes";
		$channels{"FOX Sports 1"}="FoxFS1";
		$channels{"Movie One"}="Movie1";
		$channels{"TCM"}="TCM";
		$channels{"MTV"}="MTV";
		$channels{"FOX Sports 2"}="FoxSP2";
		$channels{"FOX Footy Channel"}="FFC";
		$channels{"Movie Extra"}="MovieEx";
		$channels{"Hallmark Channel"}="Hall";
		$channels{"The History Channel"}="FoxHST";
		$channels{"ESPN"}="ESPN";
		$channels{"FOX Classics"}="FoxCLA";
		$channels{"Movie Greats"}="MovieGr"; 
	} else {
		print "Unknown source '$source' for $location\n";
		exit (1);
	}
} else {
	print "Unknown location '$location'\n";
	exit (1);
}

my $prog_ref;
my $chan_ref;

foreach my $channel (keys %channels)
{
	$$chan_ref{$channel} = 
        {
		'id' => $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix,
		'display-name' => [ [ $channel, undef ]]
	};
}


# Options
my $opt_days;
my $opt_output;
my $opt_configfile;
my $opt_configure = 0;

GetOptions('days=i'        => \$opt_days,
	   'output=s'      => \$opt_output,
	   'config-file=s' => \$opt_configfile,
	   'configure'     => \$opt_configure,
	   );

if ($opt_days) {
	$days_to_grab = $opt_days
}

if (!($opt_output)) {
	$opt_output = $cache_dir . "/guide.xml";
}

# $opt_configfile should probably do something ('/home/mythtv/.mythtv/tv_grab_au.xmltv')

if ($opt_configure == 1)
{
    print "configuration must be done in this script $0\n";
    exit (0);
}

print "grabing $days_to_grab days into $opt_output\n";




print "starting $threads threads\n";

my @thrlist;
my $datepids = Thread::Queue->new;

for (my $thread=0; $thread<$threads; $thread++)
{
        push @thrlist, threads->new(\&fetch_details);
}

print "loading queue\n";

my $currentday = &ParseDate("today");
my $day_counter = 1;
while ($day_counter <= $days_to_grab)
{
	my $date = &UnixDate($currentday, "%d%m%Y");
	my @day_lines = get_day($date,1);
	if (@day_lines == 0)
	{
		$currentday = &DateCalc($currentday, "+ 1 day");	
		$day_counter++;
	        next;
	}

	my @pids;
	my @rowspans;
	my @names;
	foreach my $line (@day_lines)
	{
		foreach my $link (split /\n|tr|TR|TD|tr/, $line )
		{
			if ($link =~ /closeup\/default.asp/)
			{
				my $rowspan = $link;
				$rowspan =~ s/.+rowspan=//g;
				$rowspan =~ s/ .+//g;
				
				my $name = $link;
				$name =~ s/.+target=new>(<P>|)//g;
				$name =~ s/<\/a>.+//g;
			
				$link =~ s/.+pid=//g;
				$link =~ s/".+//g; #"
				if (($rowspan =~ /\d+/) and ($link =~ /\d\d+/))
				{
					push @pids, $link;
					push @rowspans, $rowspan;
					push @names, $name;					
				}
			}
		}
	}

	if (changed_guide($date, at pids, at rowspans, at names))
	{
		for (my $count=0; $count <= $#pids; $count++)
		{
			$datepids->enqueue($date . "-" . $pids[$count]);
		}
	}

	$day_counter++;
	$currentday = &DateCalc($currentday, "+ 1 day");
}

for (my $thread=0; $thread<$threads; $thread++)
{
	$datepids->enqueue(0 . "-" . 0);
}

print "queue is complete\n";

foreach my $thr (@thrlist)
{
    $thr->join;
}

print "all threads done\n";
print "building xml structure\n";

$currentday = &ParseDate("today");
$day_counter = 1;
while ($day_counter <= $days_to_grab)
{
	my @pids;
	my $date = &UnixDate($currentday, "%d%m%Y");

	my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn";
	if (open(PRN, $guide_prn_file))  
	{
		my @prn = split />/, <PRN>;
		close(PRN);
		
		if ($#prn > 1)
		{
			my $pidlast = ($#prn + 1)/3 - 1;
			@pids=@prn[0..$pidlast];
		}
		else
		{
			print "no pids in $guide_prn_file\n";
			@pids=();
		}
	}
	else
	{
		print "can't read $guide_prn_file\n";
		@pids=()
	}
	
	my $retry = 0;		
	foreach my $pid (@pids)
	{
		my @details = get_details($date, $pid);
		if (@details == 0)
		{
		        next;
		}
		
		my $show_details_table = "";
		my $use_line = 0;
		my $close_html = 0;
		foreach my $line (@details)
		{
			if ($line =~ /bgColor=#f7f3e8/)
			{
				$use_line = 0;
			}
			if ($use_line == 1)
			{
				$show_details_table .= $line;
			}
			if ($line =~ /bgcolor=#ffffff/)
			{
				$use_line = 1;
			}
			if ($line =~ /<\/HTML>/)
			{
				$close_html = 1;
			}
	    	}

		if ($close_html == 0)
		{
			my $name = $cache_dir . "/" . $date . "/" . $pid . ".html";
			if ($retry++ >= $retrys)
			{
				print "giving up on truncated $name\n";
				$retry=0;
				next;
			}
			unlink $name;
			push @pids, $pid;
			print "t";	# truncated
			sleep($secondsbeforeretry);
			next;
		}

		if ((length $show_details_table) == 0)
		{
			print "m";	# missing: can't do anything about this
			$retry=0;
			next;
		}
	    	
	    	$show_details_table =~ s/<[^>]*>/\n/g;
		$show_details_table =~ s/\&nbsp\;//g;
		#$show_details_table =~ s/<BR>|<TR>|<TD><B><b><\/B><\/b>/\n/g;
		#$show_details_table =~ s/Genre://g;
		#$show_details_table =~ s/Rated:/\n/g;
		my $count = 0;

		my $channel = "";
		my $start_date = &UnixDate($currentday, "%Y-%m-%d");
		my $time;
		my $title1 = "";
		my $title2 = "";
		my $genre = "";
		my $descr = "";
		my $details = "";
		my $duration;


		#print $show_details_table. "\n\n\n";
		foreach my $line (split /\n/, $show_details_table)
		{
			if ($count == 4){
				#print "Time: " . $line . "\n";
				$time = $line;
			}
			elsif ($count == 7){
				$channel = $line;
				#print "Channel: " . $line . "\n";
			}
			elsif ($count == 19){
				$title1 = $line;
				#print "Program: " . $line . "\n";
			}
			elsif ($count == 20){
				$line =~ s/ - //g;
				$title2 = $line;
				#print "Subtitle: " . $line . "\n";
			}
			elsif ($count == 21){
				$line =~ s/\D//g;
				$duration = $line;
				#print "Run time: " . $line . "\n";
			}
			elsif ($count == 22){
				$line =~ s/[^A-Z]//g;
				$details = $line;
				#print "Rating: " . $line . "\n";
			}
			elsif ($count == 26){
				$line =~ s/ //g;
				$genre = $line;
				#print "Genre: " . $line . "\n";
			}
			elsif ($count == 28 && $line =~ /[a-zA-Z]/){
				$descr = $line;
				#print "Description: " . $line . "\n";
			}
			#elsif ($count == 26 && $line =~ /[a-zA-Z]/){
			#	$descr = $line;
			#	print "Description: " . $line . "\n";
			#}
			#print $count .": " . $line . "\n";
			++$count;
		}
		
		
		my $start_time = &UnixDate($time, "%H:%M");
#		my $start_datetime = $start_date . " " . $start_time;
		if ($start_time =~ /00:|01:|02:|03:|04:|05:/)
		{
			$start_date = &DateCalc($start_date, "+ 1 day");
		}
		$start_date = &UnixDate($start_date, "%Y%m%d");
		my $end_time = &DateCalc($start_time, " + " . $duration . "minutes");
		$end_time = &UnixDate($end_time, "%H:%M");
		
		my $end_date;
		if (&Date_Cmp($start_time, $end_time) <= 0)
		{
			$end_date = $start_date;
		}
		else
		{
			my $err;
			my $edate = &DateCalc($start_date, "+ 1 day", \$err);
			$end_date = &UnixDate($edate, "%Y%m%d");
		}

		if (defined $channels{$channel})
		{
			$channel = $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix;
		}
		else
		{
			print "unknown channel $channel\n";
			$retry=0;
			next;
		}
		
		my $start;
		my $stop;
		
		$start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset;
		$stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset;
		
		my $a_prog = { 
			channel => $channel,
			start   => $start,
			stop    => $stop,
			title   => [ [ $title1, undef ] ]
		};
		
		$descr =~ s/^\s+//;
		$descr =~ s/\s+$//;

		if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; }
		if ($descr)  { $$a_prog{desc}        = [ [ $descr, undef ] ]; }
		if ($genre)  { $$a_prog{category}    = [ [ $genre, undef ] ]; }
		            
		push @$prog_ref, $a_prog;
		$retry=0;		
	}
	
	$currentday = &DateCalc($currentday, "+ 1 day");	
	$day_counter++;
}

my $data = [
    'ISO-8859-1',
    {
     'source-info-name'    => 'http://tvguide.ninemsn.com.au/',
     'generator-info-name' => 'NineMSN grabber',
     'generator-info-url'  => '',
     'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2"
    },
    $chan_ref,
    $prog_ref
];

my $hour=&UnixDate(&ParseDate("now"),"%H");
if ($hour < 6)
{
    print "can't update between 0:00 and 6:00\n";
# If we update between these hours we lose any data we had up to 6:00.
# This is because the web site starts a day at 6:00 and ends at 6:00 the next day
# This could be fixed by read the previous days info and adding the needed shows.
# I did try adding the whole previous day but got lots of mythfilldatabase errors.
    exit(1);
}

print "writing file\n";

my $fh = new IO::File ">$opt_output";
XMLTV::write_data($data, OUTPUT=>$fh);

print "done\n";

# subroutines
sub get_day 
{
	my $date = shift;
	my $force = shift;
	my $url = $guide_url . $date . "_" . $region . ".asp";
	
	my $guide_dir = $cache_dir . "/" . $date;
	my $guide_file = $guide_dir . "/guide.html";
	mkpath ($guide_dir);

	for (my $retry=0; (($force==1) || (!(-e $guide_file))) && is_error(getstore($url, $guide_file)) && ($retry<$retrys); $retry++)
	{
		print ".";
		sleep($secondsbeforeretry);
	}
	
	my @guide_lines;
	if (open(GUIDE, $guide_file))
	{
		@guide_lines = <GUIDE>;
		close(GUIDE);
	}
	else
	{
		@guide_lines = ();
		print "giving up on $guide_file\n";
	}
	return @guide_lines;
}

sub get_details
{
	my $date = shift;
	my $program_id = shift;
	
	my $url = $details_url . $program_id;
	my $guide_dir = $cache_dir . "/" . $date;
	my $details_file = $guide_dir . "/" . $program_id . ".html";
	mkpath ($guide_dir);
	
	for (my $retry=0; (!(-e $details_file)) && is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++)
	{
		print ".";
		sleep($secondsbeforeretry);
	}

	my @details_lines;
	if (open(DETAILS, $details_file))
	{
		@details_lines = <DETAILS>;
		close(DETAILS);
	}
	else
	{
		@details_lines = ();
		print "giving up on $details_file\n";
	}
	return @details_lines;
}

sub fetch_details
{
	my $datepid=$datepids->dequeue;
	my @datepidl=split /-/, $datepid;
	my $date = $datepidl[0];
	my $pid = $datepidl[1];

	while (($date!=0) and ($pid!=0))
	{
		my $guide_dir = $cache_dir . "/" . $date;
		mkpath ($guide_dir);
	
		my $url = $details_url . $pid;
		my $details_file = $guide_dir . "/" . $pid . ".html";

		for (my $retry=0; is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++)
		{
			sleep($secondsbeforeretry);
		}

		$datepid=$datepids->dequeue;
		@datepidl=split /-/, $datepid;
		$date = $datepidl[0];
		$pid = $datepidl[1];
	}
}

sub changed_guide
{
	my $date = shift;
	my @pidsrowspansnames = @_;
	
	my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn";
	if (open(PRN, $guide_prn_file))  
	{
		my @prn = split />/, <PRN>;
		close(PRN);

		if (($#prn > 1) and ($#prn == $#pidsrowspansnames))
		{
			my $count;
			my $diff = ((($#prn+1)*2)/3)-1;
			for ($count=0; ($count <= $diff) && ($prn[$count]==$pidsrowspansnames[$count]); $count++)
			{ }
			
			if ($count==($diff+1))
			{
				for (; ($count <= $#prn) && ($prn[$count] eq $pidsrowspansnames[$count]); $count++)
				{ }
			
	    			if ($count==($#prn+1))
				{
					print "$date unchanged\n";
					return 0;
				}
			}
		}
	}

	print "$date downloading\n";

	if (open(PRN, ">", $guide_prn_file))
	{
		for (my $count=0; $count<$#pidsrowspansnames; $count++)
		{
			print PRN "$pidsrowspansnames[$count]>";
		}
		print PRN "$pidsrowspansnames[$#pidsrowspansnames]";
		close(PRN);
	}
	else
	{
		print "can't open for writing $guide_prn_file\n";
	}

	return 1;
}


More information about the mythtv-users mailing list