SOURCES: xmltv-grab_ee-20050412.diff (NEW) - tv_grab_ee from http:...

glen glen at pld-linux.org
Sat Sep 24 18:29:46 CEST 2005


Author: glen                         Date: Sat Sep 24 16:29:46 2005 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- tv_grab_ee from http://wiki.version6.net/MythTV

---- Files affected:
SOURCES:
   xmltv-grab_ee-20050412.diff (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/xmltv-grab_ee-20050412.diff
diff -u /dev/null SOURCES/xmltv-grab_ee-20050412.diff:1.1
--- /dev/null	Sat Sep 24 18:29:46 2005
+++ SOURCES/xmltv-grab_ee-20050412.diff	Sat Sep 24 18:29:41 2005
@@ -0,0 +1,541 @@
+Index: Makefile.PL
+===================================================================
+RCS file: /cvsroot/xmltv/xmltv/Makefile.PL,v
+retrieving revision 1.239
+diff -u -u -r1.239 Makefile.PL
+--- Makefile.PL	19 Mar 2005 16:37:29 -0000	1.239
++++ Makefile.PL	12 Apr 2005 09:11:57 -0000
+@@ -408,6 +408,12 @@
+        prereqs => { 'CGI' => 0 },
+        type => 'run',
+      },
++
++     { name => 'tv_grab_ee',
++       blurb => 'Grabber for Estonia',
++       exes => [ 'grab/ee/tv_grab_ee' ],
++       prereqs => { 'HTML::TreeBuilder' => 0 } },
++
+     );
+ 
+ # Now we need to prompt about each optional component.  The style of
+Index: grab/ee/tv_grab_ee
+===================================================================
+RCS file: grab/ee/tv_grab_ee
+diff -N dir/uusfail
+--- /dev/null	1 Jan 1970 00:00:00 -0000
++++ grab/ee/tv_grab_ee	12 Jan 2005 20:28:59 -0000
+@@ -0,0 +1,514 @@
++#!/usr/bin/perl -w
++
++=pod
++
++=head1 NAME
++
++tv_grab_fi - Grab TV listings for Estonia.
++
++=head1 SYNOPSIS
++
++tv_grab_ee --help
++
++tv_grab_ee [--config-file FILE] --configure [--gui OPTION]
++
++tv_grab_ee [--config-file FILE] [--output FILE] [--days N]
++           [--offset N] [--quiet]
++
++tv_grab_ee --list-channels
++
++=head1 DESCRIPTION
++
++Output TV listings for several channels available in Estonia.
++The data comes from http://ajaviide.delfi.ee/events/tv/.
++The grabber relies on parsing HTML so it might stop working at any time.
++
++First run B<tv_grab_ee --configure> to choose, which channels you want
++to download. Then running B<tv_grab_ee> with no arguments will output
++listings in XML format to standard output.
++
++B<--configure> Prompt for which channels,
++and write the configuration file.
++
++B<--config-file FILE> Set the name of the configuration file, the
++default is B<~/.xmltv/tv_grab_ee.conf>.  This is the file written by
++B<--configure> and read when grabbing.
++
++B<--gui OPTION> Use this option to enable a graphical interface to be used.
++OPTION may be 'Tk', or left blank for the best available choice.
++Additional allowed values of OPTION are 'Term' for normal terminal output
++(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.
++
++B<--output FILE> write to FILE rather than standard output.
++
++B<--days N> grab N days.  The default is ten.
++
++B<--offset N> start N days in the future.  The default is to start
++from today.
++
++B<--quiet> suppress the progress messages normally written to standard
++error.
++
++B<--list-channels> write output giving <channel> elements for every
++channel available (ignoring the config file), but no programmes.
++
++B<--help> print a help message and exit.
++
++=head1 SEE ALSO
++
++L<xmltv(5)>.
++
++=head1 AUTHOR
++
++Cougar < cougar at random.ee >. Based on tv_grab_fi by Matti Airas, mairas at iki.fi.
++
++=head1 BUGS
++
++The data source does not include full channels information and the
++channels are identified by short names rather than the RFC2838 form
++recommended by the XMLTV DTD.
++
++=cut
++
++######################################################################
++# initializations
++
++use strict;
++use XMLTV::Version '$Id$ ';
++use Getopt::Long;
++use Date::Manip;
++use HTML::TreeBuilder;
++use HTML::Entities; # parse entities
++use IO::File;
++
++use XMLTV;
++use XMLTV::Memoize;
++use XMLTV::ProgressBar;
++use XMLTV::Ask;
++use XMLTV::Config_file;
++use XMLTV::DST;
++use XMLTV::Get_nice;
++use XMLTV::Mode;
++use XMLTV::Date;
++# Todo: perhaps we should internationalize messages and docs?
++use XMLTV::Usage <<END
++$0: get Estonian television listings in XMLTV format
++To configure: $0 --configure [--config-file FILE]
++To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
++        [--offset N] [--quiet]
++To list channels: $0 --list-channels
++END
++  ;
++
++# Attributes of the root element in output.
++my $HEAD = { 'source-info-url'     => 'http://ajaviide.delfi.ee/events/tv/',
++	     'source-data-url'     => "http://ajaviide.delfi.ee/events/tv/",
++	     'generator-info-name' => 'XMLTV',
++	     'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
++	   };
++
++# Whether zero-length programmes should be included in the output.
++my $WRITE_ZERO_LENGTH = 0;
++
++# The winter timezone in Estonia.  Summer time is one hour ahead of this.
++my $TZ="+0200";
++
++# default language
++my $LANG="et";
++
++# Global channel data.
++our @ch_all;
++
++
++######################################################################
++# get options
++
++# Get options, including undocumented --cache option.
++XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
++my ($opt_days, $opt_offset, $opt_help, $opt_output,
++    $opt_configure, $opt_config_file, $opt_gui,
++    $opt_quiet, $opt_list_channels);
++$opt_days  = 10; # default
++$opt_offset = 0; # default
++$opt_quiet  = 0; # default
++GetOptions('days=i'        => \$opt_days,
++	   'offset=i'      => \$opt_offset,
++	   'help'          => \$opt_help,
++	   'configure'     => \$opt_configure,
++	   'config-file=s' => \$opt_config_file,
++       'gui:s'         => \$opt_gui,
++	   'output=s'      => \$opt_output,
++	   'quiet'         => \$opt_quiet,
++	   'list-channels' => \$opt_list_channels,
++	  )
++  or usage(0);
++die 'number of days must not be negative'
++  if (defined $opt_days && $opt_days < 0);
++usage(1) if $opt_help;
++
++XMLTV::Ask::init($opt_gui);
++
++my $mode = XMLTV::Mode::mode('grab', # default
++			     $opt_configure => 'configure',
++			     $opt_list_channels => 'list-channels',
++			    );
++
++# File that stores which channels to download.
++my $config_file
++  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_ee', $opt_quiet);
++
++my @config_lines; # used only in grab mode
++if ($mode eq 'configure') {
++    XMLTV::Config_file::check_no_overwrite($config_file);
++}
++elsif ($mode eq 'grab') {
++    @config_lines = XMLTV::Config_file::read_lines($config_file);
++}
++elsif ($mode eq 'list-channels') {
++    # Config file not used.
++}
++else { die }
++
++# Whatever we are doing, we need the channels data.
++my %channels = get_channels(); # sets @ch_all
++my @channels;
++
++######################################################################
++# write configuration
++
++if ($mode eq 'configure') {
++    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
++
++    # Ask about each channel.
++    my @chs = sort keys %channels;
++    my @names = map { $channels{$_} } @chs;
++    my @qs = map { "add channel $_?" } @names;
++    my @want = ask_many_boolean(1, @qs);
++    foreach (@chs) {
++	my $w = shift @want;
++	warn("cannot read input, stopping channel questions"), last
++	  if not defined $w;
++	# No need to print to user - XMLTV::Ask is verbose enough.
++
++	# Print a config line, but comment it out if channel not wanted.
++	print CONF '#' if not $w;
++	my $name = shift @names;
++	print CONF "channel $_ $name\n";
++	# TODO don't store display-name in config file.
++    }
++
++    close CONF or warn "cannot close $config_file: $!";
++    say("Finished configuration.");
++
++    exit();
++}
++
++# Not configuration, we must be writing something, either full
++# listings or just channels.
++#
++die if $mode ne 'grab' and $mode ne 'list-channels';
++
++# Options to be used for XMLTV::Writer.
++my %w_args;
++if (defined $opt_output) {
++    my $fh = new IO::File(">$opt_output");
++    die "cannot write to $opt_output: $!" if not defined $fh;
++    $w_args{OUTPUT} = $fh;
++}
++$w_args{encoding} = 'ISO-8859-1';
++my $writer = new XMLTV::Writer(%w_args);
++$writer->start($HEAD);
++
++if ($mode eq 'list-channels') {
++    # Write channels mode.
++    $writer->write_channel($_) foreach @ch_all;
++    $writer->end();
++    exit();
++}
++
++######################################################################
++# We are producing full listings.
++die if $mode ne 'grab';
++
++# Read configuration.
++my $line_num = 1;
++foreach (@config_lines) {
++    ++ $line_num;
++    next if not defined;
++    if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
++	my $ch_did = $1;
++	my $ch_name = $2;
++	$ch_name =~ s/\s*$//;
++	push @channels, $ch_did;
++	$channels{$ch_did} = $ch_name;
++    }
++    else {
++	warn "$config_file:$line_num: bad line\n";
++    }
++}
++
++######################################################################
++# begin main program
++
++# Assume the listings source uses EET (see BUGS above).
++my $now = DateCalc(parse_date('now'), "$opt_offset days");
++die "No channels specified, run me with --configure\n"
++  if not keys %channels;
++my @to_get;
++
++# the order in which we fetch the channels matters
++foreach my $ch_did (@channels) {
++    my $ch_name=$channels{$ch_did};
++    my $ch_xid="$ch_did.tv.delfi.ee";
++    $writer->write_channel({ id => $ch_xid,
++			     'display-name' => [ [ $ch_name ] ] });
++    my $day=UnixDate($now,'%Q');
++    for (my $i=0;$i<$opt_days;$i++) {
++	push @to_get, [ $day, $ch_xid, $ch_did ];
++	#for each day
++	$day=nextday($day); die if not defined $day;
++    }
++}
++
++# This progress bar is for both downloading and parsing.  Maybe
++# they could be separate stages.
++#
++my $bar = new XMLTV::ProgressBar('getting listings', scalar @to_get)
++  if not $opt_quiet;
++foreach (@to_get) {
++    foreach (process_table($_->[0], $_->[1], $_->[2])) {
++	$writer->write_programme($_);
++    }
++    update $bar if not $opt_quiet;
++}
++$bar->finish() if not $opt_quiet;
++$writer->end();
++
++######################################################################
++# subroutine definitions
++
++# Use Log::TraceMessages if installed.
++BEGIN {
++    eval { require Log::TraceMessages };
++    if ($@) {
++	*t = sub {};
++	*d = sub { '' };
++    }
++    else {
++	*t = \&Log::TraceMessages::t;
++	*d = \&Log::TraceMessages::d;
++	Log::TraceMessages::check_argv();
++    }
++}
++
++my $warned_bad_chars;
++sub tidy( $ ) {
++    for (my $tmp = shift) {
++	tr/\t\205/ /d;
++	if (s/([^\012\015\040-\176\240-\377]+)//g) {
++	    warn "removing bad characters: '$1'"
++	      unless $warned_bad_chars++;
++	}
++	return $_;
++    }
++}
++
++####
++# process_table: fetch a URL and process it
++#
++# arguments:
++#    Date::Manip object giving the day to grab
++#    xmltv id of channel
++#    katso id of channel
++#
++# returns: list of programme hashes to write
++#
++sub process_table {
++    my ($date, $ch_xmltv_id, $ch_id) = @_;
++
++    my $today = UnixDate($date, '%Y-%m-%d');
++    my $url = "http://ajaviide.delfi.ee/events/tv/program.php?id=$ch_id&date=$today";
++    t "getting URL: $url";
++    my $data=tidy(get_nice($url));
++    if (not defined $data) {
++	die "could not fetch $url, aborting\n";
++    }
++    local $SIG{__WARN__} = sub {
++	warn "$url: $_[0]";
++    };
++
++    # parse the page to a document object
++    my $tree = HTML::TreeBuilder->new();
++    $tree->parse($data);
++    my @program_data = get_program_data($tree);
++    my $bump_start_day=0;
++
++    my @r;
++    while (@program_data) {
++	my $cur = shift @program_data;
++	my $next = shift @program_data;
++	unshift @program_data,$next if $next;
++	push @r, make_programme_hash($date, $ch_xmltv_id, $ch_id, $cur, $next);
++	if (!$bump_start_day && bump_start_day($cur,$next)) {
++	    $bump_start_day=1;
++	    $date = UnixDate(DateCalc($date,"+ 1 day"),'%Q');
++	}
++    }
++    return @r;
++}
++
++sub make_programme_hash {
++    my ($date, $ch_xmltv_id, $ch_id, $cur, $next) = @_;
++
++    my %prog;
++
++    $prog{channel}=$ch_xmltv_id;
++    $prog{title}=[ [ $cur->{title}, $LANG ] ];
++
++    my $cur_time = $cur->{time};
++    t 'raw time for programme: ' . d $cur_time;
++    t 'with base (winter) timezone: ' . d $TZ;
++    my $start=parse_local_date("$date $cur_time", $TZ);
++    t 'parse_local_date() returned: ' . d $start;
++    my ($start_base, $start_tz) = @{date_to_local($start, $TZ)};
++    t "date_to_local() returned time $start_base, timezone $start_tz";
++    $prog{start}=UnixDate($start_base, '%q') . " $start_tz";
++    t 'set programme start time to: ' . d $prog{start};
++
++    my $next_time = $next ? $next->{time} : undef;
++    if (defined $next_time) {
++	t '$cur_time=' . d $cur_time;
++	t '$next_time=' . d $next_time;
++	my $stop_date;
++	if ($next_time lt $cur_time) {
++	    # Must span midnight.  (Don't worry about start being
++	    # summer time and stop being winter: we assume the site is
++	    # sane enough to put them both in the same timezone and
++	    # avoid looking like stop < start.)
++	    #
++	    t '$next_time appears sooner, must be next day';
++	    $stop_date = nextday($date);
++	}
++	else {
++	    $stop_date = $date;
++	}
++	t '$stop_date set to: ' . d $stop_date;
++	my $stop = parse_local_date("$stop_date $next_time", $TZ);
++	t 'stop time in UTC: ' . d $stop;
++	my ($stop_base, $stop_tz) = @{date_to_local($stop, $TZ)};
++	t 'converted back to Estonian: ' . d [ $stop_base, $stop_tz ];
++	$prog{stop}=UnixDate($stop_base, '%q') . " $stop_tz";
++    }	
++
++    # FIXME: parse description field further
++    $prog{desc}=[ [ $cur->{desc}, $LANG ] ] if defined $cur->{desc};
++
++    return \%prog;
++}
++sub bump_start_day {
++    my ($cur,$next) = @_;
++    if (!defined($next)) {
++	return undef;
++    }
++    my $start = UnixDate($cur->{time},'%H:%M');
++    my $stop = UnixDate($next->{time},'%H:%M');
++    if (Date_Cmp($start,$stop)>0) {
++	return 1;
++    } else {
++	return 0;
++    }
++}
++
++# program data is split as follows:
++# - td with class=kanava-klo is the time
++# - its sibling td with class=kanava-ohj(-nyt)? (two to the right)
++#   is the program
++# - second td on the next row is the description
++sub get_program_data {
++    my $tree = shift;
++    t "get_program_data() ENTRY for tree: $tree";
++    my @data;
++
++    my $last_time = "";
++    my $last_title;
++
++    my @table_cont = $tree->look_down("_tag"=>"table", "cellspacing"=>"6");
++
++    foreach my $table (@table_cont) {
++        my @rows = $table->look_down("_tag"=>"tr");
++        foreach my $row (@rows) {
++            my @txts = $row->find_by_tag_name("_tag"=>"font", "class"=>"whatsonTVProgramTitle");
++            if (defined $txts[1]) {
++                $last_time = $txts[0]->as_text;
++                $last_title = $txts[1]->as_text;
++            } else {
++                my @txts = $row->find_by_tag_name("_tag"=>"font", "class"=>"whatsonTVProgramLead");
++                if ($last_time ne '') {
++                    my %h;
++                    $h{time} = $last_time;
++                    $h{title} = $last_title;
++                    $h{desc} = $last_title;
++                    $h{desc} = $txts[0]->as_text if ((defined $txts[0]) && ($txts[0]->as_text ne ''));
++                    push @data, \%h;
++                    $last_time = "";
++                }
++            }
++        }
++    }
++    t 'get_program_data() RETURNING ' . d \@data;
++    return @data;
++}
++
++# get channel listing
++sub get_channels {
++    my $bar = new XMLTV::ProgressBar('getting list of channels', 1)
++      if not $opt_quiet;
++    my %channels;
++    my $url="http://ajaviide.delfi.ee/events/tv/";
++    my $local_data=get_nice($url);
++    die "could not get channel listing $url, aborting\n"
++      if not defined $local_data;
++
++    # FIXME commonize this
++    local $SIG{__WARN__} = sub {
++	warn "$url: $_[0]";
++    };
++    local $SIG{__DIE__} = sub {
++	die "$url: $_[0]";
++    };
++    
++    my $tree = HTML::TreeBuilder->new();
++    $tree->parse($local_data);
++
++    my @conts = map { [ $_->content_list() ] }
++      $tree->look_down('_tag' => 'select', 'name' => 'id');
++
++    foreach my $cont (@conts) {
++        my @children =@$cont;
++        if (scalar(@children) == 0) {
++            warn 'No stations are defined';
++            next;
++        }
++        foreach my $station_line (@children) {
++	    if ($station_line ne ' ') {
++                my $channel_id = $station_line->attr('value');
++	        my $channel_name = $station_line->as_text();
++	        $channels{$channel_id} = $channel_name;
++	        push @ch_all, { 'display-name' => [ [ $channel_name, $LANG ] ],
++			        'id' => $channel_id };
++            }
++        }
++    }
++    die "no channels could be found" if not keys %channels;
++    update $bar if not $opt_quiet;
++    $bar->finish() if not $opt_quiet;
++    return %channels;
++}
++
++# Bump a YYYYMMDD date by one.
++sub nextday {
++    my $d = shift;
++    my $p = parse_date($d);
++    my $n = DateCalc($p, '+ 1 day');
++    return UnixDate($n, '%Q');
++}
================================================================



More information about the pld-cvs-commit mailing list