SOURCES: xmltv-grab_ee-20050412.diff (NEW) - tv_grab_ee from http:...
glen
glen at pld-linux.org
Sat Sep 24 18:29:46 CEST 2005
Author: glen Date: Sat Sep 24 16:29:46 2005 GMT
Module: SOURCES Tag: HEAD
---- Log message:
- tv_grab_ee from http://wiki.version6.net/MythTV
---- Files affected:
SOURCES:
xmltv-grab_ee-20050412.diff (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: SOURCES/xmltv-grab_ee-20050412.diff
diff -u /dev/null SOURCES/xmltv-grab_ee-20050412.diff:1.1
--- /dev/null Sat Sep 24 18:29:46 2005
+++ SOURCES/xmltv-grab_ee-20050412.diff Sat Sep 24 18:29:41 2005
@@ -0,0 +1,541 @@
+Index: Makefile.PL
+===================================================================
+RCS file: /cvsroot/xmltv/xmltv/Makefile.PL,v
+retrieving revision 1.239
+diff -u -u -r1.239 Makefile.PL
+--- Makefile.PL 19 Mar 2005 16:37:29 -0000 1.239
++++ Makefile.PL 12 Apr 2005 09:11:57 -0000
+@@ -408,6 +408,12 @@
+ prereqs => { 'CGI' => 0 },
+ type => 'run',
+ },
++
++ { name => 'tv_grab_ee',
++ blurb => 'Grabber for Estonia',
++ exes => [ 'grab/ee/tv_grab_ee' ],
++ prereqs => { 'HTML::TreeBuilder' => 0 } },
++
+ );
+
+ # Now we need to prompt about each optional component. The style of
+Index: grab/ee/tv_grab_ee
+===================================================================
+RCS file: grab/ee/tv_grab_ee
+diff -N dir/uusfail
+--- /dev/null 1 Jan 1970 00:00:00 -0000
++++ grab/ee/tv_grab_ee 12 Jan 2005 20:28:59 -0000
+@@ -0,0 +1,514 @@
++#!/usr/bin/perl -w
++
++=pod
++
++=head1 NAME
++
++tv_grab_fi - Grab TV listings for Estonia.
++
++=head1 SYNOPSIS
++
++tv_grab_ee --help
++
++tv_grab_ee [--config-file FILE] --configure [--gui OPTION]
++
++tv_grab_ee [--config-file FILE] [--output FILE] [--days N]
++ [--offset N] [--quiet]
++
++tv_grab_ee --list-channels
++
++=head1 DESCRIPTION
++
++Output TV listings for several channels available in Estonia.
++The data comes from http://ajaviide.delfi.ee/events/tv/.
++The grabber relies on parsing HTML so it might stop working at any time.
++
++First run B<tv_grab_ee --configure> to choose, which channels you want
++to download. Then running B<tv_grab_ee> with no arguments will output
++listings in XML format to standard output.
++
++B<--configure> Prompt for which channels,
++and write the configuration file.
++
++B<--config-file FILE> Set the name of the configuration file, the
++default is B<~/.xmltv/tv_grab_ee.conf>. This is the file written by
++B<--configure> and read when grabbing.
++
++B<--gui OPTION> Use this option to enable a graphical interface to be used.
++OPTION may be 'Tk', or left blank for the best available choice.
++Additional allowed values of OPTION are 'Term' for normal terminal output
++(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.
++
++B<--output FILE> write to FILE rather than standard output.
++
++B<--days N> grab N days. The default is ten.
++
++B<--offset N> start N days in the future. The default is to start
++from today.
++
++B<--quiet> suppress the progress messages normally written to standard
++error.
++
++B<--list-channels> write output giving <channel> elements for every
++channel available (ignoring the config file), but no programmes.
++
++B<--help> print a help message and exit.
++
++=head1 SEE ALSO
++
++L<xmltv(5)>.
++
++=head1 AUTHOR
++
++Cougar < cougar at random.ee >. Based on tv_grab_fi by Matti Airas, mairas at iki.fi.
++
++=head1 BUGS
++
++The data source does not include full channels information and the
++channels are identified by short names rather than the RFC2838 form
++recommended by the XMLTV DTD.
++
++=cut
++
++######################################################################
++# initializations
++
++use strict;
++use XMLTV::Version '$Id$ ';
++use Getopt::Long;
++use Date::Manip;
++use HTML::TreeBuilder;
++use HTML::Entities; # parse entities
++use IO::File;
++
++use XMLTV;
++use XMLTV::Memoize;
++use XMLTV::ProgressBar;
++use XMLTV::Ask;
++use XMLTV::Config_file;
++use XMLTV::DST;
++use XMLTV::Get_nice;
++use XMLTV::Mode;
++use XMLTV::Date;
++# Todo: perhaps we should internationalize messages and docs?
++use XMLTV::Usage <<END
++$0: get Estonian television listings in XMLTV format
++To configure: $0 --configure [--config-file FILE]
++To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
++ [--offset N] [--quiet]
++To list channels: $0 --list-channels
++END
++ ;
++
++# Attributes of the root element in output.
++my $HEAD = { 'source-info-url' => 'http://ajaviide.delfi.ee/events/tv/',
++ 'source-data-url' => "http://ajaviide.delfi.ee/events/tv/",
++ 'generator-info-name' => 'XMLTV',
++ 'generator-info-url' => 'http://membled.com/work/apps/xmltv/',
++ };
++
++# Whether zero-length programmes should be included in the output.
++my $WRITE_ZERO_LENGTH = 0;
++
++# The winter timezone in Estonia. Summer time is one hour ahead of this.
++my $TZ="+0200";
++
++# default language
++my $LANG="et";
++
++# Global channel data.
++our @ch_all;
++
++
++######################################################################
++# get options
++
++# Get options, including undocumented --cache option.
++XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
++my ($opt_days, $opt_offset, $opt_help, $opt_output,
++ $opt_configure, $opt_config_file, $opt_gui,
++ $opt_quiet, $opt_list_channels);
++$opt_days = 10; # default
++$opt_offset = 0; # default
++$opt_quiet = 0; # default
++GetOptions('days=i' => \$opt_days,
++ 'offset=i' => \$opt_offset,
++ 'help' => \$opt_help,
++ 'configure' => \$opt_configure,
++ 'config-file=s' => \$opt_config_file,
++ 'gui:s' => \$opt_gui,
++ 'output=s' => \$opt_output,
++ 'quiet' => \$opt_quiet,
++ 'list-channels' => \$opt_list_channels,
++ )
++ or usage(0);
++die 'number of days must not be negative'
++ if (defined $opt_days && $opt_days < 0);
++usage(1) if $opt_help;
++
++XMLTV::Ask::init($opt_gui);
++
++my $mode = XMLTV::Mode::mode('grab', # default
++ $opt_configure => 'configure',
++ $opt_list_channels => 'list-channels',
++ );
++
++# File that stores which channels to download.
++my $config_file
++ = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_ee', $opt_quiet);
++
++my @config_lines; # used only in grab mode
++if ($mode eq 'configure') {
++ XMLTV::Config_file::check_no_overwrite($config_file);
++}
++elsif ($mode eq 'grab') {
++ @config_lines = XMLTV::Config_file::read_lines($config_file);
++}
++elsif ($mode eq 'list-channels') {
++ # Config file not used.
++}
++else { die }
++
++# Whatever we are doing, we need the channels data.
++my %channels = get_channels(); # sets @ch_all
++my @channels;
++
++######################################################################
++# write configuration
++
++if ($mode eq 'configure') {
++ open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
++
++ # Ask about each channel.
++ my @chs = sort keys %channels;
++ my @names = map { $channels{$_} } @chs;
++ my @qs = map { "add channel $_?" } @names;
++ my @want = ask_many_boolean(1, @qs);
++ foreach (@chs) {
++ my $w = shift @want;
++ warn("cannot read input, stopping channel questions"), last
++ if not defined $w;
++ # No need to print to user - XMLTV::Ask is verbose enough.
++
++ # Print a config line, but comment it out if channel not wanted.
++ print CONF '#' if not $w;
++ my $name = shift @names;
++ print CONF "channel $_ $name\n";
++ # TODO don't store display-name in config file.
++ }
++
++ close CONF or warn "cannot close $config_file: $!";
++ say("Finished configuration.");
++
++ exit();
++}
++
++# Not configuration, we must be writing something, either full
++# listings or just channels.
++#
++die if $mode ne 'grab' and $mode ne 'list-channels';
++
++# Options to be used for XMLTV::Writer.
++my %w_args;
++if (defined $opt_output) {
++ my $fh = new IO::File(">$opt_output");
++ die "cannot write to $opt_output: $!" if not defined $fh;
++ $w_args{OUTPUT} = $fh;
++}
++$w_args{encoding} = 'ISO-8859-1';
++my $writer = new XMLTV::Writer(%w_args);
++$writer->start($HEAD);
++
++if ($mode eq 'list-channels') {
++ # Write channels mode.
++ $writer->write_channel($_) foreach @ch_all;
++ $writer->end();
++ exit();
++}
++
++######################################################################
++# We are producing full listings.
++die if $mode ne 'grab';
++
++# Read configuration.
++my $line_num = 1;
++foreach (@config_lines) {
++ ++ $line_num;
++ next if not defined;
++ if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
++ my $ch_did = $1;
++ my $ch_name = $2;
++ $ch_name =~ s/\s*$//;
++ push @channels, $ch_did;
++ $channels{$ch_did} = $ch_name;
++ }
++ else {
++ warn "$config_file:$line_num: bad line\n";
++ }
++}
++
++######################################################################
++# begin main program
++
++# Assume the listings source uses EET (see BUGS above).
++my $now = DateCalc(parse_date('now'), "$opt_offset days");
++die "No channels specified, run me with --configure\n"
++ if not keys %channels;
++my @to_get;
++
++# the order in which we fetch the channels matters
++foreach my $ch_did (@channels) {
++ my $ch_name=$channels{$ch_did};
++ my $ch_xid="$ch_did.tv.delfi.ee";
++ $writer->write_channel({ id => $ch_xid,
++ 'display-name' => [ [ $ch_name ] ] });
++ my $day=UnixDate($now,'%Q');
++ for (my $i=0;$i<$opt_days;$i++) {
++ push @to_get, [ $day, $ch_xid, $ch_did ];
++ #for each day
++ $day=nextday($day); die if not defined $day;
++ }
++}
++
++# This progress bar is for both downloading and parsing. Maybe
++# they could be separate stages.
++#
++my $bar = new XMLTV::ProgressBar('getting listings', scalar @to_get)
++ if not $opt_quiet;
++foreach (@to_get) {
++ foreach (process_table($_->[0], $_->[1], $_->[2])) {
++ $writer->write_programme($_);
++ }
++ update $bar if not $opt_quiet;
++}
++$bar->finish() if not $opt_quiet;
++$writer->end();
++
++######################################################################
++# subroutine definitions
++
++# Use Log::TraceMessages if installed.
++BEGIN {
++ eval { require Log::TraceMessages };
++ if ($@) {
++ *t = sub {};
++ *d = sub { '' };
++ }
++ else {
++ *t = \&Log::TraceMessages::t;
++ *d = \&Log::TraceMessages::d;
++ Log::TraceMessages::check_argv();
++ }
++}
++
++my $warned_bad_chars;
++sub tidy( $ ) {
++ for (my $tmp = shift) {
++ tr/\t\205/ /d;
++ if (s/([^\012\015\040-\176\240-\377]+)//g) {
++ warn "removing bad characters: '$1'"
++ unless $warned_bad_chars++;
++ }
++ return $_;
++ }
++}
++
++####
++# process_table: fetch a URL and process it
++#
++# arguments:
++# Date::Manip object giving the day to grab
++# xmltv id of channel
++# katso id of channel
++#
++# returns: list of programme hashes to write
++#
++sub process_table {
++ my ($date, $ch_xmltv_id, $ch_id) = @_;
++
++ my $today = UnixDate($date, '%Y-%m-%d');
++ my $url = "http://ajaviide.delfi.ee/events/tv/program.php?id=$ch_id&date=$today";
++ t "getting URL: $url";
++ my $data=tidy(get_nice($url));
++ if (not defined $data) {
++ die "could not fetch $url, aborting\n";
++ }
++ local $SIG{__WARN__} = sub {
++ warn "$url: $_[0]";
++ };
++
++ # parse the page to a document object
++ my $tree = HTML::TreeBuilder->new();
++ $tree->parse($data);
++ my @program_data = get_program_data($tree);
++ my $bump_start_day=0;
++
++ my @r;
++ while (@program_data) {
++ my $cur = shift @program_data;
++ my $next = shift @program_data;
++ unshift @program_data,$next if $next;
++ push @r, make_programme_hash($date, $ch_xmltv_id, $ch_id, $cur, $next);
++ if (!$bump_start_day && bump_start_day($cur,$next)) {
++ $bump_start_day=1;
++ $date = UnixDate(DateCalc($date,"+ 1 day"),'%Q');
++ }
++ }
++ return @r;
++}
++
++sub make_programme_hash {
++ my ($date, $ch_xmltv_id, $ch_id, $cur, $next) = @_;
++
++ my %prog;
++
++ $prog{channel}=$ch_xmltv_id;
++ $prog{title}=[ [ $cur->{title}, $LANG ] ];
++
++ my $cur_time = $cur->{time};
++ t 'raw time for programme: ' . d $cur_time;
++ t 'with base (winter) timezone: ' . d $TZ;
++ my $start=parse_local_date("$date $cur_time", $TZ);
++ t 'parse_local_date() returned: ' . d $start;
++ my ($start_base, $start_tz) = @{date_to_local($start, $TZ)};
++ t "date_to_local() returned time $start_base, timezone $start_tz";
++ $prog{start}=UnixDate($start_base, '%q') . " $start_tz";
++ t 'set programme start time to: ' . d $prog{start};
++
++ my $next_time = $next ? $next->{time} : undef;
++ if (defined $next_time) {
++ t '$cur_time=' . d $cur_time;
++ t '$next_time=' . d $next_time;
++ my $stop_date;
++ if ($next_time lt $cur_time) {
++ # Must span midnight. (Don't worry about start being
++ # summer time and stop being winter: we assume the site is
++ # sane enough to put them both in the same timezone and
++ # avoid looking like stop < start.)
++ #
++ t '$next_time appears sooner, must be next day';
++ $stop_date = nextday($date);
++ }
++ else {
++ $stop_date = $date;
++ }
++ t '$stop_date set to: ' . d $stop_date;
++ my $stop = parse_local_date("$stop_date $next_time", $TZ);
++ t 'stop time in UTC: ' . d $stop;
++ my ($stop_base, $stop_tz) = @{date_to_local($stop, $TZ)};
++ t 'converted back to Estonian: ' . d [ $stop_base, $stop_tz ];
++ $prog{stop}=UnixDate($stop_base, '%q') . " $stop_tz";
++ }
++
++ # FIXME: parse description field further
++ $prog{desc}=[ [ $cur->{desc}, $LANG ] ] if defined $cur->{desc};
++
++ return \%prog;
++}
++sub bump_start_day {
++ my ($cur,$next) = @_;
++ if (!defined($next)) {
++ return undef;
++ }
++ my $start = UnixDate($cur->{time},'%H:%M');
++ my $stop = UnixDate($next->{time},'%H:%M');
++ if (Date_Cmp($start,$stop)>0) {
++ return 1;
++ } else {
++ return 0;
++ }
++}
++
++# program data is split as follows:
++# - td with class=kanava-klo is the time
++# - its sibling td with class=kanava-ohj(-nyt)? (two to the right)
++# is the program
++# - second td on the next row is the description
++sub get_program_data {
++ my $tree = shift;
++ t "get_program_data() ENTRY for tree: $tree";
++ my @data;
++
++ my $last_time = "";
++ my $last_title;
++
++ my @table_cont = $tree->look_down("_tag"=>"table", "cellspacing"=>"6");
++
++ foreach my $table (@table_cont) {
++ my @rows = $table->look_down("_tag"=>"tr");
++ foreach my $row (@rows) {
++ my @txts = $row->find_by_tag_name("_tag"=>"font", "class"=>"whatsonTVProgramTitle");
++ if (defined $txts[1]) {
++ $last_time = $txts[0]->as_text;
++ $last_title = $txts[1]->as_text;
++ } else {
++ my @txts = $row->find_by_tag_name("_tag"=>"font", "class"=>"whatsonTVProgramLead");
++ if ($last_time ne '') {
++ my %h;
++ $h{time} = $last_time;
++ $h{title} = $last_title;
++ $h{desc} = $last_title;
++ $h{desc} = $txts[0]->as_text if ((defined $txts[0]) && ($txts[0]->as_text ne ''));
++ push @data, \%h;
++ $last_time = "";
++ }
++ }
++ }
++ }
++ t 'get_program_data() RETURNING ' . d \@data;
++ return @data;
++}
++
++# get channel listing
++sub get_channels {
++ my $bar = new XMLTV::ProgressBar('getting list of channels', 1)
++ if not $opt_quiet;
++ my %channels;
++ my $url="http://ajaviide.delfi.ee/events/tv/";
++ my $local_data=get_nice($url);
++ die "could not get channel listing $url, aborting\n"
++ if not defined $local_data;
++
++ # FIXME commonize this
++ local $SIG{__WARN__} = sub {
++ warn "$url: $_[0]";
++ };
++ local $SIG{__DIE__} = sub {
++ die "$url: $_[0]";
++ };
++
++ my $tree = HTML::TreeBuilder->new();
++ $tree->parse($local_data);
++
++ my @conts = map { [ $_->content_list() ] }
++ $tree->look_down('_tag' => 'select', 'name' => 'id');
++
++ foreach my $cont (@conts) {
++ my @children =@$cont;
++ if (scalar(@children) == 0) {
++ warn 'No stations are defined';
++ next;
++ }
++ foreach my $station_line (@children) {
++ if ($station_line ne ' ') {
++ my $channel_id = $station_line->attr('value');
++ my $channel_name = $station_line->as_text();
++ $channels{$channel_id} = $channel_name;
++ push @ch_all, { 'display-name' => [ [ $channel_name, $LANG ] ],
++ 'id' => $channel_id };
++ }
++ }
++ }
++ die "no channels could be found" if not keys %channels;
++ update $bar if not $opt_quiet;
++ $bar->finish() if not $opt_quiet;
++ return %channels;
++}
++
++# Bump a YYYYMMDD date by one.
++sub nextday {
++ my $d = shift;
++ my $p = parse_date($d);
++ my $n = DateCalc($p, '+ 1 day');
++ return UnixDate($n, '%Q');
++}
================================================================
More information about the pld-cvs-commit
mailing list