if you receive GermanTV via Satellite or Cable, you might find the attached perl script useful that will import the program listings from their web site into the mythtv database.
The program needs to find the internally assigned channel id, so it looks for a callsign like "germ" (not case sensitive). Adjust call sign if necessary.
If you are in a different time zone than PT, adjust the url in the script.
Have fun and if someone has time, add command line parameters for the above options!
br
/Martin
San Diego
#!/usr/local/bin/perl # (c) copyright written by Martin Renschler [EMAIL PROTECTED] # free for private use #enter your html proxy here or empty string if none #e.g. $proxy='http://www-proxy.xxx.de:80/'; $proxy='';
require 5.002;
use LWP;
require HTTP::Request;
require HTML::Parser;
use DBI;
# globals
# counter for program entries across all days, will be incremented before
# first use
$progs=-1;
# array of hashes that holds all program listings
my @listings;
# parser object
#*****************************************
{
package P;
@ISA=qw(HTML::Parser);
$COUNT=0; # prevent multiple instances
$timecheck=0; # should we check for a valid time
$timefound=0; # we found a time entry, the next fields are relevant
$tdcount=0; # how many table cells passed since time found
$textcount=0; # how many text fields passed since cell started
$ampm=0; # have we seen a pm time for this day yet
my @listing; # pointer to listing variable
my $prog; # pointer to program entry counter
my $daymidnight; # the midnight from the current day
sub new # the constuctor for this class
{
my $class = shift; # class always first
$prog=shift; # pointer to prog integer
$listing=shift; # pointer to listing
$daymidnight=shift; # the midnight time for that day
my $self = $class->SUPER::new; # construct from parser super class
die "Can only have one" if $COUNT++; # singleton enforcement
$self; # constructor returns itself
}
sub DESTROY # destructor
{
$COUNT--;
}
# this method is called from the parser for each tag start
sub start
{
my($self, $tag, $attr) = @_;
# if a table cell then we look at text hits
if ($tag eq td) {
$timecheck=1;
if ($timefound) { # if we found a time, we start counting cells
$tdcount++;
}
}
}
# called when tag ends
sub end
{
my($self, $tag) = @_;
if ($tag eq td) { # if cell ends
$timecheck=0; # we check for time only once per cell
if ($timefound && ($tdcount==3)) { # disable processing after 3 cells
$timefound=0;
$tdcount=0;
}
}
}
# this method is called from the parser when plain text is found
sub text
{
my($self, $text) = @_;
# translate special characters to windows char set
# not found to be needed, currently not used
# HTML::Entities::decode($text);
# remove fixed blanks (code A0)
$text =~ s/\xa0/ /g;
# replace new lines with blank
$text =~ s/\n/ /g;
# remove CR
$text =~ s/\r//g;
# use next 2 lines if you need to find special characters
# printf("%s\n",$text);
# printf("%s\n",ascii_to_hex($text));
if ($timecheck) {
# check if text is valid time field
if ($text =~ /([0-9][0-9]):([0-9][0-9]) ([ap]m)/) {
# remember what we found in the regexp
$h=$1;
$m=$2;
$a=$3;
if ($a eq "pm") { # we need to remember if we pass
# am to pm, since then subsequent
# am entries are on the next day
$ampm=1;
if ($h<12) { # we want 24h notation, 3pm is 15:00
$h+=12;
}
} else { # am
if ($h==12) { # 24h notation, 12 am is 00:00
$h=0;
}
if ($ampm==1) { # we have passed noon
$daymidnight+=24*3600; # am times are on next day
$ampm=0; # since we adjusted the base, do
# only once
}
}
# calculate the starttime of the entry
$starttime=$daymidnight+$h*3600+$m*60;
# format for mysql
($ssec,$smin,$shour,$smday,$smon, $syear,$swday,$syday,$sisdst)=localtime($starttime);
$starttimestr=sprintf("%04d-%02d-%02d %02d:%02d:%02d",$syear+1900,$smon+1,$smday,$shour,$smin,$ssec);
# the very last day, the very last show will have no end time
# set from the next starting time, so initialize field to
# to usual program start of next day
$wraptime=sprintf("%04d-%02d-%02d %02d:%02d:%02d",$syear+1900,$smon+1,$smday,3,0,0);
# increment program entry index, we start with 0
${$prog}++;
# remember the formated starttime
${$listing}[${$prog}]->{starttime}=$starttimestr;
# pre initialize endtime
${$listing}[${$prog}]->{endtime}=$wraptime;
# if we aren't the very first day, very first entry,
# we update the endtime from thre previous entry with our start time
if (${$prog} != 0) {
${$listing}[${$prog}-1]->{endtime}=$starttimestr;
}
# control the counters that detect the titel and subtitel
$tdcount=0;
$textcount=0;
$timefound=1;
}
}
if ($tdcount == 3) { # the titel / subtitel table cell
$text =~ s/'/\\'/g; # SQL doesn't like single quotes
$text =~ s/\xdf/ss/g; # german umlaute replaced with
# alternate spelling
$text =~ s/\xdc/Ue/g;
$text =~ s/\xfc/ue/g;
$text =~ s/\xf6/oe/g;
$text =~ s/\xe4/ae/g;
$text =~ s/\xd6/Oe/g;
$text =~ s/\xc4/Ae/g;
$textcount++;
if ($textcount == 1) { # title found
${$listing}[${$prog}]->{title}=$text;
}
if ($textcount==3) { # subtitle found
${$listing}[${$prog}]->{subtitle}=$text;
}
}
}
# for debug purposes only, convert string to it's hex codes
# Note: a function of the class, not global
sub ascii_to_hex ($)
{
## Convert each ASCII character to a two-digit hex number.
(my $str = shift) =~ s/(.|\n)/sprintf("%02lx ", ord $1)/eg;
return $str;
}
}
#*****************************************
# main routine
#*****************************************
use LWP::UserAgent;
$ua = LWP::UserAgent->new;
# fake Netscape 3.01
$ua->agent('Mozilla/3.01 (X11; I; SunOS 5.4 sun4m)');
# set proxy (if needed)
if (length($proxy)) {
$ua->proxy('http',$proxy);
}
# how many days can we get from GermanTV web site
$numdays=14;
# we try to get 14 days from now
$now=time;
# get midnight of today
($sec,$min,$hour,$mday,$mon, $year,$wday,$yday,$isdst)=localtime($now);
$midnight=$now-$hour*3600-$min*60-$sec;
($sec,$min,$hour,$mday,$mon, $year,$wday,$yday,$isdst)=localtime($midnight);
# get the days from the web
for $dayi (0..$numdays-1) {
# calculate the midnight for that day
$daymidnight=$midnight+$dayi*24*60*60;
($sec,$min,$hour,$mday,$mon, $year,$wday,$yday,$isdst)=localtime($daymidnight);
# the webpage only wants two digit years
$year=$year-100;
# perl months start from 0
$mon++;
# format date for web site
$todaystr=sprintf("%02d%02d%02d",$year,$mon,$mday);
print "Getting GermanTV Program for $todaystr\n";
#-----------------
# get the document
#-----------------
# Create a request
# change here if you live in a different time zone, pt is fetched here
$reqt = 'http://www.germantv.info/index.php?id=tvguide&suche=&daychooser=tag&timezone=pt&programmdatum='.$todaystr.'&genre=';
my $req = HTTP::Request->new(GET => $reqt);
# Pass request to the user agent and get a response back
my $hres = $ua->request($req);
# Check the outcome of the response
if (!$hres->is_success) {
print "no http answer\n";
exit(1);
}
# parse the answer
my $p = P->new(\$progs,[EMAIL PROTECTED],$daymidnight);
$p->parse($hres->content);
}
#***********************************************
# D A T A B A S E P A R T
#***********************************************
# connect to database
$dbh = DBI->connect('DBI:mysql:mythconverg','mythtv','mythtv') || die "could not connect to DB $DBI:errstr";
# we commit at the end, so no writing if anything failes in between
$dbh->{AutoCommit}=0;
# we need the internal channel id, get it from SQL
# this only works if your callsign contains germ or GERM
$sth = $dbh->prepare('SELECT chanid,channum,callsign,name from channel where callsign like "%germ%"');
$sth->execute();
$result=$sth->fetchrow_hashref() || die "Cannot find GermanTV channel, use callsign which contains germ";
$chanid=$result->{chanid};
print "Germantv has channel $result->{channum}, call sign $result->{callsign}, channel id $chanid\n";
$sth->finish();
# now we insert the program into the database
print "inserting into program database:";
for $i (0..$progs) {
$statement = "insert ignore into program (chanid,starttime,endtime,title,subtitle) values($chanid,'$listings[$i]->{starttime}','$listings[$i]->{endtime}',CONVERT(_utf8 '$listings[$i]->{title}' USING latin1),CONVERT(_utf8 '$listings[$i]->{subtitle}' USING latin1))";
print ".";
$sth = $dbh->prepare($statement);
$sth->execute();
}
print "\ndone\n";
$dbh->commit();
$dbh->disconnect();
exit;
_______________________________________________ mythtv-dev mailing list [email protected] http://mythtv.org/cgi-bin/mailman/listinfo/mythtv-dev
