Ok, and a new xml_tv_be.py with: # - fixed parsing of "&" in xml stream # - fixed daylight saving in localtz # - added option --save to update config file # - fixed save option, only write one section to file # - added option --night: include last night # - added option for config file location # - made it possible to open config file read-only # - moved config file explanation to usage (--help) # - other fixes, mainly in main()
I understood from den_RDC that it will be replaced in the future, but meantime it can be usefull to someone.
- Richard.
#!/usr/bin/env python
# changelog 2.1 - rvp
# - fixed parsing of "&" in xml stream
# - fixed daylight saving in localtz
# - added option --save to update config file
# - fixed save option, only write one section to file
# - added option --night: include last night
# - added option for config file location
# - made it possible to open config file read-only
# - moved config file explanation to usage (--help)
# - other fixes, mainly in main()
# changelog 2.0 - logo generation now works, 40 channels
# added, configuration is now saved in config file
# including the number of days to be fetched..
# more info see config file -- den_RDC
# changelog 1.31 - tvsite.be word teveblad.be -- den_RDC
# universal timezone generator included for tz generation
# changelog 1.3 (same as cvs version tag in freevo-cvs)
# middernachtprogrammas toegevoegd
# volgens de dtd is de tag desc en niet description
import re
import urllib
import getopt, sys
import ConfigParser
from string import replace, lower
from time import time
from time import localtime
from time import strftime
from time import timezone
from time import altzone
from time import daylight
version = '2.1'
locale = 'Latin-1'
TRUE = 1
FALSE = 0
def usage(days, night, configfile):
"""print help message"""
print
print "xml_tv_be.py version %s -- generates tv programming guide xml listing"
% version
print
print "Usage: xml_tv_be.py [--days=n] [--night=b] [--config=s] [--save]
[--help]"
print
print " --days=<number> Set number of days to fetch (%s)." % days
print " --night=<0|1> Compensate for runs at night (%s)." % night
print " --config=<file> File for configuration settings (%s)." % configfile
print " --save Save options to config file (%s)." % configfile
print " --help Display this message."
print
print "Examples usage"
print " xml_tv_be.py --days=2"
print " xml_tv_be.py --config=/etc/xml_tv_be.conf > /tmp/TV.xml"
print " xml_tv_be.py --days=5 --night=1 --config=/etc/xml_tv_be.conf --save"
print
print "About the days & night options:"
print " Use 'days=1' to fetch today, 'days=7' to fetch a week, starting with
today."
print " Note that 00AM-06AM is 'yesterday': if you run the script between
00AM and"
print " 06AM (crontab.daily?) and want to include that night in the list then
set"
print " '--night=1'. Use '--save' to store the options in the config file."
print
print "About the xml_tv_be configuration file:"
print " The config file is used to determine which channels are to be
fetched."
print " Channels are identified (xmltv channel id) in freevo with their
channel"
print " name completly in lowercase, so no numbers anymore. A channel=1 means"
print " the channel will be fetched, likewise channel=0 means the channel
will"
print " be skipped when fetching. Note that channel names are case-sensitive!"
print
def inttochar( match ):
"""Return the hex string for a decimal number"""
f = re.compile(r'&#(\d+);')
k = f.sub(r'\1', match.group())
return chr(int(k))
def escape(s):
"""Replace special HTML chars and <br> tag"""
s = replace(s,'’','\x27')
p = re.compile(r'&#(\d+);')
s = p.sub(inttochar,s)
s = replace(s,'&',' & ')
#replace <br> tags, some of them can be in the desc field
s = replace(s, '<br>', ' ')
return s
def localtz():
"""returns timezone in "+xxxx" or "-xxxx"' format, daylight savings time aware
will work everywhere, minute precision
"""
if (daylight == 1) and (localtime()[8] == 1):
# daylight saving time in effect
tzone = altzone
else:
tzone = timezone
if tzone <= 0:
tz = "+"
else:
tz = "-"
# insert first 2 digits of timezone (hour)
if abs(tzone / 3600) < 10:
tz = tz + "0" + str(abs(tzone / 3600))
else:
tz = tz + str(abs(tzone / 3600))
#insert last 2 digits of timezone (minutes)
if abs(tzone % 3600) < 10:
tz = tz + "0" + str(abs(tzone % 3600))
else:
tz = tz + str(abs(tzone % 3600))
return tz
class cEvent:
start=''
end=''
title=''
subtitle=''
description=[]
images=[]
def __init__(self,block,line,today,tomorrow):
self.start_h='00'
self.start_m='00'
self.end_h=''
self.end_m=''
self.title=''
self.category=''
self.description=''
self.today = today
self.tomorrow = tomorrow
state = 0
for l in block:
if state == 0: # looking for first <starttime>
r = re.search("<td class='tvnucontent'
valign='top'>(.+)\.(.+)</td>",l)
if r != None:
self.start_h = r.group(1)
self.start_m = r.group(2)
state = 1
elif state == 1:
r = re.search("<td class='tvnucontent'
valign='top'>(.+)\.(.+)</td>",l)
if r != None:
self.end_h = r.group(1)
self.end_m = r.group(2)
state = 2
elif state == 2:
r = re.search(".+ class=tvnu>(.+)</a>",l)
if r != None:
self.title = escape(r.group(1))
state = 3
elif state == 3:
r = re.search("<td class='tvnuthema' align=right
valign='top' nowrap>(.+)</td>",l)
if r != None:
self.category = escape(r.group(1))
state = 4
elif state == 4:
r = re.search("<td width= '100%' valign='top'
colspan=2 class=programmabeschrijving>(.+)<br>",l)
if r != None:
self.description = escape(r.group(1))
def xml(self,channel_id):
if self.title != '':
#veranderd terug nr zes, sommig proggies op ketnet beginne om
7u
if self.start_h < '06':
print " <programme start=\"%s%s%s %s\" stop=\"%s%s%s
%s\" channel=\"%s\">" % (self.tomorrow, self.start_h, self.start_m, localtz(),
self.tomorrow, self.end_h, self.end_m, localtz(), channel_id)
else:
#programmas die vandaag beginnen maar morgen eindigen,
als hun einduur is kleiner dan het startuur
if self.end_h < self.start_h:
print " <programme start=\"%s%s%s %s\"
stop=\"%s%s%s %s\" channel=\"%s\">" % (self.today, self.start_h, self.start_m,
localtz(), self.tomorrow, self.end_h, self.end_m, localtz(), channel_id)
else:
print " <programme start=\"%s%s%s %s\"
stop=\"%s%s%s %s\" channel=\"%s\">" % (self.today, self.start_h, self.start_m,
localtz(), self.today, self.end_h, self.end_m, localtz(), channel_id)
print " <title lang=\"nl\">%s</title>" % self.title
if self.category != '':
print " <category lang=\"nl\">%s</category>" %
self.category
if self.description != '':
print " <desc lang=\"nl\">%s</desc>" %
self.description
print " </programme>"
class cChannel:
title = ''
events = []
def __init__(self,id,title,days,night):
self.id=id
self.title=title
self.events = []
if (night != FALSE) and (localtime()[3] < 6):
# start with yesteday to get night programming
n = 1
else:
# start with today
n = 0
for x in range(days):
block = []
state = 0
date = strftime("%m/%d/%Y",localtime(time()+(x*86400)-n*86400))
today = strftime("%Y%m%d",localtime(time()+(x*86400)-n*86400))
tomorrow =
strftime("%Y%m%d",localtime(time()+(x*86400)+86400-n*86400))
f=urllib.urlopen("http://www.teveblad.be/ndl/zender.asp?move=full&channel=%s&dag=%s"%(title,date))
for l in f.read().splitlines():
if state==0: # looking for first <starttime>
r = re.search("<td class='tvnucontent'
valign='top'>.+</td>",l)
if r != None:
block.append(l)
state = 1
elif state == 1: # looking for next <starttime>
r = re.search("<td class='tvnucontent'
valign='top' rowspan=2>.+",l)
if r != None:
self.events.append(cEvent(block,l,today,tomorrow))
block=[]
block.append(l)
else:
exit(1)
self.events.append(cEvent(block,l,today,tomorrow))
def xml(self,today = strftime("%Y/%m/%d",localtime(time())),tomorrow =
strftime("%Y/%m/%d",localtime(time()+86400))):
print " <channel id=\"%s\">" % self.id
print " <display-name lang=\"nl\">%s</display-name>" % self.title
print " <icon src=\"http://www.teveblad.be/gfx/logos/%s.gif\" />" %
self.title
print " </channel>"
for event in self.events:
event.xml(self.id)
def main():
config = ConfigParser.ConfigParser()
configfile, cok = "xml_tv_be.config", FALSE
error, dosave, dohelp = FALSE, FALSE, FALSE
days, dset = 3, FALSE
night, nset = 0, FALSE
# parse command line:
try:
opts, args = getopt.getopt(sys.argv[1:], "hdncs:", ["help", "days=",
"night=", "config=", "save"])
except getopt.GetoptError:
# print help information and exit:
sys.stderr.write('Invalid command line options. Try --help for more
info.\n')
sys.exit(2)
for o, a in opts:
# parse command line options:
if o in ("-c", "--config"):
configfile = a
if o in ("-s", "--save"):
dosave = TRUE
if o in ("-h", "--help"):
dohelp = TRUE
if o in ("-d", "--days"):
days, dset = int(a), TRUE
if o in ("-n", "--night"):
night, nset = int(a), TRUE
# read config file:
try:
if dosave:
# open for updating (write permission needed)
fp = open(configfile, 'r+')
else:
# open read-only (write permission not needed)
fp = open(configfile, 'r')
except IOError:
sys.stderr.write( 'Cannot open %s, does this file exist?\n' %
configfile )
else:
# read config file
try:
config.readfp(fp)
except:
sys.stderr.write( 'Cannot read %s, does this file exist?\n' %
configfile )
else:
# config file ok, read values
cok = TRUE
if dset == FALSE:
# load days from config
try:
days =config.getint('settings', 'days')
except:
sys.stderr.write( "Value 'days' not known,
using default (%s)\n" % days )
if nset == FALSE:
# load night from config
try:
night = config.getint('settings', 'night')
except:
sys.stderr.write( "Value 'night' not known,
using default (%s)\n" % night )
if ( days <= 0 ):
days = 1
if ( night != 0 ):
night = 1
if dohelp:
# print usage and exit
usage(days, night, configfile)
sys.exit()
if ( cok == FALSE ):
# config file not ok
sys.exit()
if dosave:
# save config and exit
if ( dset == TRUE ):
config.set('settings', 'days', days)
print "Writting to %s: days=%s" % (configfile, days)
if ( nset == TRUE ):
config.set('settings', 'night', night)
print "Writting to %s: night=%s" % (configfile, night)
fp.seek(0)
config.write(fp)
fp.truncate()
sys.exit()
# dump xml output tp stdout:
print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>"
print "<tv generator-info-name=\"Script by Bart Heremans and den_RDC\">"
for channel in config.options('channels'):
if config.getint('channels', channel) == TRUE:
cChannel(lower(channel), channel, days, night).xml()
print "</tv>"
if __name__ == "__main__":
main()
