Ok, and a new xml_tv_be.py with:
# - fixed parsing of "&" in xml stream
# - fixed daylight saving in localtz
# - added option --save to update config file
# - fixed save option, only write one section to file
# - added option --night: include last night
# - added option for config file location
# - made it possible to open config file read-only
# - moved config file explanation to usage (--help)
# - other fixes, mainly in main()

I understood from den_RDC that it will be replaced in the future, but meantime it can be usefull to someone.

- Richard.

#!/usr/bin/env python

# changelog 2.1 - rvp
# - fixed parsing of "&" in xml stream
# - fixed daylight saving in localtz
# - added option --save to update config file
# - fixed save option, only write one section to file
# - added option --night: include last night
# - added option for config file location
# - made it possible to open config file read-only
# - moved config file explanation to usage (--help)
# - other fixes, mainly in main()

# changelog 2.0 - logo generation now works, 40 channels
# added, configuration is now saved in config file
# including the number of days to be fetched..
# more info see config file -- den_RDC

# changelog 1.31 - tvsite.be word teveblad.be -- den_RDC
# universal timezone generator included for tz generation

# changelog 1.3 (same as cvs version tag in freevo-cvs)
# middernachtprogrammas toegevoegd
# volgens de dtd is de tag desc en niet description

import re
import urllib
import getopt, sys
import ConfigParser
from string import replace, lower
from time import time
from time import localtime
from time import strftime
from time import timezone
from time import altzone
from time import daylight

version = '2.1'
locale = 'Latin-1'

TRUE = 1
FALSE = 0

def usage(days, night, configfile):
        """print help message"""
        print
        print "xml_tv_be.py version %s -- generates tv programming guide xml listing" 
% version
        print
        print "Usage: xml_tv_be.py [--days=n] [--night=b] [--config=s] [--save] 
[--help]"
        print
        print "  --days=<number>  Set number of days to fetch (%s)." % days
        print "  --night=<0|1>    Compensate for runs at night (%s)." % night
        print "  --config=<file>  File for configuration settings (%s)." % configfile
        print "  --save           Save options to config file (%s)." % configfile
        print "  --help           Display this message."
        print
        print "Examples usage"
        print "    xml_tv_be.py --days=2"
        print "    xml_tv_be.py --config=/etc/xml_tv_be.conf > /tmp/TV.xml"
        print "    xml_tv_be.py --days=5 --night=1 --config=/etc/xml_tv_be.conf --save"
        print
        print "About the days & night options:"
        print "  Use 'days=1' to fetch today, 'days=7' to fetch a week, starting with 
today."
        print "  Note that 00AM-06AM is 'yesterday': if you run the script between 
00AM and"
        print "  06AM (crontab.daily?) and want to include that night in the list then 
set"
        print "  '--night=1'. Use '--save' to store the options in the config file."
        print
        print "About the xml_tv_be configuration file:"
        print "  The config file is used to determine which channels are to be 
fetched."
        print "  Channels are identified (xmltv channel id) in freevo with their 
channel"
        print "  name completly in lowercase, so no numbers anymore. A channel=1 means"
        print "  the channel will be fetched, likewise channel=0 means the channel 
will"
        print "  be skipped when fetching. Note that channel names are case-sensitive!"
        print


def inttochar( match ):
        """Return the hex string for a decimal number"""
        f = re.compile(r'&#(\d+);')
        k = f.sub(r'\1', match.group())
        return chr(int(k))


def escape(s):
        """Replace special HTML chars and <br> tag"""
        s = replace(s,'&#146;','\x27')
        p = re.compile(r'&#(\d+);')
        s = p.sub(inttochar,s)
        s = replace(s,'&',' &#38; ')
        #replace <br> tags, some of them can be in the desc field
        s = replace(s, '<br>', ' ')
        return s

def localtz():
        """returns timezone in "+xxxx" or "-xxxx"' format, daylight savings time aware
        will work everywhere, minute precision
        """

        if (daylight == 1) and (localtime()[8] == 1):
                # daylight saving time in effect
                tzone = altzone
        else:
                tzone = timezone

        if tzone <= 0:
                tz = "+"
        else:
                tz = "-"
        # insert first 2 digits of timezone (hour)
        if abs(tzone / 3600) < 10:
                tz = tz + "0" + str(abs(tzone / 3600))
        else:
                tz = tz + str(abs(tzone / 3600))

        #insert last 2 digits of timezone (minutes)
        if abs(tzone % 3600) < 10:
                tz = tz + "0" + str(abs(tzone % 3600))
        else:
                tz = tz + str(abs(tzone % 3600))
        return tz

class cEvent:
        start=''
        end=''
        title=''
        subtitle=''
        description=[]
        images=[]

        def __init__(self,block,line,today,tomorrow):
                self.start_h='00'
                self.start_m='00'
                self.end_h=''
                self.end_m=''
                self.title=''
                self.category=''
                self.description=''
                self.today = today
                self.tomorrow = tomorrow
                state = 0

                for l in block:
                        if state == 0:  # looking for first <starttime>
                                r = re.search("<td class='tvnucontent' 
valign='top'>(.+)\.(.+)</td>",l)
                                if r != None:
                                        self.start_h = r.group(1)
                                        self.start_m = r.group(2)
                                        state = 1

                        elif state == 1:
                                r = re.search("<td class='tvnucontent' 
valign='top'>(.+)\.(.+)</td>",l)
                                if r != None:
                                        self.end_h = r.group(1)
                                        self.end_m = r.group(2)
                                        state = 2

                        elif state == 2:
                                r = re.search(".+ class=tvnu>(.+)</a>",l)
                                if r != None:
                                        self.title = escape(r.group(1))
                                        state = 3

                        elif state == 3:
                                r = re.search("<td class='tvnuthema' align=right 
valign='top' nowrap>(.+)</td>",l)
                                if r != None:
                                        self.category = escape(r.group(1))
                                        state = 4

                        elif state == 4:
                                r = re.search("<td width= '100%' valign='top' 
colspan=2 class=programmabeschrijving>(.+)<br>",l)
                                if r != None:
                                        self.description = escape(r.group(1))


        def xml(self,channel_id):
                if self.title != '':
                        #veranderd terug nr zes, sommig proggies op ketnet beginne om 
7u
                        if self.start_h < '06':
                                print "  <programme start=\"%s%s%s %s\" stop=\"%s%s%s 
%s\" channel=\"%s\">" % (self.tomorrow, self.start_h, self.start_m, localtz(), 
self.tomorrow, self.end_h, self.end_m, localtz(), channel_id)
                        else:
                                #programmas die vandaag beginnen maar morgen eindigen, 
als hun einduur is kleiner dan het startuur
                                if self.end_h < self.start_h:
                                        print "  <programme start=\"%s%s%s %s\" 
stop=\"%s%s%s %s\" channel=\"%s\">" % (self.today, self.start_h, self.start_m, 
localtz(), self.tomorrow, self.end_h, self.end_m, localtz(), channel_id)
                                else:
                                        print "  <programme start=\"%s%s%s %s\" 
stop=\"%s%s%s %s\" channel=\"%s\">" % (self.today, self.start_h, self.start_m, 
localtz(), self.today, self.end_h, self.end_m, localtz(), channel_id)
                        print "    <title lang=\"nl\">%s</title>" % self.title
                        if self.category != '':
                                print "    <category lang=\"nl\">%s</category>" % 
self.category
                        if self.description != '':
                                print "    <desc lang=\"nl\">%s</desc>" % 
self.description
                        print "  </programme>"


class cChannel:
        title = ''
        events = []

        def __init__(self,id,title,days,night):
                self.id=id
                self.title=title
                self.events = []

                if (night != FALSE) and (localtime()[3] < 6):
                        # start with yesteday to get night programming
                        n = 1
                else:
                        # start with today
                        n = 0

                for x in range(days):

                        block = []
                        state = 0
                        date = strftime("%m/%d/%Y",localtime(time()+(x*86400)-n*86400))
                        today = strftime("%Y%m%d",localtime(time()+(x*86400)-n*86400))
                        tomorrow = 
strftime("%Y%m%d",localtime(time()+(x*86400)+86400-n*86400))
                        
f=urllib.urlopen("http://www.teveblad.be/ndl/zender.asp?move=full&channel=%s&dag=%s"%(title,date))
                        for l in f.read().splitlines():
                                if state==0: # looking for first <starttime>
                                        r = re.search("<td class='tvnucontent' 
valign='top'>.+</td>",l)
                                        if r != None:
                                                block.append(l)
                                                state = 1

                                elif state == 1: # looking for next <starttime>
                                        r = re.search("<td class='tvnucontent' 
valign='top' rowspan=2>.+",l)
                                        if r != None:
                                                
self.events.append(cEvent(block,l,today,tomorrow))
                                                block=[]

                                        block.append(l)

                                else:
                                        exit(1)

                        self.events.append(cEvent(block,l,today,tomorrow))


        def xml(self,today = strftime("%Y/%m/%d",localtime(time())),tomorrow = 
strftime("%Y/%m/%d",localtime(time()+86400))):

                print "  <channel id=\"%s\">" % self.id
                print " <display-name lang=\"nl\">%s</display-name>" % self.title
                print "    <icon src=\"http://www.teveblad.be/gfx/logos/%s.gif\"; />" % 
self.title
                print "  </channel>"
                for event in self.events:
                        event.xml(self.id)


def main():

        config = ConfigParser.ConfigParser()
        configfile, cok = "xml_tv_be.config", FALSE
        error, dosave, dohelp = FALSE, FALSE, FALSE
        days, dset = 3, FALSE
        night, nset = 0, FALSE

        # parse command line:
        try:
                opts, args = getopt.getopt(sys.argv[1:], "hdncs:", ["help", "days=", 
"night=", "config=", "save"])

        except getopt.GetoptError:
                # print help information and exit:
                sys.stderr.write('Invalid command line options. Try --help for more 
info.\n')
                sys.exit(2)

        for o, a in opts:
                # parse command line options:
                if o in ("-c", "--config"):
                        configfile = a
                if o in ("-s", "--save"):
                        dosave = TRUE
                if o in ("-h", "--help"):
                        dohelp = TRUE
                if o in ("-d", "--days"):
                        days, dset = int(a), TRUE
                if o in ("-n", "--night"):
                        night, nset = int(a), TRUE

        # read config file:
        try:
                if dosave:
                        # open for updating (write permission needed)
                        fp = open(configfile, 'r+')
                else:
                        # open read-only (write permission not needed)
                        fp = open(configfile, 'r')

        except IOError:
                sys.stderr.write( 'Cannot open %s, does this file exist?\n' % 
configfile )

        else:
                # read config file
                try:
                        config.readfp(fp)
                except:
                        sys.stderr.write( 'Cannot read %s, does this file exist?\n' % 
configfile )
                else:
                        # config file ok, read values
                        cok = TRUE
                        if dset == FALSE:
                                # load days from config
                                try:
                                        days =config.getint('settings', 'days')
                                except:
                                        sys.stderr.write( "Value 'days' not known, 
using default (%s)\n" % days )

                        if nset == FALSE:
                                # load night from config
                                try:
                                        night = config.getint('settings', 'night')
                                except:
                                        sys.stderr.write( "Value 'night' not known, 
using default (%s)\n" % night )

        if ( days <= 0 ):
                days = 1

        if ( night != 0 ):
                night = 1

        if dohelp:
                # print usage and exit
                usage(days, night, configfile)
                sys.exit()

        if ( cok == FALSE ):
                # config file not ok
                sys.exit()

        if dosave:
                # save config and exit
                if ( dset == TRUE ):
                        config.set('settings', 'days', days)
                        print "Writting to %s: days=%s" % (configfile, days)
                if ( nset == TRUE ):
                        config.set('settings', 'night', night)
                        print "Writting to %s: night=%s" % (configfile, night)
                fp.seek(0)
                config.write(fp)
                fp.truncate()
                sys.exit()

        # dump xml output tp stdout:
        print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>"
        print "<tv generator-info-name=\"Script by Bart Heremans and den_RDC\">"

        for channel in config.options('channels'):
                if config.getint('channels', channel) == TRUE:
                        cChannel(lower(channel), channel, days, night).xml()

        print "</tv>"

if __name__ == "__main__":
        main()

Reply via email to