Author: dmeyer
Date: Sat Feb 24 20:33:45 2007
New Revision: 2519
Added:
trunk/epg/src/sources/config_epgdata.cxml
trunk/epg/src/sources/epgdata.py
Modified:
trunk/epg/AUTHORS
Log:
add epgdata.com parser from Tanja Kotthaus
Modified: trunk/epg/AUTHORS
==============================================================================
--- trunk/epg/AUTHORS (original)
+++ trunk/epg/AUTHORS Sat Feb 24 20:33:45 2007
@@ -5,3 +5,9 @@
Jason Tackaberry <[EMAIL PROTECTED]>
Dirk Meyer <[EMAIL PROTECTED]>
Rob Shortt <[EMAIL PROTECTED]>
+
+
+Contributions:
+
+Tanja Kotthaus <[EMAIL PROTECTED]>
+epgdata.com parser
Added: trunk/epg/src/sources/config_epgdata.cxml
==============================================================================
--- (empty file)
+++ trunk/epg/src/sources/config_epgdata.cxml Sat Feb 24 20:33:45 2007
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<config>
+ <desc lang="en">
+ epgdata settings
+
+ MORE DESCRIPTION!
+ </desc>
+ <var name="activate" default="False">
+ <desc lang="en">Get epg data from epgdata.com</desc>
+ </var>
+ <var name="pin" type="str" />
+ <var name="days" default="7">
+ <desc lang="en">How many days of epg data you want to fetch.</desc>
+ </var>
+</config>
Added: trunk/epg/src/sources/epgdata.py
==============================================================================
--- (empty file)
+++ trunk/epg/src/sources/epgdata.py Sat Feb 24 20:33:45 2007
@@ -0,0 +1,399 @@
+# -*- coding: iso-8859-1 -*-
+# -----------------------------------------------------------------------------
+# source_epgdata.py - get epg data from www.epgdata.com
+# -----------------------------------------------------------------------------
+# $Id:
+# -----------------------------------------------------------------------------
+# kaa.epg - EPG Database
+# Copyright (C) 2004-2006 Jason Tackaberry, Dirk Meyer, Rob Shortt
+#
+# First Edition: Tanja Kotthaus <[EMAIL PROTECTED]>
+#
+# Please see the file AUTHORS for a complete list of authors.
+#
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version
+# 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+#
+# -----------------------------------------------------------------------------
+
+# python imports
+import sys
+import os
+import time
+import glob
+import logging
+
+# kaa imports
+from kaa import xml, TEMP
+from kaa.config import Var, Group
+from kaa.notifier import Timer, Thread
+
+from config_epgdata import config
+
+# get logging object
+log = logging.getLogger('epgdata')
+
+
+######
+# special functions for processing the data from epgdata.com
+######
+
+
+# the meaning of the tags that epgdata.com uses can be found in the qe.dtd file
+# which is included in the zip archive that contains also the epg data.
+PROG_MAPPING = {
+ 'd2':'channel_id',
+ 'd4':'start',
+ 'd5':'stop',
+ 'd10':'category',
+ 'd25':'genre',
+ 'd19':'title',
+ 'd20':'subtitle',
+ 'd21':'desc',
+ 'd32':'coutry',
+ 'd33':'date',
+ 'd34':'presenter',
+ 'd36':'director',
+ 'd37':'actor',
+ 'd40':'icon'
+}
+
+# the meaning of the tags that are used in the channel*.xml files can be found
+# in the header of each channe*.xml file.
+CH_MAPPING = {
+ 'ch0':'tvchannel_name',
+ 'ch1':'tvchannel_short',
+ 'ch4':'tvchannel_id',
+ 'ch11':'tvchannel_dvb'
+}
+
+META_MAPPING = {
+ 'g0':'id', # genre_id
+ 'g1':'name', # genre
+ 'ca0':'id', # category_id
+ 'ca2':'name' # category
+}
+
+def timestr2secs_utc(timestr):
+ """
+ Convert the timestring to UTC (=GMT) seconds.
+
+ The time format in the epddata is:
+ '2002-09-08 00:00:00'
+ The timezone is german localtime, which is CET or CEST.
+ """
+ secs = time.mktime(time.strptime(timestr, '%Y-%m-%d %H:%M:%S'))
+ return secs
+
+
+
+def parse_data(info):
+ """ parse the info from the xml
+
+ The current node can be either from a channel or a program.
+ Subelements of the form <ch?> are for channels whereas <d?> are for
programs.
+ See CH_MAPPING and PROG_MAPPING for a list of subelements that are
+ processed and their meaning.
+ First all subelements of the node are read to a dictionary called attr
+ and then the info in the dictionary is processed further depending on what
+ kind of node we have.
+ """
+
+ attr= {}
+ flag = None
+ # child is a <data> element and its children are containing the infos
+ for child in info.node.children:
+ if child.name in CH_MAPPING.keys():
+ # this is channel info
+ flag = 'channel'
+ # let's process it
+ attr[CH_MAPPING[child.name]] = child.content
+
+ if child.name in META_MAPPING.keys():
+ # this is meta info
+ flag = 'meta'
+ # let's process it
+ attr[META_MAPPING[child.name]] = child.content
+
+ if child.name in PROG_MAPPING.keys():
+ # this is program info
+ flag = 'programme'
+ if child.name=='d33': # date
+ date = child.content
+ # try to guess the format of the date
+ if len(date.split('/'))==2:
+ # if it is '1995/96', take the first year
+ date = date.split('/')[0]
+ elif len(date.split('-'))==2:
+ # if it is '1999-2004', take the first year
+ date = date.split('-')[0]
+ if not len(date)==4:
+ # format unknown, ignore
+ continue
+ else:
+ fmt = '%Y'
+ attr['date'] = int(time.mktime(time.strptime(date, fmt)))
+ elif child.name=='d10' or child.name=='d25': # genre and category
+ content = child.content
+ try:
+ content = info.meta_id_to_meta_name[content]
+ except KeyError:
+ pass
+ else:
+ attr[PROG_MAPPING[child.name]] = content
+ else:
+ # process all other known elements
+ attr[PROG_MAPPING[child.name]] = child.content
+
+ if flag =='channel':
+ # create db_id
+ db_id = info.epg.add_channel(tuner_id=attr['tvchannel_dvb'],
+ name=attr['tvchannel_short'],
+ long_name=attr['tvchannel_name'])
+ # and fill the channel_id_to_db_id dictionary
+ info.channel_id_to_db_id[attr['tvchannel_id']] = db_id
+
+ if flag == 'meta':
+ info.meta_id_to_meta_name[attr['id']]=attr['name']
+
+ if flag == 'programme':
+ # start and stop time must be converted according to our standards
+ start = timestr2secs_utc(attr.pop('start'))
+ stop = timestr2secs_utc(attr.pop('stop'))
+ # there of course must be a title
+ title = attr.pop('title')
+ # translate channel_id to db_id
+ db_id = info.channel_id_to_db_id[attr.pop('channel_id')]
+ # fill this program to the database
+ info.epg.add_program(db_id, start, stop, title, **attr)
+
+
+#####
+# this functions form the interface to freevo
+#####
+
+class UpdateInfo:
+ """
+ Simple class holding temporary information we need, will be filled later.
+ """
+ pass
+
+
+def _update_parse_xml_thread(epg, pin, days):
+ """
+ Thread to parse the xml file. It will also call the grabber if needed.
+ """
+
+ # create a tempdir as working area
+ tempdir = os.path.join(TEMP, 'epgdata')
+ if not os.path.isdir(tempdir):
+ os.mkdir(tempdir)
+ # and clear it if needed
+ for i in glob.glob(os.path.join(tempdir,'*')):
+ os.remove(i)
+
+ # temp file
+ tmpfile = os.path.join(tempdir,'temp.zip')
+ # logfile
+ logfile = os.path.join(TEMP,'epgdata.log')
+
+ # empty list for the xml docs
+ docs = []
+ # count of the nodes that have to be parsed
+ nodes = 0
+
+
+ # create download adresse for meta data
+ addresse = 'http://www.epgdata.com/index.php'
+ addresse+= '?action=sendInclude&iLang=de&iOEM=xml&iCountry=de'
+ addresse+= '&pin=%s' %pin
+ addresse+= '&dataType=xml'
+
+
+ # remove old file if needed
+ try:
+ os.remove(tmpfile)
+ except OSError:
+ pass
+ # download the meta data file
+ log.info ('Downloading meta data')
+ exit = os.system('wget -N -O %s "%s" >>%s 2>>%s'
+ %(tmpfile, addresse, logfile, logfile))
+ if not os.path.exists(tmpfile) or exit:
+ log.error('Cannot get file from epgdata.com, see %s' %logfile)
+ epg.guide_changed()
+ return
+ # and unzip the zip file
+ log.info('Unzipping data for meta data')
+ exit = os.system('unzip -uo -d %s %s >>%s 2>>%s'
+ %(tempdir, tmpfile, logfile, logfile))
+ if exit:
+ log.error('Cannot unzip the downloaded file, see %s' %logfile)
+ epg.guide_changed()
+ return
+
+ # list of channel info xml files
+ chfiles = glob.glob(os.path.join(tempdir,'channel*.xml'))
+ if len(chfiles)==0:
+ log.error('no channel xml files for parsing')
+ return
+
+ # parse this files
+ for xmlfile in chfiles:
+ try:
+ doc = xml.Document(xmlfile, 'channel')
+ except:
+ log.warning('error while parsing %s' %xmlfile)
+ continue
+ docs.append(doc)
+ nodes = nodes + len(doc.children)
+
+
+ #parse the meta files
+ try:
+ # the genre file
+ xmlfile = os.path.join(tempdir, 'genre.xml')
+ doc = xml.Document(xmlfile, 'genre')
+ except:
+ log.warning('error while parsing %s' %xmlfile)
+ else:
+ # add the files to the list
+ docs.append(doc)
+ nodes = nodes + len(doc.children)
+ try:
+ # the category file
+ xmlfile = os.path.join(tempdir, 'category.xml')
+ doc = xml.Document(xmlfile, 'category')
+ except:
+ log.warning('error while parsing %s' %xmlfile)
+ else:
+ # add the files to the list
+ docs.append(doc)
+ nodes = nodes + len(doc.children)
+
+
+ # create download adresse for programm files
+ addresse = 'http://www.epgdata.com/index.php'
+ addresse+= '?action=sendPackage&iLang=de&iOEM=xml&iCountry=de'
+ addresse+= '&pin=%s' %pin
+ addresse+= '&dayOffset=%s&dataType=xml'
+
+ # get the file for each day
+ for i in range(0, days):
+ # remove old file if needed
+ try:
+ os.remove(tmpfile)
+ except OSError:
+ pass
+ # download the zip file
+ log.info('Getting data for day %s' %(i+1))
+ exit = os.system('wget -N -O %s "%s" >>%s 2>>%s'
+ %(tmpfile, addresse %i, logfile, logfile))
+ if not os.path.exists(tmpfile) or exit:
+ log.error('Cannot get file from epgdata.com, see %s' %logfile)
+ epg.guide_changed()
+ return
+ # and unzip the zip file
+ log.info('Unzipping data for day %s' %(i+1))
+ exit = os.system('unzip -uo -d %s %s >>%s 2>>%s'
+ %(tempdir, tmpfile, logfile, logfile))
+ if exit:
+ log.error('Cannot unzip the downloaded file, see %s' %logfile)
+ epg.guide_changed()
+ return
+
+
+ # list of program xml files that must be parsed
+ progfiles = glob.glob(os.path.join(tempdir,'*de_q[a-z].xml'))
+ if len(progfiles)==0:
+ log.warning('no progam xml files for parsing')
+
+ # parse the progam xml files
+ for xmlfile in progfiles:
+ try:
+ doc = xml.Document(xmlfile, 'pack')
+ except:
+ log.warning('error while parsing %s' %xmlfile)
+ continue
+ # add the files to the list
+ docs.append(doc)
+ nodes = nodes + len(doc.children)
+
+ log.info('There are %s files to parse with in total %s nodes'
+ %(len(docs), nodes))
+
+
+ # put the informations in the UpdateInfo object.
+ info = UpdateInfo()
+ info.epg = epg
+ info.pin = pin
+ info.channel_id_to_db_id = {}
+ info.meta_id_to_meta_name = {}
+ info.docs =docs
+ info.doc = info.docs.pop(0)
+ info.node = info.doc.first
+ info.total = nodes
+ info.progress_step = info.total / 100
+
+
+ # start parser in main loop again, thread is done
+ timer = Timer(_update_process_step, info)
+ timer.start(0)
+
+
+def _update_process_step(info):
+ """
+ Step in main loop for the parsing of the epgdata xml files.
+ This function will be called in a Timer until everything is parsed.
+ """
+ t0 = time.time()
+ while info.node:
+ if info.node.name == "data":
+ # parse!
+ parse_data(info)
+ info.node = info.node.get_next()
+ if time.time() - t0 > 0.1:
+ # time to return to the main loop
+ break
+ if not info.node:
+ # check if there are more files to parse
+ if len(info.docs)>0:
+ # take the next one
+ info.doc = info.docs.pop(0)
+ # and start with its first node
+ info.node = info.doc.first
+ else:
+ # no more files to parse, mission completed!
+ info.epg.guide_changed()
+ log.info('epg grabbing finished!')
+ return False
+
+ return True
+
+
+def update(epg):
+ """
+ Interface to source_epgdata. This function will start the update process.
+ """
+ try:
+ pin = config.pin
+ except KeyError:
+ log.exception('PIN for epgdata.com is missing in tvserver.conf')
+ epg.guide_changed()
+ return False
+
+ thread = Thread(_update_parse_xml_thread, epg,
+ str(config.pin), int(config.days))
+ thread.start()
+ return True
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog