Author: simplet
Date: 2008-12-01 20:29:55 +0100 (Mon, 01 Dec 2008)
New Revision: 2967
Modified:
software_suite_v2/software/scripts/rss_monde/trunk/rss_monde.py
Log:
* switching to minidom to parse XML
* a little hack to prevent the duplicate bug of the newspaper
* better cleaning of the html
Modified: software_suite_v2/software/scripts/rss_monde/trunk/rss_monde.py
===================================================================
--- software_suite_v2/software/scripts/rss_monde/trunk/rss_monde.py
2008-12-01 14:14:42 UTC (rev 2966)
+++ software_suite_v2/software/scripts/rss_monde/trunk/rss_monde.py
2008-12-01 19:29:55 UTC (rev 2967)
@@ -7,17 +7,10 @@
Read RSS news from "le journal le Monde".
Simplet <simplet _at_ ptigeek _dot_ net>
-
-
- TODO:
-
- - Get "iLimite" from the command line (simplet)
-
+
======================================================================
"""
-
-
__author__ = 'Simplet <simplet _at_ ptigeek _dot_ net>'
__appname__ = 'Rss gadget'
__version__ = '0.0.1'
@@ -26,64 +19,58 @@
import re
import urllib2
-import xml.etree.ElementTree as ET
+import time
+from xml.dom import minidom
from tuxisalive.api import *
def tux_speak(text):
- #tux.mouth.open()
tux.tts.speak(text,"Julie",100)
- #tux.mouth.close()
# remove all html tags from the string
def strip_html_tags(value):
- return re.sub(r'<[^>]*?>', '', value)
+ return re.sub(r'<[^>]*?>', '', value.replace('<','<').replace('>',
'>').replace('"', '\"').replace('&quot;', '\"'))
-tux = TuxAPI('127.0.0.1', 270)
+tux = TuxAPI('192.168.1.5', 270)
tux.server.autoConnect(CLIENT_LEVEL_RESTRICTED, 'RssMonde', 'NONE')
tux.server.waitConnected(10.0)
tux.dongle.waitConnected(10.0)
tux.radio.waitConnected(10.0)
if tux.access.waitAcquire(10.0, ACCESS_PRIORITY_NORMAL):
- the_url = 'http://www.lemonde.fr/rss/une.xml'
- iLimite = 5 # it will read x news
- # get the rss feed from the URL
- req = urllib2.Request(the_url)
- handle = urllib2.urlopen(req)
+ the_url = 'http://www.lemonde.fr/rss/une.xml'
+ iLimite = 10
- # create XML elements
- tree = ET.parse(handle)
- root = tree.getroot()
- iter = root.getiterator()
+ req = urllib2.Request(the_url)
+ handle = urllib2.urlopen(req)
- cpt = 0
- for element in iter:
- # did we reach the limit ?
- if cpt > iLimite:
- exit()
+ xmldoc = minidom.parse(handle)
- tag = element.tag
+ cpt = 0
+ old_title = ""
- # sometimes, items are blank : ignore them
- if element.text is None:
- continue
+ for item in xmldoc.getElementsByTagName('item'):
+ if cpt >= iLimite:
+ break
- # translating the feed to the good character set (for the TTS)
- texte = unicode(element.text)
- texte = texte.encode("latin1")
+ title =
unicode(item.getElementsByTagName('title')[0].toxml()).encode('latin1')
+ description =
unicode(item.getElementsByTagName('description')[0].toxml()).encode('latin1')
- # here we go !
- if tag == 'title':
- print "\n\nTitre: " + texte
- tux_speak(texte)
- if element.tag == 'description':
- print "Description: " + strip_html_tags(texte)
- tux_speak(strip_html_tags(texte))
- cpt += 1
+ # The newspaper "le monde"'rss feed have a bug : sometimes there are
+ # two identical news that are following each others...
+ # let's ignore them if they are identical
+ if old_title != title:
+ print "\n\n#" + str(cpt) + " title: " + strip_html_tags(title) + \
+ "\n\ndescription: " + strip_html_tags(description)
+ tux_speak(strip_html_tags(title))
+ time.sleep(1)
+ tux_speak(strip_html_tags(description))
+ time.sleep(1)
+ old_title = title
+ cpt = cpt + 1
tux.access.release()
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Tux-droid-svn mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tux-droid-svn