Added: webservices/admin/planet/planetlib.py URL: http://svn.apache.org/viewcvs/webservices/admin/planet/planetlib.py?rev=355462&view=auto ============================================================================== --- webservices/admin/planet/planetlib.py (added) +++ webservices/admin/planet/planetlib.py Fri Dec 9 04:21:26 2005 @@ -0,0 +1,500 @@ +#!/usr/bin/env python +"""Planet RSS Aggregator Library. + +You may modify the following variables; + + user_agent: User-Agent sent to remote sites + cache_directory: Where to put cached feeds + +Requires Python 2.2 and Mark Pilgrim's feedparser.py. +""" + +__version__ = "0.2" +__authors__ = [ "Scott James Remnant <[EMAIL PROTECTED]>", + "Jeff Waugh <[EMAIL PROTECTED]>" ] +__credits__ = "Originally based on spycyroll." +__license__ = "Python" + + +import os +import re +import time + +import feedparser +import StringIO +import sgmllib + +try: + import logging +except: + import compat_logging + logging = compat_logging + +try: + import gzip +except: + gzip = None + + +# We might as well advertise ourself when we're off galavanting +user_agent = " ".join(("Planet/" + __version__, + "http://www.planetplanet.org/", + feedparser.USER_AGENT)) + +# Where to put cached rss feeds +cache_directory = 'cache' + +# Things we don't want to see in our cache filenames +cache_invalid_stuff = re.compile(r'\W+') +cache_multiple_dots = re.compile(r'\.+') + + +def cache_filename(url): + """Returns a cache filename for a URL. + + Sanitises the URL given, replacing unwelcome characters with periods, + then prepends the configured cache directory. + """ + filename = url + filename = filename.replace("http://", "") + filename = filename.replace("www.", "") + filename = cache_invalid_stuff.sub('.', filename) + filename = cache_multiple_dots.sub('.', filename) + + return os.path.join(cache_directory, filename) + + +class Stripper(sgmllib.SGMLParser): + def __init__(self): + sgmllib.SGMLParser.__init__(self) + + def strip(self, some_html): + self.theString = "" + self.feed(some_html) + self.close() + return self.theString + + def handle_data(self, data): + self.theString += data + + +class Channel: + """A collection of news items. + + Channel represents a feed of news from a website, or some other + source. + + A channel is created with a URI where we can obtain the feed + and an optional dictionary of additional properties you wish + to set (probably from a config file). + + Special properties: + offset: The number of hours out the channel's times tend to be. + + Useful members: + uri: Where the feed can be downloaded from + etag, modified: Used to determine whether the feed has changed + + title: Title for the feed, often the author's name + description: A description of the content of the feed + link: Link associated with the feed, generally the HTML version + + items: List of current NewsItems + props: Dictionary of properties + """ + def __init__(self, uri, props=None): + self.uri = uri + self.etag = None + self.modified = None + + self.title = None + self.description = None + self.link = None + + self.items = [] + + if props: + self.props = props + else: + self.props = {} + if self.props.has_key('offset'): + self.offset = float(self.props['offset']) + else: + self.offset = None + + self.cache_read() + + def update(self, uri=None): + """Update the channel. + + Read feed data from channel's URI (or an alternate one) and + parse it using feedparser. This, where possible, caches the + data and tries not to request it again if it hasn't changed. + + Most of this is actually what feedparser.parse() does, but as + we need the unparsed data (to cache), we have to do a bit of the + work ourselves. + + The real work is done in _update(). + """ + if uri is None: + save_uri = 1 + uri = self.uri + logging.info("Updating feed <" + self.uri + ">") + else: + save_uri = 0 + logging.info("Updating feed <" + self.uri + "> from <" + uri + ">") + + # Open the resource and read the data + f = feedparser.open_resource(uri, agent=user_agent, + etag=self.etag, modified=self.modified) + data = self._read_data(f) + + # Check for some obvious things + if hasattr(f, 'status'): + if f.status == 304: + logging.info("Feed has not changed") + return + if f.status >= 400: + logging.error("Update failed for <%s> (Error: %d)" + % (uri, f.status)) + return + + # Update etag and modified + new_etag = feedparser.get_etag(f) + if new_etag: + self.etag = new_etag + logging.debug("E-Tag: " + self.etag) + new_modified = feedparser.get_modified(f) + if new_modified: + self.modified = new_modified + logging.debug("Modified: " + self.format_date(self.modified)) + + # Update URI in case of redirect + if hasattr(f, 'url') and save_uri: + self.uri = f.url + logging.debug("URI: <" + self.uri + ">") + if hasattr(f, 'headers'): + baseuri = f.headers.get('content-location', self.uri) + else: + baseuri = self.uri + + # Parse the feed + f.close() + self._update(baseuri, data) + + def _read_data(self, f): + """Read the data from the resource. + + Attempts to gunzip the data if the Content-Encoding header claimed + to be gzip, but if that fails it doesn't overly worry about it. + + We then take the data and try to squeeze it into a UTF-8 string + using Python's unicode module. If it doesn't decode as UTF-8 + we try ISO-8559-1 before ruthlessly stripping the bad characters. + """ + data = f.read() + + if hasattr(f, 'headers'): + if gzip and f.headers.get('content-encoding', '') == 'gzip': + try: + gzdata = gzip.GzipFile(fileobj=StringIO.StringIO(data)) + data = gzdata.read() + except: + logging.warn("Feed contained invalid gzipped data", + exc_info=1) + + try: + data = unicode(data, "utf8").encode("utf8") + logging.debug("Encoding: UTF-8") + except UnicodeError: + try: + data = unicode(data, "iso8859_1").encode("utf8") + logging.debug("Encoding: ISO-8859-1") + except UnicodeError: + data = unicode(data, "ascii", "replace").encode("utf8") + logging.warn("Feed wasn't in UTF-8 or ISO-8859-1, replaced " + + "all non-ASCII characters.") + + return data + + def cache_read(self): + """Initialise the channel from the cache. + + The data is read from a file in the cache_directory and parsed. + """ + cache_uri = cache_filename(self.uri) + + try: + if os.path.exists(cache_uri): + if os.path.exists(cache_uri + ",etag"): + c = open(cache_uri + ",etag") + self.etag = c.read().strip() + c.close() + + if os.path.exists(cache_uri + ",modified"): + c = open(cache_uri + ",modified") + self.modified = feedparser.parse_date(c.read().strip()) + c.close() + + self.update(cache_uri) + except: + logging.warn("Cache read failed <" + cache_uri + ">", exc_info=1) + + def cache_write(self, data): + """Write the unparsed feed to the cache. + + The data is written as-is to a file in the cache_directory. + If the channel has etag or modified information, those are written + to files alongside. + """ + cache_uri = cache_filename(self.uri) + + try: + c = open(cache_uri, "w") + c.write(data) + c.close() + + if self.etag: + c = open(cache_uri + ",etag", "w") + c.write(self.etag + "\n") + c.close() + elif os.path.exists(cache_uri + ",etag"): + try: + os.remove(cache_uri + ",etag") + except: + pass + + if self.modified: + c = open(cache_uri + ",modified", "w") + c.write(feedparser.format_http_date(self.modified) + "\n") + c.close() + elif os.path.exists(cache_uri + ",modified"): + try: + os.remove(cache_uri + ",modified") + except: + pass + except: + logging.warn("Cache write failed <" + cache_uri + ">", exc_info=1) + + def _update(self, baseuri, data): + """Update the channel from a parsed feed. + + This is the real guts of update() and after all the fuss is + actually pretty simple. We parse the feed using feedparser + and if we get the information, cache it. + """ + feed = feedparser.FeedParser(baseuri) + feed.feed(data) + + if len(feed.items) < 1: + logging.info("Empty feed, cowardly not updating %s" % (baseuri)) + return + + new_items = [] + for item in feed.items: + new_items.append(NewsItem(item, self)) + if abs(new_items[-1].date[0] - time.gmtime()[0]) > 1: + logging.warning(("Obviously bogus year in feed (%d), " + + "cowardly not updating") + % (new_items[-1].date[0],)) + return + + self.items = new_items + self.title = feed.channel.get('title', '') + self.description = feed.channel.get('description', '') + self.link = feed.channel.get('link', '') + + self.cache_write(data) + return self.items + + def utctime(self, date): + """Return UTC time() for given date. + + Returns the equivalent of time() for the given date, but taking + into account local timezone or any forced offset for the channel. + + This is suitable for using in a call to gmtime() only. + """ + offset = time.timezone + if self.offset is not None: + # self.offset is the difference from UTC, so add timezone + offset += self.offset * 3600 + time.timezone + + return time.mktime(date) - offset + + def format_date(self, date, fmt=None): + """Formats a date for output. + + Outputs the UTC date, taking into account any forced offset for the + channel. + """ + if fmt == 'iso': + fmt = "%Y-%m-%dT%H:%M:%S+00:00" + elif fmt == 'rfc822': + fmt = "%a, %d %b %Y %H:%M:%S +0000" + elif fmt is None: + fmt = "%B %d, %Y %I:%M %p" + + return time.strftime(fmt, time.gmtime(self.utctime(date))) + + +class NewsItem: + """A single item of news. + + NewsItem represents a single item of news from a channel. They + are created and owned by the Channel and accessible through + Channel().items. + + Useful members: + id: Unique identifier for the item (often a URI) + date: Date item was last modified + + title: Title of the item + summary: Summary of the content for the first page + content: Content of the item + link: Link associated with the item, generally the HTML version + creator: Person who created the item + + channel: Channel this NewsItem belongs to + """ + def __init__(self, dict, channel): + self.channel = channel + + self.link = dict.get('link', '') + self.id = dict.get('id', self.link) + + self.title = dict.get('title', '') + self.summary = dict.get('summary', '') + if 'content' in dict and len(dict['content']): + self.content = dict['content'][0]['value'] + elif 'description' in dict: + self.content = dict['description'] + else: + self.content = '' + + if type(self.content) != type(""): + try: + self.content = unicode(self.content.encode("utf8")).encode("utf8") + except UnicodeError: + try: + self.content = unicode(self.content.encode("iso8859_1")).encode("utf8") + except UnicodeError: + self.content = unicode(self.content.encode("ascii", "replace")).encode("utf8") + logging.warn("Item wasn't in UTF-8 or ISO-8859-1, replaced " + + "all non-ASCII characters.") + + self.date = dict.get('modified_parsed') + if self.date is None or self.date[3:6] == (0, 0, 0): + self.date = self._cached_time() + + self.creator = dict.get('creator', ''); + + def _cached_time(self): + """Retrieve or save a cached time for this entry. + + Sometimes entries lack any date or time information, and + sometimes they just lack time information. The trouble is + we need both to be able to put them in the right place in the + output. + + This is the solution (for both). When you find no date, or + one that ends up at exactly midnight (as-if!) we grovel around + inside a cache file to see whether we've recorded anything for + it so far. If we have, we use that, otherwise we'll use the + current UTC time and save that (along with the rest) into the + cache file for use next time. + + Truly midnight dates will sneak a bit forward, but that's not + a great loss. + """ + time_cache_uri = cache_filename(self.channel.uri) + ",times" + time_cache = {} + + if os.path.exists(time_cache_uri): + try: + c = open(time_cache_uri) + for line in c.readlines(): + id, timestr = line.strip().split(" = ") + time_cache[id] = feedparser.parse_date(timestr) + c.close() + + if time_cache.has_key(self.id): + return time_cache[self.id] + except: + logging.warn("Time cache read failed <" + time_cache_uri + ">", + exc_info = 1) + + time_cache[self.id] = time.gmtime() + fmt_time = self.channel.format_date(time_cache[self.id], 'iso') + + # Make sure we don't move the entry *too far* + if self.date is not None: + orig_time = self.channel.utctime(self.date) + this_time = self.channel.utctime(time_cache[self.id]) + if abs(this_time - orig_time) > 86400: + return self.date + + try: + c = open(time_cache_uri, "a") + c.write("%s = %s\n" % (self.id, fmt_time)) + c.close() + except: + logging.warn("Time cache write failed <" + time_cache_uri + ">", + exc_info=1) + + return time_cache[self.id] + + +class Planet: + """A collection of channels. + + Planet represents an aggregated set of channels, easing their + management and allowing you to directly retreive the items in + descending date order. + + Once a planet is created you subscribe new Channels to it. You + can then obtain a list of subscribed channels through the + channels() member function and a list of items through the items() + function (bypassing the Channel level). + + What you do with a Planet is up to you. + """ + def __init__(self): + self._channels = [] + self._items = None + + def subscribe(self, channel): + """Subscribe the Planet to a Channel.""" + self._channels.append(channel) + self._items = None + + def unsubscribe(self, channel): + """Unsubscribe the Planet from a Channel.""" + self._channels.remove(channel) + self._items = None + + def channels(self): + """Retrieve the currently subscribed channels. + + Returns a list of all the Channels this planet is subscribed to. + """ + return list(self._channels) + + def items(self): + """Retrieve the items in date order. + + Returns all items in descending date order (most recent first). + """ + if self._items is not None: + return self._items + + self._items = [] + for channel in self._channels: + for item in channel.items: + if item.date is not None: + self._items.append(item) + + self._items.sort(lambda x,y: cmp(y.channel.utctime(y.date), + x.channel.utctime(x.date))) + return list(self._items)
Added: webservices/admin/planet/planetlib.pyc URL: http://svn.apache.org/viewcvs/webservices/admin/planet/planetlib.pyc?rev=355462&view=auto ============================================================================== Binary file - no diff available. Propchange: webservices/admin/planet/planetlib.pyc ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: webservices/admin/planet/pyblosxom.css URL: http://svn.apache.org/viewcvs/webservices/admin/planet/pyblosxom.css?rev=355462&view=auto ============================================================================== --- webservices/admin/planet/pyblosxom.css (added) +++ webservices/admin/planet/pyblosxom.css Fri Dec 9 04:21:26 2005 @@ -0,0 +1,110 @@ +/* Standard Blosxom Bits */ + +.blosxomDate { + color: white; + background-color: black; + font-weight: bold; + padding: 8px 5px 8px 5px; + clear: both; +} + +.blosxomDate a { + color: white; + font-weight: bold; + text-decoration: none; + margin-right: 0.5ex; +} + +.blosxomDateLink { + float: right; + border: 0px; +} + +.blosxomTitle { + clear: both; + color: black; + font-weight: bold; + text-decoration: none; + margin: 15px; + margin-bottom: 0px; +} + +.blosxomTitle a { + color: black; + text-decoration: none; +} + +.blosxomStory { + margin: 15px; + margin-bottom: 20px; + margin-top: 7px; + clear: both; +} + +.blosxomStory a.permalink { + color: #aaaaaa; + font-size: small; + text-decoration: none; + font-style: italic; +} + +.blosxomStory p { + margin-top: 7px; + margin-bottom: 7px; +} + +.blosxomStory img { + border: 0px; + padding: 8px 8px 8px 8px; + margin: 0px; + float: none; +} + +.blosxomStory img.left { + float: left; +} + +.blosxomStory img.right { + float: right; +} + +/* Calendar */ + +#blosxomCalendar { + padding: 0px 12px 12px 12px; +} + +.blosxomCalendar { + font-size: small; + margin-left: 0px; + margin-right: 0px; + width: 100%; +} + +.blosxomCalendarHead { + font-weight: bold; +} + +.blosxomCalendarWeekHeader { + color: green; +} + +.blosxomCalendarCell { + color: #aaaaaa; +} + +.blosxomCalendarBlogged a { + color: #000000; +} + +.blosxomCalendarSpecificDay a { + color: blue; +} + +.blosxomCalendarToday { + color: green; +} + +.blosxomCalendarToday a { + color: green; +} Added: webservices/admin/planet/rss09.xml.tmpl URL: http://svn.apache.org/viewcvs/webservices/admin/planet/rss09.xml.tmpl?rev=355462&view=auto ============================================================================== --- webservices/admin/planet/rss09.xml.tmpl (added) +++ webservices/admin/planet/rss09.xml.tmpl Fri Dec 9 04:21:26 2005 @@ -0,0 +1,21 @@ +<?xml version="1.0"?> +<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns="http://my.netscape.com/rdf/simple/0.9/"> + +<channel> + <title><TMPL_VAR name></title> + <TMPL_IF channel_description> + <description><TMPL_VAR channel_description></description> + </TMPL_IF> + <link>http://www.planetapache.org/</link> +</channel> + +<TMPL_LOOP Items> +<item> + <title><TMPL_VAR channel_name><TMPL_IF title>: <TMPL_VAR title ESCAPE="HTML"></TMPL_IF></title> + <link><TMPL_VAR link ESCAPE="HTML"></link> +</item> +</TMPL_LOOP> + +</rdf:RDF> Added: webservices/admin/planet/rss09.xml.tmplc URL: http://svn.apache.org/viewcvs/webservices/admin/planet/rss09.xml.tmplc?rev=355462&view=auto ============================================================================== Binary file - no diff available. Propchange: webservices/admin/planet/rss09.xml.tmplc ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: webservices/admin/planet/rss10.xml.tmpl URL: http://svn.apache.org/viewcvs/webservices/admin/planet/rss10.xml.tmpl?rev=355462&view=auto ============================================================================== --- webservices/admin/planet/rss10.xml.tmpl (added) +++ webservices/admin/planet/rss10.xml.tmpl Fri Dec 9 04:21:26 2005 @@ -0,0 +1,35 @@ +<?xml version="1.0"?> +<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:foaf="http://xmlns.com/foaf/0.1/" + xmlns:content="http://purl.org/rss/1.0/modules/content/" + xmlns="http://purl.org/rss/1.0/" +> +<channel rdf:about="http://www.planetapache.org/"> + <title><TMPL_VAR name></title> + <TMPL_IF channel_description> + <description><TMPL_VAR channel_description></description> + </TMPL_IF> + <link>http://www.planetapache.org/</link> + + <items> + <rdf:Seq> +<TMPL_LOOP Items> + <rdf:li rdf:resource="<TMPL_VAR link ESCAPE="HTML">" /> +</TMPL_LOOP> + </rdf:Seq> + </items> + +<TMPL_LOOP Items> +<item rdf:about="<TMPL_VAR link ESCAPE="HTML">"> + <title><TMPL_VAR channel_name><TMPL_IF title>: <TMPL_VAR title ESCAPE="HTML"></TMPL_IF></title> + <link><TMPL_VAR link ESCAPE="HTML"></link> + <TMPL_IF content> + <content:encoded><TMPL_VAR content ESCAPE="HTML"></content:encoded> + </TMPL_IF> + <dc:date><TMPL_VAR date_iso></dc:date> +</item> +</TMPL_LOOP> +</channel> +</rdf:RDF> Added: webservices/admin/planet/rss10.xml.tmplc URL: http://svn.apache.org/viewcvs/webservices/admin/planet/rss10.xml.tmplc?rev=355462&view=auto ============================================================================== Binary file - no diff available. Propchange: webservices/admin/planet/rss10.xml.tmplc ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: webservices/admin/planet/rss20.xml.tmpl URL: http://svn.apache.org/viewcvs/webservices/admin/planet/rss20.xml.tmpl?rev=355462&view=auto ============================================================================== --- webservices/admin/planet/rss20.xml.tmpl (added) +++ webservices/admin/planet/rss20.xml.tmpl Fri Dec 9 04:21:26 2005 @@ -0,0 +1,23 @@ +<?xml version="1.0"?> +<rss version="2.0"> +<channel> + <title><TMPL_VAR name></title> + <link>http://www.planetapache.org/</link> + <TMPL_IF channel_description> + <description><TMPL_VAR channel_description></description> + </TMPL_IF> + <language>en</language> + +<TMPL_LOOP Items> +<item> + <title><TMPL_VAR channel_name><TMPL_IF title>: <TMPL_VAR title ESCAPE="HTML"></TMPL_IF></title> + <guid isPermaLink="true"><TMPL_VAR link ESCAPE="HTML"></guid> + <TMPL_IF content> + <description><TMPL_VAR content ESCAPE="HTML"></description> + </TMPL_IF> + <pubDate><TMPL_VAR date_822></pubDate> +</item> +</TMPL_LOOP> + +</channel> +</rss> Added: webservices/admin/planet/rss20.xml.tmplc URL: http://svn.apache.org/viewcvs/webservices/admin/planet/rss20.xml.tmplc?rev=355462&view=auto ============================================================================== Binary file - no diff available. Propchange: webservices/admin/planet/rss20.xml.tmplc ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: webservices/admin/planet/run.sh URL: http://svn.apache.org/viewcvs/webservices/admin/planet/run.sh?rev=355462&view=auto ============================================================================== --- webservices/admin/planet/run.sh (added) +++ webservices/admin/planet/run.sh Fri Dec 9 04:21:26 2005 @@ -0,0 +1,2 @@ +cd /usr/home/serendib/blogs +/usr/local/bin/python planet.py config.ini > /dev/null 2>&1 Added: webservices/admin/planet/timeoutsocket.py URL: http://svn.apache.org/viewcvs/webservices/admin/planet/timeoutsocket.py?rev=355462&view=auto ============================================================================== --- webservices/admin/planet/timeoutsocket.py (added) +++ webservices/admin/planet/timeoutsocket.py Fri Dec 9 04:21:26 2005 @@ -0,0 +1,424 @@ + +#### +# Copyright 2000,2001 by Timothy O'Malley <[EMAIL PROTECTED]> +# +# All Rights Reserved +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for any purpose and without fee is hereby +# granted, provided that the above copyright notice appear in all +# copies and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Timothy O'Malley not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR +# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +#### + +"""Timeout Socket + +This module enables a timeout mechanism on all TCP connections. It +does this by inserting a shim into the socket module. After this module +has been imported, all socket creation goes through this shim. As a +result, every TCP connection will support a timeout. + +The beauty of this method is that it immediately and transparently +enables the entire python library to support timeouts on TCP sockets. +As an example, if you wanted to SMTP connections to have a 20 second +timeout: + + import timeoutsocket + import smtplib + timeoutsocket.setDefaultSocketTimeout(20) + + +The timeout applies to the socket functions that normally block on +execution: read, write, connect, and accept. If any of these +operations exceeds the specified timeout, the exception Timeout +will be raised. + +The default timeout value is set to None. As a result, importing +this module does not change the default behavior of a socket. The +timeout mechanism only activates when the timeout has been set to +a numeric value. (This behavior mimics the behavior of the +select.select() function.) + +This module implements two classes: TimeoutSocket and TimeoutFile. + +The TimeoutSocket class defines a socket-like object that attempts to +avoid the condition where a socket may block indefinitely. The +TimeoutSocket class raises a Timeout exception whenever the +current operation delays too long. + +The TimeoutFile class defines a file-like object that uses the TimeoutSocket +class. When the makefile() method of TimeoutSocket is called, it returns +an instance of a TimeoutFile. + +Each of these objects adds two methods to manage the timeout value: + + get_timeout() --> returns the timeout of the socket or file + set_timeout() --> sets the timeout of the socket or file + + +As an example, one might use the timeout feature to create httplib +connections that will timeout after 30 seconds: + + import timeoutsocket + import httplib + H = httplib.HTTP("www.python.org") + H.sock.set_timeout(30) + +Note: When used in this manner, the connect() routine may still +block because it happens before the timeout is set. To avoid +this, use the 'timeoutsocket.setDefaultSocketTimeout()' function. + +Good Luck! + +""" + +__version__ = "$Revision: 1.1.1.1 $" +__author__ = "Timothy O'Malley <[EMAIL PROTECTED]>" + +# +# Imports +# +import select, string +import socket +if not hasattr(socket, "_no_timeoutsocket"): + _socket = socket.socket +else: + _socket = socket._no_timeoutsocket + + +# +# Set up constants to test for Connected and Blocking operations. +# We delete 'os' and 'errno' to keep our namespace clean(er). +# Thanks to Alex Martelli and G. Li for the Windows error codes. +# +import os +if os.name == "nt": + _IsConnected = ( 10022, 10056 ) + _ConnectBusy = ( 10035, ) + _AcceptBusy = ( 10035, ) +else: + import errno + _IsConnected = ( errno.EISCONN, ) + _ConnectBusy = ( errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK ) + _AcceptBusy = ( errno.EAGAIN, errno.EWOULDBLOCK ) + del errno +del os + + +# +# Default timeout value for ALL TimeoutSockets +# +_DefaultTimeout = None +def setDefaultSocketTimeout(timeout): + global _DefaultTimeout + _DefaultTimeout = timeout +def getDefaultSocketTimeout(): + return _DefaultTimeout + +# +# Exceptions for socket errors and timeouts +# +Error = socket.error +class Timeout(Exception): + pass + + +# +# Factory function +# +from socket import AF_INET, SOCK_STREAM +def timeoutsocket(family=AF_INET, type=SOCK_STREAM, proto=None): + if family != AF_INET or type != SOCK_STREAM: + if proto: + return _socket(family, type, proto) + else: + return _socket(family, type) + return TimeoutSocket( _socket(family, type), _DefaultTimeout ) +# end timeoutsocket + +# +# The TimeoutSocket class definition +# +class TimeoutSocket: + """TimeoutSocket object + Implements a socket-like object that raises Timeout whenever + an operation takes too long. + The definition of 'too long' can be changed using the + set_timeout() method. + """ + + _copies = 0 + _blocking = 1 + + def __init__(self, sock, timeout): + self._sock = sock + self._timeout = timeout + # end __init__ + + def __getattr__(self, key): + return getattr(self._sock, key) + # end __getattr__ + + def get_timeout(self): + return self._timeout + # end set_timeout + + def set_timeout(self, timeout=None): + self._timeout = timeout + # end set_timeout + + def setblocking(self, blocking): + self._blocking = blocking + return self._sock.setblocking(blocking) + # end set_timeout + + def connect_ex(self, addr): + errcode = 0 + try: + self.connect(addr) + except Error, why: + errcode = why[0] + return errcode + # end connect_ex + + def connect(self, addr, port=None, dumbhack=None): + # In case we were called as connect(host, port) + if port != None: addr = (addr, port) + + # Shortcuts + sock = self._sock + timeout = self._timeout + blocking = self._blocking + + # First, make a non-blocking call to connect + try: + sock.setblocking(0) + sock.connect(addr) + sock.setblocking(blocking) + return + except Error, why: + # Set the socket's blocking mode back + sock.setblocking(blocking) + + # If we are not blocking, re-raise + if not blocking: + raise + + # If we are already connected, then return success. + # If we got a genuine error, re-raise it. + errcode = why[0] + if dumbhack and errcode in _IsConnected: + return + elif errcode not in _ConnectBusy: + raise + + # Now, wait for the connect to happen + # ONLY if dumbhack indicates this is pass number one. + # If select raises an error, we pass it on. + # Is this the right behavior? + if not dumbhack: + r,w,e = select.select([], [sock], [], timeout) + if w: + return self.connect(addr, dumbhack=1) + + # If we get here, then we should raise Timeout + raise Timeout("Attempted connect to %s timed out." % str(addr) ) + # end connect + + def accept(self, dumbhack=None): + # Shortcuts + sock = self._sock + timeout = self._timeout + blocking = self._blocking + + # First, make a non-blocking call to accept + # If we get a valid result, then convert the + # accept'ed socket into a TimeoutSocket. + # Be carefult about the blocking mode of ourselves. + try: + sock.setblocking(0) + newsock, addr = sock.accept() + sock.setblocking(blocking) + timeoutnewsock = self.__class__(newsock, timeout) + timeoutnewsock.setblocking(blocking) + return (timeoutnewsock, addr) + except Error, why: + # Set the socket's blocking mode back + sock.setblocking(blocking) + + # If we are not supposed to block, then re-raise + if not blocking: + raise + + # If we got a genuine error, re-raise it. + errcode = why[0] + if errcode not in _AcceptBusy: + raise + + # Now, wait for the accept to happen + # ONLY if dumbhack indicates this is pass number one. + # If select raises an error, we pass it on. + # Is this the right behavior? + if not dumbhack: + r,w,e = select.select([sock], [], [], timeout) + if r: + return self.accept(dumbhack=1) + + # If we get here, then we should raise Timeout + raise Timeout("Attempted accept timed out.") + # end accept + + def send(self, data, flags=0): + sock = self._sock + if self._blocking: + r,w,e = select.select([],[sock],[], self._timeout) + if not w: + raise Timeout("Send timed out") + return sock.send(data, flags) + # end send + + def recv(self, bufsize, flags=0): + sock = self._sock + if self._blocking: + r,w,e = select.select([sock], [], [], self._timeout) + if not r: + raise Timeout("Recv timed out") + return sock.recv(bufsize, flags) + # end recv + + def makefile(self, flags="r", bufsize=-1): + self._copies = self._copies +1 + return TimeoutFile(self, flags, bufsize) + # end makefile + + def close(self): + if self._copies <= 0: + self._sock.close() + else: + self._copies = self._copies -1 + # end close + +# end TimeoutSocket + + +class TimeoutFile: + """TimeoutFile object + Implements a file-like object on top of TimeoutSocket. + """ + + def __init__(self, sock, mode="r", bufsize=4096): + self._sock = sock + self._bufsize = 4096 + if bufsize > 0: self._bufsize = bufsize + if not hasattr(sock, "_inqueue"): self._sock._inqueue = "" + + # end __init__ + + def __getattr__(self, key): + return getattr(self._sock, key) + # end __getattr__ + + def close(self): + self._sock.close() + self._sock = None + # end close + + def write(self, data): + self.send(data) + # end write + + def read(self, size=-1): + _sock = self._sock + _bufsize = self._bufsize + while 1: + datalen = len(_sock._inqueue) + if datalen >= size >= 0: + break + bufsize = _bufsize + if size > 0: + bufsize = min(bufsize, size - datalen ) + buf = self.recv(bufsize) + if not buf: + break + _sock._inqueue = _sock._inqueue + buf + data = _sock._inqueue + _sock._inqueue = "" + if size > 0 and datalen > size: + _sock._inqueue = data[size:] + data = data[:size] + return data + # end read + + def readline(self, size=-1): + _sock = self._sock + _bufsize = self._bufsize + while 1: + idx = string.find(_sock._inqueue, "\n") + if idx >= 0: + break + datalen = len(_sock._inqueue) + if datalen >= size >= 0: + break + bufsize = _bufsize + if size > 0: + bufsize = min(bufsize, size - datalen ) + buf = self.recv(bufsize) + if not buf: + break + _sock._inqueue = _sock._inqueue + buf + + data = _sock._inqueue + _sock._inqueue = "" + if idx >= 0: + idx = idx + 1 + _sock._inqueue = data[idx:] + data = data[:idx] + elif size > 0 and datalen > size: + _sock._inqueue = data[size:] + data = data[:size] + return data + # end readline + + def readlines(self, sizehint=-1): + result = [] + data = self.read() + while data: + idx = string.find(data, "\n") + if idx >= 0: + idx = idx + 1 + result.append( data[:idx] ) + data = data[idx:] + else: + result.append( data ) + data = "" + return result + # end readlines + + def flush(self): pass + +# end TimeoutFile + + +# +# Silently replace the socket() builtin function with +# our timeoutsocket() definition. +# +if not hasattr(socket, "_no_timeoutsocket"): + socket._no_timeoutsocket = socket.socket + socket.socket = timeoutsocket +del socket +socket = timeoutsocket +# Finis
