Hmm, thanks for the work on the patch, I'll take a look at it later. But Im a little dubious as apart from yuo saying it poor quality and the fact that the dreaded "ordinal not in range" *may still* occur I cant really see it as a proper fix.
Id really like to understand what the problem is with changing site.encoding which seems to me the most logical and easiest way to fix things 100% . It seems however you fix things in jabberpy the python expat bindings will still barf unless you change site.encoding . Its probably best to discuss this further ( if you want ) on the jabber.py list. -- mallum on Wed, Feb 06, 2002 at 08:35:05PM +0100, Jacek Konieczny wrote: > On Wed, Feb 06, 2002 at 06:14:17PM +0100, Igor Stroh wrote: > > > > it doesn't work this way, don't ask my why :) to switch to utf-8, edit > > > > your site.py and change the line that says "encoding = 'ascii'" to > > > > "encoding = 'UTF-8'" > > > It is not a good thing (one Python hacker told me this, with some > > > arguments, that convinced me). > > > jabber.py should be fixed, so it uses proper encoding. > > > > there's no way to do it other than to follow the instructions at > > http://www.python.org/cgi-bin/faqw.py?req=show&file=faq04.102.htp > > > ... > > > > again, this is a known issue, if you think there's a better way to handle > > this problem, please send a patch > Here is patch attached. It is not very good or pretty, but I wrote it > just to show you how I think it should look like. > > IMHO jabber.py module should work on Unicode and it should not depend in > any way on system or locale encoding. Applications are responsible for > encoding conversion and if they don't do it well it is OK, that they > crash. Sometimes it is the only way to convinve ascii-speaking developer > to fix this :-) > > The problem is, that the expat python module doesn't support Unicode > very well :-( Thats why the patch is so ugly (but I am sure there are > better ways to do this anyway). > > This patch makes the sample jabber client work for me, with > international characters. > > It could happen, that conversion error ("ordinal not in range") may > occur. If it is raised in jabber.py, it means something more has to be > fixed in the module. When in the application --- this means application > is broken. Making the module silently convert international characters > to "?" is bad. I left this behaviour for log and debug messages --- this > are the only places where it seems OK for me. > > > or a solution proposal to jabber.py > > mailing list > > or just post in here, i'll forward the message to the list... > Could you do this, please? > > Greets, > Jacek > > The ugly patch follow... > > diff -durN jabber.py-0.3-1.orig/examples/test_client.py >jabber.py-0.3-1/examples/test_client.py > --- jabber.py-0.3-1.orig/examples/test_client.py Thu Jan 17 13:05:40 2002 > +++ jabber.py-0.3-1/examples/test_client.py Wed Feb 6 20:13:48 2002 > @@ -1,4 +1,4 @@ > -#!/usr/bin/env python2 > +#!/usr/bin/python > > # $Id: test_client.py,v 1.9 2002/01/17 12:05:40 mallum Exp $ > > @@ -9,6 +9,7 @@ > from select import select > from string import split,strip,join > import sys,os > +import locale > > sys.path.insert(1, os.path.join(sys.path[0], '..')) > > @@ -24,6 +25,12 @@ > MyStatus = '' > MyShow = '' > > +loc = locale.getdefaultlocale() > +if loc[1]: > + LocalEncoding=loc[1] > +else: > + LocalEncoding=getdefaultencoding() > + > def usage(): > print "%s: a simple python jabber client " % sys.argv[0] > print "usage:" > @@ -107,7 +114,7 @@ > if Who != '': > msg = jabber.Message(Who, strip(txt)) > msg.setType('chat') > - print "<%s> %s" % (JID, msg.getBody()) > + print "<%s> %s" % (JID.encode(LocalEncoding,"replace"), >msg.getBody().encode(LocalEncoding,"replace")) > con.send(msg) > else: > print colorize('Nobody selected','red') > @@ -117,8 +124,8 @@ > """Called when a message is recieved""" > if msg.getBody(): ## Dont show blank messages ## > print colorize( > - '<' + str(msg.getFrom()) + '>', 'green' > - ) + ' ' + msg.getBody() > + '<' + str(msg.getFrom()).encode(LocalEncoding,"replace") + '>', 'green' > + ) + ' ' + msg.getBody().encode(LocalEncoding,"replace") > > def presenceCB(con, prs): > """Called when a presence is recieved""" > @@ -149,11 +156,23 @@ > print colorize("we are now unsubscribed to %s" % (who), 'blue') > > elif type == 'available': > + sh=prs.getShow() > + if sh: > + sh=sh.encode(LocalEncoding,"replace") > + st=prs.getStatus() > + if st: > + st=st.encode(LocalEncoding,"replace") > print colorize("%s is available (%s / %s)" % \ > - (who, prs.getShow(), prs.getStatus()),'blue') > + (who, sh, st),'blue') > elif type == 'unavailable': > + sh=prs.getShow() > + if sh: > + sh=sh.encode(LocalEncoding,"replace") > + st=prs.getStatus() > + if st: > + st=st.encode(LocalEncoding,"replace") > print colorize("%s is unavailable (%s / %s)" % \ > - (who, prs.getShow(), prs.getStatus()),'blue') > + (who, sh, st),'blue') > > > def iqCB(con,iq): > @@ -243,7 +262,7 @@ > inputs, outputs, errors = select([sys.stdin], [], [],1) > > if sys.stdin in inputs: > - doCmd(con,sys.stdin.readline()) > + doCmd(con,unicode(sys.stdin.readline(),LocalEncoding)) > else: > con.process(1) > > diff -durN jabber.py-0.3-1.orig/jabber.py jabber.py-0.3-1/jabber.py > --- jabber.py-0.3-1.orig/jabber.py Thu Jan 17 13:05:40 2002 > +++ jabber.py-0.3-1/jabber.py Wed Feb 6 20:18:05 2002 > @@ -155,7 +155,7 @@ > > def send(self, what): > """Sends a jabber protocol element (Node) to the server""" > - xmlstream.Client.write(self,str(what)) > + xmlstream.Client.write(self,what) > > def dispatch(self, root_node ): > """Called internally when a 'protocol element' is recieved. > @@ -364,7 +364,7 @@ > > def send(self, what): > """Sends a jabber protocol element (Node) to the server""" > - xmlstream.Client.write(self,str(what)) > + xmlstream.Client.write(self,what.unicode()) > > def sendInitPresence(self): > """Sends an empty presence protocol element to the > @@ -603,6 +603,9 @@ > """returns an xmlstreamnode representation of the protocol element""" > return self._node > > + def unicode(self): > + return self._node.unicode() > + > def __str__(self): > return self._node.__str__() > > diff -durN jabber.py-0.3-1.orig/xmlstream.py jabber.py-0.3-1/xmlstream.py > --- jabber.py-0.3-1.orig/xmlstream.py Thu Jan 17 13:05:40 2002 > +++ jabber.py-0.3-1/xmlstream.py Wed Feb 6 20:22:18 2002 > @@ -44,11 +44,6 @@ > STDIO = 0 > TCP_SSL = 2 > > -ENCODING = site.encoding ## fallback encoding to avoid random > - ## random UnicodeError: ASCII decoding error: > - ## ordinal not in range(128) > - ## type errors - being looked into. > - > BLOCK_SIZE = 1024 ## Number of bytes to get at at time via socket > ## transactions > > @@ -159,7 +154,28 @@ > return newnode > > def __str__(self): > - return self._xmlnode2str() > + return self.unicode() > + > + def unicode(self, parent=None): > + """Returns an xml ( Unicode ) representation of the node > + and it children""" > + s = u"<" + self.name > + if self.namespace: > + if parent and parent.namespace != self.namespace: > + s = s + u" xmlns = '%s' " % self.namespace > + for key in self.attrs.keys(): > + val = str(self.attrs[key]) > + s = s + u" %s='%s'" % ( key, XMLescape(val) ) > + s = s + u">" > + cnt = 0 > + if self.kids != None: > + for a in self.kids: > + if (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) > + s = s + a._xmlnode2str(parent=self) > + cnt=cnt+1 > + if (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) > + s = s + u"</" + self.name + u">" > + return s > > def _xmlnode2str(self, parent=None): > """Returns an xml ( string ) representation of the node > @@ -208,6 +224,7 @@ > method of Node""" > def __init__(self,data): > self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ') > + self._parser.returns_unicode = 1 > self._parser.StartElementHandler = self.unknown_starttag > self._parser.EndElementHandler = self.unknown_endtag > self._parser.CharacterDataHandler = self.handle_data > @@ -298,8 +315,10 @@ > self._logFH = None > > def DEBUG(self,txt): > + if type(txt) is type(u""): > + txt=txt.encode(sys.getdefaultencoding(),"replace") > if self._debug: > - sys.stderr.write("DEBUG: %s\n" % txt) > + sys.stderr.write("DEBUG: %s\n" % txt ) > > def getSocket(self): > return self._sock > @@ -368,45 +387,42 @@ > data_in = u'' > if self._connection == TCP: > data_in = data_in + \ > - unicode(self._sock.recv(BLOCK_SIZE),'utf-8').encode(ENCODING, > - 'replace') > + unicode(self._sock.recv(BLOCK_SIZE),'utf-8') > while data_in: > data = data + data_in > if len(data_in) != BLOCK_SIZE: > break > - data_in = unicode(self._sock.recv(BLOCK_SIZE),'utf-8').encode( > - ENCODING, 'replace') > - > + data_in = unicode(self._sock.recv(BLOCK_SIZE),'utf-8') > if self._connection == TCP_SSL: > data_in = data_in + \ > - >unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8').encode(ENCODING,'replace') > + unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8') > while data_in: > data = data + data_in > if len(data_in) != BLOCK_SIZE: > break > - data_in = >unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8').encode(ENCODING, 'replace') > + data_in = unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8') > > elif self._connection == STDIO: > ## Hope this dont buffer ! > - data_in = data_in + unicode(sys.stdin.read(1024),'utf-8').encode( > - ENCODING, 'replace') > - while data_in: > + data_in = data_in + unicode(sys.stdin.read(1024),'utf-8') > + while data_in: > data = data + data_in > if len(data_in) != 1024: > break > - data_in = unicode(sys.stdin.read(1024),'utf-8').encode( > - ENCODING, 'replace') > + data_in = unicode(sys.stdin.read(1024),'utf-8') > else: > pass # should never get here > > self.DEBUG("got data %s" % data ) > self.log(data, 'RECV:') > - self._parser.Parse(data) > + self._parser.Parse(data.encode("utf-8")) > return data > > def write(self,data_out=u''): > """Writes raw outgoing data. blocks""" > try: > + if type(data_out) is type(u''): > + data_out=data_out.encode("utf-8") > if self._connection == TCP: > self._sock.send (data_out) > elif self._connection == TCP_SSL: > @@ -418,6 +434,7 @@ > self.log(data_out, 'SENT:') > self.DEBUG("sent %s" % data_out) > except: > + raise > self.DEBUG("xmlstream write threw error") > self.disconnected() > > @@ -461,9 +478,13 @@ > def log(self, data, inout=''): > """Logs data to the specified filehandle. Data is time stamped > and prefixed with inout""" > + if type(data) is type(u""): > + data=data.encode(sys.getdefaultencoding(),"replace") > + if type(inout) is type(u""): > + inout=data.encode(sys.getdefaultencoding(),"replace") > if self._logFH is not None: > self._logFH.write("%s - %s - %s\n" % > - (time.asctime(time.localtime(time.time())), inout, data ) ) > + (time.asctime(time.localtime(time.time())), inout, data)) > > def getIncomingID(self): > """Returns the streams ID""" > _______________________________________________ > jdev mailing list > [EMAIL PROTECTED] > http://mailman.jabber.org/listinfo/jdev _______________________________________________ jdev mailing list [EMAIL PROTECTED] http://mailman.jabber.org/listinfo/jdev
