Author: jmorliaguet
Date: Sat Feb 18 15:46:34 2006
New Revision: 2415

Modified:
   cpsskins/branches/jmo-perspectives/minjson.py
Log:

- upgraded to the latest version of minjson.py



Modified: cpsskins/branches/jmo-perspectives/minjson.py
==============================================================================
--- cpsskins/branches/jmo-perspectives/minjson.py       (original)
+++ cpsskins/branches/jmo-perspectives/minjson.py       Sat Feb 18 15:46:34 2006
@@ -16,11 +16,24 @@
 # reads minimal javascript objects.
 # str's objects and fixes the text to write javascript.
 
+#UNICODE USAGE:  Minjson tries hard to accommodate naive usage in a 
+#"Do what I mean" manner.  Real applications should handle unicode separately.
+# The "right" way to use minjson in an application is to provide minjson a 
+# python unicode string for reading and accept a unicode output from minjson's
+# writing.  That way, the assumptions for unicode are yours and not minjson's.
+
+# That said, the minjson code has some (optional) unicode handling that you 
+# may look at as a model for the unicode handling your application may need.
+
 # Thanks to Patrick Logan for starting the json-py project and making so many
 # good test cases.
 
-# Jim Washington 10 Oct 2005.
+# Additional thanks to Balazs Ree for replacing the writing module.
 
+# Jim Washington 30 Dec 2005.
+
+# 2005-12-30 writing now traverses the object tree instead of relying on 
+#            str() or unicode()
 # 2005-10-10 on reading, looks for \\uxxxx and replaces with u'\uxxxx'
 # 2005-10-09 now tries hard to make all strings unicode when reading.
 # 2005-10-07 got rid of eval() completely, makes object as found by the
@@ -37,17 +50,15 @@
 from token import ENDMARKER, NAME, NUMBER, STRING, OP, ERRORTOKEN
 from tokenize import tokenize, TokenError, NL
 
-# set to true if transmission size is much more important than speed
-# only affects writing, and makes a minimal difference in output size.
-alwaysStripWhiteSpace = False
-
-# set this to True if you want chars above 128 always expressed as /uxxx
-# this is expensive.
-doUxxxx = False
-
 #Usually, utf-8 will work, set this to utf-16 if you dare.
 emergencyEncoding = 'utf-8'
 
+class ReadException(Exception):
+    pass
+
+class WriteException(Exception):
+    pass
+
 #################################
 #      read JSON object         #
 #################################
@@ -76,6 +87,7 @@
 class CommaToken:
     __slots__=[]
     pass
+
 class JSONReader(object):
     """raise SyntaxError if it is not JSON, and make the object available"""
     def __init__(self,data):
@@ -85,7 +97,7 @@
         self.lastOp = None
         self.objects = []
         self.tokenize()
-        
+
     def tokenize(self):
         try:
             tokenize(self._data.next,self.readTokens)
@@ -143,7 +155,7 @@
             if not isinstance(key,basestring):
                 raise SyntaxError
         except IndexError:
-            
+
             raise SyntaxError
         #salt the while
         comma = value
@@ -241,8 +253,8 @@
 
 def safeRead(aString, encoding=None):
     """read the js, first sanitizing a bit and removing any c-style comments
-    If the input is a unicode string, that's OK.  If the input is a byte 
string, 
-    strings in the object will be produced as unicode anyway.
+    If the input is a unicode string, great.  That's preferred.  If the input 
+    is a byte string, strings in the object will be produced as unicode anyway.
     """
     # get rid of trailing null. Konqueror appends this.
     CHR0 = chr(0)
@@ -264,7 +276,7 @@
         s = aString
     else:
         if encoding:
-            # note: no "try" here.  the encoding provided must work for the 
+            # note: no "try" here.  the encoding provided must work for the
             # incoming byte string.  UnicodeDecode error will be raised
             # in that case.  Often, it will be best not to provide the encoding
             # and allow the default
@@ -294,213 +306,131 @@
 #   write object as JSON        #
 #################################
 
-#alwaysStripWhiteSpace is defined at the top of the module
+import re, codecs
+from cStringIO import StringIO
 
-tfnTuple = (('True','true'),('False','false'),('None','null'),)
+### Codec error handler
 
-def _replaceTrueFalseNone(aString):
-    """replace True, False, and None with javascript counterparts"""
-    for k in tfnTuple:
-        if k[0] in aString:
-            aString = aString.replace(k[0],k[1])
-    return aString
-
-def _handleCode(subStr,stripWhiteSpace):
-    """replace True, False, and None with javascript counterparts if
-       appropriate, remove unicode u's, fix long L's, make tuples
-       lists, and strip white space if requested
-    """
-    if 'e' in subStr:
-        #True, False, and None have 'e' in them. :)
-        subStr = (_replaceTrueFalseNone(subStr))
-    if stripWhiteSpace:
-        # re.sub might do a better job, but takes longer.
-        # Spaces are the majority of the whitespace, anyway...
-        subStr = subStr.replace(' ','')
-    if subStr[-1] in "uU":
-        #remove unicode u's
-        subStr = subStr[:-1]
-    if "L" in subStr:
-        #remove Ls from long ints
-        subStr = subStr.replace("L",'')
-    #do tuples as lists
-    if "(" in subStr:
-        subStr = subStr.replace("(",'[')
-    if ")" in subStr:
-        subStr = subStr.replace(")",']')
-    return subStr
-
-# re for a double-quoted string that has a single-quote in it
-# but no double-quotes and python punctuation after:
-redoublequotedstring = compile(r'"[^"]*\'[^"]*"[,\]\}:\)]')
-escapedSingleQuote = r"\'"
-escapedDoubleQuote = r'\"'
-
-def _doQuotesSwapping(aString):
-    """rewrite doublequoted strings with single quotes as singlequoted strings 
with
-    escaped single quotes"""
-    s = []
-    foundlocs = redoublequotedstring.finditer(aString)
-    prevend = 0
-    for loc in foundlocs:
-        start,end = loc.span()
-        s.append(aString[prevend:start])
-        tempstr = aString[start:end]
-        endchar = tempstr[-1]
-        ts1 = tempstr[1:-2]
-        ts1 = ts1.replace("'",escapedSingleQuote)
-        ts1 = "'%s'%s" % (ts1,endchar)
-        s.append(ts1)
-        prevend = end
-    s.append(aString[prevend:])
-    return ''.join(s)
-
-strEscapes = (('\n',r'\n'),('\b',r'\b'),
-    ('\f',r'\f'),('\t',r'\t'),('\r',r'\r'),('\u',r'\u') )
-
-unicodeRE = compile(u"([\u0080-\uffff])")
-unicodeREfunction = lambda(x): r"\u%04x" % ord(x.group(1))
-    
-slashxRX = compile(r"\\x[0-9a-fA-F]{2,2}")
+def jsonreplace_handler(exc):
+    '''Error handler for json
 
-xmlcharRX = compile(r"&#[0-9a-fA-F]{2,4};")
+    If encoding fails, \\uxxxx must be emitted. This
+    is similar to the "backshashreplace" handler, only
+    that we never emit \\xnn since this is not legal
+    according to the JSON syntax specs.
+    '''
+    if isinstance(exc, UnicodeEncodeError):
+        part = exc.object[exc.start]
+        # repr(part) will convert u'\unnnn' to u'u\\nnnn'
+        return u'\\u%04x' % ord(part), exc.start+1
+    else:
+        raise exc
 
-def slashxRXReplace(match):
-    return unichr(int(match.group()[2:],16))
+# register the error handler
+codecs.register_error('jsonreplace', jsonreplace_handler)
 
-def uxreplace(match):
-    c = match.group()[2:-1]
-    l = len(c)
-    if l == 2:
-        return "\\u00%s" % c
-    elif l == 3:
-        return "\\u0%s" % c
-    elif l == 4:
-        return "\\u%s" % c
-
-def _escapeSomeStringChars(aString):
-    """replace single-character chars that have an escaped
-    representation with their literals"""
-    if doUxxxx:
-        # escape anything above 128 as \uxxxx
-        if unicodeRE.search(aString):
-            aString = unicodeRE.sub(unicodeREfunction, aString)
-    
-    for character,replacement in strEscapes:
-        if character in aString:
-            aString = aString.replace(character,replacement)
-    return aString
-
-def _pyexpr2jsexpr(aString, stripWhiteSpace):
-    """Take advantage of python's formatting of string representations of
-    objects.  Python always uses "'" to delimit strings.  Except it doesn't 
when
-    there is ' in the string.  Fix that, then, if we split
-    on that delimiter, we have a list that alternates non-string text with
-    string text.  Since string text is already properly escaped, we
-    only need to replace True, False, and None in non-string text and
-    remove any unicode 'u's preceding string values.
+### Writer
 
-    if stripWhiteSpace is True, remove spaces, etc from the non-string
-    text.
-    """
-    #do some escaping first
-    aString = _escapeSomeStringChars(aString)
-    #python will quote with " when there is a ' in the string,
-    #so fix that first
-    if redoublequotedstring.search(aString):
-        aString = _doQuotesSwapping(aString)
-    marker = None
-    if escapedSingleQuote in aString:
-        #replace escaped single quotes with a marker
-        marker = markerBase = '|'
-        markerCount = 1
-        while marker in aString:
-            #if the marker is already there, make it different
-            markerCount += 1
-            marker = markerBase * markerCount
-        aString = aString.replace(escapedSingleQuote,marker)
-    #escape double-quotes
-    aString = aString.replace('"',escapedDoubleQuote)
-    #split the string on the real single-quotes
-    splitStr = aString.split("'")
-    outList = []
-    alt = True
-    for subStr in splitStr:
-        if alt:
-            #if alt is True, non-string; do replacements
-            subStr = _handleCode(subStr,stripWhiteSpace)
-        outList.append(subStr)
-        alt = not alt
-    result = '"'.join(outList)
-    if marker:
-        #put the escaped single-quotes back as "'"
-        result = result.replace(marker,"'")
-    return result
-
-def write(obj, encoding='utf-8', stripWhiteSpace=alwaysStripWhiteSpace,\
-    encodeOutput=None):
-    """Represent the object as a byte string in JSON notation.
-    
-    JSON specification says that the output is unicode, and what we really 
-    usually want is an encoded byte string for output. Since this method cannot
-    know whether it is "at the boundary", it will by default output a python 
-    unicode object that you can encode at the boundary.  
-    
-    If there are bytestrings in the object that cannot be decoded in the system
-    default encoding, this will decode them to unicode by an encoding provided.
-    The default, utf-8, will work in most cases.  
-    
-    This method includes an experimental encodeOutput parameter, which may 
-    suffice to make a byte string is that's what you want.  The encodeOutput 
-    parameter needs to be the python identifier for the desired charset 
-    encoding.  By default, a python unicode string will be output.
-        
-    """
-    #  first, we get a representation of the object in unicode
-    #  we will encode to an encoding later if desired.
-    if not isinstance(obj,unicode):
-        #get representation of object as unicode.
-        aString = str(obj)
-        if encoding:
-            aString = unicode(aString, encoding)
+def write(input, encoding='utf-8', outputEncoding=None):
+    writer = JsonWriter(input_encoding=encoding, 
output_encoding=outputEncoding)
+    writer.write(input)
+    return writer.getvalue()
+
+re_strmangle = re.compile('"|\b|\f|\n|\r|\t|\\\\')
+
+def func_strmangle(match):
+    return {
+        '"': '\\"',
+        '\b': '\\b',
+        '\f': '\\f',
+        '\n': '\\n',
+        '\r': '\\r',
+        '\t': '\\t',
+        '\\': '\\\\',
+        }[match.group(0)]
+
+def strmangle(text):
+    return re_strmangle.sub(func_strmangle, text)
+
+class JsonStream(object):
+
+    def __init__(self):
+        self.buf = []
+
+    def write(self, text):
+        self.buf.append(text)
+
+    def getvalue(self):
+        return ''.join(self.buf)
+
+class JsonWriter(object):
+
+    def __init__(self, stream=None, input_encoding='utf-8', 
output_encoding=None):
+        '''
+        - stream is optional, if specified must also give output_encoding
+        - The input strings can be unicode or in input_encoding
+        - output_encoding is optional, if omitted, result will be unicode
+        '''
+        if stream is not None:
+            if output_encoding is None:
+                raise WriteException, 'If a stream is given, output encoding 
must also be provided'
         else:
-            aString = unicode(aString, 'utf-8')
-        #if \xnn converted to \\xnn, put those chars back
-        if slashxRX.search(aString):
-            aString = slashxRX.sub(slashxRXReplace, aString)
-    else:
-        # it's already a unicode string, no need to convert to unicode
-        aString = obj
-    
-    if isinstance(obj,basestring):
-        aString = aString.replace('\\','\\\\')
-        if '"' in aString:
-            aString = '"%s"' % aString.replace('"',escapedDoubleQuote)
+            stream = JsonStream()
+        self.stream = stream
+        self.input_encoding = input_encoding
+        self.output_encoding = output_encoding
+
+    def write(self, obj):
+        if isinstance(obj, (list, tuple)):
+            self.stream.write('[')
+            first = True
+            for elem in obj:
+                if first:
+                    first = False
+                else:
+                    self.stream.write(',')
+                self.write(elem)
+            self.stream.write(']'),
+        elif isinstance(obj, dict):
+            self.stream.write('{')
+            first = True
+            for key, value in obj.iteritems():
+                if first:
+                    first = False
+                else:
+                    self.stream.write(',')
+                self.write(key)
+                self.stream.write(':')
+                self.write(value)
+            self.stream.write('}')
+        elif obj is True:
+            self.stream.write('true')
+        elif obj is False:
+            self.stream.write('false')
+        elif obj is None:
+            self.stream.write('null')
+        elif not isinstance(obj, basestring):
+            # if we are not baseobj, convert to it
+            try:
+                obj = str(obj)
+            except Exception, exc:
+                raise WriteException, 'Cannot write object (%s: %s)' % 
(exc.__class__, exc)
+            self.stream.write(obj)
         else:
-            aString = '"%s"' % aString
-        result = _escapeSomeStringChars(aString)
-    else:
-        result = _pyexpr2jsexpr(aString,stripWhiteSpace)
-        
-    #assert isinstance(result,unicode)
-    
-    if encodeOutput:
-        # we have a choice here; xmlcharrefreplace or backslashreplace.
-        # since it is likely a web thing, we choose xmlcharrefreplace and 
-        # convert that back to \\unnn representation.  This is experimental
-        # and may not do the right thing.
-        result = result.encode(encodeOutput,"xmlcharrefreplace")
-        if xmlcharRX.search(result):
-            #if \xnn converted to &#nn;, put those chars back as \\u00nn
-            #print "result before conversion: %s" % result
-            result = xmlcharRX.sub(uxreplace, result)
-            #print "result after conversion: %s" % result
-            # Waah! it replaces unprintable chars in the charset with printable
-            # but different chars.  It should be rare.
-    return result
-
-class ReadException(Exception):
-    pass
+            # convert to unicode first
+            if not isinstance(obj, unicode):
+                try:
+                    obj = unicode(obj, self.input_encoding)
+                except (UnicodeDecodeError, UnicodeTranslateError):
+                    obj = unicode(obj, 'utf-8', 'replace')
+            # do the mangling
+            obj = strmangle(obj)
+            # make the encoding
+            if self.output_encoding is not None:
+                obj = obj.encode(self.output_encoding, 'jsonreplace')
+            self.stream.write('"')
+            self.stream.write(obj)
+            self.stream.write('"')
 
-class WriteException(Exception):
-    pass
+    def getvalue(self):
+        return self.stream.getvalue()
-- 
http://lists.nuxeo.com/mailman/listinfo/z3lab-checkins

Reply via email to