Index: Objects/unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.231
diff -u -r2.231 unicodeobject.c
--- Objects/unicodeobject.c	30 Aug 2005 10:23:14 -0000	2.231
+++ Objects/unicodeobject.c	5 Oct 2005 14:39:34 -0000
@@ -3606,6 +3606,146 @@
     return NULL;
 }
 
+/* --- Fast Mapping Decoder ----------------------------------------------- */
+
+PyObject *PyUnicode_DecodeFastmap(const char *s,
+				  int size,
+				  const Py_UNICODE *table,
+				  const char *errors)
+{
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
+    const char *e;
+    PyUnicodeObject *v;
+    Py_UNICODE *p;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+
+    v = _PyUnicode_New(size);
+    if (v == NULL)
+	goto onError;
+    if (size == 0)
+	return (PyObject *)v;
+    p = PyUnicode_AS_UNICODE(v);
+    e = s + size;
+    while (s < e) {
+	unsigned char ch = *s;
+	Py_UNICODE value;
+
+	value = table[(int)ch];
+
+	/* Apply mapping */
+	if (value != Py_UNICODE_REPLACEMENT_CHARACTER)
+	    *p++ = (Py_UNICODE)value;
+	else {
+	    /* undefined mapping */
+	    outpos = p-PyUnicode_AS_UNICODE(v);
+	    startinpos = s-starts;
+	    endinpos = startinpos+1;
+	    if (unicode_decode_call_errorhandler(
+		 errors, &errorHandler,
+		 "fastmap", "fastmap to <undefined>",
+		 starts, size, &startinpos, &endinpos, &exc, &s,
+		 (PyObject **)&v, &outpos, &p)) {
+		goto onError;
+	    }
+	    continue;
+	}
+	++s;
+    }
+    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
+	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
+	    goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return (PyObject *)v;
+
+ onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
+}
+
+PyObject *PyUnicode_EncodeFastmap(const Py_UNICODE *p,
+				  int size,
+				  const unsigned char *table,
+				  const PyFastmap_EncodingIndex *index,
+				  int indexsize,
+				  const char *errors)
+{
+    const Py_UNICODE *e;
+    char *op;
+    PyObject *v;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+
+    v = PyString_FromStringAndSize(NULL, size);
+    if (v == NULL)
+	goto onError;
+    if (size == 0)
+	return (PyObject *)v;
+    op = PyString_AS_STRING(v);
+    e = p + size;
+    while (p < e) {
+    	Py_UNICODE ch = *p;
+	unsigned char high, low, coded = 0;
+	const PyFastmap_EncodingIndex *segment = NULL;
+
+	high = ch >> 8;
+	low = ch & 0xff;
+	
+	if (high == 0) /* fast path for latin-1 area */
+	    segment = index;
+	else {
+	    /* XXX: this may be improved using binary search */
+	    int i;
+	    for (i = 1; i < indexsize; i++)
+		if (index[i].high == high) {
+		    segment = &index[i];
+		    break;
+		}
+	}
+
+	if (segment == NULL)
+	    /* segment not found */;
+	else if (segment->lowfirst == 0xff && segment->lowlast == 0x00)
+	    /* special case for the full mapping to one segment */
+	    coded = table[segment->mapindex + low];
+	else if (low < segment->lowfirst || segment->lowlast < low)
+	    segment = NULL; /* out of the segment */
+	else {
+	    coded = table[segment->mapindex + low - segment->lowfirst];
+	    if (coded == segment->undefmark)
+		segment = NULL; /* marked as undefined */
+	}
+
+	/* Apply mapping */
+	if (segment != NULL)
+	    *op++ = (char)coded;
+	else {
+	    PyErr_SetString(PyExc_NotImplementedError,
+			    "error handling is not implemented yet. :-)");
+	    goto onError;
+	}
+	++p;
+    }
+    if (op - PyString_AS_STRING(v) < PyString_GET_SIZE(v))
+	if (_PyString_Resize(&v, (int)(op - PyString_AS_STRING(v))) < 0)
+	    goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return (PyObject *)v;
+
+ onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
+}
+
 /* --- Decimal Encoder ---------------------------------------------------- */
 
 int PyUnicode_EncodeDecimal(Py_UNICODE *s,
Index: Include/unicodeobject.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/unicodeobject.h,v
retrieving revision 2.49
diff -u -r2.49 unicodeobject.h
--- Include/unicodeobject.h	30 Aug 2005 10:23:13 -0000	2.49
+++ Include/unicodeobject.h	5 Oct 2005 14:39:34 -0000
@@ -157,6 +157,7 @@
 # define PyUnicode_Decode PyUnicodeUCS2_Decode
 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
+# define PyUnicode_DecodeFastmap PyUnicodeUCS2_DecodeFastmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
@@ -168,6 +169,7 @@
 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
+# define PyUnicode_EncodeFastmap PyUnicodeUCS2_EncodeFastmap
 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
@@ -232,6 +234,7 @@
 # define PyUnicode_Decode PyUnicodeUCS4_Decode
 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
+# define PyUnicode_DecodeFastmap PyUnicodeUCS4_DecodeFastmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
@@ -243,6 +246,7 @@
 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
+# define PyUnicode_EncodeFastmap PyUnicodeUCS4_EncodeFastmap
 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
@@ -915,6 +919,33 @@
     const char *errors		/* error handling */
     );
 
+/* --- Fast Map Codecs ---------------------------------------------------- 
+XXX describe codec here
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFastmap(
+    const char *string, 	/* Encoded string */
+    int length,	 		/* size of string */
+    const Py_UNICODE *table,	/* Translate map */
+    const char *errors		/* error handling */
+    );
+
+typedef struct {
+    unsigned char high;
+    unsigned char lowfirst, lowlast;
+    unsigned char undefmark;
+    short mapindex;
+} PyFastmap_EncodingIndex;
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeFastmap(
+    const Py_UNICODE *data, 	/* Unicode char buffer */
+    int length,	 		/* Number of Py_UNICODE chars to encode */
+    const unsigned char *table,	/* Translate map */
+    const PyFastmap_EncodingIndex *index,	/* encoding map index */
+    int indexsize,		/* the index's size */
+    const char *errors		/* error handling */
+    );
+
 #ifdef MS_WIN32
 
 /* --- MBCS codecs for Windows -------------------------------------------- */
Index: Modules/_codecsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_codecsmodule.c,v
retrieving revision 2.22
diff -u -r2.22 _codecsmodule.c
--- Modules/_codecsmodule.c	30 Aug 2005 10:23:14 -0000	2.22
+++ Modules/_codecsmodule.c	5 Oct 2005 14:39:34 -0000
@@ -832,6 +832,205 @@
 }
 
 #endif /* MS_WINDOWS */
+
+
+/* --- Fastmap Object ----------------------------------------------------- */
+
+static char *codeckwarglist[] = {"input", "errors", NULL};
+typedef struct {
+    PyObject_HEAD
+    Py_UNICODE *decoding_map;
+    unsigned char *encoding_map;
+    PyFastmap_EncodingIndex *encoding_index;
+    int encoding_index_size;
+} FastmapCodecObject;
+
+#if SIZEOF_SHORT == 2
+typedef unsigned short fastmap_ucs2_t;
+#else
+#error fastmap is not support on this platform yet.
+#endif
+
+static PyObject *
+fastmap_decode(FastmapCodecObject *self,
+	       PyObject *args, PyObject *kwds)
+{
+    const char *errors = NULL;
+    const char *data;
+    int size;
+    
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "t#|z:fastmap_decode",
+				     codeckwarglist, &data, &size, &errors))
+	return NULL;
+
+    return codec_tuple(PyUnicode_DecodeFastmap(data, size, self->decoding_map,
+					       errors),
+		       size);
+}
+
+static PyObject *
+fastmap_encode(FastmapCodecObject *self,
+	       PyObject *args, PyObject *kwds)
+{
+    PyObject *str, *v;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|z:fastmap_encode",
+				     codeckwarglist, &str, &errors))
+	return NULL;
+
+    str = PyUnicode_FromObject(str);
+    if (str == NULL)
+	return NULL;
+    v = codec_tuple(PyUnicode_EncodeFastmap(
+			       PyUnicode_AS_UNICODE(str), 
+			       PyUnicode_GET_SIZE(str),
+			       self->encoding_map,
+			       self->encoding_index,
+			       self->encoding_index_size,
+			       errors),
+		    PyUnicode_GET_SIZE(str));
+    Py_DECREF(str);
+    return v;
+}
+
+static struct PyMethodDef fastmap_methods[] = {
+    {"encode",	(PyCFunction)fastmap_encode, METH_VARARGS | METH_KEYWORDS},
+    {"decode",	(PyCFunction)fastmap_decode, METH_VARARGS | METH_KEYWORDS},
+    {NULL,	NULL},
+};
+
+static void
+fastmap_dealloc(FastmapCodecObject *self)
+{
+    PyMem_Del(self->decoding_map);
+    PyMem_Del(self->encoding_map);
+    PyMem_Del(self->encoding_index);
+    PyObject_Del(self);
+}
+
+static PyTypeObject FastmapCodec_Type = {
+    PyObject_HEAD_INIT(NULL)
+    0,					/* ob_size */
+    "FastmapCodec",			/* tp_name */
+    sizeof(FastmapCodecObject),		/* tp_basicsize */
+    0,					/* tp_itemsize */
+    /* methods */
+    (destructor)fastmap_dealloc,	/* tp_dealloc */
+    0,					/* tp_print */
+    0,					/* tp_getattr */
+    0,					/* tp_setattr */
+    0,					/* tp_compare */
+    0,					/* tp_repr */
+    0,					/* tp_as_number */
+    0,					/* tp_as_sequence */
+    0,					/* tp_as_mapping */
+    0,					/* tp_hash */
+    0,					/* tp_call */
+    0,					/* tp_str */
+    PyObject_GenericGetAttr,		/* tp_getattro */
+    0,					/* tp_setattro */
+    0,					/* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,			/* tp_flags */
+    0,					/* tp_doc */
+    0,					/* tp_traverse */
+    0,					/* tp_clear */
+    0,					/* tp_richcompare */
+    0,					/* tp_weaklistoffset */
+    0,					/* tp_iter */
+    0,					/* tp_iterext */
+    fastmap_methods,			/* tp_methods */
+};
+
+#define EIDX_ELEM_SIZE	    (1+1+1+1+2) /* !BBBBH */
+static PyObject *
+fastmap_codec(PyObject *ignore, PyObject *args)
+{
+    FastmapCodecObject *self;
+    Py_UNICODE *decoding_map = NULL;
+    unsigned char *encoding_map = NULL;
+    PyFastmap_EncodingIndex *encoding_index = NULL;
+    int encoding_index_size;
+    unsigned char *dmap, *emap, *eidx;
+    int dmapsize, emapsize, eidxsize;
+    int i;
+
+    if (!PyArg_ParseTuple(args, "t#t#t#:fastmap_codec", &dmap, &dmapsize,
+			  &emap, &emapsize, &eidx, &eidxsize))
+	return NULL;
+
+    if (dmapsize != sizeof(fastmap_ucs2_t) * 256) {
+    	PyErr_Format(PyExc_ValueError, "fastmap requires a decoding "
+			"translation table of %d bytes",
+			(int)(sizeof(fastmap_ucs2_t) * 256));
+	return NULL;
+    }
+
+    if (eidxsize % EIDX_ELEM_SIZE) {
+    	PyErr_Format(PyExc_ValueError,
+		     "encoding index has some trailing data");
+	return NULL;
+    }
+
+    encoding_index_size = eidxsize / EIDX_ELEM_SIZE;
+
+    encoding_index = PyMem_New(PyFastmap_EncodingIndex, encoding_index_size);
+    if (encoding_index == NULL)
+	return NULL;
+
+    for (i = 0; i < encoding_index_size; i++) {
+	const unsigned char *eidxelem;
+	eidxelem = (unsigned char *)eidx + i*EIDX_ELEM_SIZE;
+	encoding_index[i].high = eidxelem[0];
+	encoding_index[i].lowfirst = eidxelem[1];
+	encoding_index[i].lowlast = eidxelem[2];
+	encoding_index[i].undefmark = eidxelem[3];
+	encoding_index[i].mapindex = (eidxelem[4]<<8) | eidxelem[5];
+
+	/* check whether the index may cause map overflows */
+	if (encoding_index[i].mapindex + encoding_index[i].lowlast -
+		encoding_index[i].lowfirst >= emapsize) {
+	    PyErr_Format(PyExc_ValueError,
+			 "encoding index may cause overflow.");
+	    goto onError;
+	}
+    }
+
+    encoding_map = PyMem_New(unsigned char, emapsize);
+    if (encoding_map == NULL)
+	goto onError;
+
+    memcpy(encoding_map, emap, emapsize);
+
+    decoding_map = PyMem_New(Py_UNICODE, dmapsize);
+    if (decoding_map == NULL)
+	goto onError;
+
+    for (i = 0; i < 256; i++)
+	decoding_map[i] = (Py_UNICODE)((dmap[i*2]<<8) | dmap[i*2 + 1]);
+
+    self = PyObject_New(FastmapCodecObject, &FastmapCodec_Type);
+    if (self == NULL)
+	goto onError;
+
+    self->decoding_map = decoding_map;
+    self->encoding_map = encoding_map;
+    self->encoding_index = encoding_index;
+    self->encoding_index_size = encoding_index_size;
+
+    return (PyObject *)self;
+
+ onError:
+    if (encoding_index != NULL)
+	PyMem_Del(encoding_index);
+    if (encoding_map != NULL)
+	PyMem_Del(encoding_map);
+    if (decoding_map != NULL)
+	PyMem_Del(decoding_map);
+    return NULL;
+}
+#undef EIDX_ELEM_SIZE
+
 #endif /* Py_USING_UNICODE */
 
 /* --- Error handler registry --------------------------------------------- */
@@ -918,6 +1117,7 @@
     {"mbcs_encode", 		mbcs_encode,			METH_VARARGS},
     {"mbcs_decode", 		mbcs_decode,			METH_VARARGS},
 #endif
+    {"fastmap_codec", 		fastmap_codec,			METH_VARARGS},
 #endif /* Py_USING_UNICODE */
     {"register_error", 		register_error,			METH_VARARGS,
         register_error__doc__},
@@ -930,4 +1130,7 @@
 init_codecs(void)
 {
     Py_InitModule("_codecs", _codecs_functions);
+
+    if (PyType_Ready(&FastmapCodec_Type) < 0)
+	return;
 }
Index: Tools/scripts/genfastcodec.py
===================================================================
RCS file: Tools/scripts/genfastcodec.py
diff -N Tools/scripts/genfastcodec.py
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ Tools/scripts/genfastcodec.py	5 Oct 2005 14:39:34 -0000
@@ -0,0 +1,163 @@
+import struct
+import string
+
+UNICODE_REPLACEMENT = 0xFFFD
+CODEC_TEMPLATE = string.Template("""\
+\"\"\" Python Fast Mapping Codec generated from XXX with genfastcodec.py.
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
+\"\"\"
+
+import codecs
+
+### Decoding Map
+
+decoding_map = (
+$decoding_map)
+
+### Encoding Index
+
+encoding_index = (
+$encoding_index)
+
+### Encoding Map
+
+encoding_map = (
+$encoding_map)
+
+### Codec APIs
+
+_codec = codecs.fastmap_codec(decoding_map, encoding_map, encoding_index)
+
+class Codec(codecs.Codec):
+
+    encode = _codec.encode
+    decode = _codec.decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+    
+### encodings module API
+
+def getregentry():
+
+    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
+""")#"
+
+def generate_decoding_trans(decmap):
+    out = []
+    for i in range(256):
+        uni = decmap.get(i, UNICODE_REPLACEMENT)
+        out.append(struct.pack('!H', uni))
+    return ''.join(out)
+
+def generate_encoding_trans(encmap):
+    emapdigest = {}
+
+    for i in range(256):
+        piecemap = {}
+        for j in range(256):
+            encoded = encmap.get((i<<8) | j)
+            if encoded is not None:
+                piecemap[j] = encoded
+
+        if not piecemap:
+            continue
+
+        # begin creating tight mapping
+        d = emapdigest[i] = {'high': i}
+        lowbytes = piecemap.keys()
+        d['lowfirst'] = min(lowbytes)
+        d['lowlast'] = max(lowbytes)
+
+        used = set()
+        mapping = []
+
+        for low in range(d['lowfirst'], d['lowlast']+1):
+            encoded = piecemap.get(low, None)
+            mapping.append(encoded)
+            if encoded is not None:
+                used.add(encoded)
+
+        for unusedchk in range(256):
+            if unusedchk not in used:
+                break
+        else:
+            if None in mapping:
+                raise ValueError, "can't get tight in this mapping"
+            unusedchk = None
+
+        if unusedchk is None:
+            if d['lowfirst'] != 0 or d['lowlast'] != 255:
+                raise ValueError, "there's no hole and not full mapping"
+            d['unused'] = None
+        else:
+            d['unused'] = unusedchk
+            for k, value in enumerate(mapping):
+                if value is None:
+                    mapping[k] = unusedchk
+
+        d['mapping'] = mapping
+
+    if 0 not in emapdigest:
+        # index for 0 must exist for improve lookup performance.
+        emapdigest[0] = {
+            'high': 0,
+            'lowfirst': 1, 'lowlast': 0, # no mapping
+            'mapping': [], 'unused': 0,
+        }
+
+    indexout = []
+    wholemapping = []
+
+    # generate mapping part & mark index positions
+    for high in sorted(emapdigest.keys()):
+        emapdigest[high]['mapbegins'] = len(wholemapping)
+        wholemapping.extend(map(chr, emapdigest[high]['mapping']))
+
+    for high in sorted(emapdigest.keys()):
+        curpart = emapdigest[high]
+        if curpart['unused'] is None:
+            lowfirst = 0xff # mark as special full map
+            lowlast = 0x00
+            unused = 0x00
+        else:
+            lowfirst, lowlast = curpart['lowfirst'], curpart['lowlast']
+            unused = curpart['unused']
+
+        indexout.append(struct.pack('!BBBBH',
+            curpart['high'], lowfirst, lowlast, unused, curpart['mapbegins']))
+
+    indexpart = ''.join(indexout)
+    mappart = ''.join(wholemapping)
+
+    return indexpart, mappart
+
+def hexdumpstr(s, perline=16):
+    o = []
+    for cur in range(0, len(s), perline):
+        v = ''.join('\\x%02x' % ord(c) for c in s[cur:cur+perline])
+        o.append('"%s"\n' % v)
+    return ''.join(o)
+
+def gencodec(encoding):
+    mod = __import__('encodings.' + encoding)
+    mod = getattr(mod, encoding)
+    decmap = generate_decoding_trans(mod.decoding_map)
+    encidx, encmap = generate_encoding_trans(mod.encoding_map)
+    return CODEC_TEMPLATE.substitute(
+                encoding_map=hexdumpstr(encmap),
+                encoding_index=hexdumpstr(encidx),
+                decoding_map=hexdumpstr(decmap))
+
+if __name__ == '__main__':
+    # just temporary usage :-)
+    # python genfastcodec.py iso8859_1 > Lib/encodings/iso8859_1.py
+    import sys
+    sys.stdout.write(gencodec(sys.argv[1]))


