Package: python-docutils
Version: 0.3.7-2
Severity: normal
Tags: patch
rst parser doesn't think about CJK characters width.
It cause problems in CJK environment.
For example, one Chinese character's width is equal to 2 ASCII
characters'. But docutils just count number of characters.
All markups but TABLE can work. In table markup, this problem
makes marking up tables very difficult.
For example, if 'CC' is one Chinese character, and
'a' is one ASCII character, reST's table is looks like this:
+----+----+
|CCCC |CCCCCC |
|CCaa |aaCC |
|CCCC |CCaCC |
|aaaa|CCCC |
+----+----+
here is a patch for this problem. I got it from
http://city.plala.jp/download/rst/ .
This patch is used about one year and it looks like
have no problems.
It's license is..
> Copyright (C) 2004 by Matsumoto,Tadashi
> (E-Mail Address: [EMAIL PROTECTED])
>
> Everyone is permitted to do anything on this program
> including copying, modifying, improving,
> as long as you don't try to pretend that you wrote it.
> i.e., the above copyright notice has to appear in all copies.
> Binary distribution requires original version messages.
> You don't have to ask before copying, redistribution or publishing.
> THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
regards,
-- System Information:
Debian Release: 3.1
APT prefers testing
APT policy: (101, 'testing')
Architecture: i386 (i686)
Kernel: Linux 2.6.8-1-686
Locale: LANG=ja_JP.eucJP, LC_CTYPE=ja_JP.eucJP (charmap=EUC-JP)
Versions of packages python-docutils depends on:
ii python 2.3.5-1 An interactive high-level object-o
ii python2.3-docutils 0.3.7-2 Dependency package for python-docu
-- no debconf information
diff -urP /usr/lib/site-python/docutils/parsers/rst/adjusttable.py
docutils/parsers/rst/adjusttable.py
--- /usr/lib/site-python/docutils/parsers/rst/adjusttable.py 1970-01-01
09:00:00.000000000 +0900
+++ docutils/parsers/rst/adjusttable.py 2005-03-28 16:25:20.276759120 +0900
@@ -0,0 +1,85 @@
+"""
+adjusttable.py
+
+Copyright (C) 2004 by Matsumoto,Tadashi
+(E-Mail Address: [EMAIL PROTECTED])
+
+Everyone is permitted to do anything on this program
+including copying, modifying, improving,
+as long as you don't try to pretend that you wrote it.
+i.e., the above copyright notice has to appear in all copies.
+Binary distribution requires original version messages.
+You don't have to ask before copying, redistribution or publishing.
+THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
+"""
+from cStringIO import StringIO
+
+jplib_exists = 1
+try:
+ import jplib
+except:
+ jplib_exists = 0
+
+if jplib_exists:
+ is7bit = jplib.is7bit
+ kanjiwidth = jplib.width
+else:
+ import re
+ is7bit = re.compile('^[\000-\177]*$').match
+ def kanjiwidth(u):
+ w = 0
+ for c in u:
+ if ord(c) > 255:
+ w += 2
+ else:
+ w += 1
+ return w
+
+def adjustgridtable(lines):
+ rlines = []
+ for line in lines:
+ rline = []
+ for col in line.split('|'):
+ if is7bit(col):
+ rline.append(col)
+ else:
+ diff = kanjiwidth(col) - len(col)
+ if diff:
+ rline.append(col+' '*diff)
+ rline = '|'.join(rline)
+ rlines.append(rline)
+ return rlines
+
+def adjustsimpletable(lines, columns):
+ rlines = []
+ limit = len(columns)
+ for line in lines:
+ if is7bit(line):
+ rlines.append(line)
+ continue
+ f = StringIO()
+ i = 0
+ start, end = columns[0]
+ width = 0
+ kanji = 0
+ for c in line:
+ f.write(c.encode('utf-8'))
+ if ord(c) > 255:
+ width += 2
+ kanji += 1
+ else:
+ width +=1
+ if width <= end:
+ continue
+ else:
+ i += 1
+ if i < limit:
+ if kanji:
+ f.write(' '*kanji)
+ kanji = 0
+ start, end = columns[i]
+ else:
+ end = len(line)*2 + 1
+ rline = f.getvalue()
+ rlines.append(rline.decode('utf-8'))
+ return rlines
diff -urP /usr/lib/site-python/docutils/parsers/rst/states.py
docutils/parsers/rst/states.py
--- /usr/lib/site-python/docutils/parsers/rst/states.py 2004-11-26
17:27:55.000000000 +0900
+++ docutils/parsers/rst/states.py 2005-03-28 16:25:20.275759272 +0900
@@ -1,7 +1,7 @@
# Author: David Goodger
# Contact: [EMAIL PROTECTED]
-# Revision: $Revision: 1.86 $
-# Date: $Date: 2004/11/06 19:52:19 $
+# Revision: $Revision: 1.2.10.7 $
+# Date: $Date: 2005/01/07 13:26:03 $
# Copyright: This module has been placed in the public domain.
"""
@@ -117,6 +117,7 @@
from docutils.utils import escape2null, unescape
from docutils.parsers.rst import directives, languages, tableparser, roles
from docutils.parsers.rst.languages import en as _fallback_language_module
+from docutils.parsers.rst import adjusttable
class MarkupError(DataError): pass
@@ -1551,6 +1552,7 @@
blank_finish = 1
try:
block = self.state_machine.get_text_block(flush_left=1)
+ block.data = adjusttable.adjustgridtable(block)
except statemachine.UnexpectedIndentationError, instance:
block, source, lineno = instance.args
messages.append(self.reporter.error('Unexpected indentation.',
diff -urP /usr/lib/site-python/docutils/parsers/rst/tableparser.py
docutils/parsers/rst/tableparser.py
--- /usr/lib/site-python/docutils/parsers/rst/tableparser.py 2003-07-06
05:38:46.000000000 +0900
+++ docutils/parsers/rst/tableparser.py 2005-03-28 16:25:20.276759120 +0900
@@ -1,7 +1,7 @@
# Author: David Goodger
# Contact: [EMAIL PROTECTED]
-# Revision: $Revision: 1.9 $
-# Date: $Date: 2003/07/05 22:38:28 $
+# Revision: $Revision: 1.2.10.6 $
+# Date: $Date: 2005/01/07 13:26:04 $
# Copyright: This module has been placed in the public domain.
"""
@@ -25,6 +25,7 @@
import re
import sys
from docutils import DataError
+from docutils.parsers.rst import adjusttable
class TableMarkupError(DataError): pass
@@ -463,6 +464,7 @@
else:
columns = self.columns[:]
span_offset = start
+ lines.data = adjusttable.adjustsimpletable(lines, columns)
self.check_columns(lines, start, columns)
row = self.init_row(columns, start)
for i in range(len(columns)):
diff -urP /usr/lib/site-python/docutils/utils.py docutils/utils.py
--- /usr/lib/site-python/docutils/utils.py 2004-11-07 03:52:35.000000000
+0900
+++ docutils/utils.py 2005-03-28 16:25:20.278758816 +0900
@@ -1,7 +1,7 @@
# Author: David Goodger
# Contact: [EMAIL PROTECTED]
-# Revision: $Revision: 1.40 $
-# Date: $Date: 2004/09/30 13:47:58 $
+# Revision: $Revision: 1.2.10.7 $
+# Date: $Date: 2005/01/07 13:26:02 $
# Copyright: This module has been placed in the public domain.
"""
@@ -494,12 +494,12 @@
parts = []
start = 0
while 1:
- found = text.find('\\', start)
+ found = text.find(u'\\', start)
if found == -1:
parts.append(text[start:])
return ''.join(parts)
parts.append(text[start:found])
- parts.append('\x00' + text[found+1:found+2])
+ parts.append(u'\x00' + text[found+1:found+2])
start = found + 2 # skip character after escape
def unescape(text, restore_backslashes=0):
@@ -508,9 +508,9 @@
Backslash-escaped spaces are also removed.
"""
if restore_backslashes:
- return text.replace('\x00', '\\')
+ return text.replace(u'\x00', u'\\')
else:
- for sep in ['\x00 ', '\x00\n', '\x00']:
+ for sep in [u'\x00 ', u'\x00\n', u'\x00']:
text = ''.join(text.split(sep))
return text