On 06/17/2011 11:00 PM, ZyX wrote:
Reply to message «Re: How to write command for select range of Chinese text»,
sent 09:17:23 17 June 2011, Friday
by Tony Mechelynck:

Well, in Vim /[] constructs are limited to approximately 256 character
values (or maybe 257)
1. 257 exactly.
2. Not collections themselves, but ranges within collections.
3. In current implementation collection with large number of characters should
be very slow.
4. You can always emulate it with `\%(char1\|char2\|...\|char100500\)' though it
will be even slower.

Here is a python script that does the job. It visually select Chinese text in a given range. Hopefully, faster than vim builtin calls.

:py sys.path.append(PLUGINPATH)
:py import select_chinese
:py import select_chinese.select_chinese(0, 0, 100, 20)

This one select Chinese text from row 0, col 0 to row 100, col 20


--
You received this message from the "vim_use" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
#!/usr/bin/python
#vim:fileencoding=utf-8:sw=4:et
'''Visual select Chinese text from a start point to an end point.'''

import vim

def get_chinese(start_row, start_col, end_row, end_col):
    '''Get Chinese text segment in the given range.
    Default text encoding is UTF-8 (from :set encoding=xxx)

    return tuple (start, end) where
        start = (chinese_start_row, chinese_start_col)
        end = (chinese_end_row, chinese_end_col)
    '''
    buffer = vim.current.buffer
    encoding = vim.eval('&encoding')
    if encoding is not None and len(encoding) == 0:
        encoding = 'UTF-8'
    start = None
    end = None
    for row in range(start_row, end_row):
        raw_line = buffer[row]
        line = raw_line.decode(encoding)
        c0 = 0
        c1 = len(line)
        if row == start_row:
            c0 = start_col
        if row == end_row:
            c1 = end_col
        for col in range(c0, c1):
            c = line[col]
            if c.isspace():
                continue
            #ucode = ord(c)
            # FIXME: You can add extA/B/C/D here if you like.
            #if ucode >= 0x4E00 and ucode <=0x9FFF:
            if c >= u'\u4E00' and c <=u'\u9FFF':
                if start is None:
                    raw_c = c.encode(encoding)
                    find_start = 0
                    if row == start_row:
                        find_start = start_col
                    mark_col = raw_line.find(raw_c, find_start)
                    start = (row, mark_col)
            elif start is not None:
                raw_c = c.encode(encoding)
                find_start = 0
                if row == start[0]:
                    find_start = start[1]
                mark_col = raw_line.find(raw_c, find_start)
                if mark_col == 0:
                    mark_col = len(buffer[row-1])
                    print mark_col
                    end = (row-1, mark_col)
                else:
                    end = (row, mark_col-1)
                break
        if end is not None:
            break

    if start is not None and end is None:
        end = (end_row, end_col)
    return start, end

def select_chinese(start_row, start_col, end_row, end_col):
    '''Visual select Chinese from the given range.'''
    start, end = get_chinese(start_row, start_col, end_row, end_col)
    if start is not None:
        # fix vim index in which line starts from 1
        start = (start[0]+1, start[1])
        end = (end[0]+1, end[1])
        win = vim.current.window
        win.cursor = start
        vim.command('normal v')
        win.cursor = end

Reply via email to