Author: Armin Rigo <[email protected]>
Branch: unicode-utf8-re
Changeset: r93302:cb5b89596a2f
Date: 2017-12-08 11:44 +0100
http://bitbucket.org/pypy/pypy/changeset/cb5b89596a2f/

Log:    in-progress

diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -142,6 +142,7 @@
     # Utf8MatchContext.  The non-utf8 implementation is provided
     # by the FixedMatchContext abstract subclass, in order to use
     # the same @not_rpython safety trick as above.
+    ZERO = 0
     @not_rpython
     def next(self, position):
         raise NotImplementedError
@@ -221,9 +222,8 @@
 
 class FixedMatchContext(AbstractMatchContext):
     """Abstract subclass to introduce the default implementation for
-    these position methods.  The Utf8 subclass doesn't inherit from here."""
-
-    ZERO = 0
+    these position methods.  The Utf8MatchContext subclass doesn't
+    inherit from here."""
 
     def next(self, position):
         return position + 1
diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsre/rsre_utf8.py
@@ -0,0 +1,59 @@
+from rpython.rlib.debug import check_nonneg
+from rpython.rlib.rarithmetic import r_uint, intmask
+from rpython.rlib.rsre.rsre_core import AbstractMatchContext, EndOfString
+from rpython.rlib.rsre import rsre_char
+from rpython.rlib import rutf8
+
+
+class Utf8MatchContext(AbstractMatchContext):
+
+    def __init__(self, pattern, utf8string, match_start, end, flags):
+        AbstractMatchContext.__init__(self, pattern, match_start, end, flags)
+        self._utf8 = utf8string
+
+    def str(self, index):
+        check_nonneg(index)
+        return rutf8.codepoint_at_pos(self._utf8, index)
+
+    def lowstr(self, index):
+        c = self.str(index)
+        return rsre_char.getlower(c, self.flags)
+
+    def get_single_byte(self, base_position, index):
+        return self.str(base_position + index)
+
+    def fresh_copy(self, start):
+        return Utf8MatchContext(self.pattern, self._utf8, start,
+                                self.end, self.flags)
+
+    def next(self, position):
+        return rutf8.next_codepoint_pos(self._utf8, position)
+
+    def prev(self, position):
+        if position <= 0:
+            raise EndOfString
+        upos = r_uint(position)
+        upos = rutf8.prev_codepoint_pos(self._utf8, upos)
+        position = intmask(upos)
+        assert position >= 0
+        return position
+
+    def next_n(self, position, n, end_position):
+        for i in range(n):
+            if position >= end_position:
+                raise EndOfString
+            position = rutf8.next_codepoint_pos(self._utf8, position)
+        return position
+
+    def prev_n(self, position, n, start_position):
+        upos = r_uint(position)
+        for i in range(n):
+            if upos <= r_uint(start_position):
+                raise EndOfString
+            upos = rutf8.next_codepoint_pos(self._utf8, upos)
+        position = intmask(upos)
+        assert position >= 0
+        return position
+
+    def slowly_convert_byte_pos_to_index(self, position):
+        
diff --git a/rpython/rlib/rsre/test/test_search.py 
b/rpython/rlib/rsre/test/test_search.py
--- a/rpython/rlib/rsre/test/test_search.py
+++ b/rpython/rlib/rsre/test/test_search.py
@@ -1,7 +1,7 @@
 import re, py
 from rpython.rlib.rsre.test.test_match import get_code, get_code_and_re
 from rpython.rlib.rsre.test import support
-from rpython.rlib.rsre import rsre_core
+from rpython.rlib.rsre import rsre_core, rsre_utf8
 
 
 class BaseTestSearch:
@@ -222,3 +222,8 @@
     search = staticmethod(rsre_core.search)
     match = staticmethod(rsre_core.match)
     Position = staticmethod(lambda n: n)
+
+class TestSearchUtf8(BaseTestSearch):
+    search = staticmethod(rsre_utf8.utf8search)
+    match = staticmethod(rsre_utf8.utf8match)
+    Position = staticmethod(lambda n: n)   # NB. only for plain ascii
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to