[pypy-commit] pypy py3.6: Use uniformly the technique of @jit.elidable on a family of unicode

arigo Sun, 14 Jul 2019 08:39:27 -0700

Author: Armin Rigo <[email protected]>
Branch: py3.6
Changeset: r96997:6601328a5b4a
Date: 2019-07-14 16:36 +0100
http://bitbucket.org/pypy/pypy/changeset/6601328a5b4a/


Log:    Use uniformly the technique of @jit.elidable on a family of unicode
        methods. Fixes one test in test_pypy_c

diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py 
b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -191,8 +191,9 @@
         loops = log.loops_by_filename(self.filepath)
         loop, = loops
         assert loop.match_by_id('callone', '''
-            p114 = call_r(ConstClass(ll_lower__rpy_stringPtr), p113, 
descr=<Callr . r EF=3>)
+            p114 = call_r(ConstClass(_lower_unicode), p113, descr=<Callr . r 
EF=4>)
             guard_no_exception(descr=...)
+            guard_nonnull_class(p114, ConstClass(W_UnicodeObject), descr=...)
             ''')
         assert loop.match_by_id('calltwo', '')    # nothing
 
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -398,46 +398,54 @@
         return mod_format(space, w_values, self, fmt_type=FORMAT_UNICODE)
 
     def descr_swapcase(self, space):
-        value = self._utf8
+        return W_UnicodeObject._swapcase_unicode(self._utf8)
+
+    @staticmethod
+    @jit.elidable
+    def _swapcase_unicode(value):
+        if len(value) == 0:
+            return W_UnicodeObject.EMPTY
         builder = rutf8.Utf8StringBuilder(len(value))
         for ch, pos in rutf8.Utf8StringPosIterator(value):
             if unicodedb.isupper(ch):
-                codes = self._lower_char(ch, value, pos)
+                codes = W_UnicodeObject._lower_char(ch, value, pos)
             elif unicodedb.islower(ch):
                 codes = unicodedb.toupper_full(ch)
             else:
                 codes = [ch,]
             for c in codes:
                 builder.append_code(c)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
     def descr_title(self, space):
-        if len(self._utf8) == 0:
-            return self
-        return self.title_unicode()
+        return W_UnicodeObject._title_unicode(self._utf8)
 
+    @staticmethod
     @jit.elidable
-    def title_unicode(self):
-        value = self._utf8
+    def _title_unicode(value):
+        if len(value) == 0:
+            return W_UnicodeObject.EMPTY
         builder = rutf8.Utf8StringBuilder(len(value))
         previous_is_cased = False
         for ch, pos in rutf8.Utf8StringPosIterator(value):
             if previous_is_cased:
-                codes = self._lower_char(ch, value, pos)
+                codes = W_UnicodeObject._lower_char(ch, value, pos)
             else:
                 codes = unicodedb.totitle_full(ch)
             for c in codes:
                 builder.append_code(c)
             previous_is_cased = unicodedb.iscased(ch)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
-    def _lower_char(self, ch, value, bytepos):
+    @staticmethod
+    def _lower_char(ch, value, bytepos):
         if ch == 0x3a3:
-            return [self._handle_capital_sigma(value, bytepos), ]
+            return [W_UnicodeObject._handle_capital_sigma(value, bytepos), ]
         else:
             return unicodedb.tolower_full(ch)
 
-    def _handle_capital_sigma(self, value, bytepos):
+    @staticmethod
+    def _handle_capital_sigma(value, bytepos):
         # U+03A3 is in the Final_Sigma context when, it is found like this:
         #\p{cased} \p{case-ignorable}* U+03A3 not(\p{case-ignorable}* 
\p{cased})
         # where \p{xxx} is a character with property xxx.
@@ -585,7 +593,13 @@
         return space.is_w(space.type(w_obj), space.w_unicode)
 
     def descr_casefold(self, space):
-        value = self._utf8
+        return W_UnicodeObject._casefold_unicode(self._utf8)
+
+    @staticmethod
+    @jit.elidable
+    def _casefold_unicode(value):
+        if len(value) == 0:
+            return W_UnicodeObject.EMPTY
         builder = rutf8.Utf8StringBuilder(len(value))
         for ch in rutf8.Utf8StringIterator(value):
             folded = unicodedb.casefold_lookup(ch)
@@ -594,16 +608,22 @@
             else:
                 for r in folded:
                     builder.append_code(r)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
     def descr_lower(self, space):
-        value = self._utf8
+        return W_UnicodeObject._lower_unicode(self._utf8)
+
+    @staticmethod
+    @jit.elidable
+    def _lower_unicode(value):
+        if len(value) == 0:
+            return W_UnicodeObject.EMPTY
         builder = rutf8.Utf8StringBuilder(len(value))
         for ch, pos in rutf8.Utf8StringPosIterator(value):
-            codes = self._lower_char(ch, value, pos)
+            codes = W_UnicodeObject._lower_char(ch, value, pos)
             for c in codes:
                 builder.append_code(c)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
     def descr_isdecimal(self, space):
         return self._is_generic(space, '_isdecimal')
@@ -770,12 +790,19 @@
         return space.newlist(strs_w)
 
     def descr_upper(self, space):
-        builder = rutf8.Utf8StringBuilder(len(self._utf8))
-        for ch in rutf8.Utf8StringIterator(self._utf8):
+        return W_UnicodeObject._upper_unicode(self._utf8)
+
+    @staticmethod
+    @jit.elidable
+    def _upper_unicode(value):
+        if len(value) == 0:
+            return W_UnicodeObject.EMPTY
+        builder = rutf8.Utf8StringBuilder(len(value))
+        for ch in rutf8.Utf8StringIterator(value):
             codes = unicodedb.toupper_full(ch)
             for c in codes:
                 builder.append_code(c)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
     @unwrap_spec(width=int)
     def descr_zfill(self, space, width):
@@ -874,10 +901,13 @@
         return W_UnicodeObject(self._utf8[byte_start:byte_stop], stop - start)
 
     def descr_capitalize(self, space):
-        if self._len() == 0:
-            return self._empty()
+        return W_UnicodeObject._capitalize_unicode(self._utf8)
 
-        value = self._utf8
+    @staticmethod
+    @jit.elidable
+    def _capitalize_unicode(value):
+        if len(value) == 0:
+            return W_UnicodeObject.EMPTY
         builder = rutf8.Utf8StringBuilder(len(value))
         it = rutf8.Utf8StringPosIterator(value)
         uchar, _ = it.next()
@@ -886,10 +916,10 @@
         for c in codes:
             builder.append_code(c)
         for ch, pos in it:
-            codes = self._lower_char(ch, value, pos)
+            codes = W_UnicodeObject._lower_char(ch, value, pos)
             for c in codes:
                 builder.append_code(c)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(u' '))
     def descr_center(self, space, width, w_fillchar):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3.6: Use uniformly the technique of @jit.elidable on a family of unicode

Reply via email to