Author: Matti Picus <matti.pi...@gmail.com> Branch: py3.6 Changeset: r96019:bf156a807410 Date: 2019-02-15 14:56 +0200 http://bitbucket.org/pypy/pypy/changeset/bf156a807410/
Log: merge default into py3.6 diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py --- a/rpython/rlib/rarithmetic.py +++ b/rpython/rlib/rarithmetic.py @@ -729,7 +729,9 @@ """ The JIT special-cases this too. """ from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.lltypesystem.lloperation import llop - return llop.int_force_ge_zero(lltype.Signed, n) + n = llop.int_force_ge_zero(lltype.Signed, n) + assert n >= 0 + return n def int_c_div(x, y): """Return the result of the C-style 'x / y'. This differs from the diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -151,7 +151,10 @@ # The following methods are provided to be overriden in # Utf8MatchContext. The non-utf8 implementation is provided # by the FixedMatchContext abstract subclass, in order to use - # the same @not_rpython safety trick as above. + # the same @not_rpython safety trick as above. If you get a + # "not_rpython" error during translation, either consider + # calling the methods xxx_indirect() instead of xxx(), or if + # applicable add the @specializectx decorator. ZERO = 0 @not_rpython def next(self, position): @@ -460,8 +463,7 @@ ptr = self.start_ptr if not self.next_char_ok(ctx, pattern, ptr, self.ppos3): return - assert not isinstance(ctx, AbstractMatchContext) - self.start_ptr = ctx.next(ptr) + self.start_ptr = ctx.next_indirect(ptr) return self.find_first_result(ctx, pattern) def next_char_ok(self, ctx, pattern, ptr, ppos): diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -19,7 +19,7 @@ from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline from rpython.rlib.rstring import StringBuilder -from rpython.rlib import jit, types +from rpython.rlib import jit, types, rarithmetic from rpython.rlib.signature import signature, finishsigs from rpython.rlib.types import char, none from rpython.rlib.rarithmetic import r_uint @@ -117,6 +117,12 @@ # chinese wikipedia, they're anywhere between 10% and 30% slower. # In extreme cases (small, only chinese text), they're 40% slower +# The following was found by hand to be more optimal than both, +# on x86-64... +_is_64bit = sys.maxint > 2**32 +_constant_ncp = rarithmetic.r_uint64(0xffff0000ffffffff) + +@always_inline def next_codepoint_pos(code, pos): """Gives the position of the next codepoint after pos. Assumes valid utf8. 'pos' must be before the end of the string. @@ -125,6 +131,11 @@ chr1 = ord(code[pos]) if chr1 <= 0x7F: return pos + 1 + if _is_64bit and not jit.we_are_jitted(): + # optimized for Intel x86-64 by hand + return pos + 1 + ( + ((chr1 > 0xDF) << 1) + + rarithmetic.intmask((_constant_ncp >> (chr1 & 0x3F)) & 1)) if chr1 <= 0xDF: return pos + 2 if chr1 <= 0xEF: @@ -162,7 +173,6 @@ ordch1 = ord(code[pos]) if ordch1 <= 0x7F or pos +1 >= lgt: return ordch1 - ordch2 = ord(code[pos+1]) if ordch1 <= 0xDF or pos +2 >= lgt: # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz @@ -518,7 +528,7 @@ break return storage -@jit.dont_look_inside +@jit.elidable def codepoint_position_at_index(utf8, storage, index): """ Return byte index of a character inside utf8 encoded string, given storage of type UTF8_INDEX_STORAGE. The index must be smaller than @@ -546,7 +556,7 @@ pos = next_codepoint_pos(utf8, pos) return pos -@jit.dont_look_inside +@jit.elidable def codepoint_at_index(utf8, storage, index): """ Return codepoint of a character inside utf8 encoded string, given storage of type UTF8_INDEX_STORAGE @@ -564,7 +574,7 @@ bytepos = next_codepoint_pos(utf8, bytepos) return codepoint_at_pos(utf8, bytepos) -@jit.dont_look_inside +@jit.elidable def codepoint_index_at_byte_position(utf8, storage, bytepos): """ Return the character index for which codepoint_position_at_index(index) == bytepos. _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit