[pypy-commit] cffi default: Issue #348

2017-12-06 Thread arigo
Author: Armin Rigo 
Branch: 
Changeset: r3052:9b653b9a1c87
Date: 2017-12-06 20:02 +0100
http://bitbucket.org/cffi/cffi/changeset/9b653b9a1c87/

Log:Issue #348

Fix for MSVC complaining about very large strings.

diff --git a/cffi/recompiler.py b/cffi/recompiler.py
--- a/cffi/recompiler.py
+++ b/cffi/recompiler.py
@@ -295,8 +295,9 @@
 base_module_name = self.module_name.split('.')[-1]
 if self.ffi._embedding is not None:
 prnt('#define _CFFI_MODULE_NAME  "%s"' % (self.module_name,))
-prnt('#define _CFFI_PYTHON_STARTUP_CODE  %s' %
- (self._string_literal(self.ffi._embedding),))
+prnt('static const char _CFFI_PYTHON_STARTUP_CODE[] = {')
+self._print_string_literal_in_array(self.ffi._embedding)
+prnt('0 };')
 prnt('#ifdef PYPY_VERSION')
 prnt('# define _CFFI_PYTHON_STARTUP_FUNC  _cffi_pypyinit_%s' % (
 base_module_name,))
@@ -1271,17 +1272,18 @@
   _generate_cpy_extern_python_plus_c_ctx = \
   _generate_cpy_extern_python_ctx
 
-def _string_literal(self, s):
-def _char_repr(c):
-# escape with a '\' the characters '\', '"' or (for trigraphs) '?'
-if c in '\\"?': return '\\' + c
-if ' ' <= c < '\x7F': return c
-if c == '\n': return '\\n'
-return '\\%03o' % ord(c)
-lines = []
-for line in s.splitlines(True) or ['']:
-lines.append('"%s"' % ''.join([_char_repr(c) for c in line]))
-return ' \\\n'.join(lines)
+def _print_string_literal_in_array(self, s):
+prnt = self._prnt
+prnt('// # NB. this is not a string because of a size limit in MSVC')
+for line in s.splitlines(True):
+prnt(('// ' + line).rstrip())
+printed_line = ''
+for c in line:
+if len(printed_line) >= 76:
+prnt(printed_line)
+printed_line = ''
+printed_line += '%d,' % (ord(c),)
+prnt(printed_line)
 
 # --
 # emitting the opcodes for individual types
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: small improvements

2017-12-06 Thread fijal
Author: fijal
Branch: unicode-utf8
Changeset: r93287:f6b0e685b84d
Date: 2017-12-06 21:00 +0200
http://bitbucket.org/pypy/pypy/changeset/f6b0e685b84d/

Log:small improvements

diff --git a/unicode-bench.py b/unicode-bench.py
--- a/unicode-bench.py
+++ b/unicode-bench.py
@@ -6,6 +6,9 @@
 unicodes = [unicode("u" * LGT + str(i)) for i in range(100)]
 non_ascii_unicodes = [u"u" * LGT + unicode(i) + u"" for i in range(100)]
 
+long_string = u" " * 100
+unicodes = [long_string] * 100
+
 RANGE = 25000 // LGT
 
 def upper(main_l):
@@ -38,11 +41,37 @@
 for i in xrange(RANGE):
 l[0] = main_l[i % 100].find(u"foo")
 
-for func in [upper]:#, lower, isupper, islower]:
+def split(main_l):
+l = [None]
+for i in xrange(RANGE):
+l[0] = main_l[i % 100].split()
+
+def splitlines(main_l):
+l = [None]
+for i in xrange(RANGE):
+l[0] = main_l[i % 100].splitlines()
+
+def iter(main_l):
+l = [None]
+for i in xrange(RANGE // 1):
+for elem in main_l[i % 100]:
+l[0] = elem
+
+def indexing(main_l):
+l = [None]
+for i in xrange(RANGE * 10):
+l[0] = main_l[i % 100][13]
+
+def isspace(main_l):
+l = [None]
+for i in xrange(RANGE // 1):
+l[0] = main_l[i % 100].isspace()
+
+for func in [isspace]:#, lower, isupper, islower]:
 t0 = time.time()
 func(unicodes)
 t1 = time.time()
 print "ascii %s %.2f" % (func.__name__, t1 - t0)
-func(non_ascii_unicodes)
-t2 = time.time()
-print "non-ascii %s %.2f" % (func.__name__, t2 - t1)
+#func(non_ascii_unicodes)
+#t2 = time.time()
+#print "non-ascii %s %.2f" % (func.__name__, t2 - t1)
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: more trivial use cases for Utf8StringIterator

2017-12-06 Thread fijal
Author: fijal
Branch: unicode-utf8
Changeset: r93288:0c5fc845224f
Date: 2017-12-06 21:00 +0200
http://bitbucket.org/pypy/pypy/changeset/0c5fc845224f/

Log:more trivial use cases for Utf8StringIterator

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -449,10 +449,7 @@
 def _is_generic_loop(self, space, v, func_name):
 func = getattr(self, func_name)
 val = self._utf8
-i = 0
-while i < len(val):
-uchar = rutf8.codepoint_at_pos(val, i)
-i = rutf8.next_codepoint_pos(val, i)
+for uchar in rutf8.Utf8StringIterator(val):
 if not func(uchar):
 return space.w_False
 return space.w_True
@@ -535,11 +532,7 @@
 def descr_istitle(self, space):
 cased = False
 previous_is_cased = False
-val = self._utf8
-i = 0
-while i < len(val):
-uchar = rutf8.codepoint_at_pos(val, i)
-i = rutf8.next_codepoint_pos(val, i)
+for uchar in rutf8.Utf8StringIterator(self._utf8):
 if unicodedb.isupper(uchar) or unicodedb.istitle(uchar):
 if previous_is_cased:
 return space.w_False
@@ -555,16 +548,12 @@
 
 def descr_isupper(self, space):
 cased = False
-i = 0
-val = self._utf8
-while i < len(val):
-uchar = rutf8.codepoint_at_pos(val, i)
+for uchar in rutf8.Utf8StringIterator(self._utf8):
 if (unicodedb.islower(uchar) or
 unicodedb.istitle(uchar)):
 return space.w_False
 if not cased and unicodedb.isupper(uchar):
 cased = True
-i = rutf8.next_codepoint_pos(val, i)
 return space.newbool(cased)
 
 def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: try to improve latin1 handling

2017-12-06 Thread fijal
Author: fijal
Branch: unicode-utf8
Changeset: r93289:a6a28d7e46a8
Date: 2017-12-06 21:17 +0200
http://bitbucket.org/pypy/pypy/changeset/a6a28d7e46a8/

Log:try to improve latin1 handling

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -149,37 +149,32 @@
 
 def _utf8_encode_latin_1_slowpath(s, errors, errorhandler):
 res = StringBuilder(len(s))
-size = len(s)
 cur = 0
-i = 0
-while i < size:
-if ord(s[i]) <= 0x7F:
-res.append(s[i])
-i += 1
-cur += 1
-else:
-oc = rutf8.codepoint_at_pos(s, i)
-if oc <= 0xFF:
-res.append(chr(oc))
+iter = rutf8.Utf8StringIterator(s)
+try:
+while True:
+ch = iter.next()
+if ch <= 0xFF:
+res.append(chr(ch))
 cur += 1
-i = rutf8.next_codepoint_pos(s, i)
 else:
 r, pos = errorhandler(errors, 'latin1',
   'ordinal not in range(256)', s, cur,
   cur + 1)
-for j in range(pos - cur):
-i = rutf8.next_codepoint_pos(s, i)
 
-j = 0
-while j < len(r):
-c = rutf8.codepoint_at_pos(r, j)
+for c in rutf8.Utf8StringIterator(r):
 if c > 0xFF:
 errorhandler("strict", 'latin1',
  'ordinal not in range(256)', s,
  cur, cur + 1)
-j = rutf8.next_codepoint_pos(r, j)
 res.append(chr(c))
+
+for j in range(pos - cur - 1):
+iter.next()
+
 cur = pos
+except StopIteration:
+pass
 r = res.build()
 return r
 
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: refactor

2017-12-06 Thread fijal
Author: fijal
Branch: unicode-utf8
Changeset: r93290:c211485151ba
Date: 2017-12-06 21:40 +0200
http://bitbucket.org/pypy/pypy/changeset/c211485151ba/

Log:refactor

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -151,30 +151,30 @@
 res = StringBuilder(len(s))
 cur = 0
 iter = rutf8.Utf8StringIterator(s)
-try:
-while True:
+while True:
+try:
 ch = iter.next()
-if ch <= 0xFF:
-res.append(chr(ch))
-cur += 1
-else:
-r, pos = errorhandler(errors, 'latin1',
-  'ordinal not in range(256)', s, cur,
-  cur + 1)
+except StopIteration:
+break
+if ch <= 0xFF:
+res.append(chr(ch))
+cur += 1
+else:
+r, pos = errorhandler(errors, 'latin1',
+  'ordinal not in range(256)', s, cur,
+  cur + 1)
 
-for c in rutf8.Utf8StringIterator(r):
-if c > 0xFF:
-errorhandler("strict", 'latin1',
- 'ordinal not in range(256)', s,
- cur, cur + 1)
-res.append(chr(c))
+for c in rutf8.Utf8StringIterator(r):
+if c > 0xFF:
+errorhandler("strict", 'latin1',
+ 'ordinal not in range(256)', s,
+ cur, cur + 1)
+res.append(chr(c))
 
-for j in range(pos - cur - 1):
-iter.next()
+for j in range(pos - cur - 1):
+iter.next()
 
-cur = pos
-except StopIteration:
-pass
+cur = pos
 r = res.build()
 return r
 
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: I _think_ it was meant to be like that, or else the interface is very obscure

2017-12-06 Thread arigo
Author: Armin Rigo 
Branch: unicode-utf8
Changeset: r93280:67e2516d5db6
Date: 2017-12-06 10:58 +0100
http://bitbucket.org/pypy/pypy/changeset/67e2516d5db6/

Log:I _think_ it was meant to be like that, or else the interface is
very obscure

diff --git a/targetunicode-bench.py b/targetunicode-bench.py
--- a/targetunicode-bench.py
+++ b/targetunicode-bench.py
@@ -33,7 +33,7 @@
 if len(argv) > 2 and argv[2] == "s":
 for i in range(int(argv[1])):
 res_l[0] = descr_upper_s(l[i % 100])
-if len(argv) > 2 and argv[2] == "u":
+elif len(argv) > 2 and argv[2] == "u":
 for i in range(int(argv[1])):
 res_l_2[0] = descr_upper_u(u_l[i % 100])
 else:
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: Should improve the situation already

2017-12-06 Thread arigo
Author: Armin Rigo 
Branch: unicode-utf8
Changeset: r93279:87f548473353
Date: 2017-12-06 10:53 +0100
http://bitbucket.org/pypy/pypy/changeset/87f548473353/

Log:Should improve the situation already

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -17,6 +17,7 @@
 
 import sys
 from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize
+from rpython.rlib.objectmodel import always_inline
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib import jit
 from rpython.rlib.signature import signature
@@ -734,6 +735,7 @@
 def __iter__(self):
 return self
 
+@always_inline
 def next(self):
 if self._pos == self._end:
 raise StopIteration
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: use iterator for islower

2017-12-06 Thread fijal
Author: fijal
Branch: unicode-utf8
Changeset: r93286:55238fb1d18a
Date: 2017-12-06 18:47 +0200
http://bitbucket.org/pypy/pypy/changeset/55238fb1d18a/

Log:use iterator for islower

diff --git a/pypy/objspace/std/test/test_liststrategies.py 
b/pypy/objspace/std/test/test_liststrategies.py
--- a/pypy/objspace/std/test/test_liststrategies.py
+++ b/pypy/objspace/std/test/test_liststrategies.py
@@ -7,6 +7,7 @@
 IntOrFloatListStrategy)
 from pypy.objspace.std import listobject
 from pypy.objspace.std.test.test_listobject import TestW_ListObject
+from rpython.rlib.rutf8 import FLAG_ASCII
 
 
 class TestW_ListStrategies(TestW_ListObject):
@@ -600,9 +601,9 @@
 def test_unicode(self):
 l1 = W_ListObject(self.space, [self.space.newbytes("eins"), 
self.space.newbytes("zwei")])
 assert isinstance(l1.strategy, BytesListStrategy)
-l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, 2), 
self.space.newutf8("zwei", 4, 2)])
+l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, 
FLAG_ASCII), self.space.newutf8("zwei", 4, FLAG_ASCII)])
 assert isinstance(l2.strategy, UnicodeListStrategy)
-l3 = W_ListObject(self.space, [self.space.newbytes("eins"), 
self.space.newutf8("zwei", 4, 2)])
+l3 = W_ListObject(self.space, [self.space.newbytes("eins"), 
self.space.newutf8("zwei", 4, FLAG_ASCII)])
 assert isinstance(l3.strategy, ObjectListStrategy)
 
 def test_listview_bytes(self):
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -524,16 +524,12 @@
 
 def descr_islower(self, space):
 cased = False
-val = self._utf8
-i = 0
-while i < len(val):
-uchar = rutf8.codepoint_at_pos(val, i)
+for uchar in rutf8.Utf8StringIterator(self._utf8):
 if (unicodedb.isupper(uchar) or
 unicodedb.istitle(uchar)):
 return space.w_False
 if not cased and unicodedb.islower(uchar):
 cased = True
-i = rutf8.next_codepoint_pos(val, i)
 return space.newbool(cased)
 
 def descr_istitle(self, space):
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: Another big speed-up

2017-12-06 Thread arigo
Author: Armin Rigo 
Branch: unicode-utf8
Changeset: r93283:41d3807f2d87
Date: 2017-12-06 11:21 +0100
http://bitbucket.org/pypy/pypy/changeset/41d3807f2d87/

Log:Another big speed-up

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -680,11 +680,13 @@
 return unicode_escape #, char_escape_helper
 
 class Utf8StringBuilder(object):
+@always_inline
 def __init__(self, size=0):
 self._s = StringBuilder(size)
 self._lgt = 0
 self._flag = FLAG_ASCII
 
+@always_inline
 def append(self, s):
 # for strings
 self._s.append(s)
@@ -692,6 +694,7 @@
 self._lgt += newlgt
 self._flag = combine_flags(self._flag, newflag)
 
+@always_inline
 def append_slice(self, s, start, end):
 self._s.append_slice(s, start, end)
 newlgt, newflag = get_utf8_length_flag(s, start, end)
@@ -699,27 +702,33 @@
 self._flag = combine_flags(self._flag, newflag)
 
 @signature(char(), returns=none())
+@always_inline
 def append_char(self, s):
 # for characters, ascii
 self._lgt += 1
 self._s.append(s)
 
+@always_inline
 def append_code(self, code):
 self._flag = combine_flags(self._flag, get_flag_from_code(code))
 self._lgt += 1
 unichr_as_utf8_append(self._s, code, True)
 
+@always_inline
 def append_utf8(self, utf8, length, flag):
 self._flag = combine_flags(self._flag, flag)
 self._lgt += length
 self._s.append(utf8)
 
+@always_inline
 def build(self):
 return self._s.build()
 
+@always_inline
 def get_flag(self):
 return self._flag
 
+@always_inline
 def get_length(self):
 return self._lgt
 
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: This is probably faster (needs non-ascii to verify)

2017-12-06 Thread arigo
Author: Armin Rigo 
Branch: unicode-utf8
Changeset: r93281:91e03fd0b17e
Date: 2017-12-06 11:13 +0100
http://bitbucket.org/pypy/pypy/changeset/91e03fd0b17e/

Log:This is probably faster (needs non-ascii to verify)

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -131,23 +131,20 @@
 ordch2 = ord(code[pos+1])
 if ordch1 <= 0xDF:
 # 110y 10zz ->  0yyy yyzz
-return (((ordch1 & 0x1F) << 6) +# 0b0001
- (ordch2 & 0x3F))   # 0b0011
+return (ordch1 << 6) + ordch2 - (
+   (0xC0   << 6) + 0x80 )
 
 ordch3 = ord(code[pos+2])
 if ordch1 <= 0xEF:
 # 1110 10yy 10zz ->   yyzz
-return (((ordch1 & 0x0F) << 12) + # 0b
-((ordch2 & 0x3F) << 6) +  # 0b0011
-(ordch3 & 0x3F))  # 0b0011
+return (ordch1 << 12) + (ordch2 << 6) + ordch3 - (
+   (0xE0   << 12) + (0x80   << 6) + 0x80 )
 
 ordch4 = ord(code[pos+3])
 if True:
 # 0www 10xx 10yy 10zz -> 000wwwxx  yyzz
-return (((ordch1 & 0x07) << 18) +  # 0b0111
-((ordch2 & 0x3F) << 12) +  # 0b0011
-((ordch3 & 0x3F) << 6) +   # 0b0011
-(ordch4 & 0x3F))   # 0b0011
+return (ordch1 << 18) + (ordch2 << 12) + (ordch3 << 6) + ordch4 - (
+   (0xF0   << 18) + (0x80   << 12) + (0x80   << 6) + 0x80 )
 assert False, "unreachable"
 
 def codepoint_before_pos(code, pos):
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: Inline the ascii part of unichr_as_utf8_append()

2017-12-06 Thread arigo
Author: Armin Rigo 
Branch: unicode-utf8
Changeset: r93284:6d7f2e710bd2
Date: 2017-12-06 11:42 +0100
http://bitbucket.org/pypy/pypy/changeset/6d7f2e710bd2/

Log:Inline the ascii part of unichr_as_utf8_append()

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -17,7 +17,7 @@
 
 import sys
 from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize
-from rpython.rlib.objectmodel import always_inline
+from rpython.rlib.objectmodel import always_inline, dont_inline
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib import jit
 from rpython.rlib.signature import signature
@@ -50,6 +50,7 @@
 chr((0x80 | (code & 0x3f
 raise ValueError
 
+@always_inline
 def unichr_as_utf8_append(builder, code, allow_surrogates=False):
 """Encode code (numeric value) as utf8 encoded string
 and emit the result into the given StringBuilder.
@@ -59,13 +60,40 @@
 if code <= r_uint(0x7F):
 # Encode ASCII
 builder.append(chr(code))
-return
+else:
+# Encode non-ASCII, uses a function call
+if allow_surrogates:
+_nonascii_unichr_as_utf8_append(builder, code)
+else:
+_nonascii_unichr_as_utf8_append_nosurrogates(builder, code)
+
+@dont_inline
+def _nonascii_unichr_as_utf8_append(builder, code):
 if code <= r_uint(0x07FF):
 builder.append(chr((0xc0 | (code >> 6
 builder.append(chr((0x80 | (code & 0x3f
 return
 if code <= r_uint(0x):
-if not allow_surrogates and 0xd800 <= code <= 0xdfff:
+builder.append(chr((0xe0 | (code >> 12
+builder.append(chr((0x80 | ((code >> 6) & 0x3f
+builder.append(chr((0x80 | (code & 0x3f
+return
+if code <= r_uint(0x10):
+builder.append(chr((0xf0 | (code >> 18
+builder.append(chr((0x80 | ((code >> 12) & 0x3f
+builder.append(chr((0x80 | ((code >> 6) & 0x3f
+builder.append(chr((0x80 | (code & 0x3f
+return
+raise ValueError
+
+@dont_inline
+def _nonascii_unichr_as_utf8_append_nosurrogates(builder, code):
+if code <= r_uint(0x07FF):
+builder.append(chr((0xc0 | (code >> 6
+builder.append(chr((0x80 | (code & 0x3f
+return
+if code <= r_uint(0x):
+if 0xd800 <= code <= 0xdfff:
 raise ValueError
 builder.append(chr((0xe0 | (code >> 12
 builder.append(chr((0x80 | ((code >> 6) & 0x3f
@@ -79,6 +107,7 @@
 return
 raise ValueError
 
+
 # note - table lookups are really slow. Measured on various elements of obama
 #chinese wikipedia, they're anywhere between 10% and 30% slower.
 #In extreme cases (small, only chinese text), they're 40% slower
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: This reduces the overhead from 50% to 35%

2017-12-06 Thread arigo
Author: Armin Rigo 
Branch: unicode-utf8
Changeset: r93282:cc3f32cc59be
Date: 2017-12-06 11:18 +0100
http://bitbucket.org/pypy/pypy/changeset/cc3f32cc59be/

Log:This reduces the overhead from 50% to 35%

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -734,11 +734,43 @@
 
 @always_inline
 def next(self):
-if self._pos == self._end:
+pos = self._pos
+if pos == self._end:
 raise StopIteration
-ret = codepoint_at_pos(self._utf8, self._pos)
-self._pos = next_codepoint_pos(self._utf8, self._pos)
-return ret
+#- sane-looking version: --
+#ret = codepoint_at_pos(self._utf8, self._pos)
+#self._pos = next_codepoint_pos(self._utf8, self._pos)
+#return ret
+#- manually inlined version follows, with merged checks -
+
+code = self._utf8
+ordch1 = ord(code[pos])
+if ordch1 <= 0x7F:
+self._pos = pos + 1
+return ordch1
+
+ordch2 = ord(code[pos+1])
+if ordch1 <= 0xDF:
+# 110y 10zz ->  0yyy yyzz
+self._pos = pos + 2
+return (ordch1 << 6) + ordch2 - (
+   (0xC0   << 6) + 0x80 )
+
+ordch3 = ord(code[pos+2])
+if ordch1 <= 0xEF:
+# 1110 10yy 10zz ->   yyzz
+self._pos = pos + 3
+return (ordch1 << 12) + (ordch2 << 6) + ordch3 - (
+   (0xE0   << 12) + (0x80   << 6) + 0x80 )
+
+ordch4 = ord(code[pos+3])
+if True:
+# 0www 10xx 10yy 10zz -> 000wwwxx  yyzz
+self._pos = pos + 4
+return (ordch1 << 18) + (ordch2 << 12) + (ordch3 << 6) + ordch4 - (
+   (0xF0   << 18) + (0x80   << 12) + (0x80   << 6) + 0x80 )
+assert False, "unreachable"
+
 
 def decode_latin_1(s):
 if len(s) == 0:
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] buildbot default: no need for non-JIT builds on ARM, schedule backend tests only for RPython changes

2017-12-06 Thread mattip
Author: Matti Picus 
Branch: 
Changeset: r1045:21cd87db82d9
Date: 2017-12-07 00:21 +0200
http://bitbucket.org/pypy/buildbot/changeset/21cd87db82d9/

Log:no need for non-JIT builds on ARM, schedule backend tests only for
RPython changes

diff --git a/bot2/pypybuildbot/arm_master.py b/bot2/pypybuildbot/arm_master.py
--- a/bot2/pypybuildbot/arm_master.py
+++ b/bot2/pypybuildbot/arm_master.py
@@ -1,5 +1,5 @@
 from buildbot.scheduler import Nightly, Triggerable
-from pypybuildbot.util import load
+from pypybuildbot.util import load, isRPython
 
 pypybuilds = load('pypybuildbot.builds')
 ARMCrossLock = pypybuilds.ARMCrossLock
@@ -10,13 +10,13 @@
 crosstranslationargs = ['--platform=arm', '--gcrootfinder=shadowstack']
 crosstranslationjitargs = ['--jit-backend=arm']
 # this one needs a larger timeout due to how it is run
-pypyJitBackendOnlyOwnTestFactoryARM = pypybuilds.Own(
+pypyJitBackendOnlyRPythonTestFactoryARM = pypybuilds.RPython(
 cherrypick=':'.join(["jit/backend/arm",
 "jit/backend/llsupport",
 "jit/backend/test",  # kill this one in case it is 
too slow
 ]),
 timeout=36000)
-pypyJitOnlyOwnTestFactoryARM = pypybuilds.Own(cherrypick="jit", timeout=2 * 
3600)
+pypyJitOnlyRPythonTestFactoryARM = pypybuilds.RPython(cherrypick="jit", 
timeout=2 * 3600)
 pypyOwnTestFactoryARM = pypybuilds.Own(timeout=2 * 3600)
 pypyRPythonTestFactoryARM = pypybuilds.RPython(timeout=2 * 3600)
 
@@ -146,13 +146,20 @@
 BUILDJITLINUXARMHF_RASPBIAN,   # on hhu-cross-raspbianhf, uses 1 core
 BUILDJITLINUXARMHF_RARING, # on hhu-cross-raring-armhf, uses 1 core
 
-BUILDLINUXARM, # on hhu-cross-armel, uses 1 core
-BUILDLINUXARMHF_RASPBIAN,  # on hhu-cross-raspbianhf, uses 1 core
+#BUILDLINUXARM, # on hhu-cross-armel, uses 1 core
+#BUILDLINUXARMHF_RASPBIAN,  # on hhu-cross-raspbianhf, uses 1 core
 
+], branch=None, hour=0, minute=0,
+),
+
+Nightly("nightly-arm-0-01", [
 JITBACKENDONLYLINUXARMEL,  # on hhu-imx.53
 JITBACKENDONLYLINUXARMHF,
 JITBACKENDONLYLINUXARMHF_v7,   # on cubieboard-bob
-], branch=None, hour=0, minute=0),
+], branch='default', hour=0, minute=0, onlyIfChanged=True,
+fileIsImportant=isRPython,
+change_filter=filter.ChangeFilter(branch='default'),
+),
 
 Triggerable("APPLVLLINUXARM_scheduler", [
 APPLVLLINUXARM,# triggered by BUILDLINUXARM, on 
hhu-beagleboard
@@ -182,7 +189,7 @@
   {"name": JITBACKENDONLYLINUXARMEL,
"slavenames": ['hhu-i.mx53'],
"builddir": JITBACKENDONLYLINUXARMEL,
-   "factory": pypyJitBackendOnlyOwnTestFactoryARM,
+   "factory": pypyJitBackendOnlyRPythonTestFactoryARM,
"category": 'linux-armel',
"locks": [ARMBoardLock.access('counting')],
},
@@ -191,7 +198,7 @@
   {"name": JITBACKENDONLYLINUXARMHF,
"slavenames": ['hhu-raspberry-pi', 'hhu-pypy-pi', 'hhu-pypy-pi2'],
"builddir": JITBACKENDONLYLINUXARMHF,
-   "factory": pypyJitBackendOnlyOwnTestFactoryARM,
+   "factory": pypyJitBackendOnlyRPythonTestFactoryARM,
"category": 'linux-armhf',
"locks": [ARMBoardLock.access('counting')],
},
@@ -199,7 +206,7 @@
   {"name": JITBACKENDONLYLINUXARMHF_v7,
"slavenames": ['cubieboard-bob'],
"builddir": JITBACKENDONLYLINUXARMHF_v7,
-   "factory": pypyJitBackendOnlyOwnTestFactoryARM,
+   "factory": pypyJitBackendOnlyRPythonTestFactoryARM,
"category": 'linux-armhf',
"locks": [ARMBoardLock.access('counting')],
},
diff --git a/bot2/pypybuildbot/master.py b/bot2/pypybuildbot/master.py
--- a/bot2/pypybuildbot/master.py
+++ b/bot2/pypybuildbot/master.py
@@ -11,7 +11,7 @@
 #from buildbot import manhole
 from pypybuildbot.pypylist import PyPyList, NumpyStatusList
 from pypybuildbot.ircbot import IRC  # side effects
-from pypybuildbot.util import we_are_debugging
+from pypybuildbot.util import we_are_debugging, isRPython
 from buildbot.changes import filter
 from buildbot.changes.hgpoller import HgPoller
 
@@ -63,7 +63,7 @@
 
 pypybuilds = load('pypybuildbot.builds')
 
-# all ARM buildbot configuration si in arm_master.py
+# all ARM buildbot configuration is in arm_master.py
 ARM = load('pypybuildbot.arm_master')
 
 TannitCPU = pypybuilds.TannitCPU
@@ -271,13 +271,6 @@
  'hhu-pypy-pi2': {'max_builds': 1},
  }
 
-def isRPython(change):
-for fname in change.files:
-if fname.startswith('rpython'):
-log.msg('fileIsImportant filter isRPython got "%s"' % fname)
-return True
-return False
-
 BuildmasterConfig = {
 'slavePortnum': slavePortnum,
 
diff --git a/bot2/pypybuildbot/util.py b/bot2/pypybuildbot/util.py
--- a/bot2/pypybuildbot/util.py
+++ b/bot2/pypybuildbot/util.py
@@ -17,3 +17,10 @@
 if os.path.lexists(dst):
 os.remove(dst)
 os.symlink(src, dst)
+
+def 

[pypy-commit] pypy unicode-utf8: change always_inline to try_inline

2017-12-06 Thread fijal
Author: fijal
Branch: unicode-utf8
Changeset: r93285:0e829cf58e7b
Date: 2017-12-06 15:20 +0200
http://bitbucket.org/pypy/pypy/changeset/0e829cf58e7b/

Log:change always_inline to try_inline

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -17,7 +17,7 @@
 
 import sys
 from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize
-from rpython.rlib.objectmodel import always_inline, dont_inline
+from rpython.rlib.objectmodel import always_inline, dont_inline, try_inline
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib import jit
 from rpython.rlib.signature import signature
@@ -50,7 +50,7 @@
 chr((0x80 | (code & 0x3f
 raise ValueError
 
-@always_inline
+@try_inline
 def unichr_as_utf8_append(builder, code, allow_surrogates=False):
 """Encode code (numeric value) as utf8 encoded string
 and emit the result into the given StringBuilder.
@@ -737,7 +737,7 @@
 self._lgt += 1
 self._s.append(s)
 
-@always_inline
+@try_inline
 def append_code(self, code):
 self._flag = combine_flags(self._flag, get_flag_from_code(code))
 self._lgt += 1
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit