[pypy-commit] pypy str-measure: a branch to start measuring how strings are used

fijal Wed, 10 May 2017 07:38:34 -0700

Author: fijal
Branch: str-measure
Changeset: r91221:9923d9d8b6c5
Date: 2017-05-10 16:17 +0200
http://bitbucket.org/pypy/pypy/changeset/9923d9d8b6c5/


Log:    a branch to start measuring how strings are used

diff --git a/pypy/interpreter/executioncontext.py 
b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -153,6 +153,7 @@
         Like bytecode_trace() but doesn't invoke any other events besides the
         trace function.
         """
+        frame._frame_counter += 1
         if (frame.get_w_f_trace() is None or self.is_tracing or
             self.gettrace() is None):
             return
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -76,6 +76,9 @@
     valuestackdepth = 0 # number of items on valuestack
     lastblock = None
 
+    # XXX string debugging
+    _frame_counter = 0
+
     # other fields:
     
     # builtin - builtin cache, only if honor__builtins__ is True
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -98,6 +98,7 @@
         'decode_long'               : 'interp_magic.decode_long',
         '_promote'                   : 'interp_magic._promote',
         'stack_almost_full'         : 'interp_magic.stack_almost_full',
+        'set_str_debug_file'        : 'interp_debug.set_str_debug_file',
     }
     if sys.platform == 'win32':
         interpleveldefs['get_console_cp'] = 'interp_magic.get_console_cp'
diff --git a/pypy/module/__pypy__/interp_debug.py 
b/pypy/module/__pypy__/interp_debug.py
--- a/pypy/module/__pypy__/interp_debug.py
+++ b/pypy/module/__pypy__/interp_debug.py
@@ -28,3 +28,15 @@
 @jit.dont_look_inside
 def debug_flush(space):
     debug.debug_flush()
+
+class Cache(object):
+    def __init__(self, space):
+        self.w_debug_file = None
+
+def set_str_debug_file(space, w_debug_file):
+    if space.is_none(w_debug_file):
+        w_debug_file = None
+    space.fromcache(Cache).w_debug_file = w_debug_file
+
+def get_str_debug_file(space):
+    return space.fromcache(Cache).w_debug_file
\ No newline at end of file
diff --git a/pypy/objspace/std/bytearrayobject.py 
b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -50,15 +50,15 @@
     def nonmovable_carray(self, space):
         return BytearrayBuffer(self.data, False).get_raw_address()
 
-    def _new(self, value):
+    def _new(self, space, value):
         if value is self.data:
             value = value[:]
         return W_BytearrayObject(value)
 
-    def _new_from_buffer(self, buffer):
+    def _new_from_buffer(self, space, buffer):
         return W_BytearrayObject([buffer[i] for i in range(len(buffer))])
 
-    def _new_from_list(self, value):
+    def _new_from_list(self, space, value):
         return W_BytearrayObject(value)
 
     def _empty(self):
@@ -443,10 +443,10 @@
 
     def descr_add(self, space, w_other):
         if isinstance(w_other, W_BytearrayObject):
-            return self._new(self.data + w_other.data)
+            return self._new(space, self.data + w_other.data)
 
         if isinstance(w_other, W_BytesObject):
-            return self._add(self._op_val(space, w_other))
+            return self._add(space, self._op_val(space, w_other))
 
         try:
             buffer = _get_buffer(space, w_other)
@@ -454,11 +454,11 @@
             if e.match(space, space.w_TypeError):
                 return space.w_NotImplemented
             raise
-        return self._add(buffer)
+        return self._add(space, buffer)
 
-    @specialize.argtype(1)
-    def _add(self, other):
-        return self._new(self.data + [other[i] for i in range(len(other))])
+    @specialize.argtype(2)
+    def _add(self, space, other):
+        return self._new(space, self.data + [other[i] for i in 
range(len(other))])
 
     def descr_reverse(self, space):
         self.data.reverse()
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -481,10 +481,10 @@
                         "found", len(self._value))
         return space.newint(ord(self._value[0]))
 
-    def _new(self, value):
+    def _new(self, space, value):
         return W_BytesObject(value)
 
-    def _new_from_list(self, value):
+    def _new_from_list(self, space, value):
         return W_BytesObject(''.join(value))
 
     def _empty(self):
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -361,7 +361,7 @@
     def newunicode(self, uni):
         assert uni is not None
         assert isinstance(uni, unicode)
-        return W_UnicodeObject(uni)
+        return W_UnicodeObject(self, uni)
 
     def type(self, w_obj):
         jit.promote(w_obj.__class__)
diff --git a/pypy/objspace/std/stringmethods.py 
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -20,7 +20,7 @@
         #if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj),
         #                                                space.w_bytes):
         #    return orig_obj
-        return self._new(s[start:stop])
+        return self._new(space, s[start:stop])
 
     def _convert_idx_params(self, space, w_start, w_end):
         value = self._val(space)
@@ -61,7 +61,7 @@
                 if e.match(space, space.w_TypeError):
                     return space.w_NotImplemented
                 raise
-            return self._new(self._val(space) + other)
+            return self._new(space, self._val(space) + other)
 
         # Bytearray overrides this method, CPython doesn't support contacting
         # buffers and strs, and unicodes are always handled above
@@ -77,8 +77,8 @@
         if times <= 0:
             return self._empty()
         if self._len() == 1:
-            return self._new(self._multi_chr(self._val(space)[0]) * times)
-        return self._new(self._val(space) * times)
+            return self._new(space, self._multi_chr(self._val(space)[0]) * 
times)
+        return self._new(space, self._val(space) * times)
 
     descr_rmul = descr_mul
 
@@ -94,7 +94,7 @@
                 return self._sliced(space, selfvalue, start, stop, self)
             else:
                 ret = _descr_getslice_slowpath(selfvalue, start, step, sl)
-                return self._new_from_list(ret)
+                return self._new_from_list(space, ret)
 
         index = space.getindex_w(w_index, space.w_IndexError, "string index")
         return self._getitem_result(space, index)
@@ -105,7 +105,7 @@
             character = selfvalue[index]
         except IndexError:
             raise oefmt(space.w_IndexError, "string index out of range")
-        return self._new(character)
+        return self._new(space, character)
 
     def descr_getslice(self, space, w_start, w_stop):
         selfvalue = self._val(space)
@@ -125,7 +125,7 @@
         builder.append(self._upper(value[0]))
         for i in range(1, len(value)):
             builder.append(self._lower(value[i]))
-        return self._new(builder.build())
+        return self._new(space, builder.build())
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_center(self, space, width, w_fillchar):
@@ -143,7 +143,7 @@
         else:
             centered = value
 
-        return self._new(centered)
+        return self._new(space, centered)
 
     def descr_count(self, space, w_sub, w_start=None, w_end=None):
         value, start, end = self._convert_idx_params(space, w_start, w_end)
@@ -207,7 +207,7 @@
                                                          tabsize) + token
             oldtoken = token
 
-        return self._new(expanded)
+        return self._new(space, expanded)
 
     def _tabindent(self, token, tabsize):
         """calculates distance behind the token to the next tabstop"""
@@ -442,7 +442,7 @@
             if value and i != 0:
                 sb.append(value)
             sb.append(unwrapped[i])
-        return self._new(sb.build())
+        return self._new(space, sb.build())
 
     def _join_autoconvert(self, space, list_w):
         assert False, 'unreachable'
@@ -459,7 +459,7 @@
             fillchar = self._multi_chr(fillchar[0])
             value = value + fillchar * d
 
-        return self._new(value)
+        return self._new(space, value)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_rjust(self, space, width, w_fillchar):
@@ -473,14 +473,14 @@
             fillchar = self._multi_chr(fillchar[0])
             value = d * fillchar + value
 
-        return self._new(value)
+        return self._new(space, value)
 
     def descr_lower(self, space):
         value = self._val(space)
         builder = self._builder(len(value))
         for i in range(len(value)):
             builder.append(self._lower(value[i]))
-        return self._new(builder.build())
+        return self._new(space, builder.build())
 
     def descr_partition(self, space, w_sub):
         from pypy.objspace.std.bytearrayobject import W_BytearrayObject
@@ -501,11 +501,11 @@
 
             pos = find(value, sub, 0, len(value))
             if pos != -1 and isinstance(self, W_BytearrayObject):
-                w_sub = self._new_from_buffer(sub)
+                w_sub = self._new_from_buffer(space, sub)
 
         if pos == -1:
             if isinstance(self, W_BytearrayObject):
-                self = self._new(value)
+                self = self._new(space, value)
             return space.newtuple([self, self._empty(), self._empty()])
         else:
             return space.newtuple(
@@ -531,11 +531,11 @@
 
             pos = rfind(value, sub, 0, len(value))
             if pos != -1 and isinstance(self, W_BytearrayObject):
-                w_sub = self._new_from_buffer(sub)
+                w_sub = self._new_from_buffer(space, sub)
 
         if pos == -1:
             if isinstance(self, W_BytearrayObject):
-                self = self._new(value)
+                self = self._new(space, value)
             return space.newtuple([self._empty(), self._empty(), self])
         else:
             return space.newtuple(
@@ -557,7 +557,7 @@
         except OverflowError:
             raise oefmt(space.w_OverflowError, "replace string is too long")
 
-        return self._new(res)
+        return self._new(space, res)
 
     @unwrap_spec(maxsplit=int)
     def descr_split(self, space, w_sep=None, maxsplit=-1):
@@ -716,13 +716,13 @@
                 builder.append(self._upper(ch))
             else:
                 builder.append(ch)
-        return self._new(builder.build())
+        return self._new(space, builder.build())
 
     def descr_title(self, space):
         selfval = self._val(space)
         if len(selfval) == 0:
             return self
-        return self._new(self.title(selfval))
+        return self._new(space, self.title(selfval))
 
     @jit.elidable
     def title(self, value):
@@ -764,24 +764,24 @@
             for char in string:
                 if not deletion_table[ord(char)]:
                     buf.append(table[ord(char)])
-        return self._new(buf.build())
+        return self._new(space, buf.build())
 
     def descr_upper(self, space):
         value = self._val(space)
         builder = self._builder(len(value))
         for i in range(len(value)):
             builder.append(self._upper(value[i]))
-        return self._new(builder.build())
+        return self._new(space, builder.build())
 
     @unwrap_spec(width=int)
     def descr_zfill(self, space, width):
         selfval = self._val(space)
         if len(selfval) == 0:
-            return self._new(self._multi_chr(self._chr('0')) * width)
+            return self._new(space, self._multi_chr(self._chr('0')) * width)
         num_zeros = width - len(selfval)
         if num_zeros <= 0:
             # cannot return self, in case it is a subclass of str
-            return self._new(selfval)
+            return self._new(space, selfval)
 
         builder = self._builder(width)
         if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'):
@@ -792,10 +792,10 @@
             start = 0
         builder.append_multiple_char(self._chr('0'), num_zeros)
         builder.append_slice(selfval, start, len(selfval))
-        return self._new(builder.build())
+        return self._new(space, builder.build())
 
     def descr_getnewargs(self, space):
-        return space.newtuple([self._new(self._val(space))])
+        return space.newtuple([self._new(space, self._val(space))])
 
 # ____________________________________________________________
 # helpers for slow paths, moved out because they contain loops
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1,5 +1,6 @@
 """The builtin unicode implementation"""
 
+import py
 from rpython.rlib.objectmodel import (
     compute_hash, compute_unique_id, import_from_mixin,
     enforceargs)
@@ -29,11 +30,26 @@
 class W_UnicodeObject(W_Root):
     import_from_mixin(StringMethods)
     _immutable_fields_ = ['_value']
+    _frame_counter = 0
+    _frame_id = 0
 
     @enforceargs(uni=unicode)
-    def __init__(self, unistr):
+    def __init__(self, space, unistr):
         assert isinstance(unistr, unicode)
         self._value = unistr
+        if space is None:
+            return
+        frame = space.getexecutioncontext().gettopframe()
+        if frame is None:
+            return
+        self._frame_counter = frame._frame_counter
+        self._frame_id = compute_unique_id(frame)
+        from pypy.module.__pypy__.interp_debug import get_str_debug_file
+        w_file = get_str_debug_file(space)
+        if w_file is None:
+            return
+        space.call_function(space.getattr(w_file, space.newtext("write")),
+            space.newtext("descr_new %s %s\n" % (self._frame_counter, 
self._frame_id)))
 
     def __repr__(self):
         """representation for debugging purposes"""
@@ -43,10 +59,10 @@
         # for testing
         return self._value
 
-    def create_if_subclassed(self):
+    def create_if_subclassed(self, space):
         if type(self) is W_UnicodeObject:
             return self
-        return W_UnicodeObject(self._value)
+        return W_UnicodeObject(space, self._value)
 
     def is_w(self, space, w_other):
         if not isinstance(w_other, W_UnicodeObject):
@@ -105,11 +121,11 @@
                          "found", len(self._value))
         return space.newint(ord(self._value[0]))
 
-    def _new(self, value):
-        return W_UnicodeObject(value)
+    def _new(self, space, value):
+        return W_UnicodeObject(space, value)
 
-    def _new_from_list(self, value):
-        return W_UnicodeObject(u''.join(value))
+    def _new_from_list(self, space, value):
+        return W_UnicodeObject(space, u''.join(value))
 
     def _empty(self):
         return W_UnicodeObject.EMPTY
@@ -222,7 +238,7 @@
 
         assert isinstance(w_value, W_UnicodeObject)
         w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
-        W_UnicodeObject.__init__(w_newobj, w_value._value)
+        W_UnicodeObject.__init__(w_newobj, space, w_value._value)
         return w_newobj
 
     def descr_repr(self, space):
@@ -354,7 +370,7 @@
                     raise oefmt(space.w_TypeError,
                                 "character mapping must return integer, None "
                                 "or unicode")
-        return W_UnicodeObject(u''.join(result))
+        return W_UnicodeObject(space, u''.join(result))
 
     def descr_encode(self, space, w_encoding=None, w_errors=None):
         encoding, errors = _get_encoding_and_errors(space, w_encoding,
@@ -420,7 +436,7 @@
 
 
 def wrapunicode(space, uni):
-    return W_UnicodeObject(uni)
+    return W_UnicodeObject(space, uni)
 
 
 def plain_str2unicode(space, s):
@@ -562,7 +578,7 @@
         return unicode_from_encoded_object(space, w_bytes, encoding, "strict")
     s = space.bytes_w(w_bytes)
     try:
-        return W_UnicodeObject(s.decode("ascii"))
+        return W_UnicodeObject(space, s.decode("ascii"))
     except UnicodeDecodeError:
         # raising UnicodeDecodeError is messy, "please crash for me"
         return unicode_from_encoded_object(space, w_bytes, "ascii", "strict")
@@ -967,6 +983,39 @@
         of the specified width. The string S is never truncated.
         """
 
+def setup():
+    from pypy.module.__pypy__.interp_debug import get_str_debug_file
+
+    def wrap(func):
+        d = {'orig': func, 'get_str_debug_file': get_str_debug_file}
+        name = func.__name__
+        orig_args = list(func.__code__.co_varnames[:func.__code__.co_argcount])
+        args = orig_args[:]
+        if func.func_defaults:
+            i = func.__code__.co_argcount - len(func.func_defaults)
+            for j, default in enumerate(func.func_defaults):
+                args[i] = "%s = %r" % (args[i], func.func_defaults[j])
+                i += 1
+        func_args = ", ".join(args)
+        lines = ["def %s(%s):" % (name, func_args),
+        "    w_file = get_str_debug_file(space)",
+        "    if w_file is not None:",
+        "        txt = '%s ' + str(self._frame_counter) + ' ' + 
str(self._frame_id) + ' '+ '\\n'" % func.func_name,
+        "        space.call_function(space.getattr(w_file, 
space.newtext('write')), space.newtext(txt))",
+        "    return orig(%s)" % (", ".join(orig_args),)]
+        exec "\n".join(lines) in d
+        if hasattr(func, 'unwrap_spec'):
+            d[name].unwrap_spec = func.unwrap_spec
+        # get it as an unbound method
+        return d[name]
+
+    for k, v in W_UnicodeObject.__dict__.iteritems():
+        if k == 'descr_new':
+            continue
+        if k.startswith('descr_'):
+           setattr(W_UnicodeObject, k, wrap(getattr(W_UnicodeObject, k)))
+
+setup()
 
 W_UnicodeObject.typedef = TypeDef(
     "unicode", basestring_typedef,
@@ -1112,7 +1161,7 @@
     return [s for s in value]
 
 
-W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
+W_UnicodeObject.EMPTY = W_UnicodeObject(None, u'')
 
 
 # Helper for converting int/long
diff --git a/rpython/translator/platform/darwin.py 
b/rpython/translator/platform/darwin.py
--- a/rpython/translator/platform/darwin.py
+++ b/rpython/translator/platform/darwin.py
@@ -16,6 +16,7 @@
 
     standalone_only = ('-mdynamic-no-pic',)
     shared_only = ()
+    accepts_flto = False
 
     link_flags = (DARWIN_VERSION_MIN,)
     cflags = ('-O3', '-fomit-frame-pointer', DARWIN_VERSION_MIN)
diff --git a/rpython/translator/platform/posix.py 
b/rpython/translator/platform/posix.py
--- a/rpython/translator/platform/posix.py
+++ b/rpython/translator/platform/posix.py
@@ -11,6 +11,7 @@
     exe_ext = ''
     make_cmd = 'make'
     so_prefixes = ('lib', '')
+    accepts_flto = True
 
     relevant_environ = ('CPATH', 'LIBRARY_PATH', 'C_INCLUDE_PATH')
 
@@ -131,7 +132,7 @@
             cflags = tuple(self.cflags) + tuple(self.standalone_only)
 
         # xxx check which compilers accept this option or not
-        if not config or config.translation.gcrootfinder != 'asmgcc':
+        if (not config or config.translation.gcrootfinder != 'asmgcc') and 
self.accepts_flto:
             cflags = ('-flto',) + cflags
 
         m = GnuMakefile(path)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy str-measure: a branch to start measuring how strings are used

Reply via email to