Author: fijal Branch: str-measure Changeset: r91221:9923d9d8b6c5 Date: 2017-05-10 16:17 +0200 http://bitbucket.org/pypy/pypy/changeset/9923d9d8b6c5/
Log: a branch to start measuring how strings are used diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -153,6 +153,7 @@ Like bytecode_trace() but doesn't invoke any other events besides the trace function. """ + frame._frame_counter += 1 if (frame.get_w_f_trace() is None or self.is_tracing or self.gettrace() is None): return diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -76,6 +76,9 @@ valuestackdepth = 0 # number of items on valuestack lastblock = None + # XXX string debugging + _frame_counter = 0 + # other fields: # builtin - builtin cache, only if honor__builtins__ is True diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py --- a/pypy/module/__pypy__/__init__.py +++ b/pypy/module/__pypy__/__init__.py @@ -98,6 +98,7 @@ 'decode_long' : 'interp_magic.decode_long', '_promote' : 'interp_magic._promote', 'stack_almost_full' : 'interp_magic.stack_almost_full', + 'set_str_debug_file' : 'interp_debug.set_str_debug_file', } if sys.platform == 'win32': interpleveldefs['get_console_cp'] = 'interp_magic.get_console_cp' diff --git a/pypy/module/__pypy__/interp_debug.py b/pypy/module/__pypy__/interp_debug.py --- a/pypy/module/__pypy__/interp_debug.py +++ b/pypy/module/__pypy__/interp_debug.py @@ -28,3 +28,15 @@ @jit.dont_look_inside def debug_flush(space): debug.debug_flush() + +class Cache(object): + def __init__(self, space): + self.w_debug_file = None + +def set_str_debug_file(space, w_debug_file): + if space.is_none(w_debug_file): + w_debug_file = None + space.fromcache(Cache).w_debug_file = w_debug_file + +def get_str_debug_file(space): + return space.fromcache(Cache).w_debug_file \ No newline at end of file diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -50,15 +50,15 @@ def nonmovable_carray(self, space): return BytearrayBuffer(self.data, False).get_raw_address() - def _new(self, value): + def _new(self, space, value): if value is self.data: value = value[:] return W_BytearrayObject(value) - def _new_from_buffer(self, buffer): + def _new_from_buffer(self, space, buffer): return W_BytearrayObject([buffer[i] for i in range(len(buffer))]) - def _new_from_list(self, value): + def _new_from_list(self, space, value): return W_BytearrayObject(value) def _empty(self): @@ -443,10 +443,10 @@ def descr_add(self, space, w_other): if isinstance(w_other, W_BytearrayObject): - return self._new(self.data + w_other.data) + return self._new(space, self.data + w_other.data) if isinstance(w_other, W_BytesObject): - return self._add(self._op_val(space, w_other)) + return self._add(space, self._op_val(space, w_other)) try: buffer = _get_buffer(space, w_other) @@ -454,11 +454,11 @@ if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return self._add(buffer) + return self._add(space, buffer) - @specialize.argtype(1) - def _add(self, other): - return self._new(self.data + [other[i] for i in range(len(other))]) + @specialize.argtype(2) + def _add(self, space, other): + return self._new(space, self.data + [other[i] for i in range(len(other))]) def descr_reverse(self, space): self.data.reverse() diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -481,10 +481,10 @@ "found", len(self._value)) return space.newint(ord(self._value[0])) - def _new(self, value): + def _new(self, space, value): return W_BytesObject(value) - def _new_from_list(self, value): + def _new_from_list(self, space, value): return W_BytesObject(''.join(value)) def _empty(self): diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -361,7 +361,7 @@ def newunicode(self, uni): assert uni is not None assert isinstance(uni, unicode) - return W_UnicodeObject(uni) + return W_UnicodeObject(self, uni) def type(self, w_obj): jit.promote(w_obj.__class__) diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -20,7 +20,7 @@ #if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj), # space.w_bytes): # return orig_obj - return self._new(s[start:stop]) + return self._new(space, s[start:stop]) def _convert_idx_params(self, space, w_start, w_end): value = self._val(space) @@ -61,7 +61,7 @@ if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return self._new(self._val(space) + other) + return self._new(space, self._val(space) + other) # Bytearray overrides this method, CPython doesn't support contacting # buffers and strs, and unicodes are always handled above @@ -77,8 +77,8 @@ if times <= 0: return self._empty() if self._len() == 1: - return self._new(self._multi_chr(self._val(space)[0]) * times) - return self._new(self._val(space) * times) + return self._new(space, self._multi_chr(self._val(space)[0]) * times) + return self._new(space, self._val(space) * times) descr_rmul = descr_mul @@ -94,7 +94,7 @@ return self._sliced(space, selfvalue, start, stop, self) else: ret = _descr_getslice_slowpath(selfvalue, start, step, sl) - return self._new_from_list(ret) + return self._new_from_list(space, ret) index = space.getindex_w(w_index, space.w_IndexError, "string index") return self._getitem_result(space, index) @@ -105,7 +105,7 @@ character = selfvalue[index] except IndexError: raise oefmt(space.w_IndexError, "string index out of range") - return self._new(character) + return self._new(space, character) def descr_getslice(self, space, w_start, w_stop): selfvalue = self._val(space) @@ -125,7 +125,7 @@ builder.append(self._upper(value[0])) for i in range(1, len(value)): builder.append(self._lower(value[i])) - return self._new(builder.build()) + return self._new(space, builder.build()) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_center(self, space, width, w_fillchar): @@ -143,7 +143,7 @@ else: centered = value - return self._new(centered) + return self._new(space, centered) def descr_count(self, space, w_sub, w_start=None, w_end=None): value, start, end = self._convert_idx_params(space, w_start, w_end) @@ -207,7 +207,7 @@ tabsize) + token oldtoken = token - return self._new(expanded) + return self._new(space, expanded) def _tabindent(self, token, tabsize): """calculates distance behind the token to the next tabstop""" @@ -442,7 +442,7 @@ if value and i != 0: sb.append(value) sb.append(unwrapped[i]) - return self._new(sb.build()) + return self._new(space, sb.build()) def _join_autoconvert(self, space, list_w): assert False, 'unreachable' @@ -459,7 +459,7 @@ fillchar = self._multi_chr(fillchar[0]) value = value + fillchar * d - return self._new(value) + return self._new(space, value) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_rjust(self, space, width, w_fillchar): @@ -473,14 +473,14 @@ fillchar = self._multi_chr(fillchar[0]) value = d * fillchar + value - return self._new(value) + return self._new(space, value) def descr_lower(self, space): value = self._val(space) builder = self._builder(len(value)) for i in range(len(value)): builder.append(self._lower(value[i])) - return self._new(builder.build()) + return self._new(space, builder.build()) def descr_partition(self, space, w_sub): from pypy.objspace.std.bytearrayobject import W_BytearrayObject @@ -501,11 +501,11 @@ pos = find(value, sub, 0, len(value)) if pos != -1 and isinstance(self, W_BytearrayObject): - w_sub = self._new_from_buffer(sub) + w_sub = self._new_from_buffer(space, sub) if pos == -1: if isinstance(self, W_BytearrayObject): - self = self._new(value) + self = self._new(space, value) return space.newtuple([self, self._empty(), self._empty()]) else: return space.newtuple( @@ -531,11 +531,11 @@ pos = rfind(value, sub, 0, len(value)) if pos != -1 and isinstance(self, W_BytearrayObject): - w_sub = self._new_from_buffer(sub) + w_sub = self._new_from_buffer(space, sub) if pos == -1: if isinstance(self, W_BytearrayObject): - self = self._new(value) + self = self._new(space, value) return space.newtuple([self._empty(), self._empty(), self]) else: return space.newtuple( @@ -557,7 +557,7 @@ except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") - return self._new(res) + return self._new(space, res) @unwrap_spec(maxsplit=int) def descr_split(self, space, w_sep=None, maxsplit=-1): @@ -716,13 +716,13 @@ builder.append(self._upper(ch)) else: builder.append(ch) - return self._new(builder.build()) + return self._new(space, builder.build()) def descr_title(self, space): selfval = self._val(space) if len(selfval) == 0: return self - return self._new(self.title(selfval)) + return self._new(space, self.title(selfval)) @jit.elidable def title(self, value): @@ -764,24 +764,24 @@ for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) - return self._new(buf.build()) + return self._new(space, buf.build()) def descr_upper(self, space): value = self._val(space) builder = self._builder(len(value)) for i in range(len(value)): builder.append(self._upper(value[i])) - return self._new(builder.build()) + return self._new(space, builder.build()) @unwrap_spec(width=int) def descr_zfill(self, space, width): selfval = self._val(space) if len(selfval) == 0: - return self._new(self._multi_chr(self._chr('0')) * width) + return self._new(space, self._multi_chr(self._chr('0')) * width) num_zeros = width - len(selfval) if num_zeros <= 0: # cannot return self, in case it is a subclass of str - return self._new(selfval) + return self._new(space, selfval) builder = self._builder(width) if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'): @@ -792,10 +792,10 @@ start = 0 builder.append_multiple_char(self._chr('0'), num_zeros) builder.append_slice(selfval, start, len(selfval)) - return self._new(builder.build()) + return self._new(space, builder.build()) def descr_getnewargs(self, space): - return space.newtuple([self._new(self._val(space))]) + return space.newtuple([self._new(space, self._val(space))]) # ____________________________________________________________ # helpers for slow paths, moved out because they contain loops diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1,5 +1,6 @@ """The builtin unicode implementation""" +import py from rpython.rlib.objectmodel import ( compute_hash, compute_unique_id, import_from_mixin, enforceargs) @@ -29,11 +30,26 @@ class W_UnicodeObject(W_Root): import_from_mixin(StringMethods) _immutable_fields_ = ['_value'] + _frame_counter = 0 + _frame_id = 0 @enforceargs(uni=unicode) - def __init__(self, unistr): + def __init__(self, space, unistr): assert isinstance(unistr, unicode) self._value = unistr + if space is None: + return + frame = space.getexecutioncontext().gettopframe() + if frame is None: + return + self._frame_counter = frame._frame_counter + self._frame_id = compute_unique_id(frame) + from pypy.module.__pypy__.interp_debug import get_str_debug_file + w_file = get_str_debug_file(space) + if w_file is None: + return + space.call_function(space.getattr(w_file, space.newtext("write")), + space.newtext("descr_new %s %s\n" % (self._frame_counter, self._frame_id))) def __repr__(self): """representation for debugging purposes""" @@ -43,10 +59,10 @@ # for testing return self._value - def create_if_subclassed(self): + def create_if_subclassed(self, space): if type(self) is W_UnicodeObject: return self - return W_UnicodeObject(self._value) + return W_UnicodeObject(space, self._value) def is_w(self, space, w_other): if not isinstance(w_other, W_UnicodeObject): @@ -105,11 +121,11 @@ "found", len(self._value)) return space.newint(ord(self._value[0])) - def _new(self, value): - return W_UnicodeObject(value) + def _new(self, space, value): + return W_UnicodeObject(space, value) - def _new_from_list(self, value): - return W_UnicodeObject(u''.join(value)) + def _new_from_list(self, space, value): + return W_UnicodeObject(space, u''.join(value)) def _empty(self): return W_UnicodeObject.EMPTY @@ -222,7 +238,7 @@ assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) - W_UnicodeObject.__init__(w_newobj, w_value._value) + W_UnicodeObject.__init__(w_newobj, space, w_value._value) return w_newobj def descr_repr(self, space): @@ -354,7 +370,7 @@ raise oefmt(space.w_TypeError, "character mapping must return integer, None " "or unicode") - return W_UnicodeObject(u''.join(result)) + return W_UnicodeObject(space, u''.join(result)) def descr_encode(self, space, w_encoding=None, w_errors=None): encoding, errors = _get_encoding_and_errors(space, w_encoding, @@ -420,7 +436,7 @@ def wrapunicode(space, uni): - return W_UnicodeObject(uni) + return W_UnicodeObject(space, uni) def plain_str2unicode(space, s): @@ -562,7 +578,7 @@ return unicode_from_encoded_object(space, w_bytes, encoding, "strict") s = space.bytes_w(w_bytes) try: - return W_UnicodeObject(s.decode("ascii")) + return W_UnicodeObject(space, s.decode("ascii")) except UnicodeDecodeError: # raising UnicodeDecodeError is messy, "please crash for me" return unicode_from_encoded_object(space, w_bytes, "ascii", "strict") @@ -967,6 +983,39 @@ of the specified width. The string S is never truncated. """ +def setup(): + from pypy.module.__pypy__.interp_debug import get_str_debug_file + + def wrap(func): + d = {'orig': func, 'get_str_debug_file': get_str_debug_file} + name = func.__name__ + orig_args = list(func.__code__.co_varnames[:func.__code__.co_argcount]) + args = orig_args[:] + if func.func_defaults: + i = func.__code__.co_argcount - len(func.func_defaults) + for j, default in enumerate(func.func_defaults): + args[i] = "%s = %r" % (args[i], func.func_defaults[j]) + i += 1 + func_args = ", ".join(args) + lines = ["def %s(%s):" % (name, func_args), + " w_file = get_str_debug_file(space)", + " if w_file is not None:", + " txt = '%s ' + str(self._frame_counter) + ' ' + str(self._frame_id) + ' '+ '\\n'" % func.func_name, + " space.call_function(space.getattr(w_file, space.newtext('write')), space.newtext(txt))", + " return orig(%s)" % (", ".join(orig_args),)] + exec "\n".join(lines) in d + if hasattr(func, 'unwrap_spec'): + d[name].unwrap_spec = func.unwrap_spec + # get it as an unbound method + return d[name] + + for k, v in W_UnicodeObject.__dict__.iteritems(): + if k == 'descr_new': + continue + if k.startswith('descr_'): + setattr(W_UnicodeObject, k, wrap(getattr(W_UnicodeObject, k))) + +setup() W_UnicodeObject.typedef = TypeDef( "unicode", basestring_typedef, @@ -1112,7 +1161,7 @@ return [s for s in value] -W_UnicodeObject.EMPTY = W_UnicodeObject(u'') +W_UnicodeObject.EMPTY = W_UnicodeObject(None, u'') # Helper for converting int/long diff --git a/rpython/translator/platform/darwin.py b/rpython/translator/platform/darwin.py --- a/rpython/translator/platform/darwin.py +++ b/rpython/translator/platform/darwin.py @@ -16,6 +16,7 @@ standalone_only = ('-mdynamic-no-pic',) shared_only = () + accepts_flto = False link_flags = (DARWIN_VERSION_MIN,) cflags = ('-O3', '-fomit-frame-pointer', DARWIN_VERSION_MIN) diff --git a/rpython/translator/platform/posix.py b/rpython/translator/platform/posix.py --- a/rpython/translator/platform/posix.py +++ b/rpython/translator/platform/posix.py @@ -11,6 +11,7 @@ exe_ext = '' make_cmd = 'make' so_prefixes = ('lib', '') + accepts_flto = True relevant_environ = ('CPATH', 'LIBRARY_PATH', 'C_INCLUDE_PATH') @@ -131,7 +132,7 @@ cflags = tuple(self.cflags) + tuple(self.standalone_only) # xxx check which compilers accept this option or not - if not config or config.translation.gcrootfinder != 'asmgcc': + if (not config or config.translation.gcrootfinder != 'asmgcc') and self.accepts_flto: cflags = ('-flto',) + cflags m = GnuMakefile(path) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit