Author: fijal Branch: Changeset: r80265:f377aa5469e9 Date: 2015-10-16 11:38 +0200 http://bitbucket.org/pypy/pypy/changeset/f377aa5469e9/
Log: merge diff too long, truncating to 2000 out of 15271 lines diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,14 @@ .hg .svn +# VIM +*.swp +*.swo + *.pyc *.pyo *~ +__pycache__/ bin/pypy-c include/*.h @@ -22,4 +27,6 @@ pypy/translator/goal/pypy-c pypy/translator/goal/target*-c release/ +!pypy/tool/release/ rpython/_cache/ +__pycache__/ diff --git a/lib_pypy/cffi/ffiplatform.py b/lib_pypy/cffi/ffiplatform.py --- a/lib_pypy/cffi/ffiplatform.py +++ b/lib_pypy/cffi/ffiplatform.py @@ -14,17 +14,7 @@ LIST_OF_FILE_NAMES = ['sources', 'include_dirs', 'library_dirs', 'extra_objects', 'depends'] -def _hack_at_distutils(): - # Windows-only workaround for some configurations: see - # https://bugs.python.org/issue23246 (Python 2.7.9) - if sys.platform == "win32": - try: - import setuptools # for side-effects, patches distutils - except ImportError: - pass - def get_extension(srcfilename, modname, sources=(), **kwds): - _hack_at_distutils() # *before* the following import from distutils.core import Extension allsources = [srcfilename] allsources.extend(sources) @@ -47,7 +37,6 @@ def _build(tmpdir, ext): # XXX compact but horrible :-( - _hack_at_distutils() from distutils.core import Distribution import distutils.errors # diff --git a/lib_pypy/cffi/verifier.py b/lib_pypy/cffi/verifier.py --- a/lib_pypy/cffi/verifier.py +++ b/lib_pypy/cffi/verifier.py @@ -22,6 +22,16 @@ s = s.encode('ascii') super(NativeIO, self).write(s) +def _hack_at_distutils(): + # Windows-only workaround for some configurations: see + # https://bugs.python.org/issue23246 (Python 2.7 with + # a specific MS compiler suite download) + if sys.platform == "win32": + try: + import setuptools # for side-effects, patches distutils + except ImportError: + pass + class Verifier(object): @@ -112,6 +122,7 @@ return basename def get_extension(self): + _hack_at_distutils() # backward compatibility hack if not self._has_source: with self.ffi._lock: if not self._has_source: diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-15.11.0.rst release-2.6.1.rst release-2.6.0.rst release-2.5.1.rst diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -7,6 +7,7 @@ .. toctree:: whatsnew-head.rst + whatsnew-15.11.0.rst whatsnew-2.6.1.rst whatsnew-2.6.0.rst whatsnew-2.5.1.rst diff --git a/pypy/doc/release-15.11.0.rst b/pypy/doc/release-15.11.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-15.11.0.rst @@ -0,0 +1,191 @@ +============ +PyPy 15.11.0 +============ + +We're pleased and proud to unleash PyPy 15.11, a major update of the PyPy +python2.7.10 compatible interpreter with a Just In Time compiler. +We have improved `warmup time and memory overhead used for tracing`_, added +`vectorization`_ for numpy and general loops where possible on x86 hardware, +refactored rough edges in rpython, and increased functionality of numpy. + +You can download the PyPy 15.11 release here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. + +We would also like to thank our contributors (7 new ones since PyPy 2.6.0) and +encourage new people to join the project. PyPy has many +layers and we need help with all of them: `PyPy`_ and `RPython`_ documentation +improvements, tweaking popular `modules`_ to run on pypy, or general `help`_ +with making RPython's JIT even better. + + +Vectorization +============= + +Richard Plangger began work in March and continued over a Google Summer of Code +to add a vectorization step to the trace optimizer. The step recognizes common +constructs and emits SIMD code where possible, much as any modern compiler does. +This vectorization happens while tracing running code, so it is actually easier +at run-time to determine the +availability of possible vectorization than it is for ahead-of-time compilers. + +Availability of SIMD hardware is detected at run time, without needing to +precompile various code paths into the executable. + +Internal Refactoring and Warmup Time Improvement +================================================ + +Maciej Fijalkowski and Armin Rigo refactored internals of rpython that now allow +PyPy to more efficiently use `guards`_ in jitted code. They also rewrote unrolling, +leading to a warmup time improvement of 20% or so at the cost of a minor +regression in jitted code speed. + +Numpy +===== + +Our implementation of numpy continues to improve. ndarray and the numeric dtypes +are very close to feature-complete; record, string and unicode dtypes are mostly +supported. We have reimplemented numpy linalg, random and fft as cffi-1.0 +modules that call out to the same underlying libraries that upstream numpy uses. +Please try it out, especially using the new vectorization (via --jit vec=1 on the +command line) and let us know what is missing for your code. + +CFFI +==== + +While not applicable only to PyPy, `cffi`_ is arguably our most significant +contribution to the python ecosystem. Armin Rigo continued improving it, +and PyPy reaps the benefits of cffi-1.3: improved manangement of object +lifetimes, __stdcall on Win32, ffi.memmove(), ... + +.. _`warmup time and memory overhead used for tracing`: http://morepypy.blogspot.com/2015/10 +.. _`vectorization`: http://pypyvecopt.blogspot.co.at/ +.. _`guards`: http://rpython.readthedocs.org/en/latest/glossary.html + +.. _`PyPy`: http://doc.pypy.org +.. _`RPython`: https://rpython.readthedocs.org +.. _`cffi`: https://cffi.readthedocs.org +.. _`modules`: http://doc.pypy.org/en/latest/project-ideas.html#make-more-python-modules-pypy-friendly +.. _`help`: http://doc.pypy.org/en/latest/project-ideas.html + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7. It's fast (`pypy and cpython 2.7.x`_ performance comparison) +due to its integrated tracing JIT compiler. + +This release supports **x86** machines on most common operating systems +(Linux 32/64, Mac OS X 64, Windows 32, OpenBSD_, freebsd_), +as well as newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux. + +We also welcome developers of other +`dynamic languages`_ to see what RPython can do for them. + +.. _`pypy and cpython 2.7.x`: http://speed.pypy.org +.. _OpenBSD: http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/lang/pypy +.. _freebsd: https://svnweb.freebsd.org/ports/head/lang/pypy/ +.. _`dynamic languages`: http://pypyjs.org + +Highlights (since 2.6.1 release two months ago) +=============================================== + +* Bug Fixes + + * Applied OPENBSD downstream fixes + + * Fix a crash on non-linux when running more than 20 threads + + * In cffi, ffi.new_handle() is more cpython compliant + + * Accept unicode in functions inside the _curses cffi backend exactly like cpython + + * Fix a segfault in itertools.islice() + + * Use gcrootfinder=shadowstack by default, asmgcc on linux only + + * Fix ndarray.copy() for upstream compatability when copying non-contiguous arrays + + * Fix assumption that lltype.UniChar is unsigned + + * Fix a subtle bug with stacklets on shadowstack + + * Improve support for the cpython capi in cpyext (our capi compatibility + layer). Fixing these issues inspired some thought about cpyext in general, + stay tuned for more improvements + + * When loading dynamic libraries, in case of a certain loading error, retry + loading the library assuming it is actually a linker script, like on Arch + and Gentoo + + * Issues reported with our previous release were resolved_ after reports from users on + our issue tracker at https://bitbucket.org/pypy/pypy/issues or on IRC at + #pypy + +* New features: + + * Add an optimization pass to vectorize loops using x86 SIMD intrinsics. + + * Support __stdcall on Windows in CFFI + + * Improve debug logging when using PYPYLOG=??? + + * Deal with platforms with no RAND_egd() in OpenSSL + + * Enable building _vmprof in translation on OS/X by default + +* Numpy: + + * Add support for ndarray.ctypes + + * Fast path for mixing numpy scalars and floats + + * Add support for creating Fortran-ordered ndarrays + + * Fix casting failures in linalg (by extending ufunc casting) + + * Recognize and disallow (for now) pickling of ndarrays with objects + embedded in them + +* Performance improvements and refactorings: + + * Reuse hashed keys across dictionaries and sets + + * Refactor JIT interals to improve warmup time by 20% or so at the cost of a + minor regression in JIT speed + + * Recognize patterns of common sequences in the JIT backends and optimize them + + * Make the garbage collecter more intcremental over external_malloc() calls + + * Share guard resume data where possible which reduces memory usage + + * Fast path for zip(list, list) + + * Reduce the number of checks in the JIT for lst[a:] + + * Move the non-optimizable part of callbacks outside the JIT + + * Factor in field immutability when invalidating heap information + + * Unroll itertools.izip_longest() with two sequences + + * Minor optimizations after analyzing output from `vmprof`_ and trace logs + + * Remove many class attributes in rpython classes + + * Handle getfield_gc_pure* and getfield_gc_* uniformly in heap.py + +.. _`vmprof`: https://vmprof.readthedocs.org +.. _resolved: http://doc.pypy.org/en/latest/whatsnew-15.11.0.html + +Please try it out and let us know what you think. We welcome feedback, +we know you are using PyPy, please tell us about it! + +Cheers + +The PyPy Team + diff --git a/pypy/doc/whatsnew-15.11.0.rst b/pypy/doc/whatsnew-15.11.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-15.11.0.rst @@ -0,0 +1,87 @@ +======================== +What's new in PyPy 15.11 +======================== + +.. this is a revision shortly after release-2.6.1 +.. startrev: 07769be4057b + +.. branch: keys_with_hash +Improve the performance of dict.update() and a bunch of methods from +sets, by reusing the hash value stored in one dict when inspecting +or changing another dict with that key. + +.. branch: optresult-unroll +A major refactoring of the ResOperations that kills Box. Also rewrote +unrolling to enable future enhancements. Should improve warmup time +by 20% or so. + +.. branch: optimize-cond-call +Optimize common sequences of operations like +``int_lt/cond_call`` in the JIT backends + +.. branch: missing_openssl_include +Fix for missing headers in OpenBSD, already applied in downstream ports + +.. branch: gc-more-incremental +Remove a source of non-incremental-ness in the GC: now +external_malloc() no longer runs gc_step_until() any more. If there +is a currently-running major collection, we do only so many steps +before returning. This number of steps depends on the size of the +allocated object. It is controlled by tracking the general progress +of these major collection steps and the size of old objects that +keep adding up between them. + +.. branch: remember-tracing-counts +Reenable jithooks + +.. branch: detect_egd2 + +.. branch: shadowstack-no-move-2 +Issue #2141: fix a crash on Windows and OS/X and ARM when running +at least 20 threads. + +.. branch: numpy-ctypes + +Add support for ndarray.ctypes property. + +.. branch: share-guard-info + +Share guard resume data between consecutive guards that have only +pure operations and guards in between. + +.. branch: issue-2148 + +Fix performance regression on operations mixing numpy scalars and Python +floats, cf. issue #2148. + +.. branch: cffi-stdcall +Win32: support '__stdcall' in CFFI. + +.. branch: callfamily + +Refactorings of annotation and rtyping of function calls. + +.. branch: fortran-order + +Allow creation of fortran-ordered ndarrays + +.. branch: type_system-cleanup + +Remove some remnants of the old ootypesystem vs lltypesystem dichotomy. + +.. branch: cffi-handle-lifetime + +ffi.new_handle() returns handles that work more like CPython's: they +remain valid as long as the target exists (unlike the previous +version, where handles become invalid *before* the __del__ is called). + +.. branch: ufunc-casting + +allow automatic casting in ufuncs (and frompypyfunc) to cast the +arguments to the allowed function type declarations, fixes various +failures in linalg cffi functions + +.. branch: vecopt +.. branch: vecopt-merge + +A new optimization pass to use emit vectorized loops diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,82 +1,8 @@ -======================= -What's new in PyPy 2.6+ -======================= +========================= +What's new in PyPy 15.11+ +========================= -.. this is a revision shortly after release-2.6.1 -.. startrev: 07769be4057b +.. this is a revision shortly after release-15.11.0 +.. startrev: d924723d483b -.. branch: keys_with_hash -Improve the performance of dict.update() and a bunch of methods from -sets, by reusing the hash value stored in one dict when inspecting -or changing another dict with that key. -.. branch: optresult-unroll -A major refactoring of the ResOperations that kills Box. Also rewrote -unrolling to enable future enhancements. Should improve warmup time -by 20% or so. - -.. branch: optimize-cond-call -Optimize common sequences of operations like -``int_lt/cond_call`` in the JIT backends - -.. branch: missing_openssl_include -Fix for missing headers in OpenBSD, already applied in downstream ports - -.. branch: gc-more-incremental -Remove a source of non-incremental-ness in the GC: now -external_malloc() no longer runs gc_step_until() any more. If there -is a currently-running major collection, we do only so many steps -before returning. This number of steps depends on the size of the -allocated object. It is controlled by tracking the general progress -of these major collection steps and the size of old objects that -keep adding up between them. - -.. branch: remember-tracing-counts -Reenable jithooks - -.. branch: detect_egd2 - -.. branch: shadowstack-no-move-2 -Issue #2141: fix a crash on Windows and OS/X and ARM when running -at least 20 threads. - -.. branch: numpy-ctypes - -Add support for ndarray.ctypes property. - -.. branch: share-guard-info - -Share guard resume data between consecutive guards that have only -pure operations and guards in between. - -.. branch: issue-2148 - -Fix performance regression on operations mixing numpy scalars and Python -floats, cf. issue #2148. - -.. branch: cffi-stdcall -Win32: support '__stdcall' in CFFI. - -.. branch: callfamily - -Refactorings of annotation and rtyping of function calls. - -.. branch: fortran-order - -Allow creation of fortran-ordered ndarrays - -.. branch: type_system-cleanup - -Remove some remnants of the old ootypesystem vs lltypesystem dichotomy. - -.. branch: cffi-handle-lifetime - -ffi.new_handle() returns handles that work more like CPython's: they -remain valid as long as the target exists (unlike the previous -version, where handles become invalid *before* the __del__ is called). - -.. branch: ufunc-casting - -allow automatic casting in ufuncs (and frompypyfunc) to cast the -arguments to the allowed function type declarations, fixes various -failures in linalg cffi functions diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py --- a/pypy/module/cpyext/__init__.py +++ b/pypy/module/cpyext/__init__.py @@ -66,6 +66,7 @@ import pypy.module.cpyext.codecs import pypy.module.cpyext.pyfile import pypy.module.cpyext.pystrtod +import pypy.module.cpyext.pytraceback # now that all rffi_platform.Struct types are registered, configure them api.configure_types() diff --git a/pypy/module/cpyext/include/Python.h b/pypy/module/cpyext/include/Python.h --- a/pypy/module/cpyext/include/Python.h +++ b/pypy/module/cpyext/include/Python.h @@ -126,6 +126,7 @@ #include "fileobject.h" #include "pysignals.h" #include "pythread.h" +#include "traceback.h" /* Missing definitions */ #include "missing.h" diff --git a/pypy/module/cpyext/include/frameobject.h b/pypy/module/cpyext/include/frameobject.h --- a/pypy/module/cpyext/include/frameobject.h +++ b/pypy/module/cpyext/include/frameobject.h @@ -4,7 +4,7 @@ extern "C" { #endif -typedef struct { +typedef struct _frame { PyObject_HEAD PyCodeObject *f_code; PyObject *f_globals; diff --git a/pypy/module/cpyext/include/traceback.h b/pypy/module/cpyext/include/traceback.h --- a/pypy/module/cpyext/include/traceback.h +++ b/pypy/module/cpyext/include/traceback.h @@ -4,7 +4,15 @@ extern "C" { #endif -typedef PyObject PyTracebackObject; +struct _frame; + +typedef struct _traceback { + PyObject_HEAD + struct _traceback *tb_next; + struct _frame *tb_frame; + int tb_lasti; + int tb_lineno; +} PyTracebackObject; #ifdef __cplusplus } diff --git a/pypy/module/cpyext/pytraceback.py b/pypy/module/cpyext/pytraceback.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/pytraceback.py @@ -0,0 +1,50 @@ +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.module.cpyext.api import ( + PyObjectFields, generic_cpy_call, CONST_STRING, CANNOT_FAIL, Py_ssize_t, + cpython_api, bootstrap_function, cpython_struct, build_type_checkers) +from pypy.module.cpyext.pyobject import ( + PyObject, make_ref, from_ref, Py_DecRef, make_typedescr, borrow_from) +from pypy.module.cpyext.frameobject import PyFrameObject +from rpython.rlib.unroll import unrolling_iterable +from pypy.interpreter.error import OperationError +from pypy.interpreter.pytraceback import PyTraceback +from pypy.interpreter import pycode + + +PyTracebackObjectStruct = lltype.ForwardReference() +PyTracebackObject = lltype.Ptr(PyTracebackObjectStruct) +PyTracebackObjectFields = PyObjectFields + ( + ("tb_next", PyTracebackObject), + ("tb_frame", PyFrameObject), + ("tb_lasti", rffi.INT), + ("tb_lineno", rffi.INT), +) +cpython_struct("PyTracebackObject", PyTracebackObjectFields, PyTracebackObjectStruct) + +@bootstrap_function +def init_traceback(space): + make_typedescr(PyTraceback.typedef, + basestruct=PyTracebackObject.TO, + attach=traceback_attach, + dealloc=traceback_dealloc) + + +def traceback_attach(space, py_obj, w_obj): + py_traceback = rffi.cast(PyTracebackObject, py_obj) + traceback = space.interp_w(PyTraceback, w_obj) + if traceback.next is None: + w_next_traceback = None + else: + w_next_traceback = space.wrap(traceback.next) + py_traceback.c_tb_next = rffi.cast(PyTracebackObject, make_ref(space, w_next_traceback)) + py_traceback.c_tb_frame = rffi.cast(PyFrameObject, make_ref(space, space.wrap(traceback.frame))) + rffi.setintfield(py_traceback, 'c_tb_lasti', traceback.lasti) + rffi.setintfield(py_traceback, 'c_tb_lineno',traceback.get_lineno()) + +@cpython_api([PyObject], lltype.Void, external=False) +def traceback_dealloc(space, py_obj): + py_traceback = rffi.cast(PyTracebackObject, py_obj) + Py_DecRef(space, rffi.cast(PyObject, py_traceback.c_tb_next)) + Py_DecRef(space, rffi.cast(PyObject, py_traceback.c_tb_frame)) + from pypy.module.cpyext.object import PyObject_dealloc + PyObject_dealloc(space, py_obj) diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py --- a/pypy/module/cpyext/slotdefs.py +++ b/pypy/module/cpyext/slotdefs.py @@ -61,6 +61,30 @@ args_w = space.fixedview(w_args) return generic_cpy_call(space, func_binary, w_self, args_w[0]) +def wrap_binaryfunc_l(space, w_self, w_args, func): + func_binary = rffi.cast(binaryfunc, func) + check_num_args(space, w_args, 1) + args_w = space.fixedview(w_args) + + if not space.is_true(space.issubtype(space.type(args_w[0]), + space.type(w_self))): + raise OperationError(space.w_NotImplementedError, space.wrap( + "NotImplemented")) + + return generic_cpy_call(space, func_binary, w_self, args_w[0]) + +def wrap_binaryfunc_r(space, w_self, w_args, func): + func_binary = rffi.cast(binaryfunc, func) + check_num_args(space, w_args, 1) + args_w = space.fixedview(w_args) + + if not space.is_true(space.issubtype(space.type(args_w[0]), + space.type(w_self))): + raise OperationError(space.w_NotImplementedError, space.wrap( + "NotImplemented")) + + return generic_cpy_call(space, func_binary, args_w[0], w_self) + def wrap_inquirypred(space, w_self, w_args, func): func_inquiry = rffi.cast(inquiry, func) check_num_args(space, w_args, 0) diff --git a/pypy/module/cpyext/test/test_traceback.py b/pypy/module/cpyext/test/test_traceback.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_traceback.py @@ -0,0 +1,40 @@ +from rpython.rtyper.lltypesystem import lltype, rffi +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.pyobject import PyObject, make_ref, from_ref +from pypy.module.cpyext.pytraceback import PyTracebackObject +from pypy.interpreter.pytraceback import PyTraceback +from pypy.interpreter.pyframe import PyFrame + +class TestPyTracebackObject(BaseApiTest): + def test_traceback(self, space, api): + w_traceback = space.appexec([], """(): + import sys + try: + 1/0 + except: + return sys.exc_info()[2] + """) + py_obj = make_ref(space, w_traceback) + py_traceback = rffi.cast(PyTracebackObject, py_obj) + assert (from_ref(space, rffi.cast(PyObject, py_traceback.c_ob_type)) is + space.gettypeobject(PyTraceback.typedef)) + + traceback = space.interp_w(PyTraceback, w_traceback) + assert traceback.lasti == py_traceback.c_tb_lasti + assert traceback.get_lineno() == py_traceback.c_tb_lineno + assert space.eq_w(space.getattr(w_traceback, space.wrap("tb_lasti")), + space.wrap(py_traceback.c_tb_lasti)) + assert space.is_w(space.getattr(w_traceback, space.wrap("tb_frame")), + from_ref(space, rffi.cast(PyObject, + py_traceback.c_tb_frame))) + + while not space.is_w(w_traceback, space.w_None): + assert space.is_w( + w_traceback, + from_ref(space, rffi.cast(PyObject, py_traceback))) + w_traceback = space.getattr(w_traceback, space.wrap("tb_next")) + py_traceback = py_traceback.c_tb_next + + assert lltype.normalizeptr(py_traceback) is None + + api.Py_DecRef(py_obj) diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -589,6 +589,48 @@ assert bool(module.newInt(-1)) raises(ValueError, bool, module.newInt(-42)) + def test_binaryfunc(self): + module = self.import_extension('foo', [ + ("new_obj", "METH_NOARGS", + """ + FooObject *fooObj; + + Foo_Type.tp_as_number = &foo_as_number; + foo_as_number.nb_add = foo_nb_add_call; + if (PyType_Ready(&Foo_Type) < 0) return NULL; + fooObj = PyObject_New(FooObject, &Foo_Type); + if (!fooObj) { + return NULL; + } + + return (PyObject *)fooObj; + """)], + """ + typedef struct + { + PyObject_HEAD + } FooObject; + + static PyObject * + foo_nb_add_call(PyObject *self, PyObject *other) + { + return PyInt_FromLong(42); + } + + PyTypeObject Foo_Type = { + PyObject_HEAD_INIT(0) + /*ob_size*/ 0, + /*tp_name*/ "Foo", + /*tp_basicsize*/ sizeof(FooObject), + }; + static PyNumberMethods foo_as_number; + """) + a = module.new_obj() + b = module.new_obj() + c = 3 + assert (a + b) == 42 + raises(NotImplementedError, "b + c") + def test_tp_new_in_subclass_of_type(self): skip("BROKEN") module = self.import_module(name='foo3') diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py --- a/pypy/module/micronumpy/compile.py +++ b/pypy/module/micronumpy/compile.py @@ -2,6 +2,7 @@ It should not be imported by the module itself """ import re +import py from pypy.interpreter import special from pypy.interpreter.baseobjspace import InternalSpaceCache, W_Root, ObjSpace from pypy.interpreter.error import OperationError @@ -12,6 +13,10 @@ from pypy.module.micronumpy.ndarray import W_NDimArray from pypy.module.micronumpy.ctors import array from pypy.module.micronumpy.descriptor import get_dtype_cache +from pypy.interpreter.miscutils import ThreadLocals, make_weak_value_dictionary +from pypy.interpreter.executioncontext import (ExecutionContext, ActionFlag, + UserDelAction) +from pypy.interpreter.pyframe import PyFrame class BogusBytecode(Exception): @@ -32,12 +37,11 @@ class BadToken(Exception): pass - SINGLE_ARG_FUNCTIONS = ["sum", "prod", "max", "min", "all", "any", "unegative", "flat", "tostring", "count_nonzero", "argsort", "cumsum", "logical_xor_reduce"] -TWO_ARG_FUNCTIONS = ["dot", 'take', 'searchsorted'] -TWO_ARG_FUNCTIONS_OR_NONE = ['view', 'astype'] +TWO_ARG_FUNCTIONS = ["dot", 'take', 'searchsorted', 'multiply'] +TWO_ARG_FUNCTIONS_OR_NONE = ['view', 'astype', 'reshape'] THREE_ARG_FUNCTIONS = ['where'] class W_TypeObject(W_Root): @@ -57,6 +61,10 @@ w_OverflowError = W_TypeObject("OverflowError") w_NotImplementedError = W_TypeObject("NotImplementedError") w_AttributeError = W_TypeObject("AttributeError") + w_StopIteration = W_TypeObject("StopIteration") + w_KeyError = W_TypeObject("KeyError") + w_SystemExit = W_TypeObject("SystemExit") + w_KeyboardInterrupt = W_TypeObject("KeyboardInterrupt") w_None = None w_bool = W_TypeObject("bool") @@ -72,13 +80,26 @@ w_dict = W_TypeObject("dict") w_object = W_TypeObject("object") w_buffer = W_TypeObject("buffer") + w_type = W_TypeObject("type") - def __init__(self): + def __init__(self, config=None): """NOT_RPYTHON""" self.fromcache = InternalSpaceCache(self).getorbuild self.w_Ellipsis = special.Ellipsis() self.w_NotImplemented = special.NotImplemented() + if config is None: + from pypy.config.pypyoption import get_pypy_config + config = get_pypy_config(translating=False) + self.config = config + + self.interned_strings = make_weak_value_dictionary(self, str, W_Root) + self.builtin = DictObject({}) + self.FrameClass = PyFrame + self.threadlocals = ThreadLocals() + self.actionflag = ActionFlag() # changed by the signal module + self.check_signal_action = None # changed by the signal module + def _freeze_(self): return True @@ -89,12 +110,17 @@ return isinstance(w_obj, ListObject) or isinstance(w_obj, W_NDimArray) def len(self, w_obj): - assert isinstance(w_obj, ListObject) - return self.wrap(len(w_obj.items)) + if isinstance(w_obj, ListObject): + return self.wrap(len(w_obj.items)) + elif isinstance(w_obj, DictObject): + return self.wrap(len(w_obj.items)) + raise NotImplementedError def getattr(self, w_obj, w_attr): assert isinstance(w_attr, StringObject) - return w_obj.getdictvalue(self, w_attr.v) + if isinstance(w_obj, DictObject): + return w_obj.getdictvalue(self, w_attr) + return None def isinstance_w(self, w_obj, w_tp): try: @@ -102,6 +128,22 @@ except AttributeError: return False + def iter(self, w_iter): + if isinstance(w_iter, ListObject): + raise NotImplementedError + #return IterObject(space, w_iter.items) + elif isinstance(w_iter, DictObject): + return IterDictObject(self, w_iter) + + def next(self, w_iter): + return w_iter.next() + + def contains(self, w_iter, w_key): + if isinstance(w_iter, DictObject): + return self.wrap(w_key in w_iter.items) + + raise NotImplementedError + def decode_index4(self, w_idx, size): if isinstance(w_idx, IntObject): return (self.int_w(w_idx), 0, 0, 1) @@ -123,6 +165,10 @@ lgt = (stop - start - 1) / step + 1 return (start, stop, step, lgt) + def unicode_from_object(self, w_item): + # XXX + return StringObject("") + @specialize.argtype(1) def wrap(self, obj): if isinstance(obj, float): @@ -145,7 +191,55 @@ def newcomplex(self, r, i): return ComplexObject(r, i) + def newfloat(self, f): + return self.float(f) + + def le(self, w_obj1, w_obj2): + assert isinstance(w_obj1, boxes.W_GenericBox) + assert isinstance(w_obj2, boxes.W_GenericBox) + return w_obj1.descr_le(self, w_obj2) + + def lt(self, w_obj1, w_obj2): + assert isinstance(w_obj1, boxes.W_GenericBox) + assert isinstance(w_obj2, boxes.W_GenericBox) + return w_obj1.descr_lt(self, w_obj2) + + def ge(self, w_obj1, w_obj2): + assert isinstance(w_obj1, boxes.W_GenericBox) + assert isinstance(w_obj2, boxes.W_GenericBox) + return w_obj1.descr_ge(self, w_obj2) + + def add(self, w_obj1, w_obj2): + assert isinstance(w_obj1, boxes.W_GenericBox) + assert isinstance(w_obj2, boxes.W_GenericBox) + return w_obj1.descr_add(self, w_obj2) + + def sub(self, w_obj1, w_obj2): + return self.wrap(1) + + def mul(self, w_obj1, w_obj2): + assert isinstance(w_obj1, boxes.W_GenericBox) + assert isinstance(w_obj2, boxes.W_GenericBox) + return w_obj1.descr_mul(self, w_obj2) + + def pow(self, w_obj1, w_obj2, _): + return self.wrap(1) + + def neg(self, w_obj1): + return self.wrap(0) + + def repr(self, w_obj1): + return self.wrap('fake') + def getitem(self, obj, index): + if isinstance(obj, DictObject): + w_dict = obj.getdict(self) + if w_dict is not None: + try: + return w_dict[index] + except KeyError, e: + raise OperationError(self.w_KeyError, self.wrap("key error")) + assert isinstance(obj, ListObject) assert isinstance(index, IntObject) return obj.items[index.intval] @@ -191,12 +285,24 @@ return w_obj.v raise NotImplementedError + def unicode_w(self, w_obj): + # XXX + if isinstance(w_obj, StringObject): + return unicode(w_obj.v) + raise NotImplementedError + def int(self, w_obj): if isinstance(w_obj, IntObject): return w_obj assert isinstance(w_obj, boxes.W_GenericBox) return self.int(w_obj.descr_int(self)) + def long(self, w_obj): + if isinstance(w_obj, LongObject): + return w_obj + assert isinstance(w_obj, boxes.W_GenericBox) + return self.int(w_obj.descr_long(self)) + def str(self, w_obj): if isinstance(w_obj, StringObject): return w_obj @@ -240,9 +346,29 @@ def gettypefor(self, w_obj): return W_TypeObject(w_obj.typedef.name) - def call_function(self, tp, w_dtype): + def call_function(self, tp, w_dtype, *args): + if tp is self.w_float: + if isinstance(w_dtype, boxes.W_Float64Box): + return FloatObject(float(w_dtype.value)) + if isinstance(w_dtype, boxes.W_Float32Box): + return FloatObject(float(w_dtype.value)) + if isinstance(w_dtype, boxes.W_Int64Box): + return FloatObject(float(int(w_dtype.value))) + if isinstance(w_dtype, boxes.W_Int32Box): + return FloatObject(float(int(w_dtype.value))) + if isinstance(w_dtype, boxes.W_Int16Box): + return FloatObject(float(int(w_dtype.value))) + if isinstance(w_dtype, boxes.W_Int8Box): + return FloatObject(float(int(w_dtype.value))) + if isinstance(w_dtype, IntObject): + return FloatObject(float(w_dtype.intval)) + if tp is self.w_int: + if isinstance(w_dtype, FloatObject): + return IntObject(int(w_dtype.floatval)) + return w_dtype + @specialize.arg(2) def call_method(self, w_obj, s, *args): # XXX even the hacks have hacks return getattr(w_obj, 'descr_' + s)(self, *args) @@ -258,21 +384,21 @@ def newtuple(self, list_w): return ListObject(list_w) - def newdict(self): - return {} + def newdict(self, module=True): + return DictObject({}) - def setitem(self, dict, item, value): - dict[item] = value + def newint(self, i): + if isinstance(i, IntObject): + return i + return IntObject(i) - def len_w(self, w_obj): - if isinstance(w_obj, ListObject): - return len(w_obj.items) - # XXX array probably - assert False + def setitem(self, obj, index, value): + obj.items[index] = value def exception_match(self, w_exc_type, w_check_class): - # Good enough for now - raise NotImplementedError + assert isinstance(w_exc_type, W_TypeObject) + assert isinstance(w_check_class, W_TypeObject) + return w_exc_type.name == w_check_class.name class FloatObject(W_Root): tp = FakeSpace.w_float @@ -283,6 +409,9 @@ tp = FakeSpace.w_bool def __init__(self, boolval): self.intval = boolval +FakeSpace.w_True = BoolObject(True) +FakeSpace.w_False = BoolObject(False) + class IntObject(W_Root): tp = FakeSpace.w_int @@ -299,6 +428,33 @@ def __init__(self, items): self.items = items +class DictObject(W_Root): + tp = FakeSpace.w_dict + def __init__(self, items): + self.items = items + + def getdict(self, space): + return self.items + + def getdictvalue(self, space, key): + return self.items[key] + +class IterDictObject(W_Root): + def __init__(self, space, w_dict): + self.space = space + self.items = w_dict.items.items() + self.i = 0 + + def __iter__(self): + return self + + def next(self): + space = self.space + if self.i >= len(self.items): + raise OperationError(space.w_StopIteration, space.wrap("stop iteration")) + self.i += 1 + return self.items[self.i-1][0] + class SliceObject(W_Root): tp = FakeSpace.w_slice def __init__(self, start, stop, step): @@ -414,6 +570,15 @@ w_rhs = IntObject(int(w_rhs.floatval)) assert isinstance(w_lhs, W_NDimArray) w_res = w_lhs.descr_getitem(interp.space, w_rhs) + if isinstance(w_rhs, IntObject): + if isinstance(w_res, boxes.W_Float64Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value) + if isinstance(w_res, boxes.W_Float32Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value) + if isinstance(w_res, boxes.W_Int64Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) + if isinstance(w_res, boxes.W_Int32Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) else: raise NotImplementedError if (not isinstance(w_res, W_NDimArray) and @@ -425,9 +590,22 @@ def __repr__(self): return '(%r %s %r)' % (self.lhs, self.name, self.rhs) -class FloatConstant(Node): +class NumberConstant(Node): def __init__(self, v): - self.v = float(v) + if isinstance(v, int): + self.v = v + elif isinstance(v, float): + self.v = v + else: + assert isinstance(v, str) + assert len(v) > 0 + c = v[-1] + if c == 'f': + self.v = float(v[:-1]) + elif c == 'i': + self.v = int(v[:-1]) + else: + self.v = float(v) def __repr__(self): return "Const(%s)" % self.v @@ -519,8 +697,24 @@ def execute(self, interp): if self.v == 'int': dtype = get_dtype_cache(interp.space).w_int64dtype + elif self.v == 'int8': + dtype = get_dtype_cache(interp.space).w_int8dtype + elif self.v == 'int16': + dtype = get_dtype_cache(interp.space).w_int16dtype + elif self.v == 'int32': + dtype = get_dtype_cache(interp.space).w_int32dtype + elif self.v == 'uint': + dtype = get_dtype_cache(interp.space).w_uint64dtype + elif self.v == 'uint8': + dtype = get_dtype_cache(interp.space).w_uint8dtype + elif self.v == 'uint16': + dtype = get_dtype_cache(interp.space).w_uint16dtype + elif self.v == 'uint32': + dtype = get_dtype_cache(interp.space).w_uint32dtype elif self.v == 'float': dtype = get_dtype_cache(interp.space).w_float64dtype + elif self.v == 'float32': + dtype = get_dtype_cache(interp.space).w_float32dtype else: raise BadToken('unknown v to dtype "%s"' % self.v) return dtype @@ -556,8 +750,13 @@ raise ArgumentMismatch if self.name == "sum": if len(self.args)>1: - w_res = arr.descr_sum(interp.space, + var = self.args[1] + if isinstance(var, DtypeClass): + w_res = arr.descr_sum(interp.space, None, var.execute(interp)) + else: + w_res = arr.descr_sum(interp.space, self.args[1].execute(interp)) + else: w_res = arr.descr_sum(interp.space) elif self.name == "prod": @@ -577,10 +776,10 @@ w_res = logical_xor.reduce(interp.space, arr, None) elif self.name == "unegative": neg = ufuncs.get(interp.space).negative - w_res = neg.call(interp.space, [arr], None, None, None) + w_res = neg.call(interp.space, [arr], None, 'unsafe', None) elif self.name == "cos": cos = ufuncs.get(interp.space).cos - w_res = cos.call(interp.space, [arr], None, None, None) + w_res = cos.call(interp.space, [arr], None, 'unsafe', None) elif self.name == "flat": w_res = arr.descr_get_flatiter(interp.space) elif self.name == "argsort": @@ -598,6 +797,8 @@ raise ArgumentNotAnArray if self.name == "dot": w_res = arr.descr_dot(interp.space, arg) + elif self.name == 'multiply': + w_res = arr.descr_mul(interp.space, arg) elif self.name == 'take': w_res = arr.descr_take(interp.space, arg) elif self.name == "searchsorted": @@ -617,7 +818,7 @@ if self.name == "where": w_res = where(interp.space, arr, arg1, arg2) else: - assert False + assert False # unreachable code elif self.name in TWO_ARG_FUNCTIONS_OR_NONE: if len(self.args) != 2: raise ArgumentMismatch @@ -626,6 +827,11 @@ w_res = arr.descr_view(interp.space, arg) elif self.name == 'astype': w_res = arr.descr_astype(interp.space, arg) + elif self.name == 'reshape': + w_arg = self.args[1] + assert isinstance(w_arg, ArrayConstant) + order = -1 + w_res = arr.reshape(interp.space, w_arg.wrap(interp.space), order) else: assert False else: @@ -645,7 +851,7 @@ return W_NDimArray.new_scalar(interp.space, dtype, w_res) _REGEXES = [ - ('-?[\d\.]+', 'number'), + ('-?[\d\.]+(i|f)?', 'number'), ('\[', 'array_left'), (':', 'colon'), ('\w+', 'identifier'), @@ -719,7 +925,7 @@ start = 0 else: if tokens.get(0).name != 'colon': - return FloatConstant(start_tok.v) + return NumberConstant(start_tok.v) start = int(start_tok.v) tokens.pop() if not tokens.get(0).name in ['colon', 'number']: @@ -751,8 +957,30 @@ stack.append(ArrayClass()) elif token.v.strip(' ') == 'int': stack.append(DtypeClass('int')) + elif token.v.strip(' ') == 'int8': + stack.append(DtypeClass('int8')) + elif token.v.strip(' ') == 'int16': + stack.append(DtypeClass('int16')) + elif token.v.strip(' ') == 'int32': + stack.append(DtypeClass('int32')) + elif token.v.strip(' ') == 'int64': + stack.append(DtypeClass('int')) + elif token.v.strip(' ') == 'uint': + stack.append(DtypeClass('uint')) + elif token.v.strip(' ') == 'uint8': + stack.append(DtypeClass('uint8')) + elif token.v.strip(' ') == 'uint16': + stack.append(DtypeClass('uint16')) + elif token.v.strip(' ') == 'uint32': + stack.append(DtypeClass('uint32')) + elif token.v.strip(' ') == 'uint64': + stack.append(DtypeClass('uint')) elif token.v.strip(' ') == 'float': stack.append(DtypeClass('float')) + elif token.v.strip(' ') == 'float32': + stack.append(DtypeClass('float32')) + elif token.v.strip(' ') == 'float64': + stack.append(DtypeClass('float')) else: stack.append(Variable(token.v.strip(' '))) elif token.name == 'array_left': @@ -805,7 +1033,7 @@ while True: token = tokens.pop() if token.name == 'number': - elems.append(FloatConstant(token.v)) + elems.append(NumberConstant(token.v)) elif token.name == 'array_left': elems.append(ArrayConstant(self.parse_array_const(tokens))) elif token.name == 'paren_left': diff --git a/pypy/module/micronumpy/concrete.py b/pypy/module/micronumpy/concrete.py --- a/pypy/module/micronumpy/concrete.py +++ b/pypy/module/micronumpy/concrete.py @@ -70,7 +70,10 @@ @jit.unroll_safe def setslice(self, space, arr): - if len(arr.get_shape()) > len(self.get_shape()): + if arr.get_size() == 1: + # we can always set self[:] = scalar + pass + elif len(arr.get_shape()) > len(self.get_shape()): # record arrays get one extra dimension if not self.dtype.is_record() or \ len(arr.get_shape()) > len(self.get_shape()) + 1: diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py --- a/pypy/module/micronumpy/ctors.py +++ b/pypy/module/micronumpy/ctors.py @@ -86,6 +86,9 @@ def _array(space, w_object, w_dtype=None, copy=True, w_order=None, subok=False): + # numpy testing calls array(type(array([]))) and expects a ValueError + if space.isinstance_w(w_object, space.w_type): + raise oefmt(space.w_ValueError, "cannot create ndarray from type instance") # for anything that isn't already an array, try __array__ method first if not isinstance(w_object, W_NDimArray): w_array = try_array_method(space, w_object, w_dtype) diff --git a/pypy/module/micronumpy/flatiter.py b/pypy/module/micronumpy/flatiter.py --- a/pypy/module/micronumpy/flatiter.py +++ b/pypy/module/micronumpy/flatiter.py @@ -97,7 +97,7 @@ finally: self.iter.reset(self.state, mutate=True) - def descr___array_wrap__(self, space, obj): + def descr___array_wrap__(self, space, obj, w_context=None): return obj W_FlatIterator.typedef = TypeDef("numpy.flatiter", diff --git a/pypy/module/micronumpy/iterators.py b/pypy/module/micronumpy/iterators.py --- a/pypy/module/micronumpy/iterators.py +++ b/pypy/module/micronumpy/iterators.py @@ -83,6 +83,12 @@ self._indices = indices self.offset = offset + def same(self, other): + if self.offset == other.offset and \ + self.index == other.index and \ + self._indices == other._indices: + return self.iterator.same_shape(other.iterator) + return False class ArrayIter(object): _immutable_fields_ = ['contiguous', 'array', 'size', 'ndim_m1', 'shape_m1[*]', @@ -100,6 +106,7 @@ self.array = array self.size = size self.ndim_m1 = len(shape) - 1 + # self.shape_m1 = [s - 1 for s in shape] self.strides = strides self.backstrides = backstrides @@ -113,6 +120,17 @@ factors[ndim-i-1] = factors[ndim-i] * shape[ndim-i] self.factors = factors + def same_shape(self, other): + """ Iterating over the same element """ + if not self.contiguous or not other.contiguous: + return False + return (self.contiguous == other.contiguous and + self.array.dtype is self.array.dtype and + self.shape_m1 == other.shape_m1 and + self.strides == other.strides and + self.backstrides == other.backstrides and + self.factors == other.factors) + @jit.unroll_safe def reset(self, state=None, mutate=False): index = 0 @@ -138,9 +156,13 @@ indices = state._indices offset = state.offset if self.contiguous: - offset += self.array.dtype.elsize + elsize = self.array.dtype.elsize + jit.promote(elsize) + offset += elsize elif self.ndim_m1 == 0: - offset += self.strides[0] + stride = self.strides[0] + jit.promote(stride) + offset += stride else: for i in xrange(self.ndim_m1, -1, -1): idx = indices[i] @@ -192,7 +214,7 @@ return state.index >= self.size def getitem(self, state): - assert state.iterator is self + # assert state.iterator is self return self.array.getitem(state.offset) def getitem_bool(self, state): @@ -203,7 +225,6 @@ assert state.iterator is self self.array.setitem(state.offset, elem) - def AxisIter(array, shape, axis): strides = array.get_strides() backstrides = array.get_backstrides() diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -2,6 +2,7 @@ operations. This is the place to look for all the computations that iterate over all the array elements. """ +import py from pypy.interpreter.error import OperationError from rpython.rlib import jit from rpython.rlib.rstring import StringBuilder @@ -13,11 +14,6 @@ from pypy.interpreter.argument import Arguments -call2_driver = jit.JitDriver( - name='numpy_call2', - greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'], - reds='auto') - def call2(space, shape, func, calc_dtype, w_lhs, w_rhs, out): if w_lhs.get_size() == 1: w_left = w_lhs.get_scalar_value().convert_to(space, calc_dtype) @@ -38,24 +34,104 @@ out_iter, out_state = out.create_iter(shape) shapelen = len(shape) res_dtype = out.get_dtype() - while not out_iter.done(out_state): - call2_driver.jit_merge_point(shapelen=shapelen, func=func, - calc_dtype=calc_dtype, res_dtype=res_dtype) - if left_iter: - w_left = left_iter.getitem(left_state).convert_to(space, calc_dtype) - left_state = left_iter.next(left_state) - if right_iter: - w_right = right_iter.getitem(right_state).convert_to(space, calc_dtype) - right_state = right_iter.next(right_state) - out_iter.setitem(out_state, func(calc_dtype, w_left, w_right).convert_to( - space, res_dtype)) - out_state = out_iter.next(out_state) - return out + call2_func = try_to_share_iterators_call2(left_iter, right_iter, + left_state, right_state, out_state) + params = (space, shapelen, func, calc_dtype, res_dtype, out, + w_left, w_right, left_iter, right_iter, out_iter, + left_state, right_state, out_state) + return call2_func(*params) + +def try_to_share_iterators_call2(left_iter, right_iter, left_state, right_state, out_state): + # these are all possible iterator sharing combinations + # left == right == out + # left == right + # left == out + # right == out + right_out_equal = False + if right_iter: + # rhs is not a scalar + if out_state.same(right_state): + right_out_equal = True + # + if not left_iter: + # lhs is a scalar + if right_out_equal: + return call2_advance_out_left + else: + # worst case, nothing can be shared and lhs is a scalar + return call2_advance_out_left_right + else: + # lhs is NOT a scalar + if out_state.same(left_state): + # (2) out and left are the same -> remove left + if right_out_equal: + # the best case + return call2_advance_out + else: + return call2_advance_out_right + else: + if right_out_equal: + # right and out are equal, only advance left and out + return call2_advance_out_left + else: + if right_iter and right_state.same(left_state): + # left and right are equal, but still need to advance out + return call2_advance_out_left_eq_right + else: + # worst case, nothing can be shared + return call2_advance_out_left_right + + assert 0, "logical problem with the selection of the call2 case" + +def generate_call2_cases(name, left_state, right_state): + call2_driver = jit.JitDriver(name='numpy_call2_' + name, + greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'], + reds='auto', vectorize=True) + # + advance_left_state = left_state == "left_state" + advance_right_state = right_state == "right_state" + code = """ + def method(space, shapelen, func, calc_dtype, res_dtype, out, + w_left, w_right, left_iter, right_iter, out_iter, + left_state, right_state, out_state): + while not out_iter.done(out_state): + call2_driver.jit_merge_point(shapelen=shapelen, func=func, + calc_dtype=calc_dtype, res_dtype=res_dtype) + if left_iter: + w_left = left_iter.getitem({left_state}).convert_to(space, calc_dtype) + if right_iter: + w_right = right_iter.getitem({right_state}).convert_to(space, calc_dtype) + w_out = func(calc_dtype, w_left, w_right) + out_iter.setitem(out_state, w_out.convert_to(space, res_dtype)) + out_state = out_iter.next(out_state) + if advance_left_state and left_iter: + left_state = left_iter.next(left_state) + if advance_right_state and right_iter: + right_state = right_iter.next(right_state) + # + # if not set to None, the values will be loop carried + # (for the var,var case), forcing the vectorization to unpack + # the vector registers at the end of the loop + if left_iter: + w_left = None + if right_iter: + w_right = None + return out + """ + exec(py.code.Source(code.format(left_state=left_state,right_state=right_state)).compile(), locals()) + method.__name__ = "call2_" + name + return method + +call2_advance_out = generate_call2_cases("inc_out", "out_state", "out_state") +call2_advance_out_left = generate_call2_cases("inc_out_left", "left_state", "out_state") +call2_advance_out_right = generate_call2_cases("inc_out_right", "out_state", "right_state") +call2_advance_out_left_eq_right = generate_call2_cases("inc_out_left_eq_right", "left_state", "left_state") +call2_advance_out_left_right = generate_call2_cases("inc_out_left_right", "left_state", "right_state") call1_driver = jit.JitDriver( name='numpy_call1', - greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'], - reds='auto') + greens=['shapelen', 'share_iterator', 'func', 'calc_dtype', 'res_dtype'], + reds='auto', vectorize=True) def call1(space, shape, func, calc_dtype, w_obj, w_ret): obj_iter, obj_state = w_obj.create_iter(shape) @@ -63,13 +139,24 @@ out_iter, out_state = w_ret.create_iter(shape) shapelen = len(shape) res_dtype = w_ret.get_dtype() + share_iterator = out_state.same(obj_state) while not out_iter.done(out_state): call1_driver.jit_merge_point(shapelen=shapelen, func=func, + share_iterator=share_iterator, calc_dtype=calc_dtype, res_dtype=res_dtype) - elem = obj_iter.getitem(obj_state).convert_to(space, calc_dtype) + if share_iterator: + # use out state as param to getitem + elem = obj_iter.getitem(out_state).convert_to(space, calc_dtype) + else: + elem = obj_iter.getitem(obj_state).convert_to(space, calc_dtype) out_iter.setitem(out_state, func(calc_dtype, elem).convert_to(space, res_dtype)) - out_state = out_iter.next(out_state) - obj_state = obj_iter.next(obj_state) + if share_iterator: + # only advance out, they share the same iteration space + out_state = out_iter.next(out_state) + else: + out_state = out_iter.next(out_state) + obj_state = obj_iter.next(obj_state) + elem = None return w_ret call_many_to_one_driver = jit.JitDriver( @@ -145,7 +232,7 @@ vals[i] = in_dtypes[i].coerce(space, in_iters[i].getitem(in_states[i])) w_arglist = space.newlist(vals) w_outvals = space.call_args(func, Arguments.frompacked(space, w_arglist)) - # w_outvals should be a tuple, but func can return a single value as well + # w_outvals should be a tuple, but func can return a single value as well if space.isinstance_w(w_outvals, space.w_tuple): batch = space.listview(w_outvals) for i in range(len(batch)): @@ -161,7 +248,7 @@ setslice_driver = jit.JitDriver(name='numpy_setslice', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def setslice(space, shape, target, source): if not shape: @@ -239,7 +326,8 @@ reduce_flat_driver = jit.JitDriver( name='numpy_reduce_flat', - greens = ['shapelen', 'func', 'done_func', 'calc_dtype'], reds = 'auto') + greens = ['shapelen', 'func', 'done_func', 'calc_dtype'], reds = 'auto', + vectorize = True) def reduce_flat(space, func, w_arr, calc_dtype, done_func, identity): obj_iter, obj_state = w_arr.create_iter() @@ -260,10 +348,10 @@ obj_state = obj_iter.next(obj_state) return cur_value - reduce_driver = jit.JitDriver( name='numpy_reduce', - greens=['shapelen', 'func', 'dtype'], reds='auto') + greens=['shapelen', 'func', 'dtype'], reds='auto', + vectorize=True) def reduce(space, func, w_arr, axis_flags, dtype, out, identity): out_iter, out_state = out.create_iter() @@ -298,7 +386,7 @@ accumulate_flat_driver = jit.JitDriver( name='numpy_accumulate_flat', greens=['shapelen', 'func', 'dtype', 'out_dtype'], - reds='auto') + reds='auto', vectorize=True) def accumulate_flat(space, func, w_arr, calc_dtype, w_out, identity): arr_iter, arr_state = w_arr.create_iter() @@ -325,7 +413,9 @@ accumulate_driver = jit.JitDriver( name='numpy_accumulate', - greens=['shapelen', 'func', 'calc_dtype'], reds='auto') + greens=['shapelen', 'func', 'calc_dtype'], + reds='auto', + vectorize=True) def accumulate(space, func, w_arr, axis, calc_dtype, w_out, identity): @@ -375,7 +465,8 @@ where_driver = jit.JitDriver(name='numpy_where', greens = ['shapelen', 'dtype', 'arr_dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def where(space, out, shape, arr, x, y, dtype): out_iter, out_state = out.create_iter(shape) @@ -416,7 +507,6 @@ state = x_state return out - def _new_argmin_argmax(op_name): arg_driver = jit.JitDriver(name='numpy_' + op_name, greens = ['shapelen', 'dtype'], @@ -481,7 +571,8 @@ dot_driver = jit.JitDriver(name = 'numpy_dot', greens = ['dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def multidim_dot(space, left, right, result, dtype, right_critical_dim): ''' assumes left, right are concrete arrays @@ -524,8 +615,8 @@ lval = left_impl.getitem(i1).convert_to(space, dtype) rval = right_impl.getitem(i2).convert_to(space, dtype) oval = dtype.itemtype.add(oval, dtype.itemtype.mul(lval, rval)) - i1 += s1 - i2 += s2 + i1 += jit.promote(s1) + i2 += jit.promote(s2) outi.setitem(outs, oval) outs = outi.next(outs) rights = righti.next(rights) @@ -535,7 +626,8 @@ count_all_true_driver = jit.JitDriver(name = 'numpy_count', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def count_all_true_concrete(impl): s = 0 @@ -556,7 +648,8 @@ nonzero_driver = jit.JitDriver(name = 'numpy_nonzero', greens = ['shapelen', 'dims', 'dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def nonzero(res, arr, box): res_iter, res_state = res.create_iter() @@ -578,7 +671,8 @@ getitem_filter_driver = jit.JitDriver(name = 'numpy_getitem_bool', greens = ['shapelen', 'arr_dtype', 'index_dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def getitem_filter(res, arr, index): res_iter, res_state = res.create_iter() @@ -606,7 +700,8 @@ setitem_filter_driver = jit.JitDriver(name = 'numpy_setitem_bool', greens = ['shapelen', 'arr_dtype', 'index_dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def setitem_filter(space, arr, index, value): arr_iter, arr_state = arr.create_iter() @@ -635,7 +730,8 @@ flatiter_getitem_driver = jit.JitDriver(name = 'numpy_flatiter_getitem', greens = ['dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def flatiter_getitem(res, base_iter, base_state, step): ri, rs = res.create_iter() @@ -649,7 +745,8 @@ flatiter_setitem_driver = jit.JitDriver(name = 'numpy_flatiter_setitem', greens = ['dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def flatiter_setitem(space, dtype, val, arr_iter, arr_state, step, length): val_iter, val_state = val.create_iter() @@ -758,7 +855,8 @@ byteswap_driver = jit.JitDriver(name='numpy_byteswap_driver', greens = ['dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def byteswap(from_, to): dtype = from_.dtype @@ -773,7 +871,8 @@ choose_driver = jit.JitDriver(name='numpy_choose_driver', greens = ['shapelen', 'mode', 'dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def choose(space, arr, choices, shape, dtype, out, mode): shapelen = len(shape) @@ -807,7 +906,8 @@ clip_driver = jit.JitDriver(name='numpy_clip_driver', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def clip(space, arr, shape, min, max, out): assert min or max @@ -842,7 +942,8 @@ round_driver = jit.JitDriver(name='numpy_round_driver', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', + vectorize=True) def round(space, arr, dtype, shape, decimals, out): arr_iter, arr_state = arr.create_iter(shape) diff --git a/pypy/module/micronumpy/strides.py b/pypy/module/micronumpy/strides.py --- a/pypy/module/micronumpy/strides.py +++ b/pypy/module/micronumpy/strides.py @@ -7,6 +7,7 @@ # structures to describe slicing class BaseChunk(object): + _attrs_ = ['step','out_dim'] pass diff --git a/pypy/module/micronumpy/test/test_compile.py b/pypy/module/micronumpy/test/test_compile.py --- a/pypy/module/micronumpy/test/test_compile.py +++ b/pypy/module/micronumpy/test/test_compile.py @@ -1,6 +1,6 @@ import py from pypy.module.micronumpy.compile import (numpy_compile, Assignment, - ArrayConstant, FloatConstant, Operator, Variable, RangeConstant, Execute, + ArrayConstant, NumberConstant, Operator, Variable, RangeConstant, Execute, FunctionCall, FakeSpace, W_NDimArray) @@ -25,30 +25,30 @@ interp = self.compile(code) assert isinstance(interp.code.statements[0].expr, ArrayConstant) st = interp.code.statements[0] - assert st.expr.items == [FloatConstant(1), FloatConstant(2), - FloatConstant(3)] + assert st.expr.items == [NumberConstant(1), NumberConstant(2), + NumberConstant(3)] def test_array_literal2(self): code = "a = [[1],[2],[3]]" interp = self.compile(code) assert isinstance(interp.code.statements[0].expr, ArrayConstant) st = interp.code.statements[0] - assert st.expr.items == [ArrayConstant([FloatConstant(1)]), - ArrayConstant([FloatConstant(2)]), - ArrayConstant([FloatConstant(3)])] + assert st.expr.items == [ArrayConstant([NumberConstant(1)]), + ArrayConstant([NumberConstant(2)]), + ArrayConstant([NumberConstant(3)])] def test_expr_1(self): code = "b = a + 1" interp = self.compile(code) assert (interp.code.statements[0].expr == - Operator(Variable("a"), "+", FloatConstant(1))) + Operator(Variable("a"), "+", NumberConstant(1))) def test_expr_2(self): code = "b = a + b - 3" interp = self.compile(code) assert (interp.code.statements[0].expr == Operator(Operator(Variable("a"), "+", Variable("b")), "-", - FloatConstant(3))) + NumberConstant(3))) def test_expr_3(self): # an equivalent of range @@ -60,13 +60,13 @@ code = "3 + a" interp = self.compile(code) assert interp.code.statements[0] == Execute( - Operator(FloatConstant(3), "+", Variable("a"))) + Operator(NumberConstant(3), "+", Variable("a"))) def test_array_access(self): code = "a -> 3" interp = self.compile(code) assert interp.code.statements[0] == Execute( - Operator(Variable("a"), "->", FloatConstant(3))) + Operator(Variable("a"), "->", NumberConstant(3))) def test_function_call(self): code = "sum(a)" @@ -81,7 +81,7 @@ """ interp = self.compile(code) assert interp.code.statements[0] == Assignment( - 'a', Operator(Variable('b'), "+", FloatConstant(3))) + 'a', Operator(Variable('b'), "+", NumberConstant(3))) class TestRunner(object): @@ -272,6 +272,14 @@ """) assert interp.results[0].value == 3 + def test_any(self): + interp = self.run(""" + a = [0,0,0,0,0.1,0,0,0,0] + b = any(a) + b -> 0 + """) + assert interp.results[0].value == 1 + def test_where(self): interp = self.run(''' a = [1, 0, 3, 0] diff --git a/pypy/module/micronumpy/test/test_ndarray.py b/pypy/module/micronumpy/test/test_ndarray.py --- a/pypy/module/micronumpy/test/test_ndarray.py +++ b/pypy/module/micronumpy/test/test_ndarray.py @@ -292,6 +292,8 @@ a = np.array('123', dtype='intp') assert a == 123 assert a.dtype == np.intp + # required for numpy test suite + raises(ValueError, np.array, type(a)) def test_array_copy(self): from numpy import array diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py --- a/pypy/module/micronumpy/test/test_ufuncs.py +++ b/pypy/module/micronumpy/test/test_ufuncs.py @@ -319,6 +319,28 @@ assert out0.dtype in (int, complex) assert (out0 == in0 * 2).all() + def test_frompyfunc_scalar(self): + import sys + import numpy as np + if '__pypy__' not in sys.builtin_module_names: + skip('PyPy only frompyfunc extension') + + def summer(in0): + out = np.empty(1, in0.dtype) + out[0] = in0.sum() + return out + + pysummer = np.frompyfunc([summer, summer], 1, 1, + dtypes=[np.dtype(int), np.dtype(int), + np.dtype(complex), np.dtype(complex)], + stack_inputs=False, signature='(m,m)->()', + ) + for d in [np.dtype(float), np.dtype('uint8'), np.dtype('complex64')]: + in0 = np.arange(4, dtype=d).reshape(1, 2, 2) + out0 = pysummer(in0) + assert out0 == in0.sum() + assert out0.dtype in (int, complex) + def test_ufunc_kwargs(self): from numpy import ufunc, frompyfunc, arange, dtype def adder(a, b): diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -4,17 +4,37 @@ import py from rpython.jit.metainterp.test.support import LLJitMixin +from rpython.jit.backend.x86.test.test_basic import Jit386Mixin from rpython.jit.metainterp.warmspot import reset_jit, get_stats +from rpython.jit.metainterp.jitprof import Profiler +from rpython.jit.metainterp import counter +from rpython.rlib.jit import Counters +from rpython.rlib.rarithmetic import intmask from pypy.module.micronumpy import boxes from pypy.module.micronumpy.compile import FakeSpace, Parser, InterpreterState from pypy.module.micronumpy.base import W_NDimArray +from rpython.jit.backend.detect_cpu import getcpuclass -py.test.skip('move these to pypyjit/test_pypy_c/test_micronumpy') +CPU = getcpuclass() +if not CPU.vector_extension: + py.test.skip("this cpu %s has no implemented vector backend" % CPU) + +def get_profiler(): + from rpython.jit.metainterp import pyjitpl + return pyjitpl._warmrunnerdesc.metainterp_sd.profiler class TestNumpyJit(LLJitMixin): + enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll" graph = None interp = None + def setup_method(self, method): + if not self.CPUClass.vector_extension: + py.test.skip("needs vector extension to run (for now)") + + def assert_float_equal(self, f1, f2, delta=0.0001): + assert abs(f1-f2) < delta + def setup_class(cls): default = """ a = [1,2,3,4] @@ -52,12 +72,29 @@ w_res = i.getitem(s) if isinstance(w_res, boxes.W_Float64Box): return w_res.value + if isinstance(w_res, boxes.W_Float32Box): + return float(w_res.value) elif isinstance(w_res, boxes.W_Int64Box): return float(w_res.value) + elif isinstance(w_res, boxes.W_Int32Box): + return float(int(w_res.value)) + elif isinstance(w_res, boxes.W_Int16Box): + return float(int(w_res.value)) + elif isinstance(w_res, boxes.W_Int8Box): + return float(int(w_res.value)) + elif isinstance(w_res, boxes.W_UInt64Box): + return float(intmask(w_res.value)) + elif isinstance(w_res, boxes.W_UInt32Box): + return float(intmask(w_res.value)) + elif isinstance(w_res, boxes.W_UInt16Box): + return float(intmask(w_res.value)) + elif isinstance(w_res, boxes.W_UInt8Box): + return float(intmask(w_res.value)) elif isinstance(w_res, boxes.W_LongBox): return float(w_res.value) elif isinstance(w_res, boxes.W_BoolBox): return float(w_res.value) + print "ERROR: did not implement return type for interpreter" raise TypeError(w_res) if self.graph is None: @@ -65,122 +102,354 @@ listops=True, listcomp=True, backendopt=True, - graph_and_interp_only=True) + graph_and_interp_only=True, + ProfilerClass=Profiler, + vec=True) self.__class__.interp = interp self.__class__.graph = graph + def check_vectorized(self, expected_tried, expected_success): + profiler = get_profiler() + tried = profiler.get_counter(Counters.OPT_VECTORIZE_TRY) + success = profiler.get_counter(Counters.OPT_VECTORIZED) + assert tried >= success + assert tried == expected_tried + assert success == expected_success + def run(self, name): self.compile_graph() + profiler = get_profiler() + profiler.start() reset_jit() i = self.code_mapping[name] retval = self.interp.eval_graph(self.graph, [i]) return retval - def define_add(): + def define_float32_copy(): + return """ + a = astype(|30|, float32) + x1 = a -> 7 + x2 = a -> 8 + x3 = a -> 9 + x4 = a -> 10 + r = x1 + x2 + x3 + x4 + r + """ + def test_float32_copy(self): + result = self.run("float32_copy") + assert int(result) == 7+8+9+10 + self.check_vectorized(1, 1) + + def define_int32_copy(): + return """ + a = astype(|30|, int32) + x1 = a -> 7 + x2 = a -> 8 + x3 = a -> 9 + x4 = a -> 10 + x1 + x2 + x3 + x4 + """ + def test_int32_copy(self): + result = self.run("int32_copy") + assert int(result) == 7+8+9+10 + self.check_vectorized(1, 1) + + def define_float32_add(): + return """ + a = astype(|30|, float32) + b = a + a + b -> 15 + """ + def test_float32_add(self): + result = self.run("float32_add") + self.assert_float_equal(result, 15.0 + 15.0) + self.check_vectorized(2, 2) + + def define_float_add(): return """ a = |30| b = a + a _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit