Author: Richard Plangger <planri...@gmail.com>
Branch: ppc-vsx-support
Changeset: r87317:ce0e1fb75fc8
Date: 2016-09-22 09:23 +0200
http://bitbucket.org/pypy/pypy/changeset/ce0e1fb75fc8/

Log:    merge default

diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py 
b/pypy/module/_cffi_backend/test/test_ffi_obj.py
--- a/pypy/module/_cffi_backend/test/test_ffi_obj.py
+++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py
@@ -507,7 +507,7 @@
     def test_bug_1(self):
         import _cffi_backend as _cffi1_backend
         ffi = _cffi1_backend.FFI()
-        q = ffi.new("char[]", "abcd")
+        q = ffi.new("char[]", b"abcd")
         p = ffi.cast("char(*)(void)", q)
         raises(TypeError, ffi.string, p)
 
diff --git a/rpython/doc/jit/backend.rst b/rpython/doc/jit/backend.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/jit/backend.rst
@@ -0,0 +1,263 @@
+=========================
+PyPy's assembler backends
+=========================
+
+Draft notes about the organization of assembler backends in the PyPy JIT, in 
2016
+=================================================================================
+
+
+input: linear sequence of instructions, called a "trace".
+
+A trace is a sequence of instructions in SSA form.  Most instructions
+correspond to one or a few CPU-level instructions.  There are a few
+meta-instructions like `label` and debugging stuff.  All branching is
+done with guards, which are instructions that check that a condition is
+true and exit the trace if not.  A failing guard can have a new trace
+added to it later, called a "bridge".  A patched guard becomes a direct
+`Jcond` instruction going to the bridge, with no indirection, no
+register spilling, etc.
+
+A trace ends with either a `return` or a `jump to label`.  The target
+label is either inside the same trace, or in some older one.  For
+historical reasons we call a "loop" a trace that is not a bridge.  The
+machine code that we generate is organized as a forest of trees; the
+trunk of the tree is a "loop", and the branches are all bridges
+(branching off the trunk or off another branch).
+
+* every trunk or branch that ends in a `jump to label` can target a
+  label from a different tree, too.
+
+* the whole process of assembling a loop or a branch is basically
+  single-threaded, so no synchronization issue there (including to patch
+  older generated instructions).
+
+* the generated assembler has got a "frame" in %rbp, which is actually
+  not on the stack at all, but is a GC object (called a "jitframe").
+  Spilling goes there.
+
+* the guards are `Jcond` to a very small piece of generated code, which
+  is basically pushing a couple of constants on the stack and then
+  jumping to the general guard-recovery code.  That code will save the
+  registers into the jitframe and then exit the whole generated
+  function.  The caller of that generated function checks how it
+  finished: if it finished by hitting a guard, then the caller is
+  responsible for calling the "blackhole interpreter".  This is the part
+  of the front-end that recovers from failing guards and finishes
+  running the frame (including, possibly, by jumping again into
+  generated assembler).
+
+
+Details about the JITting process:
+
+* front-end and optimization pass
+
+* rewrite (includes gc related transformation as well as simplifactions)
+
+* assembler generation
+
+
+Front-end and optimization pass
+-------------------------------
+
+Not discussed here in detail.  This produces loops and bridges using an
+instruction set that is "high-level" in some sense: it contains
+intructions like "new"/"new_array", and
+"setfield"/"setarrayitem"/"setinteriorfield" which describe the action
+of storing a value in a precise field of the structure or array.  For
+example, the "setfield" action might require implicitly a GC write
+barrier.  This is the high-level trace that we send to the following
+step.
+
+
+Rewrite
+-------
+
+A mostly but not completely CPU-independent phase: lowers some
+instructions.  For example, the variants of "new" are lowered to
+"malloc" and a few "gc_store": it bumps the pointer of the GC and then
+sets a few fields explicitly in the newly allocated structure.  The
+"setfield" is replaced with a "cond_gc_wb_call" (conditional call to the
+write barrier) if needed, followed by a "gc_store".
+
+The "gc_store" instruction can be encoded in a single MOV assembler
+instruction, but is not as flexible as a MOV.  The address is always
+specified as "some GC pointer + an offset".  We don't have the notion of
+interior pointer for GC objects.
+
+A different instruction, "gc_store_indexed", offers additional operands,
+which can be mapped to a single MOV instruction using forms like
+`[rax+8*rcx+24]`.
+
+Some other complex instructions pass through to the backend, which must
+deal with them: for example, "card marking" in the GC.  (Writing an
+object pointer inside an array would require walking the whole array
+later to find "young" references. Instead of that, we flip a bit for
+every range of 128 entries.  This is a common GC optimization.)  Setting
+the card bit of a GC object requires a sequence of assembler
+instructions that depends too much on the target CPU to be expressed
+explicitly here (moreover, it contains a few branches, which are hard to
+express at this level).
+
+
+Assembly
+--------
+
+No fancy code generation technique, but greedy forward pass that tries
+to avoid some pitfalls
+
+
+Handling instructions
+~~~~~~~~~~~~~~~~~~~~~
+
+* One by one (forward direction).   Each instruction asks the register
+  allocator to ensure that some arguments are in registers (not in the
+  jitframe); asks for a register to put its result into; and asks for
+  additional scratch registers that will be freed at  the end of the
+  instruction.  There is a special case for boolean variables: they are
+  stored in the condition code flags instead of being materialized as a
+  0/1 value.  (They are materialized later, except in the common case
+  where they are only used by the next `guard_false` or `guard_true` and
+  then forgotten.)
+
+* Instruction arguments are loaded into a register on demand.  This
+  makes the backend quite easy to write, but leads do some bad
+  decisions.
+
+
+Linear scan register allocation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Although it's always a linear trace that we consider, we don't use
+advanced techniques for register allocation: we do forward, on-demand
+allocation as the backend produces the assembler.  When it asks for a
+register to put some value into, we give it any free register, without
+consideration for what will be done with it later.  We compute the
+longevity of all variables, but only use it when choosing which register
+to spill (we spill the variable with the longest longevity).
+
+This works to some extend because it is well integrated with the earlier
+optimization pass. Loops are unrolled once by the optimization pass to
+allow more powerful optimizations---the optimization pass itself is the
+place that benefits the most, but it also has benefits here in the
+assembly pass.  These are:
+
+* The first peeling initializes the register binding on the first use.
+
+* This leads to an already allocated register of the trace loop.
+
+* As well as allocated registers when exiting bridges
+
+[Try to better allocate registers to match the ABI (minor to non benefit
+in the current state)]
+
+
+More complex mappings
+~~~~~~~~~~~~~~~~~~~~~
+
+Some instructions generate more complex code.  These are either or both of:
+
+* complex instructions generating some local control flow, like
+  "cond_gc_wb_call" (for write barriers), "call_assembler" (a call
+  followed by a few checks).
+
+* instructions that invoke custom assembler helpers, like the slow-path
+  of write barriers or the slow-path of allocations.  These slow-paths
+  are typically generated too, so that we are not constrained by the
+  usual calling conventions.
+
+
+GC pointers
+~~~~~~~~~~~
+
+Around most CALL instructions, we need to record a description of where
+the GC pointers are (registers and stack frame).  This is needed in case
+the CALL invokes a garbage collection.  The GC pointers can move; the
+pointers in the registers and stack frame are updated by the GC.  That's
+a reason for why we don't have explicit interior pointers.
+
+GC pointers can appear as constants in the trace.  We are busy changing
+that to use a constant table and `MOV REG, (%RIP+offset)`.  The
+"constant" in the table is actually updated by the GC if the object
+move.
+
+
+Vectorization
+~~~~~~~~~~~~~
+
+Optimization developed to use SIMD instructions for trace loops. Primary
+idea was to use it as an optimization of micro numpy. It has several
+passes on the already optimized trace.
+
+Shortly explained: It builds dependencies for an unrolled trace loop,
+gathering pairs/packs of operations that could be executed in parallel
+and finally schedules the operations.
+
+What did it add to the code base:
+
+* Dependencies can be constructed
+
+* Code motion of guards to relax dependencies
+
+* Scheduler to reorder trace
+
+* Array bound check removal (especially for unrolled traces)
+
+What can it do:
+
+* Transform vector loops (element wise operations)
+
+* Accumulation (`reduce([...],operator,0)`). Requires Operation to be
+  associative and commutative
+
+* SSE 4.1 as "vector backend"
+
+
+We do not
+~~~~~~~~~
+
+* Keep tracing data around to reoptimize the trace tree. (Once a trace
+  is compiled, minimal data is kept.)  This is one reason (there are
+  others in the front-end) for the following result: JIT-compiling a
+  small loop with two common paths ends up as one "loop" and one bridge
+  assembled, and the bridge-following path is slightly less efficient.
+  This is notably because this bridge is assembled with two constraints:
+  the input registers are fixed (from the guard), and the output
+  registers are fixed (from the jump target); usually these two sets of
+  fixed registers are different, and copying around is needed.
+
+* We don't join trace tails: we only assemble *trees*.
+
+* We don't do any reordering (neither of trace instructions nor of
+  individual assembler instructions)
+
+* We don't do any cross-instruction optimization that makes sense only
+  for the backend and can't easily be expressed at a higher level.  I'm
+  sure there are tons of examples of that, but e.g. loading a large
+  constant in a register that will survive for several instructions;
+  moving out of loops *parts* of some instruction like the address
+  calculation; etc. etc.
+
+* Other optimization opportunities I can think about: look at the
+  function prologue/epilogue; look at the overhead (small but not zero)
+  at the start of a bridge.  Also check if the way guards are
+  implemented makes sense.  Also, we generate large-ish sequences of
+  assembler instructions with tons of `Jcond` that are almost never
+  followed; any optimization opportunity there?  (They all go forward,
+  if it changes anything.)  In theory we could also replace some of
+  these with a signal handler on segfault (e.g. `guard_nonnull_class`).
+
+
+a GCC or LLVM backend?
+~~~~~~~~~~~~~~~~~~~~~~
+
+At least for comparison we'd like a JIT backend that emits its code
+using GCC or LLVM (irrespective of the time it would take).  But it's
+hard to map reasonably well the guards to the C language or to LLVM IR.
+The problems are: (1) we have many guards, we would like to avoid having
+many paths that each do a full
+saving-all-local-variables-that-are-still-alive; (2) it's hard to patch
+a guard when a bridge is compiled from it; (3) instructions like a CALL
+need to expose the local variables that are GC pointers; CALL_MAY_FORCE
+need to expose *all* local variables for optional off-line
+reconstruction of the interpreter state.
+
diff --git a/rpython/doc/jit/index.rst b/rpython/doc/jit/index.rst
--- a/rpython/doc/jit/index.rst
+++ b/rpython/doc/jit/index.rst
@@ -26,6 +26,7 @@
    optimizer
    virtualizable
    vectorization
+   backend
 
 - :doc:`Overview <overview>`: motivating our approach
 
@@ -34,5 +35,8 @@
 - :doc:`Optimizer <optimizer>`: the step between tracing and writing
   machine code
 
-- :doc:`Virtulizable <virtualizable>` how virtualizables work and what they are
-  (in other words how to make frames more efficient).
+- :doc:`Virtualizable <virtualizable>`: how virtualizables work and what
+  they are (in other words how to make frames more efficient).
+
+- :doc:`Assembler backend <backend>`: draft notes about the organization
+  of the assembler backends
diff --git a/rpython/jit/backend/test/test_ll_random.py 
b/rpython/jit/backend/test/test_ll_random.py
--- a/rpython/jit/backend/test/test_ll_random.py
+++ b/rpython/jit/backend/test/test_ll_random.py
@@ -710,6 +710,12 @@
 
 # 6. a conditional call (for now always with no exception raised)
 class CondCallOperation(BaseCallOperation):
+
+    def filter(self, builder):
+        if not builder.cpu.supports_cond_call_value and \
+           self.opnum == rop.COND_CALL_VALUE_I:
+            raise CannotProduceOperation
+
     def produce_into(self, builder, r):
         fail_subset = builder.subset_of_intvars(r)
         if self.opnum == rop.COND_CALL:
diff --git a/rpython/rlib/clibffi.py b/rpython/rlib/clibffi.py
--- a/rpython/rlib/clibffi.py
+++ b/rpython/rlib/clibffi.py
@@ -359,12 +359,13 @@
     tpe.members[n] = lltype.nullptr(FFI_TYPE_P.TO)
     return tpe
 
+@specialize.memo()
 def cast_type_to_ffitype(tp):
     """ This function returns ffi representation of rpython type tp
     """
     return TYPE_MAP[tp]
-cast_type_to_ffitype._annspecialcase_ = 'specialize:memo'
 
+@specialize.argtype(1)
 def push_arg_as_ffiptr(ffitp, arg, ll_buf):
     # This is for primitive types.  Note that the exact type of 'arg' may be
     # different from the expected 'c_size'.  To cope with that, we fall back
@@ -396,7 +397,6 @@
                 arg >>= 8
         else:
             raise AssertionError
-push_arg_as_ffiptr._annspecialcase_ = 'specialize:argtype(1)'
 
 
 # type defs for callback and closure userdata
@@ -470,12 +470,12 @@
 FUNCFLAG_USE_ERRNO = 8
 FUNCFLAG_USE_LASTERROR = 16
 
+@specialize.arg(1)     # hack :-/
 def get_call_conv(flags, from_jit):
     if _WIN32 and not _WIN64 and (flags & FUNCFLAG_CDECL == 0):
         return FFI_STDCALL
     else:
         return FFI_DEFAULT_ABI
-get_call_conv._annspecialcase_ = 'specialize:arg(1)'     # hack :-/
 
 
 class AbstractFuncPtr(object):
@@ -599,6 +599,7 @@
         else:
             self.restype_size = -1
 
+    @specialize.argtype(1)
     def push_arg(self, value):
         #if self.pushed_args == self.argnum:
         #    raise TypeError("Too many arguments, eats %d, pushed %d" %
@@ -618,7 +619,6 @@
         push_arg_as_ffiptr(self.argtypes[self.pushed_args], value,
                            self.ll_args[self.pushed_args])
         self.pushed_args += 1
-    push_arg._annspecialcase_ = 'specialize:argtype(1)'
 
     def _check_args(self):
         if self.pushed_args < self.argnum:
@@ -627,6 +627,7 @@
     def _clean_args(self):
         self.pushed_args = 0
 
+    @specialize.arg(1)
     def call(self, RES_TP):
         self._check_args()
         ffires = c_ffi_call(self.ll_cif, self.funcsym,
@@ -645,7 +646,6 @@
         self._clean_args()
         check_fficall_result(ffires, self.flags)
         return res
-    call._annspecialcase_ = 'specialize:arg(1)'
 
     def __del__(self):
         if self.ll_args:
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -280,6 +280,7 @@
 
 
 @oopspec("jit.isconstant(value)")
+@specialize.call_location()
 def isconstant(value):
     """
     While tracing, returns whether or not the value is currently known to be
@@ -289,9 +290,9 @@
     This is for advanced usage only.
     """
     return NonConstant(False)
-isconstant._annspecialcase_ = "specialize:call_location"
 
 @oopspec("jit.isvirtual(value)")
+@specialize.call_location()
 def isvirtual(value):
     """
     Returns if this value is virtual, while tracing, it's relatively
@@ -300,7 +301,6 @@
     This is for advanced usage only.
     """
     return NonConstant(False)
-isvirtual._annspecialcase_ = "specialize:call_location"
 
 @specialize.call_location()
 def loop_unrolling_heuristic(lst, size, cutoff=2):
@@ -401,28 +401,27 @@
         hop.exception_cannot_occur()
         return hop.inputconst(lltype.Signed, _we_are_jitted)
 
-
+@oopspec('jit.current_trace_length()')
 def current_trace_length():
     """During JIT tracing, returns the current trace length (as a constant).
     If not tracing, returns -1."""
     if NonConstant(False):
         return 73
     return -1
-current_trace_length.oopspec = 'jit.current_trace_length()'
 
+@oopspec('jit.debug(string, arg1, arg2, arg3, arg4)')
 def jit_debug(string, arg1=-sys.maxint-1, arg2=-sys.maxint-1,
                       arg3=-sys.maxint-1, arg4=-sys.maxint-1):
     """When JITted, cause an extra operation JIT_DEBUG to appear in
     the graphs.  Should not be left after debugging."""
     keepalive_until_here(string) # otherwise the whole function call is removed
-jit_debug.oopspec = 'jit.debug(string, arg1, arg2, arg3, arg4)'
 
+@oopspec('jit.assert_green(value)')
+@specialize.argtype(0)
 def assert_green(value):
     """Very strong assert: checks that 'value' is a green
     (a JIT compile-time constant)."""
     keepalive_until_here(value)
-assert_green._annspecialcase_ = 'specialize:argtype(0)'
-assert_green.oopspec = 'jit.assert_green(value)'
 
 class AssertGreenFailed(Exception):
     pass
@@ -457,6 +456,7 @@
 # ____________________________________________________________
 # VRefs
 
+@oopspec('virtual_ref(x)')
 @specialize.argtype(0)
 def virtual_ref(x):
     """Creates a 'vref' object that contains a reference to 'x'.  Calls
@@ -467,14 +467,13 @@
     dereferenced (by the call syntax 'vref()'), it returns 'x', which is
     then forced."""
     return DirectJitVRef(x)
-virtual_ref.oopspec = 'virtual_ref(x)'
 
+@oopspec('virtual_ref_finish(x)')
 @specialize.argtype(1)
 def virtual_ref_finish(vref, x):
     """See docstring in virtual_ref(x)"""
     keepalive_until_here(x)   # otherwise the whole function call is removed
     _virtual_ref_finish(vref, x)
-virtual_ref_finish.oopspec = 'virtual_ref_finish(x)'
 
 def non_virtual_ref(x):
     """Creates a 'vref' that just returns x when called; nothing more special.
@@ -825,6 +824,7 @@
     jit_opencoder_model
     """
 
+@specialize.arg(0)
 def set_user_param(driver, text):
     """Set the tunable JIT parameters from a user-supplied string
     following the format 'param=value,param=value', or 'off' to
@@ -860,7 +860,6 @@
                     break
             else:
                 raise ValueError
-set_user_param._annspecialcase_ = 'specialize:arg(0)'
 
 # ____________________________________________________________
 #
diff --git a/rpython/rlib/listsort.py b/rpython/rlib/listsort.py
--- a/rpython/rlib/listsort.py
+++ b/rpython/rlib/listsort.py
@@ -1,4 +1,5 @@
 from rpython.rlib.rarithmetic import ovfcheck
+from rpython.rlib.objectmodel import specialize
 
 
 ## ------------------------------------------------------------------------
@@ -141,6 +142,12 @@
         # or, IOW, the first k elements of a should precede key, and the last
         # n-k should follow key.
 
+        # hint for the annotator: the argument 'rightmost' is always passed in 
as
+        # a constant (either True or False), so we can specialize the function 
for
+        # the two cases.  (This is actually needed for technical reasons: the
+        # variable 'lower' must contain a known method, which is the case in 
each
+        # specialized version but not in the unspecialized one.)
+        @specialize.arg(4)
         def gallop(self, key, a, hint, rightmost):
             assert 0 <= hint < a.len
             if rightmost:
@@ -212,12 +219,6 @@
             assert lastofs == ofs         # so a[ofs-1] < key <= a[ofs]
             return ofs
 
-        # hint for the annotator: the argument 'rightmost' is always passed in 
as
-        # a constant (either True or False), so we can specialize the function 
for
-        # the two cases.  (This is actually needed for technical reasons: the
-        # variable 'lower' must contain a known method, which is the case in 
each
-        # specialized version but not in the unspecialized one.)
-        gallop._annspecialcase_ = "specialize:arg(4)"
 
         # ____________________________________________________________
 
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -37,6 +37,7 @@
 from rpython.rlib import objectmodel
 from rpython.flowspace.model import Constant, const
 from rpython.flowspace.specialcase import register_flow_sc
+from rpython.rlib.objectmodel import specialize
 
 """
 Long-term target:
@@ -135,14 +136,15 @@
     # We deal directly with overflow there anyway.
     return r_longlonglong(n)
 
+@specialize.argtype(0)
 def widen(n):
     from rpython.rtyper.lltypesystem import lltype
     if _should_widen_type(lltype.typeOf(n)):
         return intmask(n)
     else:
         return n
-widen._annspecialcase_ = 'specialize:argtype(0)'
 
+@specialize.memo()
 def _should_widen_type(tp):
     from rpython.rtyper.lltypesystem import lltype, rffi
     if tp is lltype.Bool:
@@ -153,19 +155,18 @@
     assert issubclass(r_class, base_int)
     return r_class.BITS < LONG_BIT or (
         r_class.BITS == LONG_BIT and r_class.SIGNED)
-_should_widen_type._annspecialcase_ = 'specialize:memo'
 
 # the replacement for sys.maxint
 maxint = int(LONG_TEST - 1)
 # for now, it should be equal to sys.maxint on all supported platforms
 assert maxint == sys.maxint
 
+@specialize.argtype(0)
 def is_valid_int(r):
     if objectmodel.we_are_translated():
         return isinstance(r, int)
     return isinstance(r, (base_int, int, long, bool)) and (
         -maxint - 1 <= r <= maxint)
-is_valid_int._annspecialcase_ = 'specialize:argtype(0)'
 
 def ovfcheck(r):
     "NOT_RPYTHON"
@@ -225,12 +226,12 @@
         return build_int(None, self_type.SIGNED, max(self_type.BITS, 
other_type.BITS))
     raise AssertionError("Merging these types (%s, %s) is not supported" % 
(self_type, other_type))
 
+@specialize.memo()
 def signedtype(t):
     if t in (bool, int, long):
         return True
     else:
         return t.SIGNED
-signedtype._annspecialcase_ = 'specialize:memo'
 
 def normalizedinttype(t):
     if t is int:
@@ -241,11 +242,12 @@
         assert t.BITS <= r_longlong.BITS
         return build_int(None, t.SIGNED, r_longlong.BITS)
 
+@specialize.argtype(0)
 def most_neg_value_of_same_type(x):
     from rpython.rtyper.lltypesystem import lltype
     return most_neg_value_of(lltype.typeOf(x))
-most_neg_value_of_same_type._annspecialcase_ = 'specialize:argtype(0)'
 
+@specialize.memo()
 def most_neg_value_of(tp):
     from rpython.rtyper.lltypesystem import lltype, rffi
     if tp is lltype.Signed:
@@ -256,13 +258,13 @@
         return r_class(-(r_class.MASK >> 1) - 1)
     else:
         return r_class(0)
-most_neg_value_of._annspecialcase_ = 'specialize:memo'
 
+@specialize.argtype(0)
 def most_pos_value_of_same_type(x):
     from rpython.rtyper.lltypesystem import lltype
     return most_pos_value_of(lltype.typeOf(x))
-most_pos_value_of_same_type._annspecialcase_ = 'specialize:argtype(0)'
 
+@specialize.memo()
 def most_pos_value_of(tp):
     from rpython.rtyper.lltypesystem import lltype, rffi
     if tp is lltype.Signed:
@@ -273,8 +275,8 @@
         return r_class(r_class.MASK >> 1)
     else:
         return r_class(r_class.MASK)
-most_pos_value_of._annspecialcase_ = 'specialize:memo'
 
+@specialize.memo()
 def is_signed_integer_type(tp):
     from rpython.rtyper.lltypesystem import lltype, rffi
     if tp is lltype.Signed:
@@ -284,7 +286,6 @@
         return r_class.SIGNED
     except KeyError:
         return False   # not an integer type
-is_signed_integer_type._annspecialcase_ = 'specialize:memo'
 
 def highest_bit(n):
     """
@@ -676,7 +677,7 @@
     from rpython.rtyper.lltypesystem.lloperation import llop
     return llop.int_mod(lltype.Signed, x, y)
 
-@objectmodel.specialize.ll()
+@specialize.ll()
 def byteswap(arg):
     """ Convert little->big endian and the opposite
     """
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -90,16 +90,16 @@
 
 FIVEARY_CUTOFF = 8
 
+@specialize.argtype(0)
 def _mask_digit(x):
     return UDIGIT_MASK(x & MASK)
-_mask_digit._annspecialcase_ = 'specialize:argtype(0)'
 
 def _widen_digit(x):
     return rffi.cast(LONG_TYPE, x)
 
+@specialize.argtype(0)
 def _store_digit(x):
     return rffi.cast(STORE_TYPE, x)
-_store_digit._annspecialcase_ = 'specialize:argtype(0)'
 
 def _load_unsigned_digit(x):
     return rffi.cast(UNSIGNED_TYPE, x)
@@ -175,11 +175,11 @@
         return _load_unsigned_digit(self._digits[x])
     udigit._always_inline_ = True
 
+    @specialize.argtype(2)
     def setdigit(self, x, val):
         val = _mask_digit(val)
         assert val >= 0
         self._digits[x] = _store_digit(val)
-    setdigit._annspecialcase_ = 'specialize:argtype(2)'
     setdigit._always_inline_ = True
 
     def numdigits(self):
@@ -1312,6 +1312,7 @@
 
     return res
 
+@specialize.argtype(0)
 def digits_from_nonneg_long(l):
     digits = []
     while True:
@@ -1319,8 +1320,8 @@
         l = l >> SHIFT
         if not l:
             return digits[:] # to make it non-resizable
-digits_from_nonneg_long._annspecialcase_ = "specialize:argtype(0)"
-
+
+@specialize.argtype(0)
 def digits_for_most_neg_long(l):
     # This helper only works if 'l' is the most negative integer of its
     # type, which in base 2 looks like: 1000000..0000
@@ -1335,8 +1336,8 @@
     assert l & MASK == l
     digits.append(_store_digit(l))
     return digits[:] # to make it non-resizable
-digits_for_most_neg_long._annspecialcase_ = "specialize:argtype(0)"
-
+
+@specialize.argtype(0)
 def args_from_rarith_int1(x):
     if x > 0:
         return digits_from_nonneg_long(x), 1
@@ -1348,11 +1349,10 @@
     else:
         # the most negative integer! hacks needed...
         return digits_for_most_neg_long(x), -1
-args_from_rarith_int1._annspecialcase_ = "specialize:argtype(0)"
-
+
+@specialize.argtype(0)
 def args_from_rarith_int(x):
     return args_from_rarith_int1(widen(x))
-args_from_rarith_int._annspecialcase_ = "specialize:argtype(0)"
 # ^^^ specialized by the precise type of 'x', which is typically a r_xxx
 #     instance from rlib.rarithmetic
 
@@ -1909,6 +1909,7 @@
         i += 1
     return borrow
 
+@specialize.argtype(2)
 def _muladd1(a, n, extra=0):
     """Multiply by a single digit and add a single digit, ignoring the sign.
     """
@@ -1926,7 +1927,7 @@
     z.setdigit(i, carry)
     z._normalize()
     return z
-_muladd1._annspecialcase_ = "specialize:argtype(2)"
+
 def _v_lshift(z, a, m, d):
     """ Shift digit vector a[0:m] d bits left, with 0 <= d < SHIFT. Put
         * result in z[0:m], and return the d bits shifted out of the top.
@@ -2178,6 +2179,7 @@
         ad = -ad
     return ad
 
+@specialize.arg(0)
 def _loghelper(func, arg):
     """
     A decent logarithm is easy to compute even for huge bigints, but libm can't
@@ -2195,7 +2197,6 @@
     # CAUTION:  e*SHIFT may overflow using int arithmetic,
     # so force use of double. */
     return func(x) + (e * float(SHIFT) * func(2.0))
-_loghelper._annspecialcase_ = 'specialize:arg(0)'
 
 # ____________________________________________________________
 
@@ -2519,6 +2520,7 @@
     return output.build()
 
 
+@specialize.arg(1)
 def _bitwise(a, op, b): # '&', '|', '^'
     """ Bitwise and/or/xor operations """
 
@@ -2598,8 +2600,8 @@
         return z
 
     return z.invert()
-_bitwise._annspecialcase_ = "specialize:arg(1)"
-
+
+@specialize.arg(1)
 def _int_bitwise(a, op, b): # '&', '|', '^'
     """ Bitwise and/or/xor operations """
 
@@ -2682,7 +2684,6 @@
         return z
 
     return z.invert()
-_int_bitwise._annspecialcase_ = "specialize:arg(1)"
 
 ULONGLONG_BOUND = r_ulonglong(1L << (r_longlong.BITS-1))
 LONGLONG_MIN = r_longlong(-(1L << (r_longlong.BITS-1)))
diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py
--- a/rpython/rlib/rmmap.py
+++ b/rpython/rlib/rmmap.py
@@ -10,7 +10,7 @@
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.rlib import rposix
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, specialize
 from rpython.rlib.nonconst import NonConstant
 from rpython.rlib.rarithmetic import intmask
 
@@ -239,12 +239,12 @@
     _, _VirtualProtect_safe = winexternal('VirtualProtect',
                                   [rffi.VOIDP, rffi.SIZE_T, DWORD, LPDWORD],
                                   BOOL)
+    @specialize.ll()
     def VirtualProtect(addr, size, mode, oldmode_ptr):
         return _VirtualProtect_safe(addr,
                                rffi.cast(rffi.SIZE_T, size),
                                rffi.cast(DWORD, mode),
                                oldmode_ptr)
-    VirtualProtect._annspecialcase_ = 'specialize:ll'
     VirtualFree, VirtualFree_safe = winexternal('VirtualFree',
                               [rffi.VOIDP, rffi.SIZE_T, DWORD], BOOL)
 
diff --git a/rpython/rlib/rstruct/runpack.py b/rpython/rlib/rstruct/runpack.py
--- a/rpython/rlib/rstruct/runpack.py
+++ b/rpython/rlib/rstruct/runpack.py
@@ -7,6 +7,7 @@
 from struct import unpack
 from rpython.rlib.rstruct.formatiterator import FormatIterator
 from rpython.rlib.rstruct.error import StructError
+from rpython.rlib.objectmodel import specialize
 
 class MasterReader(object):
     def __init__(self, s):
@@ -99,14 +100,14 @@
         self._create_unpacking_func()
         return True
 
+@specialize.memo()
 def create_unpacker(unpack_str):
     fmtiter = FrozenUnpackIterator(unpack_str)
     fmtiter.interpret(unpack_str)
     assert fmtiter._freeze_()
     return fmtiter
-create_unpacker._annspecialcase_ = 'specialize:memo'
 
+@specialize.arg(0)
 def runpack(fmt, input):
     unpacker = create_unpacker(fmt)
     return unpacker.unpack(input)
-runpack._annspecialcase_ = 'specialize:arg(0)'
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to