Author: Armin Rigo <[email protected]>
Branch: py3k
Changeset: r87293:f854ee56f616
Date: 2016-09-21 22:33 +0200
http://bitbucket.org/pypy/pypy/changeset/f854ee56f616/
Log: hg merge default
diff --git a/rpython/doc/jit/backend.rst b/rpython/doc/jit/backend.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/jit/backend.rst
@@ -0,0 +1,263 @@
+=========================
+PyPy's assembler backends
+=========================
+
+Draft notes about the organization of assembler backends in the PyPy JIT, in
2016
+=================================================================================
+
+
+input: linear sequence of instructions, called a "trace".
+
+A trace is a sequence of instructions in SSA form. Most instructions
+correspond to one or a few CPU-level instructions. There are a few
+meta-instructions like `label` and debugging stuff. All branching is
+done with guards, which are instructions that check that a condition is
+true and exit the trace if not. A failing guard can have a new trace
+added to it later, called a "bridge". A patched guard becomes a direct
+`Jcond` instruction going to the bridge, with no indirection, no
+register spilling, etc.
+
+A trace ends with either a `return` or a `jump to label`. The target
+label is either inside the same trace, or in some older one. For
+historical reasons we call a "loop" a trace that is not a bridge. The
+machine code that we generate is organized as a forest of trees; the
+trunk of the tree is a "loop", and the branches are all bridges
+(branching off the trunk or off another branch).
+
+* every trunk or branch that ends in a `jump to label` can target a
+ label from a different tree, too.
+
+* the whole process of assembling a loop or a branch is basically
+ single-threaded, so no synchronization issue there (including to patch
+ older generated instructions).
+
+* the generated assembler has got a "frame" in %rbp, which is actually
+ not on the stack at all, but is a GC object (called a "jitframe").
+ Spilling goes there.
+
+* the guards are `Jcond` to a very small piece of generated code, which
+ is basically pushing a couple of constants on the stack and then
+ jumping to the general guard-recovery code. That code will save the
+ registers into the jitframe and then exit the whole generated
+ function. The caller of that generated function checks how it
+ finished: if it finished by hitting a guard, then the caller is
+ responsible for calling the "blackhole interpreter". This is the part
+ of the front-end that recovers from failing guards and finishes
+ running the frame (including, possibly, by jumping again into
+ generated assembler).
+
+
+Details about the JITting process:
+
+* front-end and optimization pass
+
+* rewrite (includes gc related transformation as well as simplifactions)
+
+* assembler generation
+
+
+Front-end and optimization pass
+-------------------------------
+
+Not discussed here in detail. This produces loops and bridges using an
+instruction set that is "high-level" in some sense: it contains
+intructions like "new"/"new_array", and
+"setfield"/"setarrayitem"/"setinteriorfield" which describe the action
+of storing a value in a precise field of the structure or array. For
+example, the "setfield" action might require implicitly a GC write
+barrier. This is the high-level trace that we send to the following
+step.
+
+
+Rewrite
+-------
+
+A mostly but not completely CPU-independent phase: lowers some
+instructions. For example, the variants of "new" are lowered to
+"malloc" and a few "gc_store": it bumps the pointer of the GC and then
+sets a few fields explicitly in the newly allocated structure. The
+"setfield" is replaced with a "cond_gc_wb_call" (conditional call to the
+write barrier) if needed, followed by a "gc_store".
+
+The "gc_store" instruction can be encoded in a single MOV assembler
+instruction, but is not as flexible as a MOV. The address is always
+specified as "some GC pointer + an offset". We don't have the notion of
+interior pointer for GC objects.
+
+A different instruction, "gc_store_indexed", offers additional operands,
+which can be mapped to a single MOV instruction using forms like
+`[rax+8*rcx+24]`.
+
+Some other complex instructions pass through to the backend, which must
+deal with them: for example, "card marking" in the GC. (Writing an
+object pointer inside an array would require walking the whole array
+later to find "young" references. Instead of that, we flip a bit for
+every range of 128 entries. This is a common GC optimization.) Setting
+the card bit of a GC object requires a sequence of assembler
+instructions that depends too much on the target CPU to be expressed
+explicitly here (moreover, it contains a few branches, which are hard to
+express at this level).
+
+
+Assembly
+--------
+
+No fancy code generation technique, but greedy forward pass that tries
+to avoid some pitfalls
+
+
+Handling instructions
+~~~~~~~~~~~~~~~~~~~~~
+
+* One by one (forward direction). Each instruction asks the register
+ allocator to ensure that some arguments are in registers (not in the
+ jitframe); asks for a register to put its result into; and asks for
+ additional scratch registers that will be freed at the end of the
+ instruction. There is a special case for boolean variables: they are
+ stored in the condition code flags instead of being materialized as a
+ 0/1 value. (They are materialized later, except in the common case
+ where they are only used by the next `guard_false` or `guard_true` and
+ then forgotten.)
+
+* Instruction arguments are loaded into a register on demand. This
+ makes the backend quite easy to write, but leads do some bad
+ decisions.
+
+
+Linear scan register allocation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Although it's always a linear trace that we consider, we don't use
+advanced techniques for register allocation: we do forward, on-demand
+allocation as the backend produces the assembler. When it asks for a
+register to put some value into, we give it any free register, without
+consideration for what will be done with it later. We compute the
+longevity of all variables, but only use it when choosing which register
+to spill (we spill the variable with the longest longevity).
+
+This works to some extend because it is well integrated with the earlier
+optimization pass. Loops are unrolled once by the optimization pass to
+allow more powerful optimizations---the optimization pass itself is the
+place that benefits the most, but it also has benefits here in the
+assembly pass. These are:
+
+* The first peeling initializes the register binding on the first use.
+
+* This leads to an already allocated register of the trace loop.
+
+* As well as allocated registers when exiting bridges
+
+[Try to better allocate registers to match the ABI (minor to non benefit
+in the current state)]
+
+
+More complex mappings
+~~~~~~~~~~~~~~~~~~~~~
+
+Some instructions generate more complex code. These are either or both of:
+
+* complex instructions generating some local control flow, like
+ "cond_gc_wb_call" (for write barriers), "call_assembler" (a call
+ followed by a few checks).
+
+* instructions that invoke custom assembler helpers, like the slow-path
+ of write barriers or the slow-path of allocations. These slow-paths
+ are typically generated too, so that we are not constrained by the
+ usual calling conventions.
+
+
+GC pointers
+~~~~~~~~~~~
+
+Around most CALL instructions, we need to record a description of where
+the GC pointers are (registers and stack frame). This is needed in case
+the CALL invokes a garbage collection. The GC pointers can move; the
+pointers in the registers and stack frame are updated by the GC. That's
+a reason for why we don't have explicit interior pointers.
+
+GC pointers can appear as constants in the trace. We are busy changing
+that to use a constant table and `MOV REG, (%RIP+offset)`. The
+"constant" in the table is actually updated by the GC if the object
+move.
+
+
+Vectorization
+~~~~~~~~~~~~~
+
+Optimization developed to use SIMD instructions for trace loops. Primary
+idea was to use it as an optimization of micro numpy. It has several
+passes on the already optimized trace.
+
+Shortly explained: It builds dependencies for an unrolled trace loop,
+gathering pairs/packs of operations that could be executed in parallel
+and finally schedules the operations.
+
+What did it add to the code base:
+
+* Dependencies can be constructed
+
+* Code motion of guards to relax dependencies
+
+* Scheduler to reorder trace
+
+* Array bound check removal (especially for unrolled traces)
+
+What can it do:
+
+* Transform vector loops (element wise operations)
+
+* Accumulation (`reduce([...],operator,0)`). Requires Operation to be
+ associative and commutative
+
+* SSE 4.1 as "vector backend"
+
+
+We do not
+~~~~~~~~~
+
+* Keep tracing data around to reoptimize the trace tree. (Once a trace
+ is compiled, minimal data is kept.) This is one reason (there are
+ others in the front-end) for the following result: JIT-compiling a
+ small loop with two common paths ends up as one "loop" and one bridge
+ assembled, and the bridge-following path is slightly less efficient.
+ This is notably because this bridge is assembled with two constraints:
+ the input registers are fixed (from the guard), and the output
+ registers are fixed (from the jump target); usually these two sets of
+ fixed registers are different, and copying around is needed.
+
+* We don't join trace tails: we only assemble *trees*.
+
+* We don't do any reordering (neither of trace instructions nor of
+ individual assembler instructions)
+
+* We don't do any cross-instruction optimization that makes sense only
+ for the backend and can't easily be expressed at a higher level. I'm
+ sure there are tons of examples of that, but e.g. loading a large
+ constant in a register that will survive for several instructions;
+ moving out of loops *parts* of some instruction like the address
+ calculation; etc. etc.
+
+* Other optimization opportunities I can think about: look at the
+ function prologue/epilogue; look at the overhead (small but not zero)
+ at the start of a bridge. Also check if the way guards are
+ implemented makes sense. Also, we generate large-ish sequences of
+ assembler instructions with tons of `Jcond` that are almost never
+ followed; any optimization opportunity there? (They all go forward,
+ if it changes anything.) In theory we could also replace some of
+ these with a signal handler on segfault (e.g. `guard_nonnull_class`).
+
+
+a GCC or LLVM backend?
+~~~~~~~~~~~~~~~~~~~~~~
+
+At least for comparison we'd like a JIT backend that emits its code
+using GCC or LLVM (irrespective of the time it would take). But it's
+hard to map reasonably well the guards to the C language or to LLVM IR.
+The problems are: (1) we have many guards, we would like to avoid having
+many paths that each do a full
+saving-all-local-variables-that-are-still-alive; (2) it's hard to patch
+a guard when a bridge is compiled from it; (3) instructions like a CALL
+need to expose the local variables that are GC pointers; CALL_MAY_FORCE
+need to expose *all* local variables for optional off-line
+reconstruction of the interpreter state.
+
diff --git a/rpython/doc/jit/index.rst b/rpython/doc/jit/index.rst
--- a/rpython/doc/jit/index.rst
+++ b/rpython/doc/jit/index.rst
@@ -26,6 +26,7 @@
optimizer
virtualizable
vectorization
+ backend
- :doc:`Overview <overview>`: motivating our approach
@@ -34,5 +35,8 @@
- :doc:`Optimizer <optimizer>`: the step between tracing and writing
machine code
-- :doc:`Virtulizable <virtualizable>` how virtualizables work and what they are
- (in other words how to make frames more efficient).
+- :doc:`Virtualizable <virtualizable>`: how virtualizables work and what
+ they are (in other words how to make frames more efficient).
+
+- :doc:`Assembler backend <backend>`: draft notes about the organization
+ of the assembler backends
diff --git a/rpython/jit/backend/test/test_ll_random.py
b/rpython/jit/backend/test/test_ll_random.py
--- a/rpython/jit/backend/test/test_ll_random.py
+++ b/rpython/jit/backend/test/test_ll_random.py
@@ -710,6 +710,12 @@
# 6. a conditional call (for now always with no exception raised)
class CondCallOperation(BaseCallOperation):
+
+ def filter(self, builder):
+ if not builder.cpu.supports_cond_call_value and \
+ self.opnum == rop.COND_CALL_VALUE_I:
+ raise CannotProduceOperation
+
def produce_into(self, builder, r):
fail_subset = builder.subset_of_intvars(r)
if self.opnum == rop.COND_CALL:
diff --git a/rpython/jit/backend/test/zll_stress.py
b/rpython/jit/backend/test/zll_stress.py
--- a/rpython/jit/backend/test/zll_stress.py
+++ b/rpython/jit/backend/test/zll_stress.py
@@ -1,6 +1,7 @@
from rpython.jit.backend.test.test_random import check_random_function, Random
from rpython.jit.backend.test.test_ll_random import LLtypeOperationBuilder
from rpython.jit.backend.detect_cpu import getcpuclass
+from rpython.jit.metainterp.resoperation import rop
import platform
CPU = getcpuclass()
diff --git a/rpython/rlib/clibffi.py b/rpython/rlib/clibffi.py
--- a/rpython/rlib/clibffi.py
+++ b/rpython/rlib/clibffi.py
@@ -359,12 +359,13 @@
tpe.members[n] = lltype.nullptr(FFI_TYPE_P.TO)
return tpe
[email protected]()
def cast_type_to_ffitype(tp):
""" This function returns ffi representation of rpython type tp
"""
return TYPE_MAP[tp]
-cast_type_to_ffitype._annspecialcase_ = 'specialize:memo'
[email protected](1)
def push_arg_as_ffiptr(ffitp, arg, ll_buf):
# This is for primitive types. Note that the exact type of 'arg' may be
# different from the expected 'c_size'. To cope with that, we fall back
@@ -396,7 +397,6 @@
arg >>= 8
else:
raise AssertionError
-push_arg_as_ffiptr._annspecialcase_ = 'specialize:argtype(1)'
# type defs for callback and closure userdata
@@ -470,12 +470,12 @@
FUNCFLAG_USE_ERRNO = 8
FUNCFLAG_USE_LASTERROR = 16
[email protected](1) # hack :-/
def get_call_conv(flags, from_jit):
if _WIN32 and not _WIN64 and (flags & FUNCFLAG_CDECL == 0):
return FFI_STDCALL
else:
return FFI_DEFAULT_ABI
-get_call_conv._annspecialcase_ = 'specialize:arg(1)' # hack :-/
class AbstractFuncPtr(object):
@@ -599,6 +599,7 @@
else:
self.restype_size = -1
+ @specialize.argtype(1)
def push_arg(self, value):
#if self.pushed_args == self.argnum:
# raise TypeError("Too many arguments, eats %d, pushed %d" %
@@ -618,7 +619,6 @@
push_arg_as_ffiptr(self.argtypes[self.pushed_args], value,
self.ll_args[self.pushed_args])
self.pushed_args += 1
- push_arg._annspecialcase_ = 'specialize:argtype(1)'
def _check_args(self):
if self.pushed_args < self.argnum:
@@ -627,6 +627,7 @@
def _clean_args(self):
self.pushed_args = 0
+ @specialize.arg(1)
def call(self, RES_TP):
self._check_args()
ffires = c_ffi_call(self.ll_cif, self.funcsym,
@@ -645,7 +646,6 @@
self._clean_args()
check_fficall_result(ffires, self.flags)
return res
- call._annspecialcase_ = 'specialize:arg(1)'
def __del__(self):
if self.ll_args:
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -280,6 +280,7 @@
@oopspec("jit.isconstant(value)")
[email protected]_location()
def isconstant(value):
"""
While tracing, returns whether or not the value is currently known to be
@@ -289,9 +290,9 @@
This is for advanced usage only.
"""
return NonConstant(False)
-isconstant._annspecialcase_ = "specialize:call_location"
@oopspec("jit.isvirtual(value)")
[email protected]_location()
def isvirtual(value):
"""
Returns if this value is virtual, while tracing, it's relatively
@@ -300,7 +301,6 @@
This is for advanced usage only.
"""
return NonConstant(False)
-isvirtual._annspecialcase_ = "specialize:call_location"
@specialize.call_location()
def loop_unrolling_heuristic(lst, size, cutoff=2):
@@ -401,28 +401,27 @@
hop.exception_cannot_occur()
return hop.inputconst(lltype.Signed, _we_are_jitted)
-
+@oopspec('jit.current_trace_length()')
def current_trace_length():
"""During JIT tracing, returns the current trace length (as a constant).
If not tracing, returns -1."""
if NonConstant(False):
return 73
return -1
-current_trace_length.oopspec = 'jit.current_trace_length()'
+@oopspec('jit.debug(string, arg1, arg2, arg3, arg4)')
def jit_debug(string, arg1=-sys.maxint-1, arg2=-sys.maxint-1,
arg3=-sys.maxint-1, arg4=-sys.maxint-1):
"""When JITted, cause an extra operation JIT_DEBUG to appear in
the graphs. Should not be left after debugging."""
keepalive_until_here(string) # otherwise the whole function call is removed
-jit_debug.oopspec = 'jit.debug(string, arg1, arg2, arg3, arg4)'
+@oopspec('jit.assert_green(value)')
[email protected](0)
def assert_green(value):
"""Very strong assert: checks that 'value' is a green
(a JIT compile-time constant)."""
keepalive_until_here(value)
-assert_green._annspecialcase_ = 'specialize:argtype(0)'
-assert_green.oopspec = 'jit.assert_green(value)'
class AssertGreenFailed(Exception):
pass
@@ -457,6 +456,7 @@
# ____________________________________________________________
# VRefs
+@oopspec('virtual_ref(x)')
@specialize.argtype(0)
def virtual_ref(x):
"""Creates a 'vref' object that contains a reference to 'x'. Calls
@@ -467,14 +467,13 @@
dereferenced (by the call syntax 'vref()'), it returns 'x', which is
then forced."""
return DirectJitVRef(x)
-virtual_ref.oopspec = 'virtual_ref(x)'
+@oopspec('virtual_ref_finish(x)')
@specialize.argtype(1)
def virtual_ref_finish(vref, x):
"""See docstring in virtual_ref(x)"""
keepalive_until_here(x) # otherwise the whole function call is removed
_virtual_ref_finish(vref, x)
-virtual_ref_finish.oopspec = 'virtual_ref_finish(x)'
def non_virtual_ref(x):
"""Creates a 'vref' that just returns x when called; nothing more special.
@@ -831,6 +830,7 @@
jit_opencoder_model
"""
[email protected](0)
def set_user_param(driver, text):
"""Set the tunable JIT parameters from a user-supplied string
following the format 'param=value,param=value', or 'off' to
@@ -866,7 +866,6 @@
break
else:
raise ValueError
-set_user_param._annspecialcase_ = 'specialize:arg(0)'
# ____________________________________________________________
#
diff --git a/rpython/rlib/listsort.py b/rpython/rlib/listsort.py
--- a/rpython/rlib/listsort.py
+++ b/rpython/rlib/listsort.py
@@ -1,4 +1,5 @@
from rpython.rlib.rarithmetic import ovfcheck
+from rpython.rlib.objectmodel import specialize
## ------------------------------------------------------------------------
@@ -141,6 +142,12 @@
# or, IOW, the first k elements of a should precede key, and the last
# n-k should follow key.
+ # hint for the annotator: the argument 'rightmost' is always passed in
as
+ # a constant (either True or False), so we can specialize the function
for
+ # the two cases. (This is actually needed for technical reasons: the
+ # variable 'lower' must contain a known method, which is the case in
each
+ # specialized version but not in the unspecialized one.)
+ @specialize.arg(4)
def gallop(self, key, a, hint, rightmost):
assert 0 <= hint < a.len
if rightmost:
@@ -212,12 +219,6 @@
assert lastofs == ofs # so a[ofs-1] < key <= a[ofs]
return ofs
- # hint for the annotator: the argument 'rightmost' is always passed in
as
- # a constant (either True or False), so we can specialize the function
for
- # the two cases. (This is actually needed for technical reasons: the
- # variable 'lower' must contain a known method, which is the case in
each
- # specialized version but not in the unspecialized one.)
- gallop._annspecialcase_ = "specialize:arg(4)"
# ____________________________________________________________
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -37,6 +37,7 @@
from rpython.rlib import objectmodel
from rpython.flowspace.model import Constant, const
from rpython.flowspace.specialcase import register_flow_sc
+from rpython.rlib.objectmodel import specialize
"""
Long-term target:
@@ -135,14 +136,15 @@
# We deal directly with overflow there anyway.
return r_longlonglong(n)
[email protected](0)
def widen(n):
from rpython.rtyper.lltypesystem import lltype
if _should_widen_type(lltype.typeOf(n)):
return intmask(n)
else:
return n
-widen._annspecialcase_ = 'specialize:argtype(0)'
[email protected]()
def _should_widen_type(tp):
from rpython.rtyper.lltypesystem import lltype, rffi
if tp is lltype.Bool:
@@ -153,19 +155,18 @@
assert issubclass(r_class, base_int)
return r_class.BITS < LONG_BIT or (
r_class.BITS == LONG_BIT and r_class.SIGNED)
-_should_widen_type._annspecialcase_ = 'specialize:memo'
# the replacement for sys.maxint
maxint = int(LONG_TEST - 1)
# for now, it should be equal to sys.maxint on all supported platforms
assert maxint == sys.maxint
[email protected](0)
def is_valid_int(r):
if objectmodel.we_are_translated():
return isinstance(r, int)
return isinstance(r, (base_int, int, long, bool)) and (
-maxint - 1 <= r <= maxint)
-is_valid_int._annspecialcase_ = 'specialize:argtype(0)'
def ovfcheck(r):
"NOT_RPYTHON"
@@ -225,12 +226,12 @@
return build_int(None, self_type.SIGNED, max(self_type.BITS,
other_type.BITS))
raise AssertionError("Merging these types (%s, %s) is not supported" %
(self_type, other_type))
[email protected]()
def signedtype(t):
if t in (bool, int, long):
return True
else:
return t.SIGNED
-signedtype._annspecialcase_ = 'specialize:memo'
def normalizedinttype(t):
if t is int:
@@ -241,11 +242,12 @@
assert t.BITS <= r_longlong.BITS
return build_int(None, t.SIGNED, r_longlong.BITS)
[email protected](0)
def most_neg_value_of_same_type(x):
from rpython.rtyper.lltypesystem import lltype
return most_neg_value_of(lltype.typeOf(x))
-most_neg_value_of_same_type._annspecialcase_ = 'specialize:argtype(0)'
[email protected]()
def most_neg_value_of(tp):
from rpython.rtyper.lltypesystem import lltype, rffi
if tp is lltype.Signed:
@@ -256,13 +258,13 @@
return r_class(-(r_class.MASK >> 1) - 1)
else:
return r_class(0)
-most_neg_value_of._annspecialcase_ = 'specialize:memo'
[email protected](0)
def most_pos_value_of_same_type(x):
from rpython.rtyper.lltypesystem import lltype
return most_pos_value_of(lltype.typeOf(x))
-most_pos_value_of_same_type._annspecialcase_ = 'specialize:argtype(0)'
[email protected]()
def most_pos_value_of(tp):
from rpython.rtyper.lltypesystem import lltype, rffi
if tp is lltype.Signed:
@@ -273,8 +275,8 @@
return r_class(r_class.MASK >> 1)
else:
return r_class(r_class.MASK)
-most_pos_value_of._annspecialcase_ = 'specialize:memo'
[email protected]()
def is_signed_integer_type(tp):
from rpython.rtyper.lltypesystem import lltype, rffi
if tp is lltype.Signed:
@@ -284,7 +286,6 @@
return r_class.SIGNED
except KeyError:
return False # not an integer type
-is_signed_integer_type._annspecialcase_ = 'specialize:memo'
def highest_bit(n):
"""
@@ -676,7 +677,7 @@
from rpython.rtyper.lltypesystem.lloperation import llop
return llop.int_mod(lltype.Signed, x, y)
[email protected]()
[email protected]()
def byteswap(arg):
""" Convert little->big endian and the opposite
"""
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -90,16 +90,16 @@
FIVEARY_CUTOFF = 8
[email protected](0)
def _mask_digit(x):
return UDIGIT_MASK(x & MASK)
-_mask_digit._annspecialcase_ = 'specialize:argtype(0)'
def _widen_digit(x):
return rffi.cast(LONG_TYPE, x)
[email protected](0)
def _store_digit(x):
return rffi.cast(STORE_TYPE, x)
-_store_digit._annspecialcase_ = 'specialize:argtype(0)'
def _load_unsigned_digit(x):
return rffi.cast(UNSIGNED_TYPE, x)
@@ -175,11 +175,11 @@
return _load_unsigned_digit(self._digits[x])
udigit._always_inline_ = True
+ @specialize.argtype(2)
def setdigit(self, x, val):
val = _mask_digit(val)
assert val >= 0
self._digits[x] = _store_digit(val)
- setdigit._annspecialcase_ = 'specialize:argtype(2)'
setdigit._always_inline_ = True
def numdigits(self):
@@ -1312,6 +1312,7 @@
return res
[email protected](0)
def digits_from_nonneg_long(l):
digits = []
while True:
@@ -1319,8 +1320,8 @@
l = l >> SHIFT
if not l:
return digits[:] # to make it non-resizable
-digits_from_nonneg_long._annspecialcase_ = "specialize:argtype(0)"
-
+
[email protected](0)
def digits_for_most_neg_long(l):
# This helper only works if 'l' is the most negative integer of its
# type, which in base 2 looks like: 1000000..0000
@@ -1335,8 +1336,8 @@
assert l & MASK == l
digits.append(_store_digit(l))
return digits[:] # to make it non-resizable
-digits_for_most_neg_long._annspecialcase_ = "specialize:argtype(0)"
-
+
[email protected](0)
def args_from_rarith_int1(x):
if x > 0:
return digits_from_nonneg_long(x), 1
@@ -1348,11 +1349,10 @@
else:
# the most negative integer! hacks needed...
return digits_for_most_neg_long(x), -1
-args_from_rarith_int1._annspecialcase_ = "specialize:argtype(0)"
-
+
[email protected](0)
def args_from_rarith_int(x):
return args_from_rarith_int1(widen(x))
-args_from_rarith_int._annspecialcase_ = "specialize:argtype(0)"
# ^^^ specialized by the precise type of 'x', which is typically a r_xxx
# instance from rlib.rarithmetic
@@ -1909,6 +1909,7 @@
i += 1
return borrow
[email protected](2)
def _muladd1(a, n, extra=0):
"""Multiply by a single digit and add a single digit, ignoring the sign.
"""
@@ -1926,7 +1927,7 @@
z.setdigit(i, carry)
z._normalize()
return z
-_muladd1._annspecialcase_ = "specialize:argtype(2)"
+
def _v_lshift(z, a, m, d):
""" Shift digit vector a[0:m] d bits left, with 0 <= d < SHIFT. Put
* result in z[0:m], and return the d bits shifted out of the top.
@@ -2178,6 +2179,7 @@
ad = -ad
return ad
[email protected](0)
def _loghelper(func, arg):
"""
A decent logarithm is easy to compute even for huge bigints, but libm can't
@@ -2195,7 +2197,6 @@
# CAUTION: e*SHIFT may overflow using int arithmetic,
# so force use of double. */
return func(x) + (e * float(SHIFT) * func(2.0))
-_loghelper._annspecialcase_ = 'specialize:arg(0)'
# ____________________________________________________________
@@ -2519,6 +2520,7 @@
return output.build()
[email protected](1)
def _bitwise(a, op, b): # '&', '|', '^'
""" Bitwise and/or/xor operations """
@@ -2598,8 +2600,8 @@
return z
return z.invert()
-_bitwise._annspecialcase_ = "specialize:arg(1)"
-
+
[email protected](1)
def _int_bitwise(a, op, b): # '&', '|', '^'
""" Bitwise and/or/xor operations """
@@ -2682,7 +2684,6 @@
return z
return z.invert()
-_int_bitwise._annspecialcase_ = "specialize:arg(1)"
ULONGLONG_BOUND = r_ulonglong(1L << (r_longlong.BITS-1))
LONGLONG_MIN = r_longlong(-(1L << (r_longlong.BITS-1)))
diff --git a/rpython/rlib/rmmap.py b/rpython/rlib/rmmap.py
--- a/rpython/rlib/rmmap.py
+++ b/rpython/rlib/rmmap.py
@@ -10,7 +10,7 @@
from rpython.rtyper.lltypesystem import rffi, lltype
from rpython.rlib import rposix
from rpython.translator.tool.cbuild import ExternalCompilationInfo
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, specialize
from rpython.rlib.nonconst import NonConstant
from rpython.rlib.rarithmetic import intmask
@@ -239,12 +239,12 @@
_, _VirtualProtect_safe = winexternal('VirtualProtect',
[rffi.VOIDP, rffi.SIZE_T, DWORD, LPDWORD],
BOOL)
+ @specialize.ll()
def VirtualProtect(addr, size, mode, oldmode_ptr):
return _VirtualProtect_safe(addr,
rffi.cast(rffi.SIZE_T, size),
rffi.cast(DWORD, mode),
oldmode_ptr)
- VirtualProtect._annspecialcase_ = 'specialize:ll'
VirtualFree, VirtualFree_safe = winexternal('VirtualFree',
[rffi.VOIDP, rffi.SIZE_T, DWORD], BOOL)
diff --git a/rpython/rlib/rstruct/runpack.py b/rpython/rlib/rstruct/runpack.py
--- a/rpython/rlib/rstruct/runpack.py
+++ b/rpython/rlib/rstruct/runpack.py
@@ -7,6 +7,7 @@
from struct import unpack
from rpython.rlib.rstruct.formatiterator import FormatIterator
from rpython.rlib.rstruct.error import StructError
+from rpython.rlib.objectmodel import specialize
class MasterReader(object):
def __init__(self, s):
@@ -99,14 +100,14 @@
self._create_unpacking_func()
return True
[email protected]()
def create_unpacker(unpack_str):
fmtiter = FrozenUnpackIterator(unpack_str)
fmtiter.interpret(unpack_str)
assert fmtiter._freeze_()
return fmtiter
-create_unpacker._annspecialcase_ = 'specialize:memo'
[email protected](0)
def runpack(fmt, input):
unpacker = create_unpacker(fmt)
return unpacker.unpack(input)
-runpack._annspecialcase_ = 'specialize:arg(0)'
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit