Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77958:cec809035d0c
Date: 2015-06-08 15:25 +0200
http://bitbucket.org/pypy/pypy/changeset/cec809035d0c/
Log: moved out guard strengthening (on arith level) and scheduling from
vectorize.py and schedule.py
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -736,79 +736,6 @@
return dot
raise NotImplementedError("dot only for debug purpose")
-class SchedulerData(object):
- pass
-class Scheduler(object):
- def __init__(self, graph, sched_data):
- assert isinstance(sched_data, SchedulerData)
- self.graph = graph
- self.schedulable_nodes = self.graph.schedulable_nodes
- self.sched_data = sched_data
-
- def has_more(self):
- return len(self.schedulable_nodes) > 0
-
- def next(self, position):
- i = self._next(self.schedulable_nodes)
- if i >= 0:
- candidate = self.schedulable_nodes[i]
- del self.schedulable_nodes[i]
- return self.schedule(candidate, position)
-
- raise RuntimeError("schedule failed cannot continue")
-
- def _next(self, candidate_list):
- i = len(candidate_list)-1
- while i >= 0:
- candidate = candidate_list[i]
- if candidate.emitted:
- del candidate_list[i]
- i -= 1
- continue
- if self.schedulable(candidate):
- return i
- i -= 1
- return -1
-
- def schedulable(self, candidate):
- if candidate.pack:
- for node in candidate.pack.operations:
- if node.depends_count() > 0:
- return False
- return candidate.depends_count() == 0
-
- def schedule(self, candidate, position):
- if candidate.pack:
- pack = candidate.pack
- vops = self.sched_data.as_vector_operation(pack)
- for node in pack.operations:
- self.scheduled(node, position)
- return vops
- else:
- self.scheduled(candidate, position)
- return [candidate.getoperation()]
-
- def scheduled(self, node, position):
- node.position = position
- for dep in node.provides()[:]: # COPY
- to = dep.to
- node.remove_edge_to(to)
- if not to.emitted and to.depends_count() == 0:
- # sorts them by priority
- nodes = self.schedulable_nodes
- i = len(nodes)-1
- while i >= 0:
- itnode = nodes[i]
- if itnode.priority < to.priority:
- nodes.insert(i+1, to)
- break
- i -= 1
- else:
- nodes.insert(0, to)
- node.clear_dependencies()
- node.emitted = True
-
-
class IntegralForwardModification(object):
""" Calculates integral modifications on integer boxes. """
def __init__(self, memory_refs, index_vars, comparison_vars,
invariant_vars):
diff --git a/rpython/jit/metainterp/optimizeopt/guard.py
b/rpython/jit/metainterp/optimizeopt/guard.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/guard.py
@@ -0,0 +1,224 @@
+"""
+NOTE this strengthing optimization is only used in the vecopt.
+It needs also the information about integral modifications
+gathered with IntegralForwardModification
+"""
+
+class Guard(object):
+ """ An object wrapper around a guard. Helps to determine
+ if one guard implies another
+ """
+ def __init__(self, index, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
+ self.index = index
+ self.op = op
+ self.cmp_op = cmp_op
+ self.lhs = lhs
+ self.rhs = rhs
+ self.lhs_arg = lhs_arg
+ self.rhs_arg = rhs_arg
+ self.implied = False
+ self.stronger = False
+
+ def implies(self, guard, opt):
+ if self.op.getopnum() != guard.op.getopnum():
+ return False
+
+ my_key = opt._get_key(self.cmp_op)
+ ot_key = opt._get_key(guard.cmp_op)
+
+ if my_key[1] == ot_key[1]:
+ # same operation
+ lc = self.compare(self.lhs, guard.lhs)
+ rc = self.compare(self.rhs, guard.rhs)
+ opnum = self.get_compare_opnum()
+ if opnum == -1:
+ return False
+ # x < y = -1,-2,...
+ # x == y = 0
+ # x > y = 1,2,...
+ if opnum == rop.INT_LT:
+ return (lc > 0 and rc >= 0) or (lc == 0 and rc >= 0)
+ if opnum == rop.INT_LE:
+ return (lc >= 0 and rc >= 0) or (lc == 0 and rc >= 0)
+ if opnum == rop.INT_GT:
+ return (lc < 0 and rc >= 0) or (lc == 0 and rc > 0)
+ if opnum == rop.INT_GE:
+ return (lc <= 0 and rc >= 0) or (lc == 0 and rc >= 0)
+ return False
+
+ def get_compare_opnum(self):
+ opnum = self.op.getopnum()
+ if opnum == rop.GUARD_TRUE:
+ return self.cmp_op.getopnum()
+ else:
+ return self.cmp_op.boolinverse
+
+ def inhert_attributes(self, other):
+ myop = self.op
+ otherop = other.op
+ assert isinstance(otherop, GuardResOp)
+ assert isinstance(myop, GuardResOp)
+ self.stronger = True
+ self.index = other.index
+
+ descr = myop.getdescr()
+ descr.copy_all_attributes_from(other.op.getdescr())
+ myop.rd_frame_info_list = otherop.rd_frame_info_list
+ myop.rd_snapshot = otherop.rd_snapshot
+ myop.setfailargs(otherop.getfailargs())
+
+ def compare(self, key1, key2):
+ if isinstance(key1, Box):
+ assert isinstance(key2, Box)
+ assert key1 is key2 # key of hash enforces this
+ return 0
+ #
+ if isinstance(key1, ConstInt):
+ assert isinstance(key2, ConstInt)
+ v1 = key1.value
+ v2 = key2.value
+ if v1 == v2:
+ return 0
+ elif v1 < v2:
+ return -1
+ else:
+ return 1
+ #
+ if isinstance(key1, IndexVar):
+ assert isinstance(key2, IndexVar)
+ return key1.compare(key2)
+ #
+ raise AssertionError("cannot compare: " + str(key1) + " <=> " +
str(key2))
+
+ def emit_varops(self, opt, var, old_arg):
+ if isinstance(var, IndexVar):
+ box = var.emit_operations(opt)
+ opt.renamer.start_renaming(old_arg, box)
+ return box
+ else:
+ return var
+
+ def emit_operations(self, opt):
+ lhs, opnum, rhs = opt._get_key(self.cmp_op)
+ # create trace instructions for the index
+ box_lhs = self.emit_varops(opt, self.lhs, self.lhs_arg)
+ box_rhs = self.emit_varops(opt, self.rhs, self.rhs_arg)
+ box_result = self.cmp_op.result.clonebox()
+ opt.emit_operation(ResOperation(opnum, [box_lhs, box_rhs], box_result))
+ # guard
+ guard = self.op.clone()
+ guard.setarg(0, box_result)
+ opt.emit_operation(guard)
+
+class GuardStrengthenOpt(object):
+ def __init__(self, index_vars):
+ self.index_vars = index_vars
+ self._newoperations = []
+ self._same_as = {}
+
+ def find_compare_guard_bool(self, boolarg, operations, index):
+ i = index - 1
+ # most likely hit in the first iteration
+ while i > 0:
+ op = operations[i]
+ if op.result and op.result == boolarg:
+ return op
+ i -= 1
+
+ raise AssertionError("guard_true/false first arg not defined")
+
+ def _get_key(self, cmp_op):
+ if cmp_op and rop.INT_LT <= cmp_op.getopnum() <= rop.INT_GE:
+ lhs_arg = cmp_op.getarg(0)
+ rhs_arg = cmp_op.getarg(1)
+ lhs_index_var = self.index_vars.get(lhs_arg, None)
+ rhs_index_var = self.index_vars.get(rhs_arg, None)
+
+ cmp_opnum = cmp_op.getopnum()
+ # get the key, this identifies the guarded operation
+ if lhs_index_var and rhs_index_var:
+ key = (lhs_index_var.getvariable(), cmp_opnum,
rhs_index_var.getvariable())
+ elif lhs_index_var:
+ key = (lhs_index_var.getvariable(), cmp_opnum, rhs_arg)
+ elif rhs_index_var:
+ key = (lhs_arg, cmp_opnum, rhs_index_var)
+ else:
+ key = (lhs_arg, cmp_opnum, rhs_arg)
+ return key
+ return (None, 0, None)
+
+ def get_key(self, guard_bool, operations, i):
+ cmp_op = self.find_compare_guard_bool(guard_bool.getarg(0),
operations, i)
+ return self._get_key(cmp_op)
+
+ def propagate_all_forward(self, loop):
+ """ strengthens the guards that protect an integral value """
+ strongest_guards = {}
+ guards = {}
+ # the guards are ordered. guards[i] is before guards[j] iff i < j
+ operations = loop.operations
+ last_guard = None
+ for i,op in enumerate(operations):
+ op = operations[i]
+ if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE,
rop.GUARD_FALSE):
+ cmp_op = self.find_compare_guard_bool(op.getarg(0),
operations, i)
+ key = self._get_key(cmp_op)
+ if key[0] is not None:
+ lhs_arg = cmp_op.getarg(0)
+ lhs = self.index_vars.get(lhs_arg, lhs_arg)
+ rhs_arg = cmp_op.getarg(1)
+ rhs = self.index_vars.get(rhs_arg, rhs_arg)
+ other = strongest_guards.get(key, None)
+ if not other:
+ guard = Guard(i, op, cmp_op,
+ lhs, lhs_arg,
+ rhs, rhs_arg)
+ strongest_guards[key] = guard
+ # nothing known, at this position emit the guard
+ guards[i] = guard
+ else: # implicit index(strongest) < index(current)
+ guard = Guard(i, op, cmp_op,
+ lhs, lhs_arg, rhs, rhs_arg)
+ if guard.implies(other, self):
+ guard.inhert_attributes(other)
+
+ strongest_guards[key] = guard
+ guards[other.index] = guard
+ # do not mark as emit
+ continue
+ elif other.implies(guard, self):
+ guard.implied = True
+ # mark as emit
+ guards[i] = guard
+ else:
+ # emit non guard_true/false guards
+ guards[i] = Guard(i, op, None, None, None, None, None)
+
+ strongest_guards = None
+ #
+ self.renamer = Renamer()
+ last_op_idx = len(operations)-1
+ for i,op in enumerate(operations):
+ op = operations[i]
+ if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE,
rop.GUARD_FALSE):
+ guard = guards.get(i, None)
+ if not guard or guard.implied:
+ # this guard is implied or marked as not emitted (= None)
+ continue
+ if guard.stronger:
+ guard.emit_operations(self)
+ continue
+ if op.result:
+ index_var = self.index_vars.get(op.result, None)
+ if index_var:
+ if not index_var.is_identity():
+ index_var.emit_operations(self, op.result)
+ continue
+ self.emit_operation(op)
+
+ loop.operations = self._newoperations[:]
+
+ def emit_operation(self, op):
+ self.renamer.rename(op)
+ self._newoperations.append(op)
+
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -0,0 +1,266 @@
+
+class SchedulerData(object):
+ pass
+class Scheduler(object):
+ def __init__(self, graph, sched_data):
+ assert isinstance(sched_data, SchedulerData)
+ self.graph = graph
+ self.schedulable_nodes = self.graph.schedulable_nodes
+ self.sched_data = sched_data
+
+ def has_more(self):
+ return len(self.schedulable_nodes) > 0
+
+ def next(self, position):
+ i = self._next(self.schedulable_nodes)
+ if i >= 0:
+ candidate = self.schedulable_nodes[i]
+ del self.schedulable_nodes[i]
+ return self.schedule(candidate, position)
+
+ raise AssertionError("schedule failed cannot continue. possible
reason: cycle")
+
+ def _next(self, candidate_list):
+ i = len(candidate_list)-1
+ while i >= 0:
+ candidate = candidate_list[i]
+ if candidate.emitted:
+ del candidate_list[i]
+ i -= 1
+ continue
+ if self.schedulable(candidate):
+ return i
+ i -= 1
+ return -1
+
+ def schedulable(self, candidate):
+ if candidate.pack:
+ for node in candidate.pack.operations:
+ if node.depends_count() > 0:
+ return False
+ return candidate.depends_count() == 0
+
+ def schedule(self, candidate, position):
+ if candidate.pack:
+ pack = candidate.pack
+ vops = self.sched_data.as_vector_operation(pack)
+ for node in pack.operations:
+ self.scheduled(node, position)
+ return vops
+ else:
+ self.scheduled(candidate, position)
+ return [candidate.getoperation()]
+
+ def scheduled(self, node, position):
+ node.position = position
+ for dep in node.provides()[:]: # COPY
+ to = dep.to
+ node.remove_edge_to(to)
+ if not to.emitted and to.depends_count() == 0:
+ # sorts them by priority
+ nodes = self.schedulable_nodes
+ i = len(nodes)-1
+ while i >= 0:
+ itnode = nodes[i]
+ if itnode.priority < to.priority:
+ nodes.insert(i+1, to)
+ break
+ i -= 1
+ else:
+ nodes.insert(0, to)
+ node.clear_dependencies()
+ node.emitted = True
+
+PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
+PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2)
+PT_FLOAT_GENERIC = PackType(INT, -1, True)
+PT_INT64 = PackType(INT, 8, True)
+PT_INT32_2 = PackType(INT, 4, True, 2)
+PT_INT_GENERIC = PackType(INT, -1, True)
+PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
+
+INT_RES = PT_INT_GENERIC
+FLOAT_RES = PT_FLOAT_GENERIC
+
+class OpToVectorOpConv(OpToVectorOp):
+ def __init__(self, intype, outtype):
+ self.from_size = intype.getsize()
+ self.to_size = outtype.getsize()
+ OpToVectorOp.__init__(self, (intype, ), outtype)
+
+ def determine_input_type(self, op):
+ return self.arg_ptypes[0]
+
+ def determine_output_type(self, op):
+ return self.result_ptype
+
+ def split_pack(self, pack):
+ if self.from_size > self.to_size:
+ # cast down
+ return OpToVectorOp.split_pack(self, pack)
+ op0 = pack.operations[0].getoperation()
+ _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+ vec_reg_size = self.sched_data.vec_reg_size
+ if vbox.getcount() * self.to_size > vec_reg_size:
+ return vec_reg_size // self.to_size
+ return len(pack.operations)
+
+ def new_result_vector_box(self):
+ type = self.output_type.gettype()
+ size = self.to_size
+ count = self.output_type.getcount()
+ vec_reg_size = self.sched_data.vec_reg_size
+ if count * size > vec_reg_size:
+ count = vec_reg_size // size
+ signed = self.output_type.signed
+ return BoxVector(type, count, size, signed)
+
+class SignExtToVectorOp(OpToVectorOp):
+ def __init__(self, intype, outtype):
+ OpToVectorOp.__init__(self, intype, outtype)
+ self.size = -1
+
+ def split_pack(self, pack):
+ op0 = pack.operations[0].getoperation()
+ sizearg = op0.getarg(1)
+ assert isinstance(sizearg, ConstInt)
+ self.size = sizearg.value
+ if self.input_type.getsize() > self.size:
+ # cast down
+ return OpToVectorOp.split_pack(self, pack)
+ _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+ vec_reg_size = self.sched_data.vec_reg_size
+ if vbox.getcount() * self.size > vec_reg_size:
+ return vec_reg_size // self.size
+ return vbox.getcount()
+
+ def new_result_vector_box(self):
+ type = self.output_type.gettype()
+ count = self.input_type.getcount()
+ vec_reg_size = self.sched_data.vec_reg_size
+ if count * self.size > vec_reg_size:
+ count = vec_reg_size // self.size
+ signed = self.input_type.signed
+ return BoxVector(type, count, self.size, signed)
+
+class LoadToVectorLoad(OpToVectorOp):
+ def __init__(self):
+ OpToVectorOp.__init__(self, (), PT_GENERIC)
+
+ def determine_input_type(self, op):
+ return None
+
+ def determine_output_type(self, op):
+ return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
+
+ def before_argument_transform(self, args):
+ args.append(ConstInt(len(self.pack.operations)))
+
+ def getsplitsize(self):
+ return self.output_type.getsize()
+
+ def new_result_vector_box(self):
+ type = self.output_type.gettype()
+ size = self.output_type.getsize()
+ count = len(self.pack.operations)
+ signed = self.output_type.signed
+ return BoxVector(type, count, size, signed)
+
+class StoreToVectorStore(OpToVectorOp):
+ def __init__(self):
+ OpToVectorOp.__init__(self, (None, None, PT_GENERIC), None)
+ self.has_descr = True
+
+ def determine_input_type(self, op):
+ return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
+
+ def determine_output_type(self, op):
+ return None
+
+INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES)
+FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES)
+FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES)
+LOAD_TRANS = LoadToVectorLoad()
+STORE_TRANS = StoreToVectorStore()
+
+# note that the following definition is x86 machine
+# specific.
+ROP_ARG_RES_VECTOR = {
+ rop.VEC_INT_ADD: INT_OP_TO_VOP,
+ rop.VEC_INT_SUB: INT_OP_TO_VOP,
+ rop.VEC_INT_MUL: INT_OP_TO_VOP,
+ rop.VEC_INT_AND: INT_OP_TO_VOP,
+ rop.VEC_INT_OR: INT_OP_TO_VOP,
+ rop.VEC_INT_XOR: INT_OP_TO_VOP,
+
+ rop.VEC_INT_SIGNEXT: SignExtToVectorOp((PT_INT_GENERIC,), INT_RES),
+
+ rop.VEC_FLOAT_ADD: FLOAT_OP_TO_VOP,
+ rop.VEC_FLOAT_SUB: FLOAT_OP_TO_VOP,
+ rop.VEC_FLOAT_MUL: FLOAT_OP_TO_VOP,
+ rop.VEC_FLOAT_TRUEDIV: FLOAT_OP_TO_VOP,
+ rop.VEC_FLOAT_ABS: FLOAT_SINGLE_ARG_OP_TO_VOP,
+ rop.VEC_FLOAT_NEG: FLOAT_SINGLE_ARG_OP_TO_VOP,
+ rop.VEC_FLOAT_EQ: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
INT_RES),
+
+ rop.VEC_RAW_LOAD: LOAD_TRANS,
+ rop.VEC_GETARRAYITEM_RAW: LOAD_TRANS,
+ rop.VEC_RAW_STORE: STORE_TRANS,
+ rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
+
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2,
PT_FLOAT_2),
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2,
PT_DOUBLE_2),
+ rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2),
+ rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2),
+}
+
+class VecScheduleData(SchedulerData):
+ def __init__(self, vec_reg_size):
+ self.box_to_vbox = {}
+ self.vec_reg_size = vec_reg_size
+ self.invariant_oplist = []
+ self.invariant_vector_vars = []
+ self.expanded_map = {}
+
+ def as_vector_operation(self, pack):
+ op_count = len(pack.operations)
+ assert op_count > 1
+ self.pack = pack
+ # properties that hold for the pack are:
+ # + isomorphism (see func above)
+ # + tight packed (no room between vector elems)
+
+ op0 = pack.operations[0].getoperation()
+ tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
+ if tovector is None:
+ raise NotImplementedError("missing vecop for '%s'" %
(op0.getopname(),))
+ oplist = []
+ tovector.as_vector_operation(pack, self, oplist)
+ return oplist
+
+ def getvector_of_box(self, arg):
+ return self.box_to_vbox.get(arg, (-1, None))
+
+ def setvector_of_box(self, box, off, vector):
+ self.box_to_vbox[box] = (off, vector)
+
+ def prepend_invariant_operations(self, oplist):
+ if len(self.invariant_oplist) > 0:
+ label = oplist[0]
+ assert label.getopnum() == rop.LABEL
+ jump = oplist[-1]
+ assert jump.getopnum() == rop.JUMP
+
+ label_args = label.getarglist()
+ jump_args = jump.getarglist()
+ for var in self.invariant_vector_vars:
+ label_args.append(var)
+ jump_args.append(var)
+
+ oplist[0] = label.copy_and_change(label.getopnum(), label_args,
None, label.getdescr())
+ oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args,
None, jump.getdescr())
+
+ return self.invariant_oplist + oplist
+
+ return oplist
+
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -9,7 +9,9 @@
from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer,
Optimization
from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
- MemoryRef, Scheduler, SchedulerData, Node, IndexVar)
+ MemoryRef, Node, IndexVar)
+from rpython.jit.metainterp.optimizeopt.schedule import VecScheduleData,
Scheduler
+from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -509,224 +511,6 @@
rec_snap = self.rename_rd_snapshot(snapshot.prev, clone)
return Snapshot(rec_snap, boxes)
-class Guard(object):
- """ An object wrapper around a guard. Helps to determine
- if one guard implies another
- """
- def __init__(self, index, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
- self.index = index
- self.op = op
- self.cmp_op = cmp_op
- self.lhs = lhs
- self.rhs = rhs
- self.lhs_arg = lhs_arg
- self.rhs_arg = rhs_arg
- self.implied = False
- self.stronger = False
-
- def implies(self, guard, opt):
- if self.op.getopnum() != guard.op.getopnum():
- return False
-
- my_key = opt._get_key(self.cmp_op)
- ot_key = opt._get_key(guard.cmp_op)
-
- if my_key[1] == ot_key[1]:
- # same operation
- lc = self.compare(self.lhs, guard.lhs)
- rc = self.compare(self.rhs, guard.rhs)
- opnum = self.get_compare_opnum()
- if opnum == -1:
- return False
- # x < y = -1,-2,...
- # x == y = 0
- # x > y = 1,2,...
- if opnum == rop.INT_LT:
- return (lc > 0 and rc >= 0) or (lc == 0 and rc >= 0)
- if opnum == rop.INT_LE:
- return (lc >= 0 and rc >= 0) or (lc == 0 and rc >= 0)
- if opnum == rop.INT_GT:
- return (lc < 0 and rc >= 0) or (lc == 0 and rc > 0)
- if opnum == rop.INT_GE:
- return (lc <= 0 and rc >= 0) or (lc == 0 and rc >= 0)
- return False
-
- def get_compare_opnum(self):
- opnum = self.op.getopnum()
- if opnum == rop.GUARD_TRUE:
- return self.cmp_op.getopnum()
- else:
- return self.cmp_op.boolinverse
-
- def inhert_attributes(self, other):
- myop = self.op
- otherop = other.op
- assert isinstance(otherop, GuardResOp)
- assert isinstance(myop, GuardResOp)
- self.stronger = True
- self.index = other.index
-
- descr = myop.getdescr()
- descr.copy_all_attributes_from(other.op.getdescr())
- myop.rd_frame_info_list = otherop.rd_frame_info_list
- myop.rd_snapshot = otherop.rd_snapshot
- myop.setfailargs(otherop.getfailargs())
-
- def compare(self, key1, key2):
- if isinstance(key1, Box):
- assert isinstance(key2, Box)
- assert key1 is key2 # key of hash enforces this
- return 0
- #
- if isinstance(key1, ConstInt):
- assert isinstance(key2, ConstInt)
- v1 = key1.value
- v2 = key2.value
- if v1 == v2:
- return 0
- elif v1 < v2:
- return -1
- else:
- return 1
- #
- if isinstance(key1, IndexVar):
- assert isinstance(key2, IndexVar)
- return key1.compare(key2)
- #
- raise AssertionError("cannot compare: " + str(key1) + " <=> " +
str(key2))
-
- def emit_varops(self, opt, var, old_arg):
- if isinstance(var, IndexVar):
- box = var.emit_operations(opt)
- opt.renamer.start_renaming(old_arg, box)
- return box
- else:
- return var
-
- def emit_operations(self, opt):
- lhs, opnum, rhs = opt._get_key(self.cmp_op)
- # create trace instructions for the index
- box_lhs = self.emit_varops(opt, self.lhs, self.lhs_arg)
- box_rhs = self.emit_varops(opt, self.rhs, self.rhs_arg)
- box_result = self.cmp_op.result.clonebox()
- opt.emit_operation(ResOperation(opnum, [box_lhs, box_rhs], box_result))
- # guard
- guard = self.op.clone()
- guard.setarg(0, box_result)
- opt.emit_operation(guard)
-
-class GuardStrengthenOpt(object):
- def __init__(self, index_vars):
- self.index_vars = index_vars
- self._newoperations = []
- self._same_as = {}
-
- def find_compare_guard_bool(self, boolarg, operations, index):
- i = index - 1
- # most likely hit in the first iteration
- while i > 0:
- op = operations[i]
- if op.result and op.result == boolarg:
- return op
- i -= 1
-
- raise AssertionError("guard_true/false first arg not defined")
-
- def _get_key(self, cmp_op):
- if cmp_op and rop.INT_LT <= cmp_op.getopnum() <= rop.INT_GE:
- lhs_arg = cmp_op.getarg(0)
- rhs_arg = cmp_op.getarg(1)
- lhs_index_var = self.index_vars.get(lhs_arg, None)
- rhs_index_var = self.index_vars.get(rhs_arg, None)
-
- cmp_opnum = cmp_op.getopnum()
- # get the key, this identifies the guarded operation
- if lhs_index_var and rhs_index_var:
- key = (lhs_index_var.getvariable(), cmp_opnum,
rhs_index_var.getvariable())
- elif lhs_index_var:
- key = (lhs_index_var.getvariable(), cmp_opnum, rhs_arg)
- elif rhs_index_var:
- key = (lhs_arg, cmp_opnum, rhs_index_var)
- else:
- key = (lhs_arg, cmp_opnum, rhs_arg)
- return key
- return (None, 0, None)
-
- def get_key(self, guard_bool, operations, i):
- cmp_op = self.find_compare_guard_bool(guard_bool.getarg(0),
operations, i)
- return self._get_key(cmp_op)
-
- def propagate_all_forward(self, loop):
- """ strengthens the guards that protect an integral value """
- strongest_guards = {}
- guards = {}
- # the guards are ordered. guards[i] is before guards[j] iff i < j
- operations = loop.operations
- last_guard = None
- for i,op in enumerate(operations):
- op = operations[i]
- if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE,
rop.GUARD_FALSE):
- cmp_op = self.find_compare_guard_bool(op.getarg(0),
operations, i)
- key = self._get_key(cmp_op)
- if key[0] is not None:
- lhs_arg = cmp_op.getarg(0)
- lhs = self.index_vars.get(lhs_arg, lhs_arg)
- rhs_arg = cmp_op.getarg(1)
- rhs = self.index_vars.get(rhs_arg, rhs_arg)
- other = strongest_guards.get(key, None)
- if not other:
- guard = Guard(i, op, cmp_op,
- lhs, lhs_arg,
- rhs, rhs_arg)
- strongest_guards[key] = guard
- # nothing known, at this position emit the guard
- guards[i] = guard
- else: # implicit index(strongest) < index(current)
- guard = Guard(i, op, cmp_op,
- lhs, lhs_arg, rhs, rhs_arg)
- if guard.implies(other, self):
- guard.inhert_attributes(other)
-
- strongest_guards[key] = guard
- guards[other.index] = guard
- # do not mark as emit
- continue
- elif other.implies(guard, self):
- guard.implied = True
- # mark as emit
- guards[i] = guard
- else:
- # emit non guard_true/false guards
- guards[i] = Guard(i, op, None, None, None, None, None)
-
- strongest_guards = None
- #
- self.renamer = Renamer()
- last_op_idx = len(operations)-1
- for i,op in enumerate(operations):
- op = operations[i]
- if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE,
rop.GUARD_FALSE):
- guard = guards.get(i, None)
- if not guard or guard.implied:
- # this guard is implied or marked as not emitted (= None)
- continue
- if guard.stronger:
- guard.emit_operations(self)
- continue
- if op.result:
- index_var = self.index_vars.get(op.result, None)
- if index_var:
- if not index_var.is_identity():
- index_var.emit_operations(self, op.result)
- continue
- self.emit_operation(op)
-
- loop.operations = self._newoperations[:]
-
- def emit_operation(self, op):
- self.renamer.rename(op)
- self._newoperations.append(op)
-
class CostModel(object):
def __init__(self, threshold):
self.threshold = threshold
@@ -1103,201 +887,6 @@
invariant_vars.append(vbox)
return vbox
-class OpToVectorOpConv(OpToVectorOp):
- def __init__(self, intype, outtype):
- self.from_size = intype.getsize()
- self.to_size = outtype.getsize()
- OpToVectorOp.__init__(self, (intype, ), outtype)
-
- def determine_input_type(self, op):
- return self.arg_ptypes[0]
-
- def determine_output_type(self, op):
- return self.result_ptype
-
- def split_pack(self, pack):
- if self.from_size > self.to_size:
- # cast down
- return OpToVectorOp.split_pack(self, pack)
- op0 = pack.operations[0].getoperation()
- _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
- vec_reg_size = self.sched_data.vec_reg_size
- if vbox.getcount() * self.to_size > vec_reg_size:
- return vec_reg_size // self.to_size
- return len(pack.operations)
-
- def new_result_vector_box(self):
- type = self.output_type.gettype()
- size = self.to_size
- count = self.output_type.getcount()
- vec_reg_size = self.sched_data.vec_reg_size
- if count * size > vec_reg_size:
- count = vec_reg_size // size
- signed = self.output_type.signed
- return BoxVector(type, count, size, signed)
-
-class SignExtToVectorOp(OpToVectorOp):
- def __init__(self, intype, outtype):
- OpToVectorOp.__init__(self, intype, outtype)
- self.size = -1
-
- def split_pack(self, pack):
- op0 = pack.operations[0].getoperation()
- sizearg = op0.getarg(1)
- assert isinstance(sizearg, ConstInt)
- self.size = sizearg.value
- if self.input_type.getsize() > self.size:
- # cast down
- return OpToVectorOp.split_pack(self, pack)
- _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
- vec_reg_size = self.sched_data.vec_reg_size
- if vbox.getcount() * self.size > vec_reg_size:
- return vec_reg_size // self.size
- return vbox.getcount()
-
- def new_result_vector_box(self):
- type = self.output_type.gettype()
- count = self.input_type.getcount()
- vec_reg_size = self.sched_data.vec_reg_size
- if count * self.size > vec_reg_size:
- count = vec_reg_size // self.size
- signed = self.input_type.signed
- return BoxVector(type, count, self.size, signed)
-
-PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
-
-class LoadToVectorLoad(OpToVectorOp):
- def __init__(self):
- OpToVectorOp.__init__(self, (), PT_GENERIC)
-
- def determine_input_type(self, op):
- return None
-
- def determine_output_type(self, op):
- return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
-
- def before_argument_transform(self, args):
- args.append(ConstInt(len(self.pack.operations)))
-
- def getsplitsize(self):
- return self.output_type.getsize()
-
- def new_result_vector_box(self):
- type = self.output_type.gettype()
- size = self.output_type.getsize()
- count = len(self.pack.operations)
- signed = self.output_type.signed
- return BoxVector(type, count, size, signed)
-
-class StoreToVectorStore(OpToVectorOp):
- def __init__(self):
- OpToVectorOp.__init__(self, (None, None, PT_GENERIC), None)
- self.has_descr = True
-
- def determine_input_type(self, op):
- return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
-
- def determine_output_type(self, op):
- return None
-
-PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
-PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2)
-PT_FLOAT_GENERIC = PackType(INT, -1, True)
-PT_INT64 = PackType(INT, 8, True)
-PT_INT32_2 = PackType(INT, 4, True, 2)
-PT_INT_GENERIC = PackType(INT, -1, True)
-PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
-
-INT_RES = PT_INT_GENERIC
-FLOAT_RES = PT_FLOAT_GENERIC
-
-INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES)
-FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES)
-FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES)
-LOAD_TRANS = LoadToVectorLoad()
-STORE_TRANS = StoreToVectorStore()
-
-# note that the following definition is x86 machine
-# specific.
-ROP_ARG_RES_VECTOR = {
- rop.VEC_INT_ADD: INT_OP_TO_VOP,
- rop.VEC_INT_SUB: INT_OP_TO_VOP,
- rop.VEC_INT_MUL: INT_OP_TO_VOP,
- rop.VEC_INT_AND: INT_OP_TO_VOP,
- rop.VEC_INT_OR: INT_OP_TO_VOP,
- rop.VEC_INT_XOR: INT_OP_TO_VOP,
-
- rop.VEC_INT_SIGNEXT: SignExtToVectorOp((PT_INT_GENERIC,), INT_RES),
-
- rop.VEC_FLOAT_ADD: FLOAT_OP_TO_VOP,
- rop.VEC_FLOAT_SUB: FLOAT_OP_TO_VOP,
- rop.VEC_FLOAT_MUL: FLOAT_OP_TO_VOP,
- rop.VEC_FLOAT_TRUEDIV: FLOAT_OP_TO_VOP,
- rop.VEC_FLOAT_ABS: FLOAT_SINGLE_ARG_OP_TO_VOP,
- rop.VEC_FLOAT_NEG: FLOAT_SINGLE_ARG_OP_TO_VOP,
- rop.VEC_FLOAT_EQ: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC),
INT_RES),
-
- rop.VEC_RAW_LOAD: LOAD_TRANS,
- rop.VEC_GETARRAYITEM_RAW: LOAD_TRANS,
- rop.VEC_RAW_STORE: STORE_TRANS,
- rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
-
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2,
PT_FLOAT_2),
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2,
PT_DOUBLE_2),
- rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2),
- rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2),
-}
-
-class VecScheduleData(SchedulerData):
- def __init__(self, vec_reg_size):
- self.box_to_vbox = {}
- self.vec_reg_size = vec_reg_size
- self.invariant_oplist = []
- self.invariant_vector_vars = []
- self.expanded_map = {}
-
- def as_vector_operation(self, pack):
- op_count = len(pack.operations)
- assert op_count > 1
- self.pack = pack
- # properties that hold for the pack are:
- # + isomorphism (see func above)
- # + tight packed (no room between vector elems)
-
- op0 = pack.operations[0].getoperation()
- tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
- if tovector is None:
- raise NotImplementedError("missing vecop for '%s'" %
(op0.getopname(),))
- oplist = []
- tovector.as_vector_operation(pack, self, oplist)
- return oplist
-
- def getvector_of_box(self, arg):
- return self.box_to_vbox.get(arg, (-1, None))
-
- def setvector_of_box(self, box, off, vector):
- self.box_to_vbox[box] = (off, vector)
-
- def prepend_invariant_operations(self, oplist):
- if len(self.invariant_oplist) > 0:
- label = oplist[0]
- assert label.getopnum() == rop.LABEL
- jump = oplist[-1]
- assert jump.getopnum() == rop.JUMP
-
- label_args = label.getarglist()
- jump_args = jump.getarglist()
- for var in self.invariant_vector_vars:
- label_args.append(var)
- jump_args.append(var)
-
- oplist[0] = label.copy_and_change(label.getopnum(), label_args,
None, label.getdescr())
- oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args,
None, jump.getdescr())
-
- return self.invariant_oplist + oplist
-
- return oplist
-
def isomorphic(l_op, r_op):
""" Subject of definition """
if l_op.getopnum() == r_op.getopnum():
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit