Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77958:cec809035d0c
Date: 2015-06-08 15:25 +0200
http://bitbucket.org/pypy/pypy/changeset/cec809035d0c/

Log:    moved out guard strengthening (on arith level) and scheduling from
        vectorize.py and schedule.py

diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py 
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -736,79 +736,6 @@
             return dot
         raise NotImplementedError("dot only for debug purpose")
 
-class SchedulerData(object):
-    pass
-class Scheduler(object):
-    def __init__(self, graph, sched_data):
-        assert isinstance(sched_data, SchedulerData)
-        self.graph = graph
-        self.schedulable_nodes = self.graph.schedulable_nodes
-        self.sched_data = sched_data
-
-    def has_more(self):
-        return len(self.schedulable_nodes) > 0
-
-    def next(self, position):
-        i = self._next(self.schedulable_nodes)
-        if i >= 0:
-            candidate = self.schedulable_nodes[i]
-            del self.schedulable_nodes[i]
-            return self.schedule(candidate, position)
-
-        raise RuntimeError("schedule failed cannot continue")
-
-    def _next(self, candidate_list):
-        i = len(candidate_list)-1
-        while i >= 0:
-            candidate = candidate_list[i]
-            if candidate.emitted:
-                del candidate_list[i]
-                i -= 1
-                continue
-            if self.schedulable(candidate):
-                return i
-            i -= 1
-        return -1
-
-    def schedulable(self, candidate):
-        if candidate.pack:
-            for node in candidate.pack.operations:
-                if node.depends_count() > 0:
-                    return False
-        return candidate.depends_count() == 0
-
-    def schedule(self, candidate, position):
-        if candidate.pack:
-            pack = candidate.pack
-            vops = self.sched_data.as_vector_operation(pack)
-            for node in pack.operations:
-                self.scheduled(node, position)
-            return vops
-        else:
-            self.scheduled(candidate, position)
-            return [candidate.getoperation()]
-
-    def scheduled(self, node, position):
-        node.position = position
-        for dep in node.provides()[:]: # COPY
-            to = dep.to
-            node.remove_edge_to(to)
-            if not to.emitted and to.depends_count() == 0:
-                # sorts them by priority
-                nodes = self.schedulable_nodes
-                i = len(nodes)-1
-                while i >= 0:
-                    itnode = nodes[i]
-                    if itnode.priority < to.priority:
-                        nodes.insert(i+1, to)
-                        break
-                    i -= 1
-                else:
-                    nodes.insert(0, to)
-        node.clear_dependencies()
-        node.emitted = True
-
-
 class IntegralForwardModification(object):
     """ Calculates integral modifications on integer boxes. """
     def __init__(self, memory_refs, index_vars, comparison_vars, 
invariant_vars):
diff --git a/rpython/jit/metainterp/optimizeopt/guard.py 
b/rpython/jit/metainterp/optimizeopt/guard.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/guard.py
@@ -0,0 +1,224 @@
+"""
+NOTE this strengthing optimization is only used in the vecopt.
+It needs also the information about integral modifications
+gathered with IntegralForwardModification
+"""
+
+class Guard(object):
+    """ An object wrapper around a guard. Helps to determine
+        if one guard implies another
+    """
+    def __init__(self, index, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
+        self.index = index
+        self.op = op
+        self.cmp_op = cmp_op
+        self.lhs = lhs
+        self.rhs = rhs
+        self.lhs_arg = lhs_arg
+        self.rhs_arg = rhs_arg
+        self.implied = False
+        self.stronger = False
+
+    def implies(self, guard, opt):
+        if self.op.getopnum() != guard.op.getopnum():
+            return False
+
+        my_key = opt._get_key(self.cmp_op)
+        ot_key = opt._get_key(guard.cmp_op)
+
+        if my_key[1] == ot_key[1]:
+            # same operation
+            lc = self.compare(self.lhs, guard.lhs)
+            rc = self.compare(self.rhs, guard.rhs)
+            opnum = self.get_compare_opnum()
+            if opnum == -1:
+                return False
+            # x < y  = -1,-2,...
+            # x == y = 0
+            # x > y  = 1,2,...
+            if opnum == rop.INT_LT:
+                return (lc > 0 and rc >= 0) or (lc == 0 and rc >= 0)
+            if opnum == rop.INT_LE:
+                return (lc >= 0 and rc >= 0) or (lc == 0 and rc >= 0)
+            if opnum == rop.INT_GT:
+                return (lc < 0 and rc >= 0) or (lc == 0 and rc > 0)
+            if opnum == rop.INT_GE:
+                return (lc <= 0 and rc >= 0) or (lc == 0 and rc >= 0)
+        return False
+
+    def get_compare_opnum(self):
+        opnum = self.op.getopnum()
+        if opnum == rop.GUARD_TRUE:
+            return self.cmp_op.getopnum()
+        else:
+            return self.cmp_op.boolinverse
+
+    def inhert_attributes(self, other):
+        myop = self.op
+        otherop = other.op
+        assert isinstance(otherop, GuardResOp)
+        assert isinstance(myop, GuardResOp)
+        self.stronger = True
+        self.index = other.index
+
+        descr = myop.getdescr()
+        descr.copy_all_attributes_from(other.op.getdescr())
+        myop.rd_frame_info_list = otherop.rd_frame_info_list
+        myop.rd_snapshot = otherop.rd_snapshot
+        myop.setfailargs(otherop.getfailargs())
+
+    def compare(self, key1, key2):
+        if isinstance(key1, Box):
+            assert isinstance(key2, Box)
+            assert key1 is key2 # key of hash enforces this
+            return 0
+        #
+        if isinstance(key1, ConstInt):
+            assert isinstance(key2, ConstInt)
+            v1 = key1.value
+            v2 = key2.value
+            if v1 == v2:
+                return 0
+            elif v1 < v2:
+                return -1
+            else:
+                return 1
+        #
+        if isinstance(key1, IndexVar):
+            assert isinstance(key2, IndexVar)
+            return key1.compare(key2)
+        #
+        raise AssertionError("cannot compare: " + str(key1) + " <=> " + 
str(key2))
+
+    def emit_varops(self, opt, var, old_arg):
+        if isinstance(var, IndexVar):
+            box = var.emit_operations(opt)
+            opt.renamer.start_renaming(old_arg, box)
+            return box
+        else:
+            return var
+
+    def emit_operations(self, opt):
+        lhs, opnum, rhs = opt._get_key(self.cmp_op)
+        # create trace instructions for the index
+        box_lhs = self.emit_varops(opt, self.lhs, self.lhs_arg)
+        box_rhs = self.emit_varops(opt, self.rhs, self.rhs_arg)
+        box_result = self.cmp_op.result.clonebox()
+        opt.emit_operation(ResOperation(opnum, [box_lhs, box_rhs], box_result))
+        # guard
+        guard = self.op.clone()
+        guard.setarg(0, box_result)
+        opt.emit_operation(guard)
+
+class GuardStrengthenOpt(object):
+    def __init__(self, index_vars):
+        self.index_vars = index_vars
+        self._newoperations = []
+        self._same_as = {}
+
+    def find_compare_guard_bool(self, boolarg, operations, index):
+        i = index - 1
+        # most likely hit in the first iteration
+        while i > 0:
+            op = operations[i]
+            if op.result and op.result == boolarg:
+                return op
+            i -= 1
+
+        raise AssertionError("guard_true/false first arg not defined")
+
+    def _get_key(self, cmp_op):
+        if cmp_op and rop.INT_LT <= cmp_op.getopnum() <= rop.INT_GE:
+            lhs_arg = cmp_op.getarg(0)
+            rhs_arg = cmp_op.getarg(1)
+            lhs_index_var = self.index_vars.get(lhs_arg, None)
+            rhs_index_var = self.index_vars.get(rhs_arg, None)
+
+            cmp_opnum = cmp_op.getopnum()
+            # get the key, this identifies the guarded operation
+            if lhs_index_var and rhs_index_var:
+                key = (lhs_index_var.getvariable(), cmp_opnum, 
rhs_index_var.getvariable())
+            elif lhs_index_var:
+                key = (lhs_index_var.getvariable(), cmp_opnum, rhs_arg)
+            elif rhs_index_var:
+                key = (lhs_arg, cmp_opnum, rhs_index_var)
+            else:
+                key = (lhs_arg, cmp_opnum, rhs_arg)
+            return key
+        return (None, 0, None)
+
+    def get_key(self, guard_bool, operations, i):
+        cmp_op = self.find_compare_guard_bool(guard_bool.getarg(0), 
operations, i)
+        return self._get_key(cmp_op)
+
+    def propagate_all_forward(self, loop):
+        """ strengthens the guards that protect an integral value """
+        strongest_guards = {}
+        guards = {}
+        # the guards are ordered. guards[i] is before guards[j] iff i < j
+        operations = loop.operations
+        last_guard = None
+        for i,op in enumerate(operations):
+            op = operations[i]
+            if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, 
rop.GUARD_FALSE):
+                cmp_op = self.find_compare_guard_bool(op.getarg(0), 
operations, i)
+                key = self._get_key(cmp_op)
+                if key[0] is not None:
+                    lhs_arg = cmp_op.getarg(0)
+                    lhs = self.index_vars.get(lhs_arg, lhs_arg)
+                    rhs_arg = cmp_op.getarg(1)
+                    rhs = self.index_vars.get(rhs_arg, rhs_arg)
+                    other = strongest_guards.get(key, None)
+                    if not other:
+                        guard = Guard(i, op, cmp_op,
+                                      lhs, lhs_arg,
+                                      rhs, rhs_arg)
+                        strongest_guards[key] = guard
+                        # nothing known, at this position emit the guard
+                        guards[i] = guard
+                    else: # implicit index(strongest) < index(current)
+                        guard = Guard(i, op, cmp_op,
+                                      lhs, lhs_arg, rhs, rhs_arg)
+                        if guard.implies(other, self):
+                            guard.inhert_attributes(other)
+
+                            strongest_guards[key] = guard
+                            guards[other.index] = guard
+                            # do not mark as emit
+                            continue
+                        elif other.implies(guard, self):
+                            guard.implied = True
+                        # mark as emit
+                        guards[i] = guard
+                else:
+                    # emit non guard_true/false guards
+                    guards[i] = Guard(i, op, None, None, None, None, None)
+
+        strongest_guards = None
+        #
+        self.renamer = Renamer()
+        last_op_idx = len(operations)-1
+        for i,op in enumerate(operations):
+            op = operations[i]
+            if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, 
rop.GUARD_FALSE):
+                guard = guards.get(i, None)
+                if not guard or guard.implied:
+                    # this guard is implied or marked as not emitted (= None)
+                    continue
+                if guard.stronger:
+                    guard.emit_operations(self)
+                    continue
+            if op.result:
+                index_var = self.index_vars.get(op.result, None)
+                if index_var:
+                    if not index_var.is_identity():
+                        index_var.emit_operations(self, op.result)
+                        continue
+            self.emit_operation(op)
+
+        loop.operations = self._newoperations[:]
+
+    def emit_operation(self, op):
+        self.renamer.rename(op)
+        self._newoperations.append(op)
+
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -0,0 +1,266 @@
+
+class SchedulerData(object):
+    pass
+class Scheduler(object):
+    def __init__(self, graph, sched_data):
+        assert isinstance(sched_data, SchedulerData)
+        self.graph = graph
+        self.schedulable_nodes = self.graph.schedulable_nodes
+        self.sched_data = sched_data
+
+    def has_more(self):
+        return len(self.schedulable_nodes) > 0
+
+    def next(self, position):
+        i = self._next(self.schedulable_nodes)
+        if i >= 0:
+            candidate = self.schedulable_nodes[i]
+            del self.schedulable_nodes[i]
+            return self.schedule(candidate, position)
+
+        raise AssertionError("schedule failed cannot continue. possible 
reason: cycle")
+
+    def _next(self, candidate_list):
+        i = len(candidate_list)-1
+        while i >= 0:
+            candidate = candidate_list[i]
+            if candidate.emitted:
+                del candidate_list[i]
+                i -= 1
+                continue
+            if self.schedulable(candidate):
+                return i
+            i -= 1
+        return -1
+
+    def schedulable(self, candidate):
+        if candidate.pack:
+            for node in candidate.pack.operations:
+                if node.depends_count() > 0:
+                    return False
+        return candidate.depends_count() == 0
+
+    def schedule(self, candidate, position):
+        if candidate.pack:
+            pack = candidate.pack
+            vops = self.sched_data.as_vector_operation(pack)
+            for node in pack.operations:
+                self.scheduled(node, position)
+            return vops
+        else:
+            self.scheduled(candidate, position)
+            return [candidate.getoperation()]
+
+    def scheduled(self, node, position):
+        node.position = position
+        for dep in node.provides()[:]: # COPY
+            to = dep.to
+            node.remove_edge_to(to)
+            if not to.emitted and to.depends_count() == 0:
+                # sorts them by priority
+                nodes = self.schedulable_nodes
+                i = len(nodes)-1
+                while i >= 0:
+                    itnode = nodes[i]
+                    if itnode.priority < to.priority:
+                        nodes.insert(i+1, to)
+                        break
+                    i -= 1
+                else:
+                    nodes.insert(0, to)
+        node.clear_dependencies()
+        node.emitted = True
+
+PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
+PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2)
+PT_FLOAT_GENERIC = PackType(INT, -1, True)
+PT_INT64 = PackType(INT, 8, True)
+PT_INT32_2 = PackType(INT, 4, True, 2)
+PT_INT_GENERIC = PackType(INT, -1, True)
+PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
+
+INT_RES = PT_INT_GENERIC
+FLOAT_RES = PT_FLOAT_GENERIC
+
+class OpToVectorOpConv(OpToVectorOp):
+    def __init__(self, intype, outtype):
+        self.from_size = intype.getsize()
+        self.to_size = outtype.getsize()
+        OpToVectorOp.__init__(self, (intype, ), outtype)
+
+    def determine_input_type(self, op):
+        return self.arg_ptypes[0]
+
+    def determine_output_type(self, op):
+        return self.result_ptype
+
+    def split_pack(self, pack):
+        if self.from_size > self.to_size:
+            # cast down
+            return OpToVectorOp.split_pack(self, pack)
+        op0 = pack.operations[0].getoperation()
+        _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+        vec_reg_size = self.sched_data.vec_reg_size
+        if vbox.getcount() * self.to_size > vec_reg_size:
+            return vec_reg_size // self.to_size
+        return len(pack.operations)
+
+    def new_result_vector_box(self):
+        type = self.output_type.gettype()
+        size = self.to_size
+        count = self.output_type.getcount()
+        vec_reg_size = self.sched_data.vec_reg_size
+        if count * size > vec_reg_size:
+            count = vec_reg_size // size
+        signed = self.output_type.signed
+        return BoxVector(type, count, size, signed)
+
+class SignExtToVectorOp(OpToVectorOp):
+    def __init__(self, intype, outtype):
+        OpToVectorOp.__init__(self, intype, outtype)
+        self.size = -1
+
+    def split_pack(self, pack):
+        op0 = pack.operations[0].getoperation()
+        sizearg = op0.getarg(1)
+        assert isinstance(sizearg, ConstInt)
+        self.size = sizearg.value
+        if self.input_type.getsize() > self.size:
+            # cast down
+            return OpToVectorOp.split_pack(self, pack)
+        _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+        vec_reg_size = self.sched_data.vec_reg_size
+        if vbox.getcount() * self.size > vec_reg_size:
+            return vec_reg_size // self.size
+        return vbox.getcount()
+
+    def new_result_vector_box(self):
+        type = self.output_type.gettype()
+        count = self.input_type.getcount()
+        vec_reg_size = self.sched_data.vec_reg_size
+        if count * self.size > vec_reg_size:
+            count = vec_reg_size // self.size
+        signed = self.input_type.signed
+        return BoxVector(type, count, self.size, signed)
+
+class LoadToVectorLoad(OpToVectorOp):
+    def __init__(self):
+        OpToVectorOp.__init__(self, (), PT_GENERIC)
+
+    def determine_input_type(self, op):
+        return None
+
+    def determine_output_type(self, op):
+        return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
+
+    def before_argument_transform(self, args):
+        args.append(ConstInt(len(self.pack.operations)))
+
+    def getsplitsize(self):
+        return self.output_type.getsize()
+
+    def new_result_vector_box(self):
+        type = self.output_type.gettype()
+        size = self.output_type.getsize()
+        count = len(self.pack.operations)
+        signed = self.output_type.signed
+        return BoxVector(type, count, size, signed)
+
+class StoreToVectorStore(OpToVectorOp):
+    def __init__(self):
+        OpToVectorOp.__init__(self, (None, None, PT_GENERIC), None)
+        self.has_descr = True
+
+    def determine_input_type(self, op):
+        return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
+
+    def determine_output_type(self, op):
+        return None
+
+INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES)
+FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES)
+FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES)
+LOAD_TRANS = LoadToVectorLoad()
+STORE_TRANS = StoreToVectorStore()
+
+# note that the following definition is x86 machine
+# specific.
+ROP_ARG_RES_VECTOR = {
+    rop.VEC_INT_ADD:     INT_OP_TO_VOP,
+    rop.VEC_INT_SUB:     INT_OP_TO_VOP,
+    rop.VEC_INT_MUL:     INT_OP_TO_VOP,
+    rop.VEC_INT_AND:     INT_OP_TO_VOP,
+    rop.VEC_INT_OR:      INT_OP_TO_VOP,
+    rop.VEC_INT_XOR:     INT_OP_TO_VOP,
+
+    rop.VEC_INT_SIGNEXT: SignExtToVectorOp((PT_INT_GENERIC,), INT_RES),
+
+    rop.VEC_FLOAT_ADD:   FLOAT_OP_TO_VOP,
+    rop.VEC_FLOAT_SUB:   FLOAT_OP_TO_VOP,
+    rop.VEC_FLOAT_MUL:   FLOAT_OP_TO_VOP,
+    rop.VEC_FLOAT_TRUEDIV:   FLOAT_OP_TO_VOP,
+    rop.VEC_FLOAT_ABS:   FLOAT_SINGLE_ARG_OP_TO_VOP,
+    rop.VEC_FLOAT_NEG:   FLOAT_SINGLE_ARG_OP_TO_VOP,
+    rop.VEC_FLOAT_EQ:    OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
INT_RES),
+
+    rop.VEC_RAW_LOAD:         LOAD_TRANS,
+    rop.VEC_GETARRAYITEM_RAW: LOAD_TRANS,
+    rop.VEC_RAW_STORE:        STORE_TRANS,
+    rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
+
+    rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2, 
PT_FLOAT_2),
+    rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, 
PT_DOUBLE_2),
+    rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2),
+    rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2),
+}
+
+class VecScheduleData(SchedulerData):
+    def __init__(self, vec_reg_size):
+        self.box_to_vbox = {}
+        self.vec_reg_size = vec_reg_size
+        self.invariant_oplist = []
+        self.invariant_vector_vars = []
+        self.expanded_map = {}
+
+    def as_vector_operation(self, pack):
+        op_count = len(pack.operations)
+        assert op_count > 1
+        self.pack = pack
+        # properties that hold for the pack are:
+        # + isomorphism (see func above)
+        # + tight packed (no room between vector elems)
+
+        op0 = pack.operations[0].getoperation()
+        tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
+        if tovector is None:
+            raise NotImplementedError("missing vecop for '%s'" % 
(op0.getopname(),))
+        oplist = []
+        tovector.as_vector_operation(pack, self, oplist)
+        return oplist
+
+    def getvector_of_box(self, arg):
+        return self.box_to_vbox.get(arg, (-1, None))
+
+    def setvector_of_box(self, box, off, vector):
+        self.box_to_vbox[box] = (off, vector)
+
+    def prepend_invariant_operations(self, oplist):
+        if len(self.invariant_oplist) > 0:
+            label = oplist[0]
+            assert label.getopnum() == rop.LABEL
+            jump = oplist[-1]
+            assert jump.getopnum() == rop.JUMP
+
+            label_args = label.getarglist()
+            jump_args = jump.getarglist()
+            for var in self.invariant_vector_vars:
+                label_args.append(var)
+                jump_args.append(var)
+
+            oplist[0] = label.copy_and_change(label.getopnum(), label_args, 
None, label.getdescr())
+            oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args, 
None, jump.getdescr())
+
+            return self.invariant_oplist + oplist
+
+        return oplist
+
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -9,7 +9,9 @@
 from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, 
Optimization
 from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, 
-        MemoryRef, Scheduler, SchedulerData, Node, IndexVar)
+        MemoryRef, Node, IndexVar)
+from rpython.jit.metainterp.optimizeopt.schedule import VecScheduleData, 
Scheduler
+from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
 from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -509,224 +511,6 @@
         rec_snap = self.rename_rd_snapshot(snapshot.prev, clone)
         return Snapshot(rec_snap, boxes)
 
-class Guard(object):
-    """ An object wrapper around a guard. Helps to determine
-        if one guard implies another
-    """
-    def __init__(self, index, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
-        self.index = index
-        self.op = op
-        self.cmp_op = cmp_op
-        self.lhs = lhs
-        self.rhs = rhs
-        self.lhs_arg = lhs_arg
-        self.rhs_arg = rhs_arg
-        self.implied = False
-        self.stronger = False
-
-    def implies(self, guard, opt):
-        if self.op.getopnum() != guard.op.getopnum():
-            return False
-
-        my_key = opt._get_key(self.cmp_op)
-        ot_key = opt._get_key(guard.cmp_op)
-
-        if my_key[1] == ot_key[1]:
-            # same operation
-            lc = self.compare(self.lhs, guard.lhs)
-            rc = self.compare(self.rhs, guard.rhs)
-            opnum = self.get_compare_opnum()
-            if opnum == -1:
-                return False
-            # x < y  = -1,-2,...
-            # x == y = 0
-            # x > y  = 1,2,...
-            if opnum == rop.INT_LT:
-                return (lc > 0 and rc >= 0) or (lc == 0 and rc >= 0)
-            if opnum == rop.INT_LE:
-                return (lc >= 0 and rc >= 0) or (lc == 0 and rc >= 0)
-            if opnum == rop.INT_GT:
-                return (lc < 0 and rc >= 0) or (lc == 0 and rc > 0)
-            if opnum == rop.INT_GE:
-                return (lc <= 0 and rc >= 0) or (lc == 0 and rc >= 0)
-        return False
-
-    def get_compare_opnum(self):
-        opnum = self.op.getopnum()
-        if opnum == rop.GUARD_TRUE:
-            return self.cmp_op.getopnum()
-        else:
-            return self.cmp_op.boolinverse
-
-    def inhert_attributes(self, other):
-        myop = self.op
-        otherop = other.op
-        assert isinstance(otherop, GuardResOp)
-        assert isinstance(myop, GuardResOp)
-        self.stronger = True
-        self.index = other.index
-
-        descr = myop.getdescr()
-        descr.copy_all_attributes_from(other.op.getdescr())
-        myop.rd_frame_info_list = otherop.rd_frame_info_list
-        myop.rd_snapshot = otherop.rd_snapshot
-        myop.setfailargs(otherop.getfailargs())
-
-    def compare(self, key1, key2):
-        if isinstance(key1, Box):
-            assert isinstance(key2, Box)
-            assert key1 is key2 # key of hash enforces this
-            return 0
-        #
-        if isinstance(key1, ConstInt):
-            assert isinstance(key2, ConstInt)
-            v1 = key1.value
-            v2 = key2.value
-            if v1 == v2:
-                return 0
-            elif v1 < v2:
-                return -1
-            else:
-                return 1
-        #
-        if isinstance(key1, IndexVar):
-            assert isinstance(key2, IndexVar)
-            return key1.compare(key2)
-        #
-        raise AssertionError("cannot compare: " + str(key1) + " <=> " + 
str(key2))
-
-    def emit_varops(self, opt, var, old_arg):
-        if isinstance(var, IndexVar):
-            box = var.emit_operations(opt)
-            opt.renamer.start_renaming(old_arg, box)
-            return box
-        else:
-            return var
-
-    def emit_operations(self, opt):
-        lhs, opnum, rhs = opt._get_key(self.cmp_op)
-        # create trace instructions for the index
-        box_lhs = self.emit_varops(opt, self.lhs, self.lhs_arg)
-        box_rhs = self.emit_varops(opt, self.rhs, self.rhs_arg)
-        box_result = self.cmp_op.result.clonebox()
-        opt.emit_operation(ResOperation(opnum, [box_lhs, box_rhs], box_result))
-        # guard
-        guard = self.op.clone()
-        guard.setarg(0, box_result)
-        opt.emit_operation(guard)
-
-class GuardStrengthenOpt(object):
-    def __init__(self, index_vars):
-        self.index_vars = index_vars
-        self._newoperations = []
-        self._same_as = {}
-
-    def find_compare_guard_bool(self, boolarg, operations, index):
-        i = index - 1
-        # most likely hit in the first iteration
-        while i > 0:
-            op = operations[i]
-            if op.result and op.result == boolarg:
-                return op
-            i -= 1
-
-        raise AssertionError("guard_true/false first arg not defined")
-
-    def _get_key(self, cmp_op):
-        if cmp_op and rop.INT_LT <= cmp_op.getopnum() <= rop.INT_GE:
-            lhs_arg = cmp_op.getarg(0)
-            rhs_arg = cmp_op.getarg(1)
-            lhs_index_var = self.index_vars.get(lhs_arg, None)
-            rhs_index_var = self.index_vars.get(rhs_arg, None)
-
-            cmp_opnum = cmp_op.getopnum()
-            # get the key, this identifies the guarded operation
-            if lhs_index_var and rhs_index_var:
-                key = (lhs_index_var.getvariable(), cmp_opnum, 
rhs_index_var.getvariable())
-            elif lhs_index_var:
-                key = (lhs_index_var.getvariable(), cmp_opnum, rhs_arg)
-            elif rhs_index_var:
-                key = (lhs_arg, cmp_opnum, rhs_index_var)
-            else:
-                key = (lhs_arg, cmp_opnum, rhs_arg)
-            return key
-        return (None, 0, None)
-
-    def get_key(self, guard_bool, operations, i):
-        cmp_op = self.find_compare_guard_bool(guard_bool.getarg(0), 
operations, i)
-        return self._get_key(cmp_op)
-
-    def propagate_all_forward(self, loop):
-        """ strengthens the guards that protect an integral value """
-        strongest_guards = {}
-        guards = {}
-        # the guards are ordered. guards[i] is before guards[j] iff i < j
-        operations = loop.operations
-        last_guard = None
-        for i,op in enumerate(operations):
-            op = operations[i]
-            if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, 
rop.GUARD_FALSE):
-                cmp_op = self.find_compare_guard_bool(op.getarg(0), 
operations, i)
-                key = self._get_key(cmp_op)
-                if key[0] is not None:
-                    lhs_arg = cmp_op.getarg(0)
-                    lhs = self.index_vars.get(lhs_arg, lhs_arg)
-                    rhs_arg = cmp_op.getarg(1)
-                    rhs = self.index_vars.get(rhs_arg, rhs_arg)
-                    other = strongest_guards.get(key, None)
-                    if not other:
-                        guard = Guard(i, op, cmp_op,
-                                      lhs, lhs_arg,
-                                      rhs, rhs_arg)
-                        strongest_guards[key] = guard
-                        # nothing known, at this position emit the guard
-                        guards[i] = guard
-                    else: # implicit index(strongest) < index(current)
-                        guard = Guard(i, op, cmp_op,
-                                      lhs, lhs_arg, rhs, rhs_arg)
-                        if guard.implies(other, self):
-                            guard.inhert_attributes(other)
-
-                            strongest_guards[key] = guard
-                            guards[other.index] = guard
-                            # do not mark as emit
-                            continue
-                        elif other.implies(guard, self):
-                            guard.implied = True
-                        # mark as emit
-                        guards[i] = guard
-                else:
-                    # emit non guard_true/false guards
-                    guards[i] = Guard(i, op, None, None, None, None, None)
-
-        strongest_guards = None
-        #
-        self.renamer = Renamer()
-        last_op_idx = len(operations)-1
-        for i,op in enumerate(operations):
-            op = operations[i]
-            if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, 
rop.GUARD_FALSE):
-                guard = guards.get(i, None)
-                if not guard or guard.implied:
-                    # this guard is implied or marked as not emitted (= None)
-                    continue
-                if guard.stronger:
-                    guard.emit_operations(self)
-                    continue
-            if op.result:
-                index_var = self.index_vars.get(op.result, None)
-                if index_var:
-                    if not index_var.is_identity():
-                        index_var.emit_operations(self, op.result)
-                        continue
-            self.emit_operation(op)
-
-        loop.operations = self._newoperations[:]
-
-    def emit_operation(self, op):
-        self.renamer.rename(op)
-        self._newoperations.append(op)
-
 class CostModel(object):
     def __init__(self, threshold):
         self.threshold = threshold
@@ -1103,201 +887,6 @@
         invariant_vars.append(vbox)
         return vbox
 
-class OpToVectorOpConv(OpToVectorOp):
-    def __init__(self, intype, outtype):
-        self.from_size = intype.getsize()
-        self.to_size = outtype.getsize()
-        OpToVectorOp.__init__(self, (intype, ), outtype)
-
-    def determine_input_type(self, op):
-        return self.arg_ptypes[0]
-
-    def determine_output_type(self, op):
-        return self.result_ptype
-
-    def split_pack(self, pack):
-        if self.from_size > self.to_size:
-            # cast down
-            return OpToVectorOp.split_pack(self, pack)
-        op0 = pack.operations[0].getoperation()
-        _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
-        vec_reg_size = self.sched_data.vec_reg_size
-        if vbox.getcount() * self.to_size > vec_reg_size:
-            return vec_reg_size // self.to_size
-        return len(pack.operations)
-
-    def new_result_vector_box(self):
-        type = self.output_type.gettype()
-        size = self.to_size
-        count = self.output_type.getcount()
-        vec_reg_size = self.sched_data.vec_reg_size
-        if count * size > vec_reg_size:
-            count = vec_reg_size // size
-        signed = self.output_type.signed
-        return BoxVector(type, count, size, signed)
-
-class SignExtToVectorOp(OpToVectorOp):
-    def __init__(self, intype, outtype):
-        OpToVectorOp.__init__(self, intype, outtype)
-        self.size = -1
-
-    def split_pack(self, pack):
-        op0 = pack.operations[0].getoperation()
-        sizearg = op0.getarg(1)
-        assert isinstance(sizearg, ConstInt)
-        self.size = sizearg.value
-        if self.input_type.getsize() > self.size:
-            # cast down
-            return OpToVectorOp.split_pack(self, pack)
-        _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
-        vec_reg_size = self.sched_data.vec_reg_size
-        if vbox.getcount() * self.size > vec_reg_size:
-            return vec_reg_size // self.size
-        return vbox.getcount()
-
-    def new_result_vector_box(self):
-        type = self.output_type.gettype()
-        count = self.input_type.getcount()
-        vec_reg_size = self.sched_data.vec_reg_size
-        if count * self.size > vec_reg_size:
-            count = vec_reg_size // self.size
-        signed = self.input_type.signed
-        return BoxVector(type, count, self.size, signed)
-
-PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
-
-class LoadToVectorLoad(OpToVectorOp):
-    def __init__(self):
-        OpToVectorOp.__init__(self, (), PT_GENERIC)
-
-    def determine_input_type(self, op):
-        return None
-
-    def determine_output_type(self, op):
-        return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
-
-    def before_argument_transform(self, args):
-        args.append(ConstInt(len(self.pack.operations)))
-
-    def getsplitsize(self):
-        return self.output_type.getsize()
-
-    def new_result_vector_box(self):
-        type = self.output_type.gettype()
-        size = self.output_type.getsize()
-        count = len(self.pack.operations)
-        signed = self.output_type.signed
-        return BoxVector(type, count, size, signed)
-
-class StoreToVectorStore(OpToVectorOp):
-    def __init__(self):
-        OpToVectorOp.__init__(self, (None, None, PT_GENERIC), None)
-        self.has_descr = True
-
-    def determine_input_type(self, op):
-        return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
-
-    def determine_output_type(self, op):
-        return None
-
-PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
-PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2)
-PT_FLOAT_GENERIC = PackType(INT, -1, True)
-PT_INT64 = PackType(INT, 8, True)
-PT_INT32_2 = PackType(INT, 4, True, 2)
-PT_INT_GENERIC = PackType(INT, -1, True)
-PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
-
-INT_RES = PT_INT_GENERIC
-FLOAT_RES = PT_FLOAT_GENERIC
-
-INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES)
-FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES)
-FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES)
-LOAD_TRANS = LoadToVectorLoad()
-STORE_TRANS = StoreToVectorStore()
-
-# note that the following definition is x86 machine
-# specific.
-ROP_ARG_RES_VECTOR = {
-    rop.VEC_INT_ADD:     INT_OP_TO_VOP,
-    rop.VEC_INT_SUB:     INT_OP_TO_VOP,
-    rop.VEC_INT_MUL:     INT_OP_TO_VOP,
-    rop.VEC_INT_AND:     INT_OP_TO_VOP,
-    rop.VEC_INT_OR:      INT_OP_TO_VOP,
-    rop.VEC_INT_XOR:     INT_OP_TO_VOP,
-
-    rop.VEC_INT_SIGNEXT: SignExtToVectorOp((PT_INT_GENERIC,), INT_RES),
-
-    rop.VEC_FLOAT_ADD:   FLOAT_OP_TO_VOP,
-    rop.VEC_FLOAT_SUB:   FLOAT_OP_TO_VOP,
-    rop.VEC_FLOAT_MUL:   FLOAT_OP_TO_VOP,
-    rop.VEC_FLOAT_TRUEDIV:   FLOAT_OP_TO_VOP,
-    rop.VEC_FLOAT_ABS:   FLOAT_SINGLE_ARG_OP_TO_VOP,
-    rop.VEC_FLOAT_NEG:   FLOAT_SINGLE_ARG_OP_TO_VOP,
-    rop.VEC_FLOAT_EQ:    OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
INT_RES),
-
-    rop.VEC_RAW_LOAD:         LOAD_TRANS,
-    rop.VEC_GETARRAYITEM_RAW: LOAD_TRANS,
-    rop.VEC_RAW_STORE:        STORE_TRANS,
-    rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
-
-    rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2, 
PT_FLOAT_2),
-    rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, 
PT_DOUBLE_2),
-    rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2),
-    rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2),
-}
-
-class VecScheduleData(SchedulerData):
-    def __init__(self, vec_reg_size):
-        self.box_to_vbox = {}
-        self.vec_reg_size = vec_reg_size
-        self.invariant_oplist = []
-        self.invariant_vector_vars = []
-        self.expanded_map = {}
-
-    def as_vector_operation(self, pack):
-        op_count = len(pack.operations)
-        assert op_count > 1
-        self.pack = pack
-        # properties that hold for the pack are:
-        # + isomorphism (see func above)
-        # + tight packed (no room between vector elems)
-
-        op0 = pack.operations[0].getoperation()
-        tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
-        if tovector is None:
-            raise NotImplementedError("missing vecop for '%s'" % 
(op0.getopname(),))
-        oplist = []
-        tovector.as_vector_operation(pack, self, oplist)
-        return oplist
-
-    def getvector_of_box(self, arg):
-        return self.box_to_vbox.get(arg, (-1, None))
-
-    def setvector_of_box(self, box, off, vector):
-        self.box_to_vbox[box] = (off, vector)
-
-    def prepend_invariant_operations(self, oplist):
-        if len(self.invariant_oplist) > 0:
-            label = oplist[0]
-            assert label.getopnum() == rop.LABEL
-            jump = oplist[-1]
-            assert jump.getopnum() == rop.JUMP
-
-            label_args = label.getarglist()
-            jump_args = jump.getarglist()
-            for var in self.invariant_vector_vars:
-                label_args.append(var)
-                jump_args.append(var)
-
-            oplist[0] = label.copy_and_change(label.getopnum(), label_args, 
None, label.getdescr())
-            oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args, 
None, jump.getdescr())
-
-            return self.invariant_oplist + oplist
-
-        return oplist
-
 def isomorphic(l_op, r_op):
     """ Subject of definition """
     if l_op.getopnum() == r_op.getopnum():
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to