[pypy-commit] pypy shadowstack-perf-2: in-progress, hypothesis testing of the bitmask encoding

arigo Wed, 11 May 2016 11:24:02 -0700

Author: Armin Rigo <[email protected]>
Branch: shadowstack-perf-2
Changeset: r84384:56e5c4403abf
Date: 2016-05-11 20:22 +0200
http://bitbucket.org/pypy/pypy/changeset/56e5c4403abf/


Log:    in-progress, hypothesis testing of the bitmask encoding

diff --git a/rpython/memory/gctransform/shadowcolor.py 
b/rpython/memory/gctransform/shadowcolor.py
--- a/rpython/memory/gctransform/shadowcolor.py
+++ b/rpython/memory/gctransform/shadowcolor.py
@@ -1,5 +1,7 @@
-from rpython.flowspace.model import mkentrymap, Variable
+from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.flowspace.model import mkentrymap, Variable, Constant
 from rpython.tool.algo.regalloc import perform_register_allocation
+from rpython.translator.unsimplify import varoftype
 
 
 def is_trivial_rewrite(op):
@@ -77,6 +79,8 @@
                 for v in op.args:
                     assert v in interesting_vars   # must be pushed just above
                     pending_succ.append((block, v))
+    if not interesting_vars:
+        return None
 
     # If there is a path from a gc_pop_roots(v) to a subsequent
     # gc_push_roots(w) where w contains the same value as v along that
@@ -96,36 +100,115 @@
 
 def allocate_registers(graph):
     interesting_vars = find_interesting_variables(graph)
+    if not interesting_vars:
+        return None
     regalloc = perform_register_allocation(graph, 
interesting_vars.__contains__)
+    regalloc.find_num_colors()
     return regalloc
 
 
-def move_pushes_earlier(graph):
+def _gc_save_root(index, var):
+    c_index = Constant(index, lltype.Signed)
+    return SpaceOperation('gc_save_root', [c_index, var],
+                          varoftype(lltype.Void))
+
+c_NULL = Constant(lltype.nullptr(llmemory.GCREF.TO), llmemory.GCREF)
+
+def make_bitmask(filled):
+    n = filled.count(False)
+    if n == 0:
+        return (None, None)
+    if n == 1:
+        return (filled.index(False), c_NULL)
+    bitmask = 0
+    last_index = 0
+    for i in range(len(filled)):
+        if not filled[i]:
+            bitmask <<= (i - last_index)
+            last_index = i
+            bitmask |= 1
+    return (last_index, Constant(bitmask, lltype.Signed))
+
+
+def expand_push_roots(graph, regalloc):
+    """Expand gc_push_roots into a series of gc_save_root, including
+    writing a bitmask tag to mark some entries as not-in-use
+    """
+    for block in graph.iterblocks():
+        any_change = False
+        newops = []
+        for op in block.operations:
+            if op.opname == 'gc_push_roots':
+                if regalloc is None:
+                    assert len(op.args) == 0
+                else:
+                    filled = [False] * regalloc.numcolors
+                    for v in op.args:
+                        index = regalloc.getcolor(v)
+                        assert not filled[index]
+                        filled[index] = True
+                        newops.append(_gc_save_root(index, v))
+                    bitmask_index, bitmask_v = make_bitmask(filled)
+                    if bitmask_index is not None:
+                        newops.append(_gc_save_root(bitmask_index, bitmask_v))
+                any_change = True
+            else:
+                newops.append(op)
+        if any_change:
+            block.operations = newops
+
+
+def move_pushes_earlier(graph, regalloc):
     """gc_push_roots and gc_pop_roots are pushes/pops to the shadowstack,
     immediately enclosing the operation that needs them (typically a call).
     Here, we try to move individual pushes earlier, in fact as early as
     possible under the following conditions: we only move it across vars
     that are 'interesting_vars'; and we stop when we encounter the
-    operation that produces the value, or when we encounter a gc_pop_roots
-    that pops off the same stack location.  In the latter case, if that
-    gc_pop_roots pops the same value out of the same stack location, then
-    success: we can remove the gc_push_root on that path.
+    operation that produces the value, or when we encounter a gc_pop_roots.
+    In the latter case, if that gc_pop_roots pops the same value out of the
+    same stack location, then success: we can remove the gc_push_root on
+    that path.
 
     If the process succeeds to remove the gc_push_root along at least
     one path, we generate it explicitly on the other paths, and we
     remove the original gc_push_root.  If the process doesn't succeed
     in doing any such removal, we don't do anything.
+    """
+    # Concrete example (assembler tested on x86-64 gcc 5.3 and clang 3.7):
+    #
+    # ----original----           ----move_pushes_earlier----
+    #
+    # while (a > 10) {           *foo = b;
+    #     *foo = b;              while (a > 10) {
+    #     a = g(a);                  a = g(a);
+    #     b = *foo;                  b = *foo;
+    #                                // *foo = b;
+    # }                          }
+    # return b;                  return b;
+    #
+    # => the store and the       => the store is before, and gcc/clang
+    # load are in the loop,      moves the load after the loop
+    # even in the assembler      (the commented-out '*foo=b' is removed
+    #                            by this function, but gcc/clang would
+    #                            also remove it)
 
-    Note that it would be possible to do exactly the same in the
-    opposite direction by exchanging the roles of "push/earlier" and
-    "pop/later".  I think doing both is pointless---one direction is
-    enough.  The direction we chose here keeps gc_pop_roots unmodified.
-    The C compiler should be better at discarding them if unused.
-    """
-    
     x.x.x.x
 
 
+def expand_push_pop_roots(graph):
+    xxxxxxxxx
+    for block in graph.iterblocks():
+        for op in block.operations:
+            if op.opname == 'gc_push_roots':
+                for v in op.args:
+                    interesting_vars.add(v)
+                    pending_pred.append((block, v))
+            elif op.opname == 'gc_pop_roots':
+                for v in op.args:
+                    assert v in interesting_vars   # must be pushed just above
+                    pending_succ.append((block, v))
+
+
 def postprocess_graph(gct, graph):
     """Collect information about the gc_push_roots and gc_pop_roots
     added in this complete graph, and replace them with real operations.
diff --git a/rpython/memory/gctransform/shadowstack.py 
b/rpython/memory/gctransform/shadowstack.py
--- a/rpython/memory/gctransform/shadowstack.py
+++ b/rpython/memory/gctransform/shadowstack.py
@@ -29,11 +29,14 @@
     def push_roots(self, hop, keep_current_args=False):
         livevars = self.get_livevars_for_roots(hop, keep_current_args)
         self.num_pushs += len(livevars)
-        hop.genop("gc_push_roots", livevars)   # even if len(livevars) == 0
+        if livevars:
+            hop.genop("gc_push_roots", livevars)
         return livevars
 
     def pop_roots(self, hop, livevars):
-        hop.genop("gc_pop_roots", list(livevars)) # even if len(livevars) == 0
+        hop.genop("gc_pop_roots", livevars)
+        # NB. we emit it even if len(livevars) == 0; this is needed for
+        # shadowcolor.move_pushes_earlier()
 
 
 class ShadowStackRootWalker(BaseRootWalker):
diff --git a/rpython/memory/gctransform/test/test_shadowcolor.py 
b/rpython/memory/gctransform/test/test_shadowcolor.py
--- a/rpython/memory/gctransform/test/test_shadowcolor.py
+++ b/rpython/memory/gctransform/test/test_shadowcolor.py
@@ -3,6 +3,7 @@
 from rpython.rtyper.test.test_llinterp import gengraph
 from rpython.conftest import option
 from rpython.memory.gctransform.shadowcolor import *
+from hypothesis import given, strategies
 
 
 def make_graph(f, argtypes):
@@ -242,3 +243,25 @@
     graph = make_graph(f, [llmemory.GCREF, int, llmemory.GCREF])
     regalloc = allocate_registers(graph)
     assert summary_regalloc(regalloc) == [('a', 1)] * 2 + [('c', 0)] * 2
+
+@given(strategies.lists(strategies.booleans()))
+def test_make_bitmask(boollist):
+    index, c = make_bitmask(boollist)
+    if index is None:
+        assert c is None
+    else:
+        assert 0 <= index < len(boollist)
+        assert boollist[index] == False
+        if c == c_NULL:
+            bitmask = 1
+        else:
+            assert c.concretetype == lltype.Signed
+            bitmask = c.value
+        while bitmask:
+            if bitmask & 1:
+                assert index >= 0
+                assert boollist[index] == False
+                boollist[index] = True
+            bitmask >>= 1
+            index -= 1
+    assert boollist == [True] * len(boollist)
diff --git a/rpython/rtyper/lltypesystem/lloperation.py 
b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -513,8 +513,12 @@
     'gc_rawrefcount_from_obj':          LLOp(sideeffects=False),
     'gc_rawrefcount_to_obj':            LLOp(sideeffects=False),
 
-    'gc_push_roots'       : LLOp(),
-    'gc_pop_roots'        : LLOp(),
+    'gc_push_roots'        : LLOp(),  # temporary: list of roots to save
+    'gc_pop_roots'         : LLOp(),  # temporary: list of roots to restore
+    'gc_enter_roots_frame' : LLOp(),  # reserve N entries, save local frame pos
+    'gc_leave_roots_frame' : LLOp(),  # restore shadowstack ptr from saved pos
+    'gc_save_root'         : LLOp(),  # save value Y in shadowstack pos X
+    'gc_restore_root'      : LLOp(),  # restore value Y from shadowstack pos X
 
     # ------- JIT & GC interaction, only for some GCs ----------
 
diff --git a/rpython/tool/algo/regalloc.py b/rpython/tool/algo/regalloc.py
--- a/rpython/tool/algo/regalloc.py
+++ b/rpython/tool/algo/regalloc.py
@@ -117,6 +117,13 @@
                 for v in block.getvariables():
                     print '\t', v, '\t', self.getcolor(v)
 
+    def find_num_colors(self):
+        if self._coloring:
+            numcolors = max(self._coloring.values()) + 1
+        else:
+            numcolors = 0
+        self.numcolors = numcolors
+
     def getcolor(self, v):
         return self._coloring[self._unionfind.find_rep(v)]
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy shadowstack-perf-2: in-progress, hypothesis testing of the bitmask encoding

Reply via email to