Author: Armin Rigo <[email protected]>
Branch: sandbox-2
Changeset: r97259:906b820ecdac
Date: 2019-08-26 10:51 +0200
http://bitbucket.org/pypy/pypy/changeset/906b820ecdac/

Log:    in-progress: enforce a review on functions manipulating directly
        low-level pointers or doing any other strange things

diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -226,6 +226,38 @@
     func._not_rpython_ = True
     return func
 
+def sandbox_review(reviewed=False, check_caller=False, abort=False):
+    """Mark a function as reviewed for sandboxing purposes.
+    This should not be necessary on any function written in "normal" RPython
+    code, but only on functions using some lloperation that is not
+    whitelisted in rpython.translator.sandbox.graphchecker.
+
+    Call this with one of the three flags set to True:
+
+      *reviewed*: This function is fine and any other code can call it.
+      If the function contains external calls, they will still be replaced with
+      stubs using I/O to communicate with the parent process (as long as they
+      are not marked sandboxsafe themselves).
+
+      *check_caller*: This function is fine, but you should still check the
+      callers; they must all have a sandbox_review() as well.
+
+      *abort*: An abort is prepended to the function's code, making the
+      whole process abort if it is called at runtime.
+
+    """
+    assert reviewed + check_caller + abort == 1
+    def wrap(func):
+        assert not hasattr(func, '_sandbox_review_')
+        if reviewed:
+            func._sandbox_review_ = 'reviewed'
+        if check_caller:
+            func._sandbox_review_ = 'check_caller'
+        if abort:
+            func._sandbox_review_ = 'abort'
+        return func
+    return wrap
+
 
 # ____________________________________________________________
 
diff --git a/rpython/rlib/rstack.py b/rpython/rlib/rstack.py
--- a/rpython/rlib/rstack.py
+++ b/rpython/rlib/rstack.py
@@ -6,6 +6,7 @@
 import py
 
 from rpython.rlib.objectmodel import we_are_translated, fetch_translated_config
+from rpython.rlib.objectmodel import sandbox_review
 from rpython.rlib.rarithmetic import r_uint
 from rpython.rlib import rgc
 from rpython.rtyper.lltypesystem import lltype, rffi
@@ -39,6 +40,7 @@
 _stack_criticalcode_stop = llexternal('LL_stack_criticalcode_stop', [],
                                       lltype.Void, lambda: None)
 
+@sandbox_review(reviewed=True)
 def stack_check():
     if not we_are_translated():
         return
@@ -64,6 +66,7 @@
 stack_check._always_inline_ = True
 stack_check._dont_insert_stackcheck_ = True
 
+@sandbox_review(check_caller=True)
 @rgc.no_collect
 def stack_check_slowpath(current):
     if ord(_stack_too_big_slowpath(current)):
@@ -72,6 +75,7 @@
 stack_check_slowpath._dont_inline_ = True
 stack_check_slowpath._dont_insert_stackcheck_ = True
 
+@sandbox_review(reviewed=True)
 def stack_almost_full():
     """Return True if the stack is more than 15/16th full."""
     if not we_are_translated():
diff --git a/rpython/rtyper/lltypesystem/rffi.py 
b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -9,6 +9,7 @@
 from rpython.tool.sourcetools import func_with_new_name
 from rpython.rlib.objectmodel import Symbolic, specialize, not_rpython
 from rpython.rlib.objectmodel import keepalive_until_here, enforceargs
+from rpython.rlib.objectmodel import sandbox_review
 from rpython.rlib import rarithmetic, rgc
 from rpython.rtyper.extregistry import ExtRegistryEntry
 from rpython.rlib.unroll import unrolling_iterable
@@ -214,6 +215,8 @@
         #
         call_external_function = func_with_new_name(call_external_function,
                                                     'ccall_' + name)
+        call_external_function = sandbox_review(check_caller=True)(
+            call_external_function)
         # don't inline, as a hack to guarantee that no GC pointer is alive
         # anywhere in call_external_function
     else:
@@ -251,6 +254,8 @@
                                                         'ccall_' + name)
             call_external_function = jit.dont_look_inside(
                 call_external_function)
+            call_external_function = sandbox_review(check_caller=True)(
+                call_external_function)
 
     def _oops():
         raise AssertionError("can't pass (any more) a unicode string"
@@ -329,8 +334,12 @@
     # for debugging, stick ll func ptr to that
     wrapper._ptr = funcptr
     wrapper = func_with_new_name(wrapper, name)
+    wrapper = sandbox_review(reviewed=True)(wrapper)
     return wrapper
 
+def sandbox_check_type(TYPE):
+    return not isinstance(TYPE, lltype.Primitive) or TYPE == llmemory.Address
+
 
 class CallbackHolder:
     def __init__(self):
@@ -838,6 +847,7 @@
 
     # str -> (buf, llobj, flag)
     # Can't inline this because of the raw address manipulation.
+    @sandbox_review(reviewed=True)
     @jit.dont_look_inside
     def get_nonmovingbuffer_ll(data):
         """
@@ -891,6 +901,7 @@
     get_nonmovingbuffer_ll._annenforceargs_ = [strtype]
 
 
+    @sandbox_review(reviewed=True)
     @jit.dont_look_inside
     def get_nonmovingbuffer_ll_final_null(data):
         tup = get_nonmovingbuffer_ll(data)
@@ -902,6 +913,7 @@
 
     # args-from-tuple-returned-by-get_nonmoving_buffer() -> None
     # Can't inline this because of the raw address manipulation.
+    @sandbox_review(reviewed=True)
     @jit.dont_look_inside
     def free_nonmovingbuffer_ll(buf, llobj, flag):
         """
diff --git a/rpython/rtyper/lltypesystem/rstr.py 
b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -3,7 +3,8 @@
 from rpython.annotator import model as annmodel
 from rpython.rlib import jit, types, objectmodel, rgc
 from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
-    ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline)
+    ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline,
+    sandbox_review)
 from rpython.rlib.signature import signature
 from rpython.rlib.rarithmetic import ovfcheck
 from rpython.rtyper.error import TyperError
@@ -59,6 +60,7 @@
                 llmemory.itemoffsetof(TP.chars, 0) +
                 llmemory.sizeof(CHAR_TP) * item)
 
+    @sandbox_review(check_caller=True)
     @signature(types.any(), types.any(), types.int(), returns=types.any())
     @specialize.arg(0)
     def _get_raw_buf(TP, src, ofs):
@@ -75,6 +77,7 @@
     _get_raw_buf._always_inline_ = True
 
     @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, 
length)')
+    @sandbox_review(reviewed=True)
     @signature(types.any(), types.any(), types.int(), types.int(), 
types.int(), returns=types.none())
     def copy_string_contents(src, dst, srcstart, dststart, length):
         """Copies 'length' characters from the 'src' string to the 'dst'
@@ -112,6 +115,7 @@
     copy_string_contents = func_with_new_name(copy_string_contents,
                                               'copy_%s_contents' % name)
 
+    @sandbox_review(reviewed=True)
     @jit.oopspec('stroruni.copy_string_to_raw(src, ptrdst, srcstart, length)')
     def copy_string_to_raw(src, ptrdst, srcstart, length):
         """
@@ -141,6 +145,7 @@
     copy_string_to_raw._always_inline_ = True
     copy_string_to_raw = func_with_new_name(copy_string_to_raw, 
'copy_%s_to_raw' % name)
 
+    @sandbox_review(reviewed=True)
     @jit.dont_look_inside
     @signature(types.any(), types.any(), types.int(), types.int(),
                returns=types.none())
@@ -1258,6 +1263,7 @@
         return hop.gendirectcall(cls.ll_join_strs, size, vtemp)
 
     @staticmethod
+    @sandbox_review(reviewed=True)
     @jit.dont_look_inside
     def ll_string2list(RESLIST, src):
         length = len(src.chars)
diff --git a/rpython/translator/backendopt/all.py 
b/rpython/translator/backendopt/all.py
--- a/rpython/translator/backendopt/all.py
+++ b/rpython/translator/backendopt/all.py
@@ -113,7 +113,7 @@
     if config.profile_based_inline and not secondary:
         threshold = config.profile_based_inline_threshold
         heuristic = get_function(config.profile_based_inline_heuristic)
-        inline.instrument_inline_candidates(graphs, threshold)
+        inline.instrument_inline_candidates(translator, graphs, threshold)
         counters = translator.driver_instrument_result(
             config.profile_based_inline)
         n = len(counters)
diff --git a/rpython/translator/backendopt/inline.py 
b/rpython/translator/backendopt/inline.py
--- a/rpython/translator/backendopt/inline.py
+++ b/rpython/translator/backendopt/inline.py
@@ -548,7 +548,8 @@
     return (0.9999 * measure_median_execution_cost(graph) +
             count), True       # may be NaN
 
-def inlinable_static_callers(graphs, store_calls=False, ok_to_call=None):
+def inlinable_static_callers(translator, graphs, store_calls=False,
+                             ok_to_call=None):
     if ok_to_call is None:
         ok_to_call = set(graphs)
     result = []
@@ -558,6 +559,7 @@
         else:
             result.append((parentgraph, graph))
     #
+    dont_inline = make_dont_inline_checker(translator)
     for parentgraph in graphs:
         for block in parentgraph.iterblocks():
             for op in block.operations:
@@ -565,13 +567,12 @@
                     funcobj = op.args[0].value._obj
                     graph = getattr(funcobj, 'graph', None)
                     if graph is not None and graph in ok_to_call:
-                        if getattr(getattr(funcobj, '_callable', None),
-                                   '_dont_inline_', False):
+                        if dont_inline(funcobj):
                             continue
                         add(parentgraph, block, op, graph)
     return result
 
-def instrument_inline_candidates(graphs, threshold):
+def instrument_inline_candidates(translator, graphs, threshold):
     cache = {None: False}
     def candidate(graph):
         try:
@@ -581,6 +582,7 @@
             cache[graph] = res
             return res
     n = 0
+    dont_inline = make_dont_inline_checker(translator)
     for parentgraph in graphs:
         for block in parentgraph.iterblocks():
             ops = block.operations
@@ -592,8 +594,7 @@
                     funcobj = op.args[0].value._obj
                     graph = getattr(funcobj, 'graph', None)
                     if graph is not None:
-                        if getattr(getattr(funcobj, '_callable', None),
-                                   '_dont_inline_', False):
+                        if dont_inline(funcobj):
                             continue
                     if candidate(graph):
                         tag = Constant('inline', Void)
@@ -610,6 +611,18 @@
     return (hasattr(graph, 'func') and
             getattr(graph.func, '_always_inline_', None))
 
+def make_dont_inline_checker(translator):
+    sandbox = translator.config.translation.sandbox
+
+    def dont_inline(funcobj):
+        func = getattr(funcobj, '_callable', None)
+        if sandbox:
+            review = getattr(func, '_sandbox_review_', None)
+            if review is not None and review != 'check_caller':
+                return True
+        return getattr(func, '_dont_inline_', False)
+    return dont_inline
+
 def auto_inlining(translator, threshold=None,
                   callgraph=None,
                   call_count_pred=None,
@@ -621,7 +634,7 @@
     callers = {}     # {graph: {graphs-that-call-it}}
     callees = {}     # {graph: {graphs-that-it-calls}}
     if callgraph is None:
-        callgraph = inlinable_static_callers(translator.graphs)
+        callgraph = inlinable_static_callers(translator, translator.graphs)
     for graph1, graph2 in callgraph:
         callers.setdefault(graph2, {})[graph1] = True
         callees.setdefault(graph1, {})[graph2] = True
@@ -727,7 +740,8 @@
                                 if not hasattr(graph, 'exceptiontransformed')])
     else:
         ok_to_call = None
-    callgraph = inlinable_static_callers(graphs, ok_to_call=ok_to_call)
+    callgraph = inlinable_static_callers(translator, graphs,
+                                         ok_to_call=ok_to_call)
     count = auto_inlining(translator, threshold, callgraph=callgraph,
                           heuristic=heuristic,
                           call_count_pred=call_count_pred)
diff --git a/rpython/translator/backendopt/test/test_inline.py 
b/rpython/translator/backendopt/test/test_inline.py
--- a/rpython/translator/backendopt/test/test_inline.py
+++ b/rpython/translator/backendopt/test/test_inline.py
@@ -100,7 +100,7 @@
         call_count_pred = None
         if call_count_check:
             call_count_pred = lambda lbl: True
-            instrument_inline_candidates(t.graphs, threshold)
+            instrument_inline_candidates(t, t.graphs, threshold)
 
         if remove_same_as:
             for graph in t.graphs:
diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py
--- a/rpython/translator/c/genc.py
+++ b/rpython/translator/c/genc.py
@@ -65,6 +65,11 @@
 
     def __init__(self, translator, entrypoint, config, gcpolicy=None,
                  gchooks=None, secondary_entrypoints=()):
+        #
+        if config.translation.sandbox:
+            assert not config.translation.thread
+            gchooks = None     # no custom gc hooks
+        #
         self.translator = translator
         self.entrypoint = entrypoint
         self.entrypoint_name = getattr(self.entrypoint, 'func_name', None)
diff --git a/rpython/translator/driver.py b/rpython/translator/driver.py
--- a/rpython/translator/driver.py
+++ b/rpython/translator/driver.py
@@ -412,6 +412,10 @@
         if translator.annotator is not None:
             translator.frozen = True
 
+        if self.config.translation.sandbox:
+            from rpython.translator.sandbox import graphchecker
+            graphchecker.check_all_graphs(self.translator)
+
         standalone = self.standalone
         get_gchooks = self.extra.get('get_gchooks', lambda: None)
         gchooks = get_gchooks()
diff --git a/rpython/translator/sandbox/graphchecker.py 
b/rpython/translator/sandbox/graphchecker.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/sandbox/graphchecker.py
@@ -0,0 +1,112 @@
+"""Logic to check the operations in all the user graphs.
+This runs at the start of the database-c step, so it excludes the
+graphs produced later, notably for the GC.  These are "low-level"
+graphs that are assumed to be safe.
+"""
+
+from rpython.flowspace.model import SpaceOperation, Constant
+from rpython.rtyper.rmodel import inputconst
+from rpython.rtyper.lltypesystem import lltype, llmemory, rstr
+from rpython.rtyper.lltypesystem.rffi import sandbox_check_type
+from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS
+from rpython.translator.unsimplify import varoftype
+from rpython.tool.ansi_print import AnsiLogger
+
+class UnsafeException(Exception):
+    pass
+
+log = AnsiLogger("sandbox")
+
+safe_operations = set([
+    'keepalive', 'threadlocalref_get', 'threadlocalref_store',
+    'malloc', 'malloc_varsize', 'free',
+    'getfield', 'getarrayitem', 'getinteriorfield',
+    'gc_thread_run',
+    ])
+gc_set_operations = set([
+    'setfield', 'setarrayitem', 'setinteriorfield',
+    ])
+for opname, opdesc in LL_OPERATIONS.items():
+    if opdesc.tryfold:
+        safe_operations.add(opname)
+
+def graph_review(graph):
+    return getattr(getattr(graph, 'func', None), '_sandbox_review_', None)
+
+def make_abort_graph(graph):
+    ll_err = rstr.conststr("reached forbidden function %r" % (graph.name,))
+    c_err = inputconst(lltype.typeOf(ll_err), ll_err)
+    op = SpaceOperation('debug_fatalerror', [c_err], varoftype(lltype.Void))
+    graph.startblock.operations.insert(0, op)
+
+
+
+class GraphChecker(object):
+
+    def __init__(self, translator):
+        self.translator = translator
+
+    def graph_is_unsafe(self, graph):
+        for block, op in graph.iterblockops():
+            opname = op.opname
+
+            if opname in safe_operations:
+                pass
+
+            elif opname in gc_set_operations:
+                if op.args[0].concretetype.TO._gckind != 'gc':
+                    return "non-GC memory write: %r" % (op,)
+
+            elif opname == 'direct_call':
+                c_target = op.args[0]
+                assert isinstance(c_target, Constant)
+                TYPE = lltype.typeOf(c_target.value)
+                assert isinstance(TYPE.TO, lltype.FuncType)
+                obj = c_target.value._obj
+                if hasattr(obj, 'graph'):
+                    g2 = obj.graph
+                    if graph_review(g2) == 'check_caller':
+                        return "caller has not been checked: %r" % (op,)
+                elif getattr(obj, 'sandboxsafe', False):
+                    pass
+                elif getattr(obj, 'external', None) is not None:
+                    # either obj._safe_not_sandboxed is True, and then it's
+                    # fine; or obj._safe_not_sandboxed is False, and then
+                    # this will be transformed into a stdin/stdout stub
+                    pass
+                else:
+                    return "direct_call to %r" % (obj,)
+
+            elif opname == 'force_cast':
+                if sandbox_check_type(op.result.concretetype):
+                    return "force_cast to pointer type: %r" % (op,)
+                if sandbox_check_type(op.args[0].concretetype):
+                    return "force_cast from pointer type: %r" % (op,)
+            else:
+                return "unsupported llop: %r" % (opname,)
+
+    def check(self):
+        unsafe = {}
+        for graph in self.translator.graphs:
+            review = graph_review(graph)
+            if review is not None:
+                if review in ('reviewed', 'check_caller'):
+                    continue
+                elif review == 'abort':
+                    make_abort_graph(graph)
+                    continue
+                else:
+                    assert False, repr(review)
+
+            problem = self.graph_is_unsafe(graph)
+            if problem is not None:
+                unsafe[graph] = problem
+        if unsafe:
+            raise UnsafeException(
+                '\n'.join('%r: %s' % kv for kv in unsafe.items()))
+
+
+def check_all_graphs(translator):
+    log("Checking the graphs for sandbox-unsafe operations")
+    checker = GraphChecker(translator)
+    checker.check()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to