Author: Armin Rigo <[email protected]>
Branch: sandbox-2
Changeset: r97259:906b820ecdac
Date: 2019-08-26 10:51 +0200
http://bitbucket.org/pypy/pypy/changeset/906b820ecdac/
Log: in-progress: enforce a review on functions manipulating directly
low-level pointers or doing any other strange things
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -226,6 +226,38 @@
func._not_rpython_ = True
return func
+def sandbox_review(reviewed=False, check_caller=False, abort=False):
+ """Mark a function as reviewed for sandboxing purposes.
+ This should not be necessary on any function written in "normal" RPython
+ code, but only on functions using some lloperation that is not
+ whitelisted in rpython.translator.sandbox.graphchecker.
+
+ Call this with one of the three flags set to True:
+
+ *reviewed*: This function is fine and any other code can call it.
+ If the function contains external calls, they will still be replaced with
+ stubs using I/O to communicate with the parent process (as long as they
+ are not marked sandboxsafe themselves).
+
+ *check_caller*: This function is fine, but you should still check the
+ callers; they must all have a sandbox_review() as well.
+
+ *abort*: An abort is prepended to the function's code, making the
+ whole process abort if it is called at runtime.
+
+ """
+ assert reviewed + check_caller + abort == 1
+ def wrap(func):
+ assert not hasattr(func, '_sandbox_review_')
+ if reviewed:
+ func._sandbox_review_ = 'reviewed'
+ if check_caller:
+ func._sandbox_review_ = 'check_caller'
+ if abort:
+ func._sandbox_review_ = 'abort'
+ return func
+ return wrap
+
# ____________________________________________________________
diff --git a/rpython/rlib/rstack.py b/rpython/rlib/rstack.py
--- a/rpython/rlib/rstack.py
+++ b/rpython/rlib/rstack.py
@@ -6,6 +6,7 @@
import py
from rpython.rlib.objectmodel import we_are_translated, fetch_translated_config
+from rpython.rlib.objectmodel import sandbox_review
from rpython.rlib.rarithmetic import r_uint
from rpython.rlib import rgc
from rpython.rtyper.lltypesystem import lltype, rffi
@@ -39,6 +40,7 @@
_stack_criticalcode_stop = llexternal('LL_stack_criticalcode_stop', [],
lltype.Void, lambda: None)
+@sandbox_review(reviewed=True)
def stack_check():
if not we_are_translated():
return
@@ -64,6 +66,7 @@
stack_check._always_inline_ = True
stack_check._dont_insert_stackcheck_ = True
+@sandbox_review(check_caller=True)
@rgc.no_collect
def stack_check_slowpath(current):
if ord(_stack_too_big_slowpath(current)):
@@ -72,6 +75,7 @@
stack_check_slowpath._dont_inline_ = True
stack_check_slowpath._dont_insert_stackcheck_ = True
+@sandbox_review(reviewed=True)
def stack_almost_full():
"""Return True if the stack is more than 15/16th full."""
if not we_are_translated():
diff --git a/rpython/rtyper/lltypesystem/rffi.py
b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -9,6 +9,7 @@
from rpython.tool.sourcetools import func_with_new_name
from rpython.rlib.objectmodel import Symbolic, specialize, not_rpython
from rpython.rlib.objectmodel import keepalive_until_here, enforceargs
+from rpython.rlib.objectmodel import sandbox_review
from rpython.rlib import rarithmetic, rgc
from rpython.rtyper.extregistry import ExtRegistryEntry
from rpython.rlib.unroll import unrolling_iterable
@@ -214,6 +215,8 @@
#
call_external_function = func_with_new_name(call_external_function,
'ccall_' + name)
+ call_external_function = sandbox_review(check_caller=True)(
+ call_external_function)
# don't inline, as a hack to guarantee that no GC pointer is alive
# anywhere in call_external_function
else:
@@ -251,6 +254,8 @@
'ccall_' + name)
call_external_function = jit.dont_look_inside(
call_external_function)
+ call_external_function = sandbox_review(check_caller=True)(
+ call_external_function)
def _oops():
raise AssertionError("can't pass (any more) a unicode string"
@@ -329,8 +334,12 @@
# for debugging, stick ll func ptr to that
wrapper._ptr = funcptr
wrapper = func_with_new_name(wrapper, name)
+ wrapper = sandbox_review(reviewed=True)(wrapper)
return wrapper
+def sandbox_check_type(TYPE):
+ return not isinstance(TYPE, lltype.Primitive) or TYPE == llmemory.Address
+
class CallbackHolder:
def __init__(self):
@@ -838,6 +847,7 @@
# str -> (buf, llobj, flag)
# Can't inline this because of the raw address manipulation.
+ @sandbox_review(reviewed=True)
@jit.dont_look_inside
def get_nonmovingbuffer_ll(data):
"""
@@ -891,6 +901,7 @@
get_nonmovingbuffer_ll._annenforceargs_ = [strtype]
+ @sandbox_review(reviewed=True)
@jit.dont_look_inside
def get_nonmovingbuffer_ll_final_null(data):
tup = get_nonmovingbuffer_ll(data)
@@ -902,6 +913,7 @@
# args-from-tuple-returned-by-get_nonmoving_buffer() -> None
# Can't inline this because of the raw address manipulation.
+ @sandbox_review(reviewed=True)
@jit.dont_look_inside
def free_nonmovingbuffer_ll(buf, llobj, flag):
"""
diff --git a/rpython/rtyper/lltypesystem/rstr.py
b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -3,7 +3,8 @@
from rpython.annotator import model as annmodel
from rpython.rlib import jit, types, objectmodel, rgc
from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
- ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline)
+ ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline,
+ sandbox_review)
from rpython.rlib.signature import signature
from rpython.rlib.rarithmetic import ovfcheck
from rpython.rtyper.error import TyperError
@@ -59,6 +60,7 @@
llmemory.itemoffsetof(TP.chars, 0) +
llmemory.sizeof(CHAR_TP) * item)
+ @sandbox_review(check_caller=True)
@signature(types.any(), types.any(), types.int(), returns=types.any())
@specialize.arg(0)
def _get_raw_buf(TP, src, ofs):
@@ -75,6 +77,7 @@
_get_raw_buf._always_inline_ = True
@jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart,
length)')
+ @sandbox_review(reviewed=True)
@signature(types.any(), types.any(), types.int(), types.int(),
types.int(), returns=types.none())
def copy_string_contents(src, dst, srcstart, dststart, length):
"""Copies 'length' characters from the 'src' string to the 'dst'
@@ -112,6 +115,7 @@
copy_string_contents = func_with_new_name(copy_string_contents,
'copy_%s_contents' % name)
+ @sandbox_review(reviewed=True)
@jit.oopspec('stroruni.copy_string_to_raw(src, ptrdst, srcstart, length)')
def copy_string_to_raw(src, ptrdst, srcstart, length):
"""
@@ -141,6 +145,7 @@
copy_string_to_raw._always_inline_ = True
copy_string_to_raw = func_with_new_name(copy_string_to_raw,
'copy_%s_to_raw' % name)
+ @sandbox_review(reviewed=True)
@jit.dont_look_inside
@signature(types.any(), types.any(), types.int(), types.int(),
returns=types.none())
@@ -1258,6 +1263,7 @@
return hop.gendirectcall(cls.ll_join_strs, size, vtemp)
@staticmethod
+ @sandbox_review(reviewed=True)
@jit.dont_look_inside
def ll_string2list(RESLIST, src):
length = len(src.chars)
diff --git a/rpython/translator/backendopt/all.py
b/rpython/translator/backendopt/all.py
--- a/rpython/translator/backendopt/all.py
+++ b/rpython/translator/backendopt/all.py
@@ -113,7 +113,7 @@
if config.profile_based_inline and not secondary:
threshold = config.profile_based_inline_threshold
heuristic = get_function(config.profile_based_inline_heuristic)
- inline.instrument_inline_candidates(graphs, threshold)
+ inline.instrument_inline_candidates(translator, graphs, threshold)
counters = translator.driver_instrument_result(
config.profile_based_inline)
n = len(counters)
diff --git a/rpython/translator/backendopt/inline.py
b/rpython/translator/backendopt/inline.py
--- a/rpython/translator/backendopt/inline.py
+++ b/rpython/translator/backendopt/inline.py
@@ -548,7 +548,8 @@
return (0.9999 * measure_median_execution_cost(graph) +
count), True # may be NaN
-def inlinable_static_callers(graphs, store_calls=False, ok_to_call=None):
+def inlinable_static_callers(translator, graphs, store_calls=False,
+ ok_to_call=None):
if ok_to_call is None:
ok_to_call = set(graphs)
result = []
@@ -558,6 +559,7 @@
else:
result.append((parentgraph, graph))
#
+ dont_inline = make_dont_inline_checker(translator)
for parentgraph in graphs:
for block in parentgraph.iterblocks():
for op in block.operations:
@@ -565,13 +567,12 @@
funcobj = op.args[0].value._obj
graph = getattr(funcobj, 'graph', None)
if graph is not None and graph in ok_to_call:
- if getattr(getattr(funcobj, '_callable', None),
- '_dont_inline_', False):
+ if dont_inline(funcobj):
continue
add(parentgraph, block, op, graph)
return result
-def instrument_inline_candidates(graphs, threshold):
+def instrument_inline_candidates(translator, graphs, threshold):
cache = {None: False}
def candidate(graph):
try:
@@ -581,6 +582,7 @@
cache[graph] = res
return res
n = 0
+ dont_inline = make_dont_inline_checker(translator)
for parentgraph in graphs:
for block in parentgraph.iterblocks():
ops = block.operations
@@ -592,8 +594,7 @@
funcobj = op.args[0].value._obj
graph = getattr(funcobj, 'graph', None)
if graph is not None:
- if getattr(getattr(funcobj, '_callable', None),
- '_dont_inline_', False):
+ if dont_inline(funcobj):
continue
if candidate(graph):
tag = Constant('inline', Void)
@@ -610,6 +611,18 @@
return (hasattr(graph, 'func') and
getattr(graph.func, '_always_inline_', None))
+def make_dont_inline_checker(translator):
+ sandbox = translator.config.translation.sandbox
+
+ def dont_inline(funcobj):
+ func = getattr(funcobj, '_callable', None)
+ if sandbox:
+ review = getattr(func, '_sandbox_review_', None)
+ if review is not None and review != 'check_caller':
+ return True
+ return getattr(func, '_dont_inline_', False)
+ return dont_inline
+
def auto_inlining(translator, threshold=None,
callgraph=None,
call_count_pred=None,
@@ -621,7 +634,7 @@
callers = {} # {graph: {graphs-that-call-it}}
callees = {} # {graph: {graphs-that-it-calls}}
if callgraph is None:
- callgraph = inlinable_static_callers(translator.graphs)
+ callgraph = inlinable_static_callers(translator, translator.graphs)
for graph1, graph2 in callgraph:
callers.setdefault(graph2, {})[graph1] = True
callees.setdefault(graph1, {})[graph2] = True
@@ -727,7 +740,8 @@
if not hasattr(graph, 'exceptiontransformed')])
else:
ok_to_call = None
- callgraph = inlinable_static_callers(graphs, ok_to_call=ok_to_call)
+ callgraph = inlinable_static_callers(translator, graphs,
+ ok_to_call=ok_to_call)
count = auto_inlining(translator, threshold, callgraph=callgraph,
heuristic=heuristic,
call_count_pred=call_count_pred)
diff --git a/rpython/translator/backendopt/test/test_inline.py
b/rpython/translator/backendopt/test/test_inline.py
--- a/rpython/translator/backendopt/test/test_inline.py
+++ b/rpython/translator/backendopt/test/test_inline.py
@@ -100,7 +100,7 @@
call_count_pred = None
if call_count_check:
call_count_pred = lambda lbl: True
- instrument_inline_candidates(t.graphs, threshold)
+ instrument_inline_candidates(t, t.graphs, threshold)
if remove_same_as:
for graph in t.graphs:
diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py
--- a/rpython/translator/c/genc.py
+++ b/rpython/translator/c/genc.py
@@ -65,6 +65,11 @@
def __init__(self, translator, entrypoint, config, gcpolicy=None,
gchooks=None, secondary_entrypoints=()):
+ #
+ if config.translation.sandbox:
+ assert not config.translation.thread
+ gchooks = None # no custom gc hooks
+ #
self.translator = translator
self.entrypoint = entrypoint
self.entrypoint_name = getattr(self.entrypoint, 'func_name', None)
diff --git a/rpython/translator/driver.py b/rpython/translator/driver.py
--- a/rpython/translator/driver.py
+++ b/rpython/translator/driver.py
@@ -412,6 +412,10 @@
if translator.annotator is not None:
translator.frozen = True
+ if self.config.translation.sandbox:
+ from rpython.translator.sandbox import graphchecker
+ graphchecker.check_all_graphs(self.translator)
+
standalone = self.standalone
get_gchooks = self.extra.get('get_gchooks', lambda: None)
gchooks = get_gchooks()
diff --git a/rpython/translator/sandbox/graphchecker.py
b/rpython/translator/sandbox/graphchecker.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/sandbox/graphchecker.py
@@ -0,0 +1,112 @@
+"""Logic to check the operations in all the user graphs.
+This runs at the start of the database-c step, so it excludes the
+graphs produced later, notably for the GC. These are "low-level"
+graphs that are assumed to be safe.
+"""
+
+from rpython.flowspace.model import SpaceOperation, Constant
+from rpython.rtyper.rmodel import inputconst
+from rpython.rtyper.lltypesystem import lltype, llmemory, rstr
+from rpython.rtyper.lltypesystem.rffi import sandbox_check_type
+from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS
+from rpython.translator.unsimplify import varoftype
+from rpython.tool.ansi_print import AnsiLogger
+
+class UnsafeException(Exception):
+ pass
+
+log = AnsiLogger("sandbox")
+
+safe_operations = set([
+ 'keepalive', 'threadlocalref_get', 'threadlocalref_store',
+ 'malloc', 'malloc_varsize', 'free',
+ 'getfield', 'getarrayitem', 'getinteriorfield',
+ 'gc_thread_run',
+ ])
+gc_set_operations = set([
+ 'setfield', 'setarrayitem', 'setinteriorfield',
+ ])
+for opname, opdesc in LL_OPERATIONS.items():
+ if opdesc.tryfold:
+ safe_operations.add(opname)
+
+def graph_review(graph):
+ return getattr(getattr(graph, 'func', None), '_sandbox_review_', None)
+
+def make_abort_graph(graph):
+ ll_err = rstr.conststr("reached forbidden function %r" % (graph.name,))
+ c_err = inputconst(lltype.typeOf(ll_err), ll_err)
+ op = SpaceOperation('debug_fatalerror', [c_err], varoftype(lltype.Void))
+ graph.startblock.operations.insert(0, op)
+
+
+
+class GraphChecker(object):
+
+ def __init__(self, translator):
+ self.translator = translator
+
+ def graph_is_unsafe(self, graph):
+ for block, op in graph.iterblockops():
+ opname = op.opname
+
+ if opname in safe_operations:
+ pass
+
+ elif opname in gc_set_operations:
+ if op.args[0].concretetype.TO._gckind != 'gc':
+ return "non-GC memory write: %r" % (op,)
+
+ elif opname == 'direct_call':
+ c_target = op.args[0]
+ assert isinstance(c_target, Constant)
+ TYPE = lltype.typeOf(c_target.value)
+ assert isinstance(TYPE.TO, lltype.FuncType)
+ obj = c_target.value._obj
+ if hasattr(obj, 'graph'):
+ g2 = obj.graph
+ if graph_review(g2) == 'check_caller':
+ return "caller has not been checked: %r" % (op,)
+ elif getattr(obj, 'sandboxsafe', False):
+ pass
+ elif getattr(obj, 'external', None) is not None:
+ # either obj._safe_not_sandboxed is True, and then it's
+ # fine; or obj._safe_not_sandboxed is False, and then
+ # this will be transformed into a stdin/stdout stub
+ pass
+ else:
+ return "direct_call to %r" % (obj,)
+
+ elif opname == 'force_cast':
+ if sandbox_check_type(op.result.concretetype):
+ return "force_cast to pointer type: %r" % (op,)
+ if sandbox_check_type(op.args[0].concretetype):
+ return "force_cast from pointer type: %r" % (op,)
+ else:
+ return "unsupported llop: %r" % (opname,)
+
+ def check(self):
+ unsafe = {}
+ for graph in self.translator.graphs:
+ review = graph_review(graph)
+ if review is not None:
+ if review in ('reviewed', 'check_caller'):
+ continue
+ elif review == 'abort':
+ make_abort_graph(graph)
+ continue
+ else:
+ assert False, repr(review)
+
+ problem = self.graph_is_unsafe(graph)
+ if problem is not None:
+ unsafe[graph] = problem
+ if unsafe:
+ raise UnsafeException(
+ '\n'.join('%r: %s' % kv for kv in unsafe.items()))
+
+
+def check_all_graphs(translator):
+ log("Checking the graphs for sandbox-unsafe operations")
+ checker = GraphChecker(translator)
+ checker.check()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit