Author: Maciej Fijalkowski <[email protected]>
Branch: fast-newarray
Changeset: r63383:c8b472530e5f
Date: 2013-04-15 20:16 +0200
http://bitbucket.org/pypy/pypy/changeset/c8b472530e5f/
Log: finish implementing fast path of varsize malloc (not quite done for
32bit and not quite done for strings)
diff --git a/rpython/jit/backend/llsupport/gc.py
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -415,6 +415,7 @@
return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
type_id, size,
False, False, False)
+
self.generate_function('malloc_nursery', malloc_nursery_slowpath,
[lltype.Signed])
@@ -567,6 +568,9 @@
def get_malloc_slowpath_addr(self):
return self.get_malloc_fn_addr('malloc_nursery')
+ def get_malloc_slowpath_array_addr(self):
+ return self.get_malloc_fn_addr('malloc_array')
+
# ____________________________________________________________
def get_ll_description(gcdescr, translator=None, rtyper=None):
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py
b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -173,6 +173,14 @@
[lltype.Signed, jitframe.JITFRAMEPTR],
lltype.Signed)
+ def malloc_array(itemsize, tid, num_elem):
+ self.calls.append((itemsize, tid, num_elem))
+ return 13
+
+ self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+ [lltype.Signed] * 3,
+ lltype.Signed)
+
def get_nursery_free_addr(self):
return rffi.cast(lltype.Signed, self.addrs)
@@ -182,6 +190,9 @@
def get_malloc_slowpath_addr(self):
return self.get_malloc_fn_addr('malloc_nursery')
+ def get_malloc_slowpath_array_addr(self):
+ return self.malloc_slowpath_array_fnptr
+
def check_nothing_in_nursery(self):
# CALL_MALLOC_NURSERY should not write anything in the nursery
for i in range(64):
@@ -254,7 +265,7 @@
[i0, i1, i2]
p0 = call_malloc_nursery_varsize(8, i0, descr=arraydescr)
p1 = call_malloc_nursery_varsize(5, i1, descr=arraydescr)
- guard_true(i0) [p0, p1]
+ guard_false(i0) [p0, p1]
'''
self.interpret(ops, [1, 2, 3],
namespace={'arraydescr': arraydescr})
@@ -275,22 +286,22 @@
self.cpu = self.getcpu(None)
ops = '''
[i0, i1, i2]
- p0 = call_malloc_nursery_varsize(8, i0)
- p1 = call_malloc_nursery_varsize(5, i1)
- guard_true(i0) [p0, p1]
+ p0 = call_malloc_nursery_varsize(8, i0, descr=arraydescr)
+ p1 = call_malloc_nursery_varsize(5, i1, descr=arraydescr)
+ p3 = call_malloc_nursery_varsize(5, i2, descr=arraydescr)
+ p4 = call_malloc_nursery_varsize(5, i2, descr=arraydescr)
+ # overflow
+ guard_false(i0) [p0, p1, p3, p4]
'''
- self.interpret(ops, [10, 2, 3])
+ A = lltype.GcArray(lltype.Signed)
+ arraydescr = self.cpu.arraydescrof(A)
+ arraydescr.tid = 15
+ self.interpret(ops, [10, 3, 3],
+ namespace={'arraydescr': arraydescr})
# check the returned pointers
gc_ll_descr = self.cpu.gc_ll_descr
- nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
- ref = lambda n: self.cpu.get_ref_value(self.deadframe, n)
- assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
- assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 2*WORD + 8*1
- # check the nursery content and state
- gc_ll_descr.check_nothing_in_nursery()
- assert gc_ll_descr.addrs[0] == nurs_adr + 4 * WORD + 8*1 + 5*2
- # slowpath never called
- assert gc_ll_descr.calls == []
+ assert gc_ll_descr.calls == [(8, 15, 10), (5, 15, 3)]
+ # one fit, one was too large, one was not fitting
def test_malloc_slowpath(self):
def check(frame):
@@ -490,6 +501,15 @@
self.malloc_slowpath_fnptr = llhelper_args(malloc_slowpath,
[lltype.Signed],
lltype.Signed)
+
+ def malloc_array(itemsize, tid, num_elem):
+ import pdb
+ pdb.set_trace()
+
+ self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+ [lltype.Signed] * 3,
+ lltype.Signed)
+
self.all_nurseries = []
def init_nursery(self, nursery_size=None):
@@ -545,6 +565,9 @@
def get_malloc_slowpath_addr(self):
return self.malloc_slowpath_fnptr
+ def get_malloc_slowpath_array_addr(self):
+ return self.malloc_slowpath_array_fnptr
+
def get_nursery_free_addr(self):
return self.nursery_addr
@@ -805,7 +828,7 @@
cpu.compile_loop(loop.inputargs, loop.operations, token)
frame = lltype.cast_opaque_ptr(JITFRAMEPTR,
cpu.execute_token(token, 1, a))
-
+
assert getmap(frame).count('1') == 4
def test_call_gcmap_no_guard(self):
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -166,11 +166,14 @@
# store the gc pattern
mc.MOV_rs(ecx.value, WORD)
mc.MOV_br(ofs, ecx.value)
- addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
- mc.SUB_rr(edi.value, eax.value) # compute the size we want
- # the arg is already in edi
+ if varsize:
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
+ else:
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
mc.SUB_ri(esp.value, 16 - WORD)
if not varsize:
+ mc.SUB_rr(edi.value, eax.value) # compute the size we want
+ # the arg is already in edi
if IS_X86_32:
mc.MOV_sr(0, edi.value)
if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
@@ -179,7 +182,13 @@
# for tests only
mc.MOV_rr(esi.value, ebp.value)
else:
- return 0
+ if IS_X86_32:
+ xxx
+ else:
+ # offset is 1 extra for call + 1 for SUB above
+ mc.MOV_rs(edi.value, WORD * 3) # itemsize
+ mc.MOV_rs(esi.value, WORD * 5)
+ mc.MOV_rs(edx.value, WORD * 4) # lengthloc
extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
mc.MOV_bi(extra_ofs, 16)
mc.CALL(imm(addr))
@@ -2351,8 +2360,7 @@
self.mc.CMP(lengthloc, imm(maxlength))
self.mc.J_il8(rx86.Conditions['G'], 0) # patched later
jmp_adr0 = self.mc.get_relative_pos()
- self.mc.MOV(edi, heap(nursery_free_adr))
- self.mc.MOV(eax, edi)
+ self.mc.MOV(eax, heap(nursery_free_adr))
self.mc.MOV(edi, lengthloc)
self.mc.IMUL(edi, imm(itemsize))
self.mc.ADD(edi, imm(WORD * 2))
@@ -2365,6 +2373,9 @@
offset = self.mc.get_relative_pos() - jmp_adr0
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr0-1, chr(offset))
+ self.mc.MOV_si(WORD, itemsize)
+ self.mc.MOV(RawEspLoc(WORD * 2, INT), lengthloc)
+ self.mc.MOV_si(WORD * 3, arraydescr.tid)
# save the gcmap
self.push_gcmap(self.mc, gcmap, mov=True)
self.mc.CALL(imm(self.malloc_slowpath_varsize))
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit