Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78012:6497803fbba6
Date: 2015-06-10 14:10 +0200
http://bitbucket.org/pypy/pypy/changeset/6497803fbba6/
Log: added a new field to the resume guard descr to handle accumulation
variables at guard exit implemented the accumulation for float
(64/32 bit) for x86 at guard exit (still need to fill the info at
the new field of resume guard descrs)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1809,6 +1809,12 @@
"""
self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
startpos = self.mc.get_relative_pos()
+ # accumulation of a vectorized loop needs to patch
+ # some vector registers (e.g. sum).
+ if guardtok.faildescr.update_at_exit is not None:
+ for pae in guardtok.faildescr.update_at_exit:
+ self._update_at_exit(guardtok.fail_locs,pae)
+ guardtok.fail_descr.update_at_exit = None
fail_descr, target = self.store_info_on_descr(startpos, guardtok)
self.mc.PUSH(imm(fail_descr))
self.push_gcmap(self.mc, guardtok.gcmap, push=True)
@@ -2471,6 +2477,41 @@
# vector operations
# ________________________________________
+ def _accum_update_at_exit(self, fail_locs, accum_descr):
+ """ If accumulation is done in this loop, at the guard exit
+ some vector registers must be adjusted to yield the correct value"""
+ pass
+ loc = fail_locs[accum_descr.position]
+ vector_var = accum_descr.vector_var
+ scalar_var = accum_descr.scalar_var
+ if accum_descr.operator == '+':
+ # reduction using plus
+ self._accum_reduce_float_sum(vector_var, scalar_var, loc)
+ else:
+ raise NotImplementedError("accum operator %s not implemented" %
+ (accum_descr.operator))
+
+ def _accum_reduce_sum(self, vector_var, scalar_var, regloc):
+ assert isinstance(vector_var, BoxVector)
+ assert isinstance(scalar_var, Box)
+ #
+ if vector_var.gettype() == FLOAT:
+ if vector_var.getsize() == 8:
+ # r = (r[0]+r[1],r[0]+r[1])
+ self.mc.HADDPD(regloc, regloc)
+ # upper bits (> 64) are dirty (but does not matter)
+ return
+ if vector_var.getsize() == 4:
+ # r = (r[0]+r[1],r[2]+r[3],r[0]+r[1],r[2]+r[3])
+ self.mc.HADDPS(regloc, regloc)
+ self.mc.HADDPS(regloc, regloc)
+ # invoking it a second time will gather the whole sum
+ # at the first element position
+ # the upper bits (>32) are dirty (but does not matter)
+ return
+
+ raise NotImplementedError("reduce sum for %s not impl." % vector_var)
+
def genop_vec_getarrayitem_raw(self, op, arglocs, resloc):
# considers item scale (raw_load does not)
base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
diff --git a/rpython/jit/metainterp/compile.py
b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -488,7 +488,8 @@
class ResumeGuardDescr(ResumeDescr):
_attrs_ = ('rd_numb', 'rd_count', 'rd_consts', 'rd_virtuals',
- 'rd_frame_info_list', 'rd_pendingfields', 'status')
+ 'rd_frame_info_list', 'rd_pendingfields', 'status',
+ 'update_at_exit')
rd_numb = lltype.nullptr(NUMBERING)
rd_count = 0
@@ -498,6 +499,7 @@
rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
status = r_uint(0)
+ update_at_exit = None
def copy_all_attributes_from(self, other):
assert isinstance(other, ResumeGuardDescr)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit