Author: Richard Plangger <planri...@gmail.com> Branch: ppc-vsx-support Changeset: r85449:946ddc31e87b Date: 2016-06-29 15:01 +0200 http://bitbucket.org/pypy/pypy/changeset/946ddc31e87b/
Log: change the code builder to resolve issue for the vector regsiters (float) 32-63, because 0-31 overlap with floating point currently for vector reg. allocation 0-31 are not used. they come in handy to splat floating point values diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py --- a/rpython/jit/backend/ppc/codebuilder.py +++ b/rpython/jit/backend/ppc/codebuilder.py @@ -65,6 +65,7 @@ XX2 = Form("fvrT", "fvrB", "XO6") XX3 = Form("fvrT", "fvrA", "fvrB", "XO9") XX3_2 = Form("fvrT", "fvrA", "fvrB", "OE", "XO11") +XX3_splat = Form("fvrT", "fvrA", "fvrB", "DM", "XO13", "OE") XV = Form("ivrT", "rA", "rB", "XO1") VX = Form("ivrT", "ivrA", "ivrB", "XO8") VC = Form("ivrT", "ivrA", "ivrB", "XO12", "OE") @@ -653,6 +654,14 @@ vcmpequdx = VC(4, XO12=199, OE=1) vcmpequd = VC(4, XO12=199, OE=0) + # permute/splat + # splat low of A, and low of B + xxspltdl = XX3_splat(60, XO13=10, OE=0, DM=0b00) + # splat high of A, and high of B + xxspltdh = XX3_splat(60, XO13=10, OE=0, DM=0b11) + # generic splat + xxspltd = XX3_splat(60, XO13=10, OE=0) + # INTEGER # ------- diff --git a/rpython/jit/backend/ppc/ppc_field.py b/rpython/jit/backend/ppc/ppc_field.py --- a/rpython/jit/backend/ppc/ppc_field.py +++ b/rpython/jit/backend/ppc/ppc_field.py @@ -44,8 +44,8 @@ "TO": ( 6, 10), "UIMM": (16, 31), "fvrT": (6, 31, 'unsigned', regname._V, 'overlap'), - "fvrA": (11, 31, 'unsigned', regname._V, 'overlap'), - "fvrB": (16, 31, 'unsigned', regname._V, 'overlap'), + "fvrA": (11, 29, 'unsigned', regname._V, 'overlap'), + "fvrB": (16, 30, 'unsigned', regname._V, 'overlap'), # low vector register T (low in a sense: # can only address 32 vector registers) "ivrT": (6, 10, 'unsigned', regname._V), @@ -66,6 +66,8 @@ "XO10": (26, 31), "XO11": (22, 28), "XO12": (22, 31), + "XO13": (24, 28), + "DM": (22, 23), "LL": ( 9, 10), "SIM": (11, 15), } diff --git a/rpython/jit/backend/ppc/rassemblermaker.py b/rpython/jit/backend/ppc/rassemblermaker.py --- a/rpython/jit/backend/ppc/rassemblermaker.py +++ b/rpython/jit/backend/ppc/rassemblermaker.py @@ -50,10 +50,10 @@ body.append('vrT1 = (%s & 31) << 21 | (%s & 32) >> 5' % (value, value)) value = 'vrT1' elif field.name == 'fvrA': - body.append('fvrA1 = ((%s & 31) << 15 | (%s & 32) >> 5) << 2' % (value, value)) + body.append('fvrA1 = (%s & 31) << 14 | (%s & 32) >> 5' % (value, value)) value = 'fvrA1' elif field.name == 'fvrB': - body.append('fvrB1 = ((%s & 31) << 10 | (%s & 32) >> 5) << 1' % (value, value)) + body.append('fvrB1 = (%s & 31) << 10 | (%s & 32) >> 5' % (value, value)) value = 'fvrB1' if isinstance(field, IField): body.append('v |= ((%3s >> 2) & r_uint(%#05x)) << 2' % (value, field.mask)) diff --git a/rpython/jit/backend/ppc/register.py b/rpython/jit/backend/ppc/register.py --- a/rpython/jit/backend/ppc/register.py +++ b/rpython/jit/backend/ppc/register.py @@ -4,7 +4,7 @@ ALL_REGS = [RegisterLocation(i) for i in range(32)] ALL_FLOAT_REGS = [FPRegisterLocation(i) for i in range(32)] ALL_INTEGER_VECTOR_REGS = [VectorRegisterLocation(i) for i in range(32)] -ALL_FLOAT_VECTOR_REGS = [VectorRegisterLocation(i) for i in range(64)] +ALL_FLOAT_VECTOR_REGS = [VectorRegisterLocation(i) for i in range(32,64)] r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16,\ r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31\ @@ -19,12 +19,12 @@ ivr24, ivr25, ivr26, ivr27, ivr28, ivr29, ivr30, ivr31\ = ALL_FLOAT_REGS -vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7, vr8, vr9, vr10, vr11, vr12, vr13, \ - vr14, vr15, vr16, vr17, vr18, vr19, vr20, vr21, vr22, vr23, vr24, vr25, \ - vr26, vr27, vr28, vr29, vr30, vr31, vr32, vr33, vr34, vr35, vr36, vr37, \ - vr38, vr39, vr40, vr41, vr42, vr43, vr44, vr45, vr46, vr47, vr48, \ - vr49, vr50, vr51, vr52, vr53, vr54, vr55, vr56, vr57, vr58, vr59, vr60, \ - vr61, vr62, vr63 = ALL_FLOAT_VECTOR_REGS +# the first 32 vector register are partly shared with the normal floating point +# registers, since there are so many registers, we just take the upper 31 ones +vr32, vr33, vr34, vr35, vr36, vr37, \ + vr38, vr39, vr40, vr41, vr42, vr43, vr44, vr45, vr46, vr47, vr48, \ + vr49, vr50, vr51, vr52, vr53, vr54, vr55, vr56, vr57, vr58, vr59, vr60, \ + vr61, vr62, vr63 = ALL_FLOAT_VECTOR_REGS NONVOLATILES = [r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py --- a/rpython/jit/backend/ppc/vector_ext.py +++ b/rpython/jit/backend/ppc/vector_ext.py @@ -479,7 +479,10 @@ self.mc.load_imm(tloc, srcloc.value) self.mc.lxvd2x(res, 0, tloc.value) elif size == 8: - self.mc.vmr(res, srcloc.value, srcloc.value) + # splat the low of src to both slots in res + src = srcloc.value + #import pdb; pdb.set_trace() + self.mc.xxspltdl(res, src, src) else: notimplemented("[ppc/assembler] vec expand in this combination not supported") @@ -804,7 +807,7 @@ l0 = self.expand_float(op.bytesize, arg) res = self.force_allocate_vector_reg(op) else: - l0 = self.ensure_vector_reg(arg) + l0 = self.ensure_reg(arg) res = self.force_allocate_vector_reg(op) return [res, l0] diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py --- a/rpython/jit/metainterp/test/test_vector.py +++ b/rpython/jit/metainterp/test/test_vector.py @@ -440,7 +440,7 @@ myjitdriver.jit_merge_point() va[i] = va[i] + variable i += 1 - val = va[0] + val = va[d//2] lltype.free(va, flavor='raw') return val res = self.meta_interp(f, [60,58.4547]) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit