Chad Versace <chad.vers...@linux.intel.com> writes: > ir->remove(); > diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > index 324e665..0ff296c 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > @@ -923,6 +923,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst, > } > > void > +fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg src) > +{ > + assert(intel->gen >= 7); > + > + /* src has the form of unpackHalf2x16's input: > + * > + * w z y x > + * |undef|undef|undef|0xhhhhllll|
I don't understand what "w z y x" mean here. I thought src was just a scalar uint (one uint per pixel). > + assert(src.type == BRW_REGISTER_TYPE_UD); > + src.type = BRW_REGISTER_TYPE_UW; > + if (src.vstride > 0) > + ++src.vstride; > + if (src.hstride > 0) > + ++src.hstride; This ++hstride/++vstride looks totally wrong to me. We use a vstride of 8 in 16-wide for temporaries -- doesn't this totally trash that? I think what you want here is an unconditional *= 2 for both values. > + src.subnr += 2; > + > + brw_F16TO32(p, dst, src); > @@ -2261,6 +2276,69 @@ fs_visitor::emit_fb_writes() > } > > void > +fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y) > +{ > + if (intel->gen < 7) > + assert(!"packHalf2x16 should be lowered"); > + > + /* uint dst; */ > + assert(dst.type == BRW_REGISTER_TYPE_UD); > + > + /* float x; */ > + assert(x.type == BRW_REGISTER_TYPE_F); > + > + /* float y; */ > + assert(y.type == BRW_REGISTER_TYPE_F); > + > + /* uint tmp; */ > + fs_reg tmp(this, glsl_type::uint_type); > + > + /* dst = f32to16(x); */ > + emit(BRW_OPCODE_F32TO16, dst, x); > + > + /* tmp = f32to16(y); */ > + emit(BRW_OPCODE_F32TO16, tmp, y); > + > + /* tmp <<= 16; */ > + emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u)); > + > + /* dst |= tmp; */ > + emit(BRW_OPCODE_OR, dst, dst, tmp); > +} These comments before every op aren't helping me, since they just say the same as what the code below says. Actually, I think this code can bet simpler, given: "The destination must be DWord aligned, the result is stored in the lower Word for each channel, data in the upper Word of the destination is untouched" So: emit(BRW_OPCODE_F32TO16, dst, y); emit(BRW_OPCODE_SHL, dst, dst, fs_reg(16u)); emit(BRW_OPCODE_F32TO16, dst, x); > +void > +fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0) > +{ > + if (intel->gen < 7) > + assert(!"unpackHalf2x16 should be lowered"); > + > + /* float dst; */ > + assert(dst.type == BRW_REGISTER_TYPE_F); > + > + /* uint src0; */ > + assert(src0.type == BRW_REGISTER_TYPE_UD); > + > + /* dst = f16to32(src0); */ > + emit(BRW_OPCODE_F16TO32, dst, src0); > +} Drop these comments.
pgplJxSxzpzPg.pgp
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev