We need to emit 2 32-bit load messages to load a full dvec4. If only
1 or 2 double components are needed dead-code-elimination will remove
the second one.

We also need to shuffle the result of the 32-bit messages to form
valid 64-bit SIMD4x2 data.
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 46 +++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 04e95a7..f234e65 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -829,31 +829,49 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
                                nir->info.num_ubos - 1);
       }
 
-      src_reg offset;
+      src_reg offset_reg;
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       if (const_offset) {
-         offset = brw_imm_ud(const_offset->u32[0] & ~15);
+         offset_reg = src_reg(this, glsl_type::uint_type);
+         emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset->u32[0] & 
~15)));
       } else {
-         offset = get_nir_src(instr->src[1], nir_type_uint32, 1);
+         offset_reg = get_nir_src(instr->src[1], nir_type_uint32, 1);
       }
 
-      src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
-      packed_consts.type = dest.type;
+      src_reg packed_consts;
+      if (nir_dest_bit_size(instr->dest) == 32) {
+         packed_consts = src_reg(this, glsl_type::vec4_type);
+         emit_pull_constant_load_reg(dst_reg(packed_consts),
+                                     surf_index,
+                                     offset_reg,
+                                     NULL, NULL /* before_block/inst */);
+      } else {
+         src_reg temp = src_reg(this, glsl_type::dvec4_type);
+         src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F);
+
+         emit_pull_constant_load_reg(dst_reg(temp_float),
+                                     surf_index, offset_reg, NULL, NULL);
 
-      emit_pull_constant_load_reg(dst_reg(packed_consts),
-                                  surf_index,
-                                  offset,
-                                  NULL, NULL /* before_block/inst */);
+         emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u)));
+         emit_pull_constant_load_reg(dst_reg(offset(temp_float, 1)),
+                                     surf_index, offset_reg, NULL, NULL);
+
+         packed_consts = src_reg(this, glsl_type::dvec4_type);
+         shuffle_64bit_data(dst_reg(packed_consts), temp, false);
+      }
 
       packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
       if (const_offset) {
-         packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u32[0] % 16 / 4,
-                                               const_offset->u32[0] % 16 / 4,
-                                               const_offset->u32[0] % 16 / 4,
-                                               const_offset->u32[0] % 16 / 4);
+         unsigned type_size = type_sz(dest.type);
+         packed_consts.swizzle +=
+            BRW_SWIZZLE4(const_offset->u32[0] % 16 / type_size,
+                         const_offset->u32[0] % 16 / type_size,
+                         const_offset->u32[0] % 16 / type_size,
+                         const_offset->u32[0] % 16 / type_size);
       }
 
-      emit(MOV(dest, packed_consts));
+      emit(MOV(dest, retype(packed_consts, dest.type)));
+
       break;
    }
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to