[Mesa-dev] [PATCH] ac/nir_to_llvm: fix interpolateAt* for structs

2019-01-21 Thread Timothy Arceri
This fixes the arb_gpu_shader5 interpolateAt* tests that contain
structs.
---

 Extra piglit tests for structs:

 https://patchwork.freedesktop.org/patch/279466/

 src/amd/common/ac_nir_to_llvm.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 161a8b2c34..ea0da850f2 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2924,12 +2924,10 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
 
}
 
-   LLVMValueRef array_idx = ctx->ac.i32_0;
+   LLVMValueRef attrib_idx = ctx->ac.i32_0;
while(deref_instr->deref_type != nir_deref_type_var) {
if (deref_instr->deref_type == nir_deref_type_array) {
-   unsigned array_size = 
glsl_get_aoa_size(deref_instr->type);
-   if (!array_size)
-   array_size = 1;
+   unsigned array_size = 
glsl_count_attribute_slots(deref_instr->type, false);
 
LLVMValueRef offset;
nir_const_value *const_value = 
nir_src_as_const_value(deref_instr->arr.index);
@@ -2942,23 +2940,26 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,
  LLVMConstInt(ctx->ac.i32, 
array_size, false), "");
}
 
-   array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, 
offset, "");
+   attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, 
offset, "");
deref_instr = nir_src_as_deref(deref_instr->parent);
+   } else if (deref_instr->deref_type == nir_deref_type_struct) {
+   LLVMValueRef offset;
+   unsigned sidx = deref_instr->strct.index;
+   deref_instr = nir_src_as_deref(deref_instr->parent);
+   offset = LLVMConstInt(ctx->ac.i32, 
glsl_get_record_location_offset(deref_instr->type, sidx), false);
+   attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, 
offset, "");
} else {
unreachable("Unsupported deref type");
}
 
}
 
-   unsigned input_array_size = glsl_get_aoa_size(var->type);
-   if (!input_array_size)
-   input_array_size = 1;
-
+   unsigned attrib_size = glsl_count_attribute_slots(var->type, false);
for (chan = 0; chan < 4; chan++) {
-   LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, 
input_array_size));
+   LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, 
attrib_size));
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
-   for (unsigned idx = 0; idx < input_array_size; ++idx) {
+   for (unsigned idx = 0; idx < attrib_size; ++idx) {
LLVMValueRef v, attr_number;
 
attr_number = LLVMConstInt(ctx->ac.i32, input_base + 
idx, false);
@@ -2981,7 +2982,7 @@ static LLVMValueRef visit_interp(struct ac_nir_context 
*ctx,

LLVMConstInt(ctx->ac.i32, idx, false), "");
}
 
-   result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, 
array_idx, "");
+   result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, 
attrib_idx, "");
 
}
return ac_build_varying_gather_values(>ac, result, 
instr->num_components,
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeonsi/nir: add missing piece for bindless image support

2019-01-21 Thread Timothy Arceri
This fixes some piglit tests and is was TGSI does.
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 65da6384c7..0ac737a062 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -939,6 +939,12 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
 
/* dynamic_index is the bindless handle */
if (image) {
+   /* For simplicity, bindless image descriptors use fixed
+* 16-dword slots for now.
+*/
+   dynamic_index = LLVMBuildMul(ctx->ac.builder, 
dynamic_index,
+LLVMConstInt(ctx->i32, 2, 0), "");
+
return si_load_image_desc(ctx, list, dynamic_index, 
desc_type,
  dcc_off, true);
}
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] ac/nir_to_llvm: add bindless support for uniform handles

2019-01-21 Thread Timothy Arceri
---
 src/amd/common/ac_nir_to_llvm.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c558873bbe..161a8b2c34 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3303,6 +3303,27 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
}
 }
 
+static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
+   unsigned base_index,
+   unsigned constant_index,
+   LLVMValueRef dynamic_index)
+{
+   LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
+   LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+ LLVMConstInt(ctx->ac.i32, 
constant_index, 0), "");
+
+   /* Bindless uniforms are 64bit so multiple index by 8 */
+   index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 
8, 0), "");
+   offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
+
+   LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
+
+   LLVMValueRef ret = ac_build_buffer_load(>ac, ubo_index, 1, NULL, 
offset,
+   NULL, 0, false, false, true, 
true);
+
+   return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
+}
+
 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
 nir_deref_instr *deref_instr,
 enum ac_descriptor_type desc_type,
@@ -3353,8 +3374,15 @@ static LLVMValueRef get_sampler_desc(struct 
ac_nir_context *ctx,
descriptor_set = deref_instr->var->data.descriptor_set;
 
if (deref_instr->var->data.bindless) {
+   /* For now just assert on unhandled variable types */
+   assert(deref_instr->var->data.mode == nir_var_uniform);
+
base_index = deref_instr->var->data.driver_location;
bindless = true;
+
+   index = index ? index : ctx->ac.i32_0;
+   index = get_bindless_index_from_uniform(ctx, base_index,
+   constant_index, 
index);
} else
base_index = deref_instr->var->data.binding;
}
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93551] Divinity: Original Sin Enhanced Edition(Native) crash on start

2019-01-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93551

--- Comment #76 from Thomas Crider  ---
I found the issue - I had my mesa compiled without glvnd due to an issue with
it and dying light. the glxcmds patch only works without glvnd. I would need to
patch that portion into libglvnd for it to work, which isnt ideal

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 25/42] intel/compiler: workaround for SIMD8 half-float MAD in gen8

2019-01-21 Thread Jason Ekstrand
On Mon, Jan 21, 2019 at 4:55 AM Iago Toral  wrote:

> On Fri, 2019-01-18 at 11:51 -0600, Jason Ekstrand wrote:
>
> On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga 
> wrote:
>
> Broadwell hardware has a bug that manifests in SIMD8 executions of
> 16-bit MAD instructions when any of the sources is a Y or W component.
> We pack these components in the same SIMD register as components X and
> Z respectively, but starting at offset 16B (so they live in the second
> half of the register). The problem does not exist in SKL or later.
>
> We work around this issue by moving any such sources to a temporary
> starting at offset 0B. We want to do this after the main optimization loop
> to prevent copy-propagation and friends to undo the fix.
>
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/intel/compiler/brw_fs.cpp | 48 +++
>  src/intel/compiler/brw_fs.h   |  1 +
>  2 files changed, 49 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 0b3ec94e2d2..d6096cd667d 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6540,6 +6540,48 @@ fs_visitor::optimize()
> validate();
>  }
>
> +/**
> + * Broadwell hardware has a bug that manifests in SIMD8 executions of
> 16-bit
> + * MAD instructions when any of the sources is a Y or W component. We pack
> + * these components in the same SIMD register as components X and Z
> + * respectively, but starting at offset 16B (so they live in the second
> half
> + * of the register).
>
>
> What exactly do you mean by a Y or W component?  Is this for the case
> where you have a scalar that happens to land at certain offsets?  Or does
> it apply to regular stride == 1 MADs?  If it applied in the stride == 1
> case, then I really don't see what this is doing to fix it.  It might help
> if you provided some before and after assembly example.
>
>
> This happens when a source to a half-float MAD instruction starts at
> offset 16B, which at the time I wrote this patch, happened when we were
> packing the Y component or the W component of a 16-bit vec2/vec4 into the
> second half of a SIMD register. I have an old version of that branch and
> the CTS tests and was able to reproduce the problem. Here is a code trace
> which is not working as expected, making some CTS tests fail:
>
> send(8) g16<1>UWg19<8,8,1>UD
> data ( DC byte scattered read, 0, 4) mlen 1
> rlen 1 { align1 1Q };
> mov(8)  g14.8<1>HF  g16<16,8,2>HF   { align1
> 1Q };
> mad(8)  g18<1>HF-g17<4,4,1>HF   g14.8<4,4,1>HF  g11<4,4,1>HF
> { align16 1Q };
> mov(8)  g21<2>W g18<8,8,1>W { align1
> 1Q };
>
> If we run this pass, we would produce the same code, only that we would
> replace the MAD instruction with this:
>
> mov(8)  g22<1>HFg14.8<8,8,1>HF  { align1
> WE_all 1Q };
> mad(8)  g18<1>HF-g17<4,4,1>HF   g22<4,4,1>HFg11<4,4,1>HF
> { align16 1Q };
>
> Which makes the test pass.
>
> It seems our compiler produces different code now than when I found this
> and these same tests now pass without this pass because we simply don't hit
> that scenario any more. It seems as if our shuffling code after a load is
> not attempting to pack 2 16-bit vector components in each VGRF anymore as
> we used to do when we implemented 16-bit storage and therefore we no longer
> hit this scenario. Independently of whether this change was intended or a
> bug, the hardware bug is there so I think we still want to have code to dea
> with it.
>

Thanks for the info!  It makes a lot more sense now.  Does this only apply
to wide reads or does it also fail with a stride of 0?  Also, is it only 16
or is it all non-zero values or everything >= 16?  It'd be good if we could
get more data.


>
> Also, this seems like something that should go in the new region
> restrictions pass as a special case in has_invalid_src_region.
>
>
> Yes, I guess that makes sense now that we have this pass. I'll put this
> there.
>
> --Jason
>
>
> + *
> + * We work around this issue by moving any such sources to a temporary
> + * starting at offset 0B. We want to do this after the main optimization
> loop
> + * to prevent copy-propagation and friends to undo the fix.
> + */
> +void
> +fs_visitor::fixup_hf_mad()
> +{
> +   if (devinfo->gen != 8)
> +  return;
> +
> +   bool progress = false;
> +
> +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
> +  if (inst->opcode != BRW_OPCODE_MAD ||
> +  inst->dst.type != BRW_REGISTER_TYPE_HF ||
> +  inst->exec_size > 8)
> + continue;
> +
> +  for (int i = 0; i < 3; i++) {
> + if (inst->src[i].offset > 0) {
> +assert(inst->src[i].type == BRW_REGISTER_TYPE_HF);
> +const fs_builder ibld =
> +   bld.at(block, inst).exec_all().group(inst->exec_size, 0);
> +fs_reg 

Re: [Mesa-dev] [PATCH] nir: Mark deref UBO and SSBO access as non-scalar

2019-01-21 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Mon, Jan 21, 2019 at 11:38 PM Jason Ekstrand  wrote:
>
> Fixes: 63b9aa2e2574 "spirv: Add support for using derefs for..."
> ---
>  src/compiler/nir/nir_lower_phis_to_scalar.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c 
> b/src/compiler/nir/nir_lower_phis_to_scalar.c
> index 3d7155c04f6..41ae19e8391 100644
> --- a/src/compiler/nir/nir_lower_phis_to_scalar.c
> +++ b/src/compiler/nir/nir_lower_phis_to_scalar.c
> @@ -86,7 +86,9 @@ is_phi_src_scalarizable(nir_phi_src *src,
>case nir_intrinsic_load_deref: {
>   nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]);
>   return deref->mode == nir_var_shader_in ||
> -deref->mode == nir_var_uniform;
> +deref->mode == nir_var_uniform ||
> +deref->mode == nir_var_mem_ubo ||
> +deref->mode == nir_var_mem_ssbo;
>}
>
>case nir_intrinsic_interp_deref_at_centroid:
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: Mark deref UBO and SSBO access as non-scalar

2019-01-21 Thread Jason Ekstrand
Fixes: 63b9aa2e2574 "spirv: Add support for using derefs for..."
---
 src/compiler/nir/nir_lower_phis_to_scalar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c 
b/src/compiler/nir/nir_lower_phis_to_scalar.c
index 3d7155c04f6..41ae19e8391 100644
--- a/src/compiler/nir/nir_lower_phis_to_scalar.c
+++ b/src/compiler/nir/nir_lower_phis_to_scalar.c
@@ -86,7 +86,9 @@ is_phi_src_scalarizable(nir_phi_src *src,
   case nir_intrinsic_load_deref: {
  nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]);
  return deref->mode == nir_var_shader_in ||
-deref->mode == nir_var_uniform;
+deref->mode == nir_var_uniform ||
+deref->mode == nir_var_mem_ubo ||
+deref->mode == nir_var_mem_ssbo;
   }
 
   case nir_intrinsic_interp_deref_at_centroid:
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109405] [BISECTED] [REGRESSION] KHR-GL45.enhanced_layouts.glsl_contant_immutablity aborts in second execution

2019-01-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109405

Timothy Arceri  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Timothy Arceri  ---
Yeah I accidentally tested the initial patch with a 32bit build of mesa and
64bit build of piglit. This patch was reverted and the later replaced by the
commit in comment 2.

I'm assuming this is no longer an issue and closing. Please reopen if that is
not the case.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH mesa] configure: EGL requirements only apply if EGL is built

2019-01-21 Thread Eric Engestrom
Fixes: 2c4f6ceeb466cb15df34 "configure.ac: Fail if egl x11 platform
 dependencies are not available"
Signed-off-by: Eric Engestrom 
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index c7473d77eff2345145ff..7530f65ad4a09d9b311c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1877,6 +1877,7 @@ for plat in $platforms; do
 ;;
 
 drm)
+test "x$enable_egl" = "xyes" &&
 test "x$enable_gbm" = "xno" &&
 AC_MSG_ERROR([EGL platform drm needs gbm])
 DEFINES="$DEFINES -DHAVE_DRM_PLATFORM"
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v11 15/20] clover/spirv: Add functions for validating SPIR-V binaries

2019-01-21 Thread Pierre Moreau
Changes since:
* v10:
  - Reuse format_validation_msg in is_valid_spirv.
  - Remove LVL2STR macro in format_validation_msg.
* v9: Add `clover_cpp_std` to the overrides of the `libclspirv` target
  in Meson.
* v7: Add DEFINES to libclspirv and libclover, in autotools, as they
  would otherwise never know whether CLOVER_ALLOW_SPIRV has been
  defined (Dave Airlie)
* v6: Update the dependency name (meson) and the libs variable
  (Makefile) due to the replacement of llvm-spirv to the new
  official SPIRV-LLVM-Translator.
* v5: Changed to match the updated “clover/llvm: Allow translating from
  SPIR-V to LLVM IR” in the v6.

Signed-off-by: Pierre Moreau 
---
 src/gallium/state_trackers/clover/Makefile.am |  17 ++-
 .../state_trackers/clover/Makefile.sources|   4 +
 src/gallium/state_trackers/clover/meson.build |  11 +-
 .../clover/spirv/invocation.cpp   | 131 ++
 .../clover/spirv/invocation.hpp   |  47 +++
 5 files changed, 207 insertions(+), 3 deletions(-)
 create mode 100644 src/gallium/state_trackers/clover/spirv/invocation.cpp
 create mode 100644 src/gallium/state_trackers/clover/spirv/invocation.hpp

diff --git a/src/gallium/state_trackers/clover/Makefile.am 
b/src/gallium/state_trackers/clover/Makefile.am
index 2f42011104f..9bc078609fd 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -28,7 +28,7 @@ cl_HEADERS = \
$(top_srcdir)/include/CL/opencl.h
 endif
 
-noinst_LTLIBRARIES = libclover.la libclllvm.la
+noinst_LTLIBRARIES = libclover.la libclllvm.la libclspirv.la
 
 libclllvm_la_CXXFLAGS = \
$(CXX11_CXXFLAGS) \
@@ -47,13 +47,26 @@ libclllvm_la_SOURCES = $(LLVM_SOURCES)
 libclllvm_la_LDFLAGS = \
$(LLVMSPIRVLIB_LIBS)
 
+libclspirv_la_CXXFLAGS = \
+   $(CXX11_CXXFLAGS) \
+   $(CLOVER_STD_OVERRIDE) \
+   $(DEFINES) \
+   $(VISIBILITY_CXXFLAGS) \
+   $(SPIRV_TOOLS_CFLAGS)
+
+libclspirv_la_SOURCES = $(SPIRV_SOURCES)
+
+libclspirv_la_LDFLAGS = \
+   $(SPIRV_TOOLS_LIBS)
+
 libclover_la_CXXFLAGS = \
$(CXX11_CXXFLAGS) \
$(CLOVER_STD_OVERRIDE) \
+   $(DEFINES) \
$(VISIBILITY_CXXFLAGS)
 
 libclover_la_LIBADD = \
-   libclllvm.la
+   libclllvm.la libclspirv.la
 
 libclover_la_SOURCES = $(CPP_SOURCES)
 
diff --git a/src/gallium/state_trackers/clover/Makefile.sources 
b/src/gallium/state_trackers/clover/Makefile.sources
index 5167ca75af4..38f94981fb6 100644
--- a/src/gallium/state_trackers/clover/Makefile.sources
+++ b/src/gallium/state_trackers/clover/Makefile.sources
@@ -62,3 +62,7 @@ LLVM_SOURCES := \
llvm/invocation.hpp \
llvm/metadata.hpp \
llvm/util.hpp
+
+SPIRV_SOURCES := \
+   spirv/invocation.cpp \
+   spirv/invocation.hpp
diff --git a/src/gallium/state_trackers/clover/meson.build 
b/src/gallium/state_trackers/clover/meson.build
index c87fb61c1ab..6773efd39d4 100644
--- a/src/gallium/state_trackers/clover/meson.build
+++ b/src/gallium/state_trackers/clover/meson.build
@@ -52,6 +52,15 @@ libclllvm = static_library(
   override_options : clover_cpp_std,
 )
 
+libclspirv = static_library(
+  'clspirv',
+  files('spirv/invocation.cpp', 'spirv/invocation.hpp'),
+  include_directories : clover_incs,
+  cpp_args : [cpp_vis_args],
+  dependencies : [dep_spirv_tools],
+  override_options : clover_cpp_std,
+)
+
 clover_files = files(
   'api/context.cpp',
   'api/device.cpp',
@@ -112,6 +121,6 @@ libclover = static_library(
   [clover_files, sha1_h],
   include_directories : clover_incs,
   cpp_args : [clover_cpp_args, cpp_vis_args],
-  link_with : [libclllvm],
+  link_with : [libclllvm, libclspirv],
   override_options : clover_cpp_std,
 )
diff --git a/src/gallium/state_trackers/clover/spirv/invocation.cpp 
b/src/gallium/state_trackers/clover/spirv/invocation.cpp
new file mode 100644
index 000..d248986f2f3
--- /dev/null
+++ b/src/gallium/state_trackers/clover/spirv/invocation.cpp
@@ -0,0 +1,131 @@
+//
+// Copyright 2018 Pierre Moreau
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN 

Re: [Mesa-dev] [PATCH v3 21/42] intel/compiler: set correct precision fields for 3-source float instructions

2019-01-21 Thread Jason Ekstrand
On Mon, Jan 21, 2019 at 3:17 AM Iago Toral  wrote:

> On Fri, 2019-01-18 at 06:54 -0600, Jason Ekstrand wrote:
>
>
> On January 18, 2019 04:47:51 Iago Toral  wrote:
>
> On Thu, 2019-01-17 at 14:18 -0600, Jason Ekstrand wrote:
>
> On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga 
> wrote:
>
> Source0 and Destination extract the floating-point precision automatically
> from the SrcType and DstType instruction fields respectively when they are
> set to types :F or :HF. For Source1 and Source2 operands, we use the new
> 1-bit fields Src1Type and Src2Type, where 0 means normal precision and 1
> means half-precision. Since we always use the type of the destination for
> all operands when we emit 3-source instructions, we only need set Src1Type
> and Src2Type to 1 when we are emitting a half-precision instruction.
>
> v2:
>  - Set the bit separately for each source based on its type so we can
>do mixed floating-point mode in the future (Topi).
>
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/intel/compiler/brw_eu_emit.c | 16 
>  1 file changed, 16 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu_emit.c
> b/src/intel/compiler/brw_eu_emit.c
> index a785f96b650..2fa89f8a2a3 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -801,6 +801,22 @@ brw_alu3(struct brw_codegen *p, unsigned opcode,
> struct brw_reg dest,
>*/
>   brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
>   brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
> +
> + /* From the Bspec: Instruction types
> +  *
> +  * Three source instructions can use operands with mixed-mode
> +  * precision. When SrcType field is set to :f or :hf it defines
> +  * precision for source 0 only, and fields Src1Type and Src2Type
> +  * define precision for other source operands:
> +  *
> +  *   0b = :f. Single precision Float (32-bit).
> +  *   1b = :hf. Half precision Float (16-bit).
> +  */
> + if (src1.type == BRW_REGISTER_TYPE_HF)
> +brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
>
>
> Maybe worth throwing in an
>
> assert(src0.type == BRW_REGISTER_TYPE_F || src0.type ==
> BRW_REGISTER_TYPE_HF);
>
> just to be sure?
>
>
> If we are going to do this I guess we should also check the same for src2.
>
>
> Yeah, it'd probably be good to have a general assertion that the three
> sources have the same type with the caveat that they can vary to mix half
> and full float.  Maybe that would be better than something specific right
> here.
>
>
> Actually, I don't think that is going to work. There is this comment right
> above this hunk:
>
>  /* Set both the source and destination types based on dest.type,
>   * ignoring the source register types.  The MAD and LRP emitters
> ensure
>   * that all four types are float.  The BFE and BFI2 emitters,
> however,
>   * may send us mixed D and UD types and want us to ignore that
> and use
>   * the destination type.
>   */
>
> I guess we could still formulate a readable assertion using helpers,
> something like this:
>
> assert(all_types_32bit_integer(src0.type, src1.type, src2.type) ||
> all_types_mixed_float(src0.type, src1.type, src2.type) ||
> all_types_64bit_float(src0.type, src1.type, src2.type));
>
> but creating 3 helpers only for one assertion might a bit excessive... so
> maybe just adding the specific asserts for src1 and src2 when they are HF
> is more reasonable?
>

Yeah, I think we're spending way too much time trying to figure out how to
assert.  I think the right thing to do here is just to do it when src1 and
src2 are HF.  Don't over-complicate it.


>
> Either way, this and patch 20 are
>
> Reviewed-by: Jason Ekstrand 
>
>
> +
> + if (src2.type == BRW_REGISTER_TYPE_HF)
> +brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
>}
>
>
> And the same here (for src0 and src1)
>
> }
>
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 02/42] intel/compiler: add a NIR pass to lower conversions

2019-01-21 Thread Jason Ekstrand
On Mon, Jan 21, 2019 at 2:09 AM Iago Toral  wrote:

> On Fri, 2019-01-18 at 06:46 -0600, Jason Ekstrand wrote:
>
> On January 18, 2019 01:48:25 Iago Toral  wrote:
>
> On Thu, 2019-01-17 at 13:42 -0600, Jason Ekstrand wrote:
>
> On Tue, Jan 15, 2019 at 7:54 AM Iago Toral Quiroga 
> wrote
>
>
> (...)
>
>
> +   nir_ssa_def *tmp = nir_build_alu(b, op1, src, NULL, NULL, NULL);
> +   nir_ssa_def *res = nir_build_alu(b, op2, tmp, NULL, NULL, NULL);
> +   nir_ssa_def_rewrite_uses(>dest.dest.ssa, nir_src_for_ssa(res));
> +   nir_instr_remove(>instr);
> +}
> +
> +static bool
> +lower_instr(nir_builder *b, nir_alu_instr *alu)
> +{
> +   unsigned src_bit_size = nir_src_bit_size(alu->src[0].src);
> +   nir_alu_type src_type = nir_op_infos[alu->op].input_types[0];
> +   nir_alu_type src_full_type = (nir_alu_type) (src_type | src_bit_size);
> +
> +   unsigned dst_bit_size = nir_dest_bit_size(alu->dest.dest);
> +   nir_alu_type dst_type = nir_op_infos[alu->op].output_type;
> +   nir_alu_type dst_full_type = (nir_alu_type) (dst_type | dst_bit_size);
>
>
> The nir_op_infos[*].output_type will already be a full type.  Maybe just
> delete the dst_type temporary so that no one gets confused and thinks it's
> a base type.
>
>
> Right, will do that.
>
>
> Actually, we want the base type too, since that is what we pass to the
> get_conversion_op() helper, I'll just pick it from the dst_full_type with
> the nir_alu_type_get_base_type() helper.
>

Sounds like the right thing to do.


> +
> +   /* BDW PRM, vol02, Command Reference Instructions, mov - MOVE:
> +*
> +*   "There is no direct conversion from HF to DF or DF to HF.
> +*Use two instructions and F (Float) as an intermediate type.
> +*
> +*There is no direct conversion from HF to Q/UQ or Q/UQ to HF.
> +*Use two instructions and F (Float) or a word integer type
> +*or a DWord integer type as an intermediate type."
> +*/
> +   if ((src_full_type == nir_type_float16 && dst_bit_size == 64) ||
> +   (src_bit_size == 64 && dst_full_type == nir_type_float16)) {
> +  nir_op op1 = get_conversion_op(src_type, src_bit_size, src_type, 32,
> + nir_rounding_mode_undef);
> +  nir_op op2 = get_conversion_op(src_type, 32, dst_type, dst_bit_size,
> + get_opcode_rounding_mode(alu->op));
> +  split_conversion(b, alu, op1, op2);
> +  return true;
> +   }
> +
> +   /* SKL PRM, vol 02a, Command Reference: Instructions, Move:
> +*
> +*   "There is no direct conversion from B/UB to DF or DF to B/UB. Use
> +*two instructions and a word or DWord intermediate type."
> +*
> +*   "There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB.
> +*Use two instructions and a word or DWord intermediate integer
> +*type."
> +*/
> +   if ((src_bit_size == 8 && dst_bit_size == 64) ||
> +   (src_bit_size == 64 && dst_bit_size == 8)) {
> +  nir_op op1 = get_conversion_op(src_type, src_bit_size, src_type, 32,
> + nir_rounding_mode_undef);
> +  nir_op op2 = get_conversion_op(src_type, 32, dst_type, dst_bit_size,
> + nir_rounding_mode_undef);
> +  split_conversion(b, alu, op1, op2);
> +  return true;
> +   }
> +
> +   return false;
> +}
> +
> +static bool
> +lower_impl(nir_function_impl *impl)
> +{
> +   nir_builder b;
> +   nir_builder_init(, impl);
> +   bool progress = false;
> +
> +   nir_foreach_block(block, impl) {
> +  nir_foreach_instr_safe(instr, block) {
> + if (instr->type != nir_instr_type_alu)
> +continue;
> +
> + nir_alu_instr *alu = nir_instr_as_alu(instr);
> + assert(alu->dest.dest.is_ssa);
> +
> + if (nir_op_infos[alu->op].num_inputs > 1)
>
>
> I don't think this is a reliable way to detect conversion opcodes.  There
> are many unary operations that aren't conversions.  You're only getting
> lucky because there are non which do float16 <-> 64=bit or 8-bit <->
> 64-bit.  I've thought many times about throwing a boolean in nir_op_infos
> for is_conversion.  Maybe now is a good time to do it?
>
>
> Yes, I knew that this was not going to only let conversions through and
> was relying on what you say above. Adding an is_conversion field souns good
> to me, alternatively I guess we could add a is_conversion() helper, but I
> guess having the field in nir_op_infos maye be a bit easier to maintain if
> we want to add more opcodes in the future.
>
>
> Thinking about this, I guess we only want to set this to True for the
> opcodes that are explicit numeric conversions, that is, the ones we
> generate right after "# Generate all of the numeric conversion opcodes". I
> am saying this because even if we have unop_convert(), that is also used
> for things like unpack opcodes, but_count, etc that are not real
> conversions. I was thinking about creating a new unop_numeric_convert()
> helper 

[Mesa-dev] [Bug 109405] [BISECTED] [REGRESSION] KHR-GL45.enhanced_layouts.glsl_contant_immutablity aborts in second execution

2019-01-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109405

--- Comment #1 from tele4...@hotmail.com ---
Hello, this commit was backed out and replaced by
https://gitlab.freedesktop.org/mesa/mesa/commit/6ca652faf368427e3e6d57ef5456f78203b8207e
. Did your bisect account for this?

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109405] [BISECTED] [REGRESSION] KHR-GL45.enhanced_layouts.glsl_contant_immutablity aborts in second execution

2019-01-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109405

Bug ID: 109405
   Summary: [BISECTED] [REGRESSION]
KHR-GL45.enhanced_layouts.glsl_contant_immutablity
aborts in second execution
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: glsl-compiler
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: ago...@igalia.com
QA Contact: intel-3d-b...@lists.freedesktop.org
CC: t_arc...@yahoo.com.au

After:

--

commit 64b8c86d37ebb1e1d286c69d642d52b7bcf051d3 (danger/master)
Author: Timothy Arceri 
Date:   Thu Jan 17 17:16:29 2019 +1100

glsl: be much more aggressive when skipping shader compilation

Currently we only add a cache key for a shader once it is linked.
However games like Team Fortress 2 compile a whole bunch of shaders
which are never actually linked. These compiled shaders can take
up a bunch of memory.

This patch changes things so that we add the key for the shader to
the cache as soon as it is compiled. This means on a warm cache we
can avoid the wasted memory from these shaders. Worst case scenario
is we need to compile the shaders at link time but this can happen
anyway if the shader has been evicted from the cache.

Reduces memory use in Team Fortress 2 from 1.3GB -> 770MB on a
warm cache from start up to the game menu.

Acked-by: Marek Olšák 

--

The following VK-GL-CTS test is failing in a second execution with the i965
driver, shader cache activated, and the x11_egl target:

--

local@5207cd755898:~/vk-gl-cts/build/external/openglcts/modules$ ./glcts
--deqp-case="KHR-GL45.enhanced_layouts.glsl_contant_immutablity"
Writing test log into TestResults.qpa
dEQP Core git-117ce3699c7e2d3d04f1760b25399aedd5dc90fa (0x117ce369) starting..
  target implementation = 'X11 EGL'
ATTENTION: default value of option vblank_mode overridden by environment.

Test case 'KHR-GL45.enhanced_layouts.glsl_contant_immutablity'..
  Pass (Pass)

DONE!

Test run totals:
  Passed:1/1 (100.0%)
  Failed:0/1 (0.0%)
  Not supported: 0/1 (0.0%)
  Warnings:  0/1 (0.0%)
local@5207cd755898:~/vk-gl-cts/build/external/openglcts/modules$ ./glcts
--deqp-case="KHR-GL45.enhanced_layouts.glsl_contant_immutablity"
Writing test log into TestResults.qpa
dEQP Core git-117ce3699c7e2d3d04f1760b25399aedd5dc90fa (0x117ce369) starting..
  target implementation = 'X11 EGL'
ATTENTION: default value of option vblank_mode overridden by environment.

Test case 'KHR-GL45.enhanced_layouts.glsl_contant_immutablity'..
Segmentation fault (core dumped)

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Double free error on etnaviv driver.

2019-01-21 Thread Nazarov Sergey
Hi, Lucas!
Yes, it works. So, it's not a Mesa problem, it's buildroot problem.
Thanks, Sergey.

18.01.2019, 18:26, "Lucas Stach" :
> Hi Sergey,
>
> I've just been made aware of a peculiarity of the buildroot toolchain
> configuration, which might well explain why you are seeing the obvious
> crash, but no one else is complaining about this.
>
> Can you try if [1] also solves this issue for you? If it does, I think
> we should not try to work around this in Mesa.
>
> Regards,
> Lucas
>
> [1] http://lists.busybox.net/pipermail/buildroot/2018-November/235923.html
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Add extension EXT_sRGB_write_control

2019-01-21 Thread Gert Wollny
Dear all, 

I'd like to push this series [1] to enable the extension 
EXT_sRGB_write control  by the end of this week if there are no
objections. 

I ran it trough the intel-ci [2] with only some unrelated unstable
tests. 

many thanks for any reviews, 
Gert 
  

[1] https://gitlab.freedesktop.org/mesa/mesa/merge_requests/14
[2] https://mesa-ci.01.org/gerddie/builds/6/group/63a9f0ea7bb98050796b6
49e85481845
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 00/42] intel: VK_KHR_shader_float16_int8 implementation

2019-01-21 Thread Iago Toral
I think you covered everithing at this point, thanks Jason!I am
addressing all your review feedback and will send a v4 when I have
everything ready.
On Fri, 2019-01-18 at 14:23 -0600, Jason Ekstrand wrote:
> I think I've gotten through everything at this point.  If I've missed
> anything, please let me know.
> 
> On Fri, Jan 18, 2019 at 5:37 AM Iago Toral  wrote:
> > Thanks a lot of for all the review work! When you're done reviewing
> > all the patches I'll prepare a v4 with all the changes.
> > On Thu, 2019-01-17 at 18:24 -0600, Jason Ekstrand wrote:
> > > I'm done for the day but I've read through most of the patches. 
> > > I think I've got 4 or 5 tricky ones left.  By and large, I think
> > > things are looking really good.  I don't know that we'll make
> > > 19.0 but there's a possibility.  If not, it'll likely land
> > > shortly after.
> > > 
> > > On Tue, Jan 15, 2019 at 7:54 AM Iago Toral Quiroga <
> > > ito...@igalia.com> wrote:
> > > > The changes in this version address review feedback to v2 and,
> > > > most importantly,
> > > > 
> > > > rebase on top of relevant changes in master, specifically
> > > > Curro's regioning
> > > > 
> > > > lowering pass. This new regioning pass simplifies some of the
> > > > NIR translation
> > > > 
> > > > code (specifically the code for translating regioning
> > > > restrictions on
> > > > 
> > > > conversions for atom platforms) making some of the previous
> > > > work in this series
> > > > 
> > > > unnecessary. The regioning restrictions for conversions between
> > > > integer and
> > > > 
> > > > half-float added with this series are are now implemented as
> > > > part of this
> > > > 
> > > > framework instead of doing it at NIR translation time. This
> > > > version of the
> > > > 
> > > > series also dropped the SPIR-V compiler patches that have
> > > > already been merged.
> > > > 
> > > > 
> > > > 
> > > > As always, a branch for with these patches is available for
> > > > testing in the
> > > > 
> > > > itoral/VK_KHR_shader_float16_int8 branch of the Igalia Mesa
> > > > repository at
> > > > 
> > > > https://github.com/Igalia/mesa.
> > > > 
> > > > 
> > > > 
> > > > Iago Toral Quiroga (42):
> > > > 
> > > >   intel/compiler: handle conversions between int and half-float 
> > > > on atom
> > > > 
> > > >   intel/compiler: add a NIR pass to lower conversions
> > > > 
> > > >   intel/compiler: split float to 64-bit opcodes from int to 64-
> > > > bit
> > > > 
> > > >   intel/compiler: handle b2i/b2f with other integer conversion
> > > > opcodes
> > > > 
> > > >   intel/compiler: assert restrictions on conversions to half-
> > > > float
> > > > 
> > > >   intel/compiler: lower some 16-bit float operations to 32-bit
> > > > 
> > > >   intel/compiler: lower 16-bit extended math to 32-bit prior to
> > > > gen9
> > > > 
> > > >   intel/compiler: implement 16-bit fsign
> > > > 
> > > >   intel/compiler: allow extended math functions with HF
> > > > operands
> > > > 
> > > >   compiler/nir: add lowering option for 16-bit fmod
> > > > 
> > > >   intel/compiler: lower 16-bit fmod
> > > > 
> > > >   compiler/nir: add lowering for 16-bit flrp
> > > > 
> > > >   intel/compiler: lower 16-bit flrp
> > > > 
> > > >   compiler/nir: add lowering for 16-bit ldexp
> > > > 
> > > >   intel/compiler: Extended Math is limited to SIMD8 on half-
> > > > float
> > > > 
> > > >   intel/compiler: add instruction setters for Src1Type and
> > > > Src2Type.
> > > > 
> > > >   intel/compiler: add new half-float register type for 3-src
> > > > 
> > > > instructions
> > > > 
> > > >   intel/compiler: add a helper function to query hardware type
> > > > table
> > > > 
> > > >   intel/compiler: don't compact 3-src instructions with
> > > > Src1Type or
> > > > 
> > > > Src2Type bits
> > > > 
> > > >   intel/compiler: allow half-float on 3-source instructions
> > > > since gen8
> > > > 
> > > >   intel/compiler: set correct precision fields for 3-source
> > > > float
> > > > 
> > > > instructions
> > > > 
> > > >   intel/compiler: don't propagate HF immediates to 3-src
> > > > instructions
> > > > 
> > > >   intel/compiler: fix ddx and ddy for 16-bit float
> > > > 
> > > >   intel/compiler: fix ddy for half-float in gen8
> > > > 
> > > >   intel/compiler: workaround for SIMD8 half-float MAD in gen8
> > > > 
> > > >   intel/compiler: split is_partial_write() into two variants
> > > > 
> > > >   intel/compiler: activate 16-bit bit-size lowerings also for
> > > > 8-bit
> > > > 
> > > >   intel/compiler: handle 64-bit float to 8-bit integer
> > > > conversions
> > > > 
> > > >   intel/compiler: handle conversions between int and half-float 
> > > > on atom
> > > > 
> > > >   intel/compiler: implement isign for int8
> > > > 
> > > >   intel/compiler: ask for an integer type if requesting an 8-
> > > > bit type
> > > > 
> > > >   intel/eu: force stride of 2 on NULL register for Byte
> > > > instructions
> > > > 
> > > >   compiler/spirv: add support for Float16 and Int8 capabilities

Re: [Mesa-dev] [PATCH v3 26/42] intel/compiler: split is_partial_write() into two variants

2019-01-21 Thread Iago Toral
On Fri, 2019-01-18 at 14:23 -0600, Jason Ekstrand wrote:
> Ugh... I really don't like this...  But I also don't have a better
> idea off-hand.  The unfortunate reality is that this IR really isn't
> designed to be able to handle this sort of thing.  My #1 concern here
> is that I don't think it does good things when we have instructions
> with exec_size < dispatch_width such as split instructions in
> SIMD32.  

Right I guess this is the same case as SIMD16 instructions split to
2xSIMD8 really. I think this would consider each split instruction
a  partial write, since that is what each chunk of the instruction is
really doing (writing a subset of the channels of a variable), but of
course, the original unsplit instruction mihgt not be a partial write
which is where we would be losing optimization opportunities, we just
don't have that information available once the instruction has been
split.
Would it make sense to have fs_inst contain information about whether
this is an instruction that has been split from a larger instruction
that was doing a  full variable write? I guess we do have all that info
available when we decide to split instructions in the SIMD
width  lowering pass.
> I think it's *mostly* a no-op there.  I'll have to think on this one
> a bit more.  Don't wait to re-send the v4 until I've come up with
> something.
> 
> On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga  > wrote:
> > This function is used in two different scenarios that for 32-bit
> > 
> > instructions are the same, but for 16-bit instructions are not.
> > 
> > 
> > 
> > One scenario is that in which we are working at a SIMD8 register
> > 
> > level and we need to know if a register is fully defined or
> > written.
> > 
> > This is useful, for example, in the context of liveness analysis or
> > 
> > register allocation, where we work with units of registers.
> > 
> > 
> > 
> > The other scenario is that in which we want to know if an
> > instruction
> > 
> > is writing a full scalar component or just some subset of it. This
> > is
> > 
> > useful, for example, in the context of some optimization passes
> > 
> > like copy propagation.
> > 
> > 
> > 
> > For 32-bit instructions (or larger), a SIMD8 dispatch will always
> > write
> > 
> > at least a full SIMD8 register (32B) if the write is not partial.
> > The
> > 
> > function is_partial_write() checks this to determine if we have a
> > partial
> > 
> > write. However, when we deal with 16-bit instructions, that logic
> > disables
> > 
> > some optimizations that should be safe. For example, a SIMD8 16-bit 
> > MOV will
> > 
> > only update half of a SIMD register, but it is still a complete
> > write of the
> > 
> > variable for a SIMD8 dispatch, so we should not prevent copy
> > propagation in
> > 
> > this scenario because we don't write all 32 bytes in the SIMD
> > register
> > 
> > or because the write starts at offset 16B (wehere we pack
> > components Y or
> > 
> > W of 16-bit vectors).
> > 
> > 
> > 
> > This is a problem for SIMD8 executions (VS, TCS, TES, GS) of 16-bit
> > 
> > instructions, which lose a number of optimizations because of this,
> > most
> > 
> > important of which is copy-propagation.
> > 
> > 
> > 
> > This patch splits is_partial_write() into is_partial_reg_write(),
> > which
> > 
> > represents the current is_partial_write(), useful for things like
> > 
> > liveness analysis, and is_partial_var_write(), which considers
> > 
> > the dispatch size to check if we are writing a full variable
> > (rather
> > 
> > than a full register) to decide if the write is partial or not,
> > which
> > 
> > is what we really want in many optimization passes.
> > 
> > 
> > 
> > Then the patch goes on and rewrites all uses of is_partial_write()
> > to use
> > 
> > one or the other version. Specifically, we use
> > is_partial_var_write()
> > 
> > in the following places: copy propagation, cmod propagation, common
> > 
> > subexpression elimination, saturate propagation and sel peephole.
> > 
> > 
> > 
> > Notice that the semantics of is_partial_var_write() exactly match
> > the
> > 
> > current implementation of is_partial_write() for anything that is
> > 
> > 32-bit or larger, so no changes are expected for 32-bit
> > instructions.
> > 
> > 
> > 
> > Tested against ~5000 tests involving 16-bit instructions in CTS
> > produced
> > 
> > the following changes in instruction counts:
> > 
> > 
> > 
> > Patched  | Master|%|
> > 
> > 
> > 
> > SIMD8  |621,900  |706,721| -12.00% |
> > 
> > 
> > 
> > SIMD16 | 93,252  | 93,252|   0.00% |
> > 
> > 
> > 
> > 
> > 
> > As expected, the change only affects SIMD8 dispatches.
> > 
> > 
> > 
> > Reviewed-by: Topi Pohjolainen 
> > 
> > ---
> > 
> >  src/intel/compiler/brw_fs.cpp | 31
> > +++
> > 
> >  

Re: [Mesa-dev] [PATCH 2/8] i965: r8stencil_mt/needs_update renamed to shadow_mt/needs_update

2019-01-21 Thread Eleni Maria Stea
On 1/19/19 12:55 AM, Nanley Chery wrote:
> The series I pointed you to earlier has a patch like this, but it's more
> complete. It also modifies the comment above the data structure being
> modified. Do you want to review it?
> 
> https://patchwork.freedesktop.org/patch/253197/
> 
> I think what people usually do in this case is send out their series
> with the other person's patch included (and their rb tacked onto it).


Hi Nanley,

First of all, thank you for taking the time to look at the patches.

I will review your patch and replace mine with it in the fixed series
when I complete the other changes you suggested.

Regards,
Eleni
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 25/42] intel/compiler: workaround for SIMD8 half-float MAD in gen8

2019-01-21 Thread Iago Toral
On Fri, 2019-01-18 at 11:51 -0600, Jason Ekstrand wrote:
> On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga  > wrote:
> > Broadwell hardware has a bug that manifests in SIMD8 executions of
> > 
> > 16-bit MAD instructions when any of the sources is a Y or W
> > component.
> > 
> > We pack these components in the same SIMD register as components X
> > and
> > 
> > Z respectively, but starting at offset 16B (so they live in the
> > second
> > 
> > half of the register). The problem does not exist in SKL or later.
> > 
> > 
> > 
> > We work around this issue by moving any such sources to a temporary
> > 
> > starting at offset 0B. We want to do this after the main
> > optimization loop
> > 
> > to prevent copy-propagation and friends to undo the fix.
> > 
> > 
> > 
> > Reviewed-by: Topi Pohjolainen 
> > 
> > ---
> > 
> >  src/intel/compiler/brw_fs.cpp | 48
> > +++
> > 
> >  src/intel/compiler/brw_fs.h   |  1 +
> > 
> >  2 files changed, 49 insertions(+)
> > 
> > 
> > 
> > diff --git a/src/intel/compiler/brw_fs.cpp
> > b/src/intel/compiler/brw_fs.cpp
> > 
> > index 0b3ec94e2d2..d6096cd667d 100644
> > 
> > --- a/src/intel/compiler/brw_fs.cpp
> > 
> > +++ b/src/intel/compiler/brw_fs.cpp
> > 
> > @@ -6540,6 +6540,48 @@ fs_visitor::optimize()
> > 
> > validate();
> > 
> >  }
> > 
> > 
> > 
> > +/**
> > 
> > + * Broadwell hardware has a bug that manifests in SIMD8 executions
> > of 16-bit
> > 
> > + * MAD instructions when any of the sources is a Y or W component.
> > We pack
> > 
> > + * these components in the same SIMD register as components X and
> > Z
> > 
> > + * respectively, but starting at offset 16B (so they live in the
> > second half
> > 
> > + * of the register).
> 
> What exactly do you mean by a Y or W component?  Is this for the case
> where you have a scalar that happens to land at certain offsets?  Or
> does it apply to regular stride == 1 MADs?  If it applied in the
> stride == 1 case, then I really don't see what this is doing to fix
> it.  It might help if you provided some before and after assembly
> example.

This happens when a source to a half-float MAD instruction starts at
offset 16B, which at the time I wrote this patch,  happened when we
were packing the Y component or the W component of a 16-bit vec2/vec4
into the second half of a SIMD register. I have an old version of that
branch and the CTS tests and was able to reproduce the problem. Here is
a code trace which is not working as expected, making some CTS tests
fail:
send(8) g16<1>UWg19<8,8,1>UD   
 data ( DC byte scattered read, 0, 4) mlen 1 rlen 1 { align1 1Q
};mov(8)  g14.8<1>HF  g16<16,8,2>HF   {
align1 1Q };mad(8)  g18<1>HF-
g17<4,4,1>HF   g14.8<4,4,1>HF  g11<4,4,1>HF { align16 1Q
};mov(8)  g21<2>W g18<8,8,1>W {
align1 1Q };
If we run this pass, we would produce the same code, only that we would
replace the MAD instruction with this:
mov(8)  g22<1>HFg14.8<8,8,1>HF  {
align1 WE_all 1Q };mad(8)  g18<1>HF-
g17<4,4,1>HF   g22<4,4,1>HFg11<4,4,1>HF { align16 1Q };
Which makes the test pass.
It seems our compiler produces different code now than when I found
this and these same tests now pass without this pass because we simply
don't hit that scenario any more. It seems as if our shuffling code
after a load is not attempting to pack 2 16-bit vector components in
each VGRF anymore as we used to do when we implemented 16-bit storage
and therefore we no longer hit this scenario. Independently of whether
this change was intended or a bug, the hardware bug is there so I think
we still want to have code to dea with it.
> Also, this seems like something that should go in the new region
> restrictions pass as a special case in has_invalid_src_region.

Yes, I guess that makes sense now that we have this pass. I'll put this
there.
> --Jason
>  
> > + *
> > 
> > + * We work around this issue by moving any such sources to a
> > temporary
> > 
> > + * starting at offset 0B. We want to do this after the main
> > optimization loop
> > 
> > + * to prevent copy-propagation and friends to undo the fix.
> > 
> > + */
> > 
> > +void
> > 
> > +fs_visitor::fixup_hf_mad()
> > 
> > +{
> > 
> > +   if (devinfo->gen != 8)
> > 
> > +  return;
> > 
> > +
> > 
> > +   bool progress = false;
> > 
> > +
> > 
> > +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
> > 
> > +  if (inst->opcode != BRW_OPCODE_MAD ||
> > 
> > +  inst->dst.type != BRW_REGISTER_TYPE_HF ||
> > 
> > +  inst->exec_size > 8)
> > 
> > + continue;
> > 
> > +
> > 
> > +  for (int i = 0; i < 3; i++) {
> > 
> > + if (inst->src[i].offset > 0) {
> > 
> > +assert(inst->src[i].type == BRW_REGISTER_TYPE_HF);
> > 
> > +const fs_builder ibld =
> > 
> > +   bld.at(block, inst).exec_all().group(inst-
> > 

Re: [Mesa-dev] [PATCH] anv: Only parse pImmutableSamplers if the descriptor has samplers

2019-01-21 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 19/01/2019 19:04, Jason Ekstrand wrote:

---
  src/intel/vulkan/anv_descriptor_set.c | 43 +++
  1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/src/intel/vulkan/anv_descriptor_set.c 
b/src/intel/vulkan/anv_descriptor_set.c
index a308fbf8540..a4e466cf3dd 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -94,7 +94,22 @@ VkResult anv_CreateDescriptorSetLayout(
 uint32_t immutable_sampler_count = 0;
 for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
-  if (pCreateInfo->pBindings[j].pImmutableSamplers)
+
+  /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding:
+   *
+   *"If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or
+   *VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then
+   *pImmutableSamplers can be used to initialize a set of immutable
+   *samplers. [...]  If descriptorType is not one of these descriptor
+   *types, then pImmutableSamplers is ignored.
+   *
+   * We need to be careful here and only parse pImmutableSamplers if we
+   * have one of the right descriptor types.
+   */
+  VkDescriptorType desc_type = pCreateInfo->pBindings[j].descriptorType;
+  if ((desc_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+   desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
+  pCreateInfo->pBindings[j].pImmutableSamplers)
   immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
 }
  
@@ -153,6 +168,12 @@ VkResult anv_CreateDescriptorSetLayout(

if (binding == NULL)
   continue;
  
+  /* We temporarily stashed the pointer to the binding in the

+   * immutable_samplers pointer.  Now that we've pulled it back out
+   * again, we reset immutable_samplers to NULL.
+   */
+  set_layout->binding[b].immutable_samplers = NULL;
+
if (binding->descriptorCount == 0)
   continue;
  
@@ -170,6 +191,15 @@ VkResult anv_CreateDescriptorSetLayout(

  set_layout->binding[b].stage[s].sampler_index = sampler_count[s];
  sampler_count[s] += binding->descriptorCount;
   }
+
+ if (binding->pImmutableSamplers) {
+set_layout->binding[b].immutable_samplers = samplers;
+samplers += binding->descriptorCount;
+
+for (uint32_t i = 0; i < binding->descriptorCount; i++)
+   set_layout->binding[b].immutable_samplers[i] =
+  anv_sampler_from_handle(binding->pImmutableSamplers[i]);
+ }
   break;
default:
   break;
@@ -221,17 +251,6 @@ VkResult anv_CreateDescriptorSetLayout(
   break;
}
  
-  if (binding->pImmutableSamplers) {

- set_layout->binding[b].immutable_samplers = samplers;
- samplers += binding->descriptorCount;
-
- for (uint32_t i = 0; i < binding->descriptorCount; i++)
-set_layout->binding[b].immutable_samplers[i] =
-   anv_sampler_from_handle(binding->pImmutableSamplers[i]);
-  } else {
- set_layout->binding[b].immutable_samplers = NULL;
-  }
-
set_layout->shader_stages |= binding->stageFlags;
 }
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 21/42] intel/compiler: set correct precision fields for 3-source float instructions

2019-01-21 Thread Iago Toral
On Fri, 2019-01-18 at 06:54 -0600, Jason Ekstrand wrote:
> 
> 
> 
> 
> 
> On January 18, 2019 04:47:51 Iago Toral  wrote:
> > On Thu, 2019-01-17 at 14:18 -0600, Jason Ekstrand wrote:
> > > On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga <
> > > ito...@igalia.com> wrote:
> > > > Source0 and Destination extract the floating-point precision
> > > > automatically
> > > > 
> > > > from the SrcType and DstType instruction fields respectively
> > > > when they are
> > > > 
> > > > set to types :F or :HF. For Source1 and Source2 operands, we
> > > > use the new
> > > > 
> > > > 1-bit fields Src1Type and Src2Type, where 0 means normal
> > > > precision and 1
> > > > 
> > > > means half-precision. Since we always use the type of the
> > > > destination for
> > > > 
> > > > all operands when we emit 3-source instructions, we only need
> > > > set Src1Type
> > > > 
> > > > and Src2Type to 1 when we are emitting a half-precision
> > > > instruction.
> > > > 
> > > > 
> > > > 
> > > > v2:
> > > > 
> > > >  - Set the bit separately for each source based on its type so
> > > > we can
> > > > 
> > > >do mixed floating-point mode in the future (Topi).
> > > > 
> > > > 
> > > > 
> > > > Reviewed-by: Topi Pohjolainen 
> > > > 
> > > > ---
> > > > 
> > > >  src/intel/compiler/brw_eu_emit.c | 16 
> > > > 
> > > >  1 file changed, 16 insertions(+)
> > > > 
> > > > 
> > > > 
> > > > diff --git a/src/intel/compiler/brw_eu_emit.c
> > > > b/src/intel/compiler/brw_eu_emit.c
> > > > 
> > > > index a785f96b650..2fa89f8a2a3 100644
> > > > 
> > > > --- a/src/intel/compiler/brw_eu_emit.c
> > > > 
> > > > +++ b/src/intel/compiler/brw_eu_emit.c
> > > > 
> > > > @@ -801,6 +801,22 @@ brw_alu3(struct brw_codegen *p, unsigned
> > > > opcode, struct brw_reg dest,
> > > > 
> > > >*/
> > > > 
> > > >   brw_inst_set_3src_a16_src_type(devinfo, inst,
> > > > dest.type);
> > > > 
> > > >   brw_inst_set_3src_a16_dst_type(devinfo, inst,
> > > > dest.type);
> > > > 
> > > > +
> > > > 
> > > > + /* From the Bspec: Instruction types
> > > > 
> > > > +  *
> > > > 
> > > > +  * Three source instructions can use operands with
> > > > mixed-mode
> > > > 
> > > > +  * precision. When SrcType field is set to :f or :hf
> > > > it defines
> > > > 
> > > > +  * precision for source 0 only, and fields Src1Type
> > > > and Src2Type
> > > > 
> > > > +  * define precision for other source operands:
> > > > 
> > > > +  *
> > > > 
> > > > +  *   0b = :f. Single precision Float (32-bit).
> > > > 
> > > > +  *   1b = :hf. Half precision Float (16-bit).
> > > > 
> > > > +  */
> > > > 
> > > > + if (src1.type == BRW_REGISTER_TYPE_HF)
> > > > 
> > > > +brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
> > > 
> > > Maybe worth throwing in an
> > > 
> > > assert(src0.type == BRW_REGISTER_TYPE_F || src0.type ==
> > > BRW_REGISTER_TYPE_HF);
> > > 
> > > just to be sure? 
> > 
> > If we are going to do this I guess we should also check the same
> > for src2.
> 
> Yeah, it'd probably be good to have a general assertion that the
> three sources have the same type with the caveat that they can vary
> to mix half and full float.  Maybe that would be better than
> something specific right here.

Actually, I don't think that is going to work. There is this comment
right above this hunk:
 /* Set both the source and destination types based on
dest.type,  * ignoring the source register types.  The MAD and
LRP emitters ensure  * that all four types are float.  The BFE
and BFI2 emitters, however,  * may send us mixed D and UD types
and want us to ignore that and use  * the destination
type.  */
I guess we could still formulate a readable assertion using helpers,
something like this:
assert(all_types_32bit_integer(src0.type, src1.type, src2.type)
||   all_types_mixed_float(src0.type, src1.type, src2.type)
||   all_types_64bit_float(src0.type, src1.type, src2.type));
but creating 3 helpers only for one assertion might a bit excessive...
so maybe just adding the specific asserts for src1 and src2 when they
are HF is more reasonable?
> > >  Either way, this and patch 20 are
> > > Reviewed-by: Jason Ekstrand 
> > >  
> > > > +
> > > > 
> > > > + if (src2.type == BRW_REGISTER_TYPE_HF)
> > > > 
> > > > +brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
> > > > 
> > > >}
> > 
> > And the same here (for src0 and src1)
> > > > }
> > > > 
> > > > 
> > > > 
> 
> 
> 
> 
> 
> 
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] etnaviv: hook up linear texture sampling support

2019-01-21 Thread Lucas Stach
Am Montag, den 21.01.2019, 07:50 +0100 schrieb Christian Gmeiner:
> If the GPU supports linear sampling, linear addressing mode
> will be used as default.
> 
> > Signed-off-by: Christian Gmeiner 
> ---
>  src/gallium/drivers/etnaviv/etnaviv_resource.c | 10 +++---
>  src/gallium/drivers/etnaviv/etnaviv_texture.c  |  4 +++-
>  2 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
> b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> index 9a7ebf3064e..7d24b1f03bd 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> @@ -318,9 +318,9 @@ etna_resource_create(struct pipe_screen *pscreen,
>  {
> struct etna_screen *screen = etna_screen(pscreen);
>  
> -   /* Figure out what tiling and address mode to use -- for now, assume that
> -* texture cannot be linear. there is a capability LINEAR_TEXTURE_SUPPORT
> -* (supported on gc880 and gc2000 at least), but not sure how it works.
> +   /* Figure out what tiling and address mode to use.
> +* Textures are TILED or LINEAR. If LINEAR_TEXTURE_SUPPORT capability is
> +* available LINEAR gets prefered.
>  * Buffers always have LINEAR layout.
>  */
> unsigned layout = ETNA_LAYOUT_LINEAR;
> @@ -334,6 +334,10 @@ etna_resource_create(struct pipe_screen *pscreen,
>  
>    if (util_format_is_compressed(templat->format))
>   layout = ETNA_LAYOUT_LINEAR;
> +  else if (VIV_FEATURE(screen, chipMinorFeatures1, 
> LINEAR_TEXTURE_SUPPORT)) {
> + layout = ETNA_LAYOUT_LINEAR;
> + mode = ETNA_ADDRESSING_MODE_LINEAR;
> +  }

Did you do any performance measurements with this change? I don't think
we generally want to prefer linear textures, as in theory they have
much worse texture cache hit rates. Also a lot of the async transfer
stuff currently depends on hitting the RS linear->tiled blit path for
optimal performance on uploads.

There are 2 cases where I think linear textures are useful:

1. Imported external buffers, where we might need to update the
internal tiled copy on each resource update. Getting rid of this blit
should help performance a good bit.

2. 8bpp formats that can't be tiled with the RS and would hit the
software fallback path. The tradeoff software tiling path vs. reduced
texture cache hit rates might still prefer linear textures.

Regards,
Lucas
 

> } else if (templat->target != PIPE_BUFFER) {
>    bool want_multitiled = false;
>    bool want_supertiled = screen->specs.can_supertile;
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_texture.c 
> b/src/gallium/drivers/etnaviv/etnaviv_texture.c
> index 3993e31cec1..b06f20531fd 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_texture.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_texture.c
> @@ -172,7 +172,9 @@ etna_resource_sampler_compatible(struct etna_resource 
> *res)
> if (res->layout == ETNA_LAYOUT_SUPER_TILED && VIV_FEATURE(screen, 
> chipMinorFeatures2, SUPERTILED_TEXTURE))
>    return true;
>  
> -   /* TODO: LINEAR_TEXTURE_SUPPORT */
> +   /* This GPU supports texturing from linear textures? */
> +   if (res->layout == ETNA_LAYOUT_LINEAR && VIV_FEATURE(screen, 
> chipMinorFeatures1, LINEAR_TEXTURE_SUPPORT))
> +  return true;
>  
> /* Otherwise, only support tiled layouts */
> if (res->layout != ETNA_LAYOUT_TILED)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] etnaviv: extend etna_resource with an addressing mode

2019-01-21 Thread Lucas Stach
Hi Christian,

first of all, thanks for figuring this out. This is really nice
to finally know how it works.

Am Montag, den 21.01.2019, 07:49 +0100 schrieb Christian Gmeiner:
> Defines how sampler (and pixel pipes) needs to access the data
> represented with a resource. The used default is mode is
> ETNA_ADDRESSING_MODE_TILED.

Do you see any reason why we need a separate property for this? IMHO
etna_resource is already a bit too fat and from this set of patches I
can't see why we can't infer the addressing mode from the layout. Do
you have something specific in mind, that I don't see right now?

Regards,
Lucas

> 
> > Signed-off-by: Christian Gmeiner 
> ---
>  src/gallium/drivers/etnaviv/etnaviv_resource.c | 17 +++--
>  src/gallium/drivers/etnaviv/etnaviv_resource.h |  9 -
>  src/gallium/drivers/etnaviv/etnaviv_texture.c  |  1 +
>  src/gallium/drivers/etnaviv/etnaviv_transfer.c |  3 ++-
>  4 files changed, 22 insertions(+), 8 deletions(-)
> 
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
> b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> index c0091288030..9a7ebf3064e 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> @@ -188,7 +188,8 @@ static bool is_rs_align(struct etna_screen *screen,
>  /* Create a new resource object, using the given template info */
>  struct pipe_resource *
>  etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
> -uint64_t modifier, const struct pipe_resource *templat)
> +enum etna_resource_addressing_mode mode, uint64_t 
> modifier,
> +const struct pipe_resource *templat)
>  {
> struct etna_screen *screen = etna_screen(pscreen);
> struct etna_resource *rsc;
> @@ -280,6 +281,7 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned 
> layout,
> rsc->base.nr_samples = nr_samples;
> rsc->layout = layout;
> rsc->halign = halign;
> +   rsc->addressing_mode = mode;
>  
> pipe_reference_init(>base.reference, 1);
> list_inithead(>list);
> @@ -316,12 +318,14 @@ etna_resource_create(struct pipe_screen *pscreen,
>  {
> struct etna_screen *screen = etna_screen(pscreen);
>  
> -   /* Figure out what tiling to use -- for now, assume that texture cannot 
> be linear.
> -* there is a capability LINEAR_TEXTURE_SUPPORT (supported on gc880 and
> -* gc2000 at least), but not sure how it works.
> +   /* Figure out what tiling and address mode to use -- for now, assume that
> +* texture cannot be linear. there is a capability LINEAR_TEXTURE_SUPPORT
> +* (supported on gc880 and gc2000 at least), but not sure how it works.
>  * Buffers always have LINEAR layout.
>  */
> unsigned layout = ETNA_LAYOUT_LINEAR;
> +   enum etna_resource_addressing_mode mode = ETNA_ADDRESSING_MODE_TILED;
> +
> if (etna_resource_sampler_only(templat)) {
>    /* The buffer is only used for texturing, so create something
> * directly compatible with the sampler.  Such a buffer can
> @@ -364,7 +368,7 @@ etna_resource_create(struct pipe_screen *pscreen,
>    layout = ETNA_LAYOUT_LINEAR;
>  
> /* modifier is only used for scanout surfaces, so safe to use LINEAR here 
> */
> -   return etna_resource_alloc(pscreen, layout, DRM_FORMAT_MOD_LINEAR, 
> templat);
> +   return etna_resource_alloc(pscreen, layout, mode, DRM_FORMAT_MOD_LINEAR, 
> templat);
>  }
>  
>  enum modifier_priority {
> @@ -445,7 +449,7 @@ etna_resource_create_modifiers(struct pipe_screen 
> *pscreen,
> tmpl.bind |= PIPE_BIND_SCANOUT;
>  
> return etna_resource_alloc(pscreen, modifier_to_layout(modifier),
> -  modifier, );
> +  ETNA_ADDRESSING_MODE_TILED, modifier, );
>  }
>  
>  static void
> @@ -518,6 +522,7 @@ etna_resource_from_handle(struct pipe_screen *pscreen,
> rsc->seqno = 1;
> rsc->layout = modifier_to_layout(handle->modifier);
> rsc->halign = TEXTURE_HALIGN_FOUR;
> +   rsc->addressing_mode = ETNA_ADDRESSING_MODE_TILED;
>  
>  
> level->width = tmpl->width0;
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.h 
> b/src/gallium/drivers/etnaviv/etnaviv_resource.h
> index 11ccf8f7bcb..75aa80b3d7a 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_resource.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.h
> @@ -49,6 +49,11 @@ struct etna_resource_level {
> bool ts_valid;
>  };
>  
> +enum etna_resource_addressing_mode {
> +   ETNA_ADDRESSING_MODE_TILED = 0,
> +   ETNA_ADDRESSING_MODE_LINEAR,
> +};
> +
>  /* status of queued up but not flushed reads and write operations.
>   * In _transfer_map() we need to know if queued up rendering needs
>   * to be flushed to preserve the order of cpu and gpu access. */
> @@ -66,6 +71,7 @@ struct etna_resource {
> /* only lod 0 used for non-texture buffers */
> /* Layout for surface (tiled, multitiled, split tiled, ...) */
> enum etna_surface_layout 

Re: [Mesa-dev] [PATCH v3 02/42] intel/compiler: add a NIR pass to lower conversions

2019-01-21 Thread Iago Toral
On Fri, 2019-01-18 at 06:46 -0600, Jason Ekstrand wrote:
> On January 18, 2019 01:48:25 Iago Toral  wrote:
> 
> > On Thu, 2019-01-17 at 13:42 -0600, Jason Ekstrand wrote:
> > > On Tue, Jan 15, 2019 at 7:54 AM Iago Toral Quiroga <
> > > ito...@igalia.com> wrote

(...)
> > > > +   nir_ssa_def *tmp = nir_build_alu(b, op1, src, NULL, NULL,
> > > > NULL);
> > > > 
> > > > +   nir_ssa_def *res = nir_build_alu(b, op2, tmp, NULL, NULL,
> > > > NULL);
> > > > 
> > > > +   nir_ssa_def_rewrite_uses(>dest.dest.ssa,
> > > > nir_src_for_ssa(res));
> > > > 
> > > > +   nir_instr_remove(>instr);
> > > > 
> > > > +}
> > > > 
> > > > +
> > > > 
> > > > +static bool
> > > > 
> > > > +lower_instr(nir_builder *b, nir_alu_instr *alu)
> > > > 
> > > > +{
> > > > 
> > > > +   unsigned src_bit_size = nir_src_bit_size(alu->src[0].src);
> > > > 
> > > > +   nir_alu_type src_type = nir_op_infos[alu-
> > > > >op].input_types[0];
> > > > 
> > > > +   nir_alu_type src_full_type = (nir_alu_type) (src_type |
> > > > src_bit_size);
> > > > 
> > > > +
> > > > 
> > > > +   unsigned dst_bit_size = nir_dest_bit_size(alu->dest.dest);
> > > > 
> > > > +   nir_alu_type dst_type = nir_op_infos[alu->op].output_type;
> > > > 
> > > > +   nir_alu_type dst_full_type = (nir_alu_type) (dst_type |
> > > > dst_bit_size);
> > > 
> > > The nir_op_infos[*].output_type will already be a full type. 
> > > Maybe just delete the dst_type temporary so that no one gets
> > > confused and thinks it's a base type.
> > 
> > Right, will do that.

Actually, we want the base type too, since that is what we pass to the
get_conversion_op() helper, I'll just pick it from the dst_full_type
with the nir_alu_type_get_base_type() helper.
> > > > +
> > > > 
> > > > +   /* BDW PRM, vol02, Command Reference Instructions, mov -
> > > > MOVE:
> > > > 
> > > > +*
> > > > 
> > > > +*   "There is no direct conversion from HF to DF or DF to
> > > > HF.
> > > > 
> > > > +*Use two instructions and F (Float) as an intermediate
> > > > type.
> > > > 
> > > > +*
> > > > 
> > > > +*There is no direct conversion from HF to Q/UQ or Q/UQ
> > > > to HF.
> > > > 
> > > > +*Use two instructions and F (Float) or a word integer
> > > > type
> > > > 
> > > > +*or a DWord integer type as an intermediate type."
> > > > 
> > > > +*/
> > > > 
> > > > +   if ((src_full_type == nir_type_float16 && dst_bit_size ==
> > > > 64) ||
> > > > 
> > > > +   (src_bit_size == 64 && dst_full_type ==
> > > > nir_type_float16)) {
> > > > 
> > > > +  nir_op op1 = get_conversion_op(src_type, src_bit_size,
> > > > src_type, 32,
> > > > 
> > > > + nir_rounding_mode_undef);
> > > > 
> > > > +  nir_op op2 = get_conversion_op(src_type, 32, dst_type,
> > > > dst_bit_size,
> > > > 
> > > > +   
> > > >  get_opcode_rounding_mode(alu->op));
> > > > 
> > > > +  split_conversion(b, alu, op1, op2);
> > > > 
> > > > +  return true;
> > > > 
> > > > +   }
> > > > 
> > > > +
> > > > 
> > > > +   /* SKL PRM, vol 02a, Command Reference: Instructions, Move:
> > > > 
> > > > +*
> > > > 
> > > > +*   "There is no direct conversion from B/UB to DF or DF
> > > > to B/UB. Use
> > > > 
> > > > +*two instructions and a word or DWord intermediate
> > > > type."
> > > > 
> > > > +*
> > > > 
> > > > +*   "There is no direct conversion from B/UB to Q/UQ or
> > > > Q/UQ to B/UB.
> > > > 
> > > > +*Use two instructions and a word or DWord intermediate
> > > > integer
> > > > 
> > > > +*type."
> > > > 
> > > > +*/
> > > > 
> > > > +   if ((src_bit_size == 8 && dst_bit_size == 64) ||
> > > > 
> > > > +   (src_bit_size == 64 && dst_bit_size == 8)) {
> > > > 
> > > > +  nir_op op1 = get_conversion_op(src_type, src_bit_size,
> > > > src_type, 32,
> > > > 
> > > > + nir_rounding_mode_undef);
> > > > 
> > > > +  nir_op op2 = get_conversion_op(src_type, 32, dst_type,
> > > > dst_bit_size,
> > > > 
> > > > + nir_rounding_mode_undef);
> > > > 
> > > > +  split_conversion(b, alu, op1, op2);
> > > > 
> > > > +  return true;
> > > > 
> > > > +   }
> > > > 
> > > > +
> > > > 
> > > > +   return false;
> > > > 
> > > > +}
> > > > 
> > > > +
> > > > 
> > > > +static bool
> > > > 
> > > > +lower_impl(nir_function_impl *impl)
> > > > 
> > > > +{
> > > > 
> > > > +   nir_builder b;
> > > > 
> > > > +   nir_builder_init(, impl);
> > > > 
> > > > +   bool progress = false;
> > > > 
> > > > +
> > > > 
> > > > +   nir_foreach_block(block, impl) {
> > > > 
> > > > +  nir_foreach_instr_safe(instr, block) {
> > > > 
> > > > + if (instr->type != nir_instr_type_alu)
> > > > 
> > > > +continue;
> > > > 
> > > > +
> > > > 
> > > > + nir_alu_instr *alu = nir_instr_as_alu(instr);
> > > > 
> > > > + assert(alu->dest.dest.is_ssa);
> > > > 
> > > > +
> > > > 
> > > > + if