[Mesa-dev] [PATCH] intel/compiler/fs/icl: Use dummy masked urb write for tess eval

2019-04-24 Thread Topi Pohjolainen
One cannot write the URB arbitrarily and therefore the message
has to be carefully constructed. The clever tricks originate
from Kenneth and Jason, I'm just writing the patch.

Fixes GPU hangs on ICL with Vulkan CTS.

CC: Kenneth Graunke 
CC: Jason Ekstrand 
CC: Anuj Phogat 
CC: Clayton Craft 
Signed-off-by: Topi Pohjolainen 
---
 src/intel/compiler/brw_fs_visitor.cpp | 51 ++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_visitor.cpp 
b/src/intel/compiler/brw_fs_visitor.cpp
index af9f803fb68..6509868f1c3 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -821,7 +821,13 @@ fs_visitor::emit_urb_writes(const fs_reg _vertex_count)
header_size);
 
  fs_inst *inst = abld.emit(opcode, reg_undef, payload);
- inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
+
+ /* For ICL WA 1805992985 one needs additional write in the end. */
+ if (devinfo->gen == 11 && stage == MESA_SHADER_TESS_EVAL)
+inst->eot = false;
+ else
+inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
+
  inst->mlen = length + header_size;
  inst->offset = urb_offset;
  urb_offset = starting_urb_offset + slot + 1;
@@ -857,6 +863,49 @@ fs_visitor::emit_urb_writes(const fs_reg _vertex_count)
   inst->mlen = 2;
   inst->offset = 1;
   return;
+   } 
+ 
+   /* ICL WA 1805992985:
+*
+* ICLLP GPU hangs on one of tessellation vkcts tests with DS not done. The
+* send cycle, which is a urb write with an eot must be 4 phases long and
+* all 8 lanes must valid.
+*/
+   if (devinfo->gen == 11 && stage == MESA_SHADER_TESS_EVAL) {
+  fs_reg payload = fs_reg(VGRF, alloc.allocate(6), BRW_REGISTER_TYPE_UD);
+
+  /* Workaround requires all 8 channels (lanes) to be valid. This is
+   * understood to mean they all need to be alive. First trick is to find
+   * a live channel and copy its urb handle for all the other channels to
+   * make sure all handles are valid.
+   */
+  bld.exec_all().MOV(payload, bld.emit_uniformize(urb_handle));
+
+  /* Second trick is to use masked URB write where one can tell the HW to
+   * actually write data only for selected channels even though all are
+   * active.
+   * Third trick is to take advantage of the must-be-zero (MBZ) area in
+   * the very beginning of the URB.
+   *
+   * One masks data to be written only for the first channel and uses
+   * offset zero explicitly to land data to the MBZ area avoiding trashing
+   * any other part of the URB.
+   *
+   * Since the WA says that the write needs to be 4 phases long one uses
+   * 4 slots data. All are explicitly zeros in order to to keep the MBZ
+   * area written as zeros.
+   */
+  bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0x1u));
+  bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));
+  bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u));
+  bld.exec_all().MOV(offset(payload, bld, 4), brw_imm_ud(0u));
+  bld.exec_all().MOV(offset(payload, bld, 5), brw_imm_ud(0u));
+
+  fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,
+  reg_undef, payload);
+  inst->eot = true;
+  inst->mlen = 6;
+  inst->offset = 0;
}
 }
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] intel/isl: Align clear color buffer to full cacheline

2019-04-17 Thread Topi Pohjolainen
From: Rafael Antognolli 

Fixes MCS fast clear gpu hangs with Vulkan CTS on ICL in CI.

CC: Anuj Phogat 
CC: Kenneth Graunke 
Tested-by: Topi Pohjolainen 
Signed-off-by: Rafael Antognolli 
---
 src/intel/isl/isl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 6b9e6c9e0f0..acfed5119ba 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -122,7 +122,8 @@ isl_device_init(struct isl_device *dev,
dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
dev->ss.align = isl_align(dev->ss.size, 32);
 
-   dev->ss.clear_color_state_size = CLEAR_COLOR_length(info) * 4;
+   dev->ss.clear_color_state_size =
+  isl_align(CLEAR_COLOR_length(info) * 4, 64);
dev->ss.clear_color_state_offset =
   RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] intel/compiler/icl: Use tcs barrier id bits 24:30 instead of 24:27

2019-03-27 Thread Topi Pohjolainen
Similarly to 1cc17fb731466c68586915acbb916586457b19bc

Fixes gpu hangs with dEQP-VK.tessellation.shader_input_output.barrier

CC: Anuj Phogat 
CC: Clayton Craft 
Signed-off-by: Topi Pohjolainen 
---
 src/intel/compiler/brw_fs_nir.cpp | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 747529e72d8..ee8274de65a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2458,15 +2458,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder 
,
   bld.exec_all().MOV(m0, brw_imm_ud(0u));
 
   /* Copy "Barrier ID" from r0.2, bits 16:13 */
-  chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
-  brw_imm_ud(INTEL_MASK(16, 13)));
+  if (devinfo->gen < 11) {
+ chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(INTEL_MASK(16, 13)));
 
-  /* Shift it up to bits 27:24. */
-  chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
+ /* Shift it up to bits 27:24. */
+ chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
+  } else {
+ chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(INTEL_MASK(30, 24)));
+  }
 
   /* Set the Barrier Count and the enable bit */
-  chanbld.OR(m0_2, m0_2,
- brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
+  if (devinfo->gen < 11)
+ chanbld.OR(m0_2, m0_2,
+brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
+  else
+ chanbld.OR(m0_2, m0_2,
+brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15)));
 
   bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0);
   break;
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 61/61] nir: Document precision lowering pass

2018-11-05 Thread Topi Pohjolainen
---
 src/compiler/nir/nir_lower_precision.cpp | 106 ++-
 1 file changed, 104 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_lower_precision.cpp 
b/src/compiler/nir/nir_lower_precision.cpp
index 3d05fa2b3c9..9647fb4d6a9 100644
--- a/src/compiler/nir/nir_lower_precision.cpp
+++ b/src/compiler/nir/nir_lower_precision.cpp
@@ -21,8 +21,110 @@
  * IN THE SOFTWARE.
  */
 
-/* TODO: Introduce helpers in C++ space for examining GLSL types and make
- *   this file just C.
+/*
+ * This lowering pass seeks to change precision for float values and for the
+ * expressions producing them. Decision making follows the rules described in
+ * GLES 3.2 Specification and section 4.7.3 Precision Qualifiers. Shortly the
+ * idea is that an arithmetic expression can be performed in 16-bit precision
+ * if and only if all its operands are either already fixed to 16-bits or are
+ * such that compiler is free to use either 32 or 16-bit precision.
+ *
+ * First step is to go over the variables as these are the only things that
+ * are marked by the shader author with explicit instructions if high
+ * precision is needed (or lower precision allowed respectively). This
+ * implementation sets precision unconditionally to 16-bits whenever allowed
+ * by the shader author. (This may not produce the most optimal end result
+ * but is a design choice to keep the complexity at bay).
+ *
+ * This is followed by manipulation of instructions themselves. Variable
+ * derefs and intrinsics dealing with derefs are straight-forward. For them
+ * one only needs to consult the variables themselves and adjust the precision
+ * of the instruction in question accordingly.
+ *
+ * At this point things get more complex as the rest are dependent on context.
+ * Precision for texturing return values (sample values) and constant loads 
+ * depend on the needs of consuming expressions. As there may be need for
+ * both 16 and 32-bit precision, one cannot simply just set them as 16-bits.
+ * There may be, for example, two separate multiplications of a sample value
+ * S. One multiplying it with 32-bit value A and the other with 16-bit
+ * value B. (Recall that the rules mandate that if one of the source operands
+ * has full precision then the rest need to have full precision as well).
+ * Hence A * S requires the sample value S with full precision. This in turn
+ * means that texturing needs to return full precision and needs to convert
+ * sample value as to 16-bits for the other multiplication (B * S).
+ * NOTE: Hardware may have capability for mixed mode instructions and it is
+ *   left for the backend to drop any unnecessary conversions.
+ *
+ * Here the implementation leaves all texturing and constant load operations
+ * to 32-bit precision until all instructions are analysed. Instead it inserts
+ * conversions from 32-bits to 16-bits for expressions that can operate with
+ * lower precision. In the example above, the multiplication of the sample
+ * with 16-bit value B would become C = B * f2f16(S). This is important for
+ * the analysis of the rest of the instructions. Once the pass examines
+ * expressions consuming C the pass can allow these expressions with lower
+ * precision if all the operands are allowed in lower precision. If one had
+ * left C with 32-bit precision it would have prevented the use of 16-bit
+ * precision in the consuming expressions even though all other operands would
+ * have allowed that.
+ *
+ * Once all instructions are examined there is separate pass that goes thru
+ * all the uses of texturing return values. If all are happy with lower
+ * precision, the pass removes the conversions (f2f16) and switches the
+ * texturing itself to directly return 16-bit samples (given that hardware
+ * support 16-bit sample values of course).
+ *
+ * For input varyings marked with lower precision there is an alternative to
+ * uploading 16-bit values into the shader. One can load them with using full
+ * precision but immediately convert them into 16-bits before they are used.
+ * This allows one to perform all calculations based on them in 16-bit
+ * precision but still keep the upload mechanism intact in the backend.
+ *
+ * TODO:
+ *
+ * 1) There is still major flaw: logic is against the rules as it considers
+ *arithmetic expressions without consider to their consuming expressions.
+ *As alus at nir level are just sub-expressions of larger expressions they
+ *are subject to the uses and shouldn't be examined just based on their
+ *own sources.
+ *One should recursively examaine uses until either a fixed search depth
+ *(heuristic to avoid runtime explosion) or it becomes clear which
+ *precision is needed. Naturally there may be both low and high precision
+ *uses. In order to keep things simple one could just force all lower
+ *precision uses to high in case even one high precision use is found or
+ *the search depth boundary is hit.
+ *
+ * 2) 

[Mesa-dev] intel: WIP: Support for using 16-bits for mediump

2018-11-05 Thread Topi Pohjolainen
Here is a version 2 of adding support for 16-bit float instructions in
the shader compiler. Unlike the first version which did all the analysis
at glsl level here one adds the notion of precision to NIR variables and
does the analysis and precision lowering in NIR level.

This lives in: gitlab.freedesktop.org:tpohjola/mesa and branch fp16.

This is now mature enough to be able to use 16-bit precision for all
instructions except a few special cases for gfxbench trex and alu2.
(Unfortunately I'm not seeing any performance benefit. This is not
that surprising as I got to the same point with the glsl-based
solution and was able to measure the performance already back then).
Hence I thought it is time to share it.

While this is still work-in-progress I didn't want to flood the list
with the full set of patches but instead included the very last where
I try to outline the logic and its current shortcomings. There is also
a short list of TODO items.

In addition to those I need to examine couple of Intel specific
misrenderings. I haven't gotten that deep yet but it looks I'm missing
something with 16-bit inot and mad/mac lowered interpolation.
Unfortunately I get corrupted rendering only with hardware while
simulator is happy.

Mostly I'm afraid how to test all of this properly. I haven't written
any unit tests but that is high on my list. This is mostly because I've
been uncertain about my design choices. So far I've used shader
runner tests that I've written for specific cases. These are useful for
development purposes but don't bring much value for regression testing.

Alejandro Piñeiro (1):
  intel/compiler/fs: Use half_precision data_format on 16-bit fb writes

Jose Maria Casanova Crespo (2):
  intel/compiler/fs: Include support for RT data_format bit
  intel/compiler/disasm: Show half-precision data_format on rt_writes

Topi Pohjolainen (58):
  intel/compiler/fs: Set 16-bit sampler return format
  intel/compiler/disasm: Show half-precision for sampler messages
  intel/compiler/fs: Skip tex-inst early in conversion lowering
  intel/compiler/fs: Support for dumping 16-bit IMM values
  intel/compiler: Allow 16-bit math
  intel/compiler/fs: Add helpers for 16-bit null regs
  intel/compiler/fs: Use two SIMD8 instructions for 16-bit math
  intel/compiler/fs: Use 16-bit null dest with 16-bit math
  intel/compiler/fs: Use 16-bit null dest with 16-bit compare
  intel/compiler/fs: Add 16-bit type support for nir_if
  intel/compiler/eu: Prepare 3-src-op for 16-bit sources
  intel/compiler/eu: Prepare 3-src-op for 16-bit dst
  intel/compiler/eu: Allow 3-src-op with mixed precision (HF/F) sources
  intel/compiler/disasm: Print mixed precision 3-src types correctly
  intel/compiler/disasm: Print 16-bit IMM values
  intel/compiler/fs: Support for combining 16-bit immediates
  intel/compiler/fs: Set tex type for generator to flag fp16
  intel/compiler/fs: Use component_size() instead of open coded
  intel/compiler/fs: Add register padding support
  intel/compiler/fs: Pad 16-bit texture return payloads
  intel/compiler/fs: Pad 16-bit output (store/fb write) payloads
  intel/compiler/fs: Pad 16-bit nir vec* components into full reg
  intel/compiler/fs: Pad 16-bit nir intrinsic dest into full reg
  intel/compiler/fs: Pad 16-bit const loads into full regs
  intel/compiler/fs: Pad 16-bit load payload lowering
  nir: Lower also 16-bit lrp() if needed
  intel/compiler: Lower 16-bit lrp()
  nir: Recognize f232(f216(x)) as x
  nir: Recognize f216(f232(x)) as x
  nir: Store variable precision when translating from glsl
  glsl: Set default precision for builtin variables
  i965: Prepare uniform mapping for 16-bit values
  i965: Support for uploading 16-bit uniforms from 32-bit store
  intel/compiler/fs: WIP: Use 32-bit slots for 16-bit uniforms
  intel/compiler: Tell compiler if lower precision is supported
  nir: Add lowering pass for variables marked mediump
  nir: Add pass for deref precision lowering
  nir: Add pass for alu precision lowering
  nir: Add precision conversion for load/store_deref
  nir: Add precision conversion for sources of texturing ops
  nir: Don't set destination size 16 for booleans
  nir: Add precision lowering for texture samples
  nir: Add support for non-fixed precision
  nir: Don't try to alter precision of boolean sources
  nir: Add support for variable sized booleans
  nir: Add support for lowering phi precision
  intel/compiler/fs: Prepare alu dest type for 16-bit booleans
  nir: Add lowering pass setting 16-bit boolean destinations
  nir: Add lowering pass turning b2f(i2i32(x)) into b2f(x)
  nir: Adjust integer precision for alus operating with 16-bit srcs
  nir: Replace b2f(x) with b2f(i2i32(x)) for 16-bit x
  nir: Adjust precision for discard_if
  nir: Allow input varyings to be converted to lower precision
  nir: Replace 16-bit src[0] for bcsel i2i32(src[0])
  nir: Replace 16-bit nir_if condition with i2i32(condition)
  Revert "intel/compiler: fix 16-bit comparisons"
  intel/comp

[Mesa-dev] intel/icl: RFC: Two hardware workarounds

2018-10-29 Thread Topi Pohjolainen
These don't seem to fix anything (hence RFC). Moreover, vertex
combining is not documented to harm anything. I thought better
having them in the list anyway.

CC: Anuj Phogat 

Topi Pohjolainen (2):
  intel/icl: Disable combining of vertices from separate instances
  intel/isl/icl: Use halign == 8 instead 4 hw workaround

 src/intel/blorp/blorp_genX_exec.h |  6 
 src/intel/isl/isl_gen8.c  | 35 +++
 src/intel/vulkan/genX_pipeline.c  |  6 
 src/mesa/drivers/dri/i965/genX_state_upload.c |  6 
 4 files changed, 53 insertions(+)

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] intel/isl/icl: Use halign == 8 instead 4 hw workaround

2018-10-29 Thread Topi Pohjolainen
CC: Jason Ekstrand 
CC: Nanley Chery 
CC: Anuj Phogat 
Signed-off-by: Topi Pohjolainen 
---
 src/intel/isl/isl_gen8.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/src/intel/isl/isl_gen8.c b/src/intel/isl/isl_gen8.c
index 2199b8d22d..f9a424dd48 100644
--- a/src/intel/isl/isl_gen8.c
+++ b/src/intel/isl/isl_gen8.c
@@ -87,6 +87,38 @@ isl_gen8_choose_msaa_layout(const struct isl_device *dev,
return true;
 }
 
+static void
+gen11_wa_1604596806(const struct isl_surf_init_info *restrict info,
+enum isl_tiling tiling, const uint32_t bpb,
+struct isl_extent3d *align_el)
+
+{
+   /* Don't try to apply the workaround for depth or stencil. See the Ice Lake
+* BSpec: Shared Functions - vol5c Shared Functions - RENDER_SURFACE_STATE:
+*
+* This field is intended to be set to HALIGN_8 only if the surface was
+* rendered as a depth buffer with Z16 format or a stencil buffer. In this
+* case it must be set to HALIGN_8 since these surfaces support only
+* alignment of 8. For Z32 formats it must be set ot HALIGN_4. Use of
+* HALIGN_8 for other surfaces is supported, but increases memory usage.
+*/
+   if (info->usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT))
+  return;
+
+   /* See the Ice Lake BSpec:  GEN:BUG:1604596806 : Pixel Corruption in
+* subspan combining (8x4 combining) scenarios if halign=4 
+* 
+* Shared Functions - vol5c Shared Functions - RENDER_SURFACE_STATE:
+*
+* For surface format = 32 bpp, num_multisamples = 1 , MIpcount > 0 and
+* surface walk = TiledY, HALIGN must be programmed to 8
+*/
+   if (tiling == ISL_TILING_Y0 && bpb == 32 && info->samples == 1) {
+  assert(align_el->w == 4);
+  align_el->w = 8;
+   }
+}
+
 void
 isl_gen8_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict 
info,
@@ -174,4 +206,7 @@ isl_gen8_choose_image_alignment_el(const struct isl_device 
*dev,
const uint32_t halign = needs_halign16 ? 16 : 4;
 
*image_align_el = isl_extent3d(halign, valign, 1);
+
+   if (!needs_halign16 && dev->info->gen == 11)
+  gen11_wa_1604596806(info, tiling, fmtl->bpb, image_align_el);
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] intel/icl: Disable combining of vertices from separate instances

2018-10-29 Thread Topi Pohjolainen
This is new hardware feature, and should be disabled until it is
clear our VS kernels are prepared for it. Thread payload has new
bit (See Bspec: Pipeline Stages - 3D Pipeline Geometry -
Vertex Shader (VS) Stage - Payloads - SIMD8 Payload [BDW+])
that vertex shaders could consult.

CC: Jason Ekstrand 
CC: Kenneth Graunke 
CC: Anuj Phogat 
Signed-off-by: Topi Pohjolainen 
---
 src/intel/blorp/blorp_genX_exec.h | 6 ++
 src/intel/vulkan/genX_pipeline.c  | 6 ++
 src/mesa/drivers/dri/i965/genX_state_upload.c | 6 ++
 3 files changed, 18 insertions(+)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 50341ab0ec..10865b9c15 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -629,6 +629,12 @@ blorp_emit_vs_config(struct blorp_batch *batch,
 #if GEN_GEN >= 8
  vs.SIMD8DispatchEnable =
 vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
+#endif
+#if GEN_GEN >= 11
+ /* TODO: Disable combining of instances until it is clear VS kernels
+  * are prepared for it.
+  */
+ vs.SIMD8SingleInstanceDispatchEnable = vs.SIMD8DispatchEnable;
 #endif
   }
}
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 33f1f7832a..9762fc78b5 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1157,6 +1157,12 @@ emit_3dstate_vs(struct anv_pipeline *pipeline)
   vs.SIMD8DispatchEnable  =
  vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
 #endif
+#if GEN_GEN >= 11
+  /* TODO: Disable combining of instances until it is clear VS kernels
+   * are prepared for it.
+   */
+  vs.SIMD8SingleInstanceDispatchEnable = vs.SIMD8DispatchEnable;
+#endif
 
   assert(!vs_prog_data->base.base.use_alt_mode);
 #if GEN_GEN < 11
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 740cb0c4d2..9198a2953a 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2277,6 +2277,12 @@ genX(upload_vs_state)(struct brw_context *brw)
 
   vs.UserClipDistanceCullTestEnableBitmask =
  vue_prog_data->cull_distance_mask;
+#endif
+#if GEN_GEN >= 11
+  /* TODO: Disable combining of instances until it is clear VS kernels
+   * are prepared for it.
+   */
+  vs.SIMD8SingleInstanceDispatchEnable = vs.SIMD8DispatchEnable;
 #endif
}
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/compiler/icl: Use invocation id bits 22:16 instead of 23:17

2018-10-16 Thread Topi Pohjolainen
Identifier bits in the dispatch header have changed. See Bspec:

SINGLE_PATCH Payload:

3D Pipeline Stages - 3D Pipeline Geometry -
Hull Shader (HS) Stage IVB+ - Payloads IVB+

Fixes: 
KHR-GL46.tessellation_shader.tessellation_shader_tc_barriers.barrier_guarded_read_write_calls

CC: Anuj Phogat 
CC: Mark Janes 
Signed-off-by: Topi Pohjolainen 
---
 src/intel/compiler/brw_fs.cpp | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 23a25fedca5..757147b01ec 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6593,14 +6593,18 @@ fs_visitor::run_tcs_single_patch()
if (tcs_prog_data->instances == 1) {
   invocation_id = channels_ud;
} else {
+  const unsigned invocation_id_mask = devinfo->gen >= 11 ?
+ INTEL_MASK(22, 16) : INTEL_MASK(23, 17);
+  const unsigned invocation_id_shift = devinfo->gen >= 11 ? 16 : 17;
+
   invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD);
 
   /* Get instance number from g0.2 bits 23:17, and multiply it by 8. */
   fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD);
   fs_reg instance_times_8 = bld.vgrf(BRW_REGISTER_TYPE_UD);
   bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)),
-  brw_imm_ud(INTEL_MASK(23, 17)));
-  bld.SHR(instance_times_8, t, brw_imm_ud(17 - 3));
+  brw_imm_ud(invocation_id_mask));
+  bld.SHR(instance_times_8, t, brw_imm_ud(invocation_id_shift - 3));
 
   bld.ADD(invocation_id, instance_times_8, channels_ud);
}
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/compiler/icl: Use barrier id bits 24:30 instead of 24:27, 31

2018-09-21 Thread Topi Pohjolainen
Fixes gpu hangs with Carchase and Manhattan.

Cc: Anuj Phogat 
Signed-off-by: Topi Pohjolainen 
---
 src/intel/compiler/brw_fs_visitor.cpp | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_fs_visitor.cpp 
b/src/intel/compiler/brw_fs_visitor.cpp
index cd2abbb5960..51a0ca2374a 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -791,9 +791,19 @@ fs_visitor::emit_cs_terminate()
 void
 fs_visitor::emit_barrier()
 {
-   assert(devinfo->gen >= 7);
-   const uint32_t barrier_id_mask =
-  devinfo->gen >= 9 ? 0x8f00u : 0x0f00u;
+   uint32_t barrier_id_mask;
+   switch (devinfo->gen) {
+   case 7:
+   case 8:
+  barrier_id_mask = 0x0f00u; break;
+   case 9:
+   case 10:
+  barrier_id_mask = 0x8f00u; break;
+   case 11:
+  barrier_id_mask = 0x7f00u; break;
+   default:
+  unreachable("barrier is only available on gen >= 7");
+   }
 
/* We are getting the barrier ID from the compute shader header */
assert(stage == MESA_SHADER_COMPUTE);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/decoder: Use gen_group::dw_length when available

2018-04-23 Thread Topi Pohjolainen
Otherwise gen_group_get_length() will try to use first fields
of, for example, CC_VIEWPORT and SF_CLIP to determine the
group size. These packets are not present in the state with
full header but simply with their contents while equivalent
state pointers (3DSTATE_VIEWPORT_STATE_POINTERS_CC and
3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP respectively) telling
their starting points.

Before:
---

0xfffef540:  0x7823:  3DSTATE_VIEWPORT_STATE_POINTERS_CC

0xfffef540:  0x7823 : Dword 0
DWord Length: 0
0xfffef544:  0x0180 : Dword 1
CC Viewport Pointer: 0x0180
CC_VIEWPORT 0
0xfffea180:  0x : Dword 0
Minimum Depth: 0.00
0xfffef548:  0x7821:  3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
0xfffef548:  0x7821 : Dword 0
DWord Length: 0
0xfffef54c:  0x01c0 : Dword 1
SF Clip Viewport Pointer: 0x01c0
SF_CLIP_VIEWPORT 0
0xfffea1c0:  0x4100 : Dword 0
Viewport Matrix Element m00: 0.00
0xfffea1c4:  0x3f00 : Dword 1
Viewport Matrix Element m11: 0.50
0xfffef550:  0x7824:  3DSTATE_BLEND_STATE_POINTERS

After:
--

0xfffef540:  0x7823:  3DSTATE_VIEWPORT_STATE_POINTERS_CC

0xfffef540:  0x7823 : Dword 0
DWord Length: 0
0xfffef544:  0x0180 : Dword 1
CC Viewport Pointer: 0x0180
CC_VIEWPORT 0
0xfffea180:  0x : Dword 0
Minimum Depth: 0.00
0xfffea184:  0x3f80 : Dword 1
Maximum Depth: 1.00
0xfffef548:  0x7821:  3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
0xfffef548:  0x7821 : Dword 0
DWord Length: 0
0xfffef54c:  0x01c0 : Dword 1
SF Clip Viewport Pointer: 0x01c0
SF_CLIP_VIEWPORT 0
0xfffea1c0:  0x4100 : Dword 0
Viewport Matrix Element m00: 0.00
0xfffea1c4:  0x3f00 : Dword 1
Viewport Matrix Element m11: 0.50
0xfffea1c8:  0x3f00 : Dword 2
Viewport Matrix Element m22: 0.50
0xfffea1cc:  0x4100 : Dword 3
Viewport Matrix Element m30: 8.00
0xfffea1d0:  0x3f00 : Dword 4
Viewport Matrix Element m31: 0.50
0xfffea1d4:  0x3f00 : Dword 5
Viewport Matrix Element m32: 0.50
0xfffea1d8:  0x : Dword 6
0xfffea1dc:  0x : Dword 7
0xfffea1e0:  0xc500 : Dword 8
X Min Clip Guardband: -2048.00
0xfffea1e4:  0x4500 : Dword 9
X Max Clip Guardband: 2048.00
0xfffea1e8:  0xc700 : Dword 10
Y Min Clip Guardband: -32768.00
0xfffea1ec:  0x4700 : Dword 11
Y Max Clip Guardband: 32768.00
0xfffea1f0:  0x : Dword 12
X Min ViewPort: 0.00
0xfffea1f4:  0x4170 : Dword 13
X Max ViewPort: 15.00
0xfffea1f8:  0x : Dword 14
Y Min ViewPort: 0.00
0xfffea1fc:  0x : Dword 15
Y Max ViewPort: 0.00

CC: Lionel Landwerlin <lionel.g.landwer...@intel.com>
CC: Kenneth Graunke <kenn...@whitecape.org>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/common/gen_decoder.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index 1b8123b..cc212cc 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -713,6 +713,9 @@ gen_group_find_field(struct gen_group *group, const char 
*name)
 int
 gen_group_get_length(struct gen_group *group, const uint32_t *p)
 {
+   if (group->dw_length)
+  return group->dw_length;
+
uint32_t h = p[0];
uint32_t type = field_value(h, 29, 31);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/urb/cnl: Apply gen7 CS stall

2018-04-19 Thread Topi Pohjolainen
This didn't actually help the failing tests I'm looking at
but hopefully has teeth elsewhere.

CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Jordan Justen <jordan.l.jus...@intel.com>
CC: Anuj Phogat <anuj.pho...@gmail.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/gen7_urb.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c 
b/src/mesa/drivers/dri/i965/gen7_urb.c
index 2e5f8e6..9e12657 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -145,8 +145,15 @@ gen7_emit_push_constant_state(struct brw_context *brw, 
unsigned vs_size,
 * in the ring after this instruction.
 *
 * No such restriction exists for Haswell or Baytrail.
+*
+* From the CNL Bspec, Windower -
+* 3DSTATE_PUSH_CONSTANT_ALLOC_PS/VS/GS/DS/HS:
+* 
+* This command must be followed by a PIPE_CONTROL with CS Stall bit
+* set.
 */
-   if (devinfo->gen < 8 && !devinfo->is_haswell && !devinfo->is_baytrail)
+   if ((devinfo->gen < 8 && !devinfo->is_haswell && !devinfo->is_baytrail) ||
+   devinfo->gen >= 10)
   gen7_emit_cs_stall_flush(brw);
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/miptree: Initialize mcs buffer only until clear color

2018-04-06 Thread Topi Pohjolainen
Otherwise even the clear color gets initialised to 0xFF. This
allows enabling of color fast clears on ICL without regressing
multisampling tests.

CC: Rafael Antognolli <rafael.antogno...@intel.com>
CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Nanley Chery <nanley.g.ch...@intel.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 89074a6..25f901d 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1680,7 +1680,12 @@ intel_miptree_init_mcs(struct brw_context *brw,
   return;
}
void *data = map;
-   memset(data, init_value, mt->mcs_buf->size);
+
+   /* Only initialize until clear color (if present). */
+   const unsigned aux_size = mt->mcs_buf->clear_color_offset ?
+mt->mcs_buf->clear_color_offset :
+mt->mcs_buf->size;
+   memset(data, init_value, aux_size);
brw_bo_unmap(mt->mcs_buf->bo);
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Assert base format before truncating to unsigned short

2018-04-06 Thread Topi Pohjolainen
CID: 1433709
Fixes: ca721b3d8: mesa: use GLenum16 in a few more places
CC: Marek Olšák <marek.ol...@amd.com>
CC: Brian Paul <bri...@vmware.com>

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/main/teximage.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 8f53510..f560512 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -845,6 +845,7 @@ _mesa_init_teximage_fields_ms(struct gl_context *ctx,
 mesa_format format,
 GLuint numSamples, GLboolean fixedSampleLocations)
 {
+   const GLint base_format =_mesa_base_tex_format(ctx, internalFormat);
GLenum target;
assert(img);
assert(width >= 0);
@@ -852,8 +853,8 @@ _mesa_init_teximage_fields_ms(struct gl_context *ctx,
assert(depth >= 0);
 
target = img->TexObject->Target;
-   img->_BaseFormat = _mesa_base_tex_format( ctx, internalFormat );
-   assert(img->_BaseFormat != -1);
+   assert(base_format != -1);
+   img->_BaseFormat = (GLenum16)base_format;
img->InternalFormat = internalFormat;
img->Border = border;
img->Width = width;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/dev: Assert the number of slices is not zero

2018-04-05 Thread Topi Pohjolainen
Fixes: c1900f5b intel: devinfo: add helper functions to fill...
CID: 1433511
CC: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/dev/gen_device_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
index f7cb94f..dfeab6e 100644
--- a/src/intel/dev/gen_device_info.c
+++ b/src/intel/dev/gen_device_info.c
@@ -1047,7 +1047,7 @@ gen_device_info_update_from_topology(struct 
gen_device_info *devinfo,
/* We expect the total number of EUs to be uniformly distributed throughout
 * the subslices.
 */
-   assert((n_eus % n_subslices) == 0);
+   assert(n_subslices && (n_eus % n_subslices) == 0);
devinfo->num_eu_per_subslice = n_eus / n_subslices;
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: Check if u_vector_init() succeeds

2018-04-05 Thread Topi Pohjolainen
However, it only fails when running out of memory. Now, if we
are about to check that, we should be consistent and check
the allocation of the worklist as well.

On the other hand there are other places where we don't check
for allocation failures. Therefore I'm not sure if we bother
here either. Coverity complains but I can mark it as ignored
the same.

CID: 1433512
Fixes: edb18564c7 nir: Initial implementation of a nir_instr_worklist
CC: Thomas Helland <thomashellan...@gmail.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/nir/nir_worklist.h | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h
index e376908..3fb391f 100644
--- a/src/compiler/nir/nir_worklist.h
+++ b/src/compiler/nir/nir_worklist.h
@@ -105,8 +105,15 @@ typedef struct {
 static inline nir_instr_worklist *
 nir_instr_worklist_create() {
nir_instr_worklist *wl = malloc(sizeof(nir_instr_worklist));
-   u_vector_init(>instr_vec, sizeof(struct nir_instr *),
- sizeof(struct nir_instr *) * 8);
+   if (!wl)
+  return NULL;
+
+   if (!u_vector_init(>instr_vec, sizeof(struct nir_instr *),
+  sizeof(struct nir_instr *) * 8)) {
+  free(wl);
+  return NULL;
+   }
+
return wl;
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/blorp/hiz: Emit CC viewport

2018-04-03 Thread Topi Pohjolainen
Otherwise simulator for ICL complains that:

B-spec CC_ViewPort Minimum Depth cannot be greater than Maximum Depth

CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Kenneth Graunke <kenn...@whitecape.org>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/blorp/blorp_genX_exec.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 992bc99..e16d10c 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1570,6 +1570,7 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
 * emit 3DSTATE_MULTISAMPLE.
 */
blorp_emit_3dstate_multisample(batch, params);
+   blorp_emit_cc_viewport(batch);
 
/* If we can't alter the depth stencil config and multiple layers are
 * involved, the HiZ op will fail. This is because the op requires that a
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] intel/isl: Add support for enabling clear color conversion

2018-04-03 Thread Topi Pohjolainen
CC: Rafael Antognolli <rafael.antogno...@intel.com>
CC: Jordan Justen <jordan.l.jus...@intel.com>
CC: Jason Ekstrand <ja...@jlekstrand.net>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/isl/isl.h   |  6 ++
 src/intel/isl/isl_surface_state.c | 11 +++
 2 files changed, 17 insertions(+)

diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index d65c621..ee89e07 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1316,6 +1316,12 @@ struct isl_surf_fill_state_info {
uint64_t clear_address;
 
/**
+* On gen11+, tells if the hardware should write the given clear color out
+* for sampler and display engine in native format.
+*/
+   bool clear_color_conversion_enable;
+
+   /**
 * Surface write disables for gen4-5
 */
isl_channel_mask_t write_disables;
diff --git a/src/intel/isl/isl_surface_state.c 
b/src/intel/isl/isl_surface_state.c
index 77931f2..83a 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -637,6 +637,17 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, 
void *state,
 #endif
 
if (info->aux_usage != ISL_AUX_USAGE_NONE) {
+#if GEN_GEN >= 11
+  /* From the Ice Lake BSpec, RENDER_SURFACE_STATE:
+   *
+   *  Enables Pixel backend hw to convert clear values into native format
+   *  and write back to clear address, so that display and sampler can use
+   * the converted value for resolving fast cleared RTs.
+   */
+  s.ClearColorConversionEnable = info->clear_color_conversion_enable;
+#else
+  assert(!info->clear_color_conversion_enable);
+#endif
 #if GEN_GEN >= 10
   s.ClearValueAddressEnable = true;
   s.ClearValueAddress = info->clear_address;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] intel/blorp/icl: Enable clear color conversion when fast clearing

2018-04-03 Thread Topi Pohjolainen
CC: Rafael Antognolli <rafael.antogno...@intel.com>
CC: Jordan Justen <jordan.l.jus...@intel.com>
CC: Jason Ekstrand <ja...@jlekstrand.net>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/blorp/blorp_genX_exec.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 721f02a..53c6b2a 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1319,6 +1319,8 @@ blorp_emit_surface_state(struct blorp_batch *batch,
.aux_surf = >aux_surf, .aux_usage = aux_usage,
.mocs = surface->addr.mocs,
.clear_color = surface->clear_color,
+   .clear_color_conversion_enable =
+   GEN_GEN >= 11 && op == ISL_AUX_OP_FAST_CLEAR,
.write_disables = write_disable_mask);
 
blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Don't try to disable render buffers for compute

2018-01-16 Thread Topi Pohjolainen
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104546
CC: xinghua@intel.com
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_draw.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e8..626cd3fdb70 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -441,8 +441,10 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering)
 tex_obj = intel_texture_object(u->TexObj);
 
 if (tex_obj && tex_obj->mt) {
-   intel_disable_rb_aux_buffer(brw, tex_obj->mt, 0, ~0,
-   "as a shader image");
+   if (rendering) {
+  intel_disable_rb_aux_buffer(brw, tex_obj->mt, 0, ~0,
+  "as a shader image");
+   }
 
intel_miptree_prepare_image(brw, tex_obj->mt);
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [v3 01/11] framework: Check for vulkan availability

2017-12-21 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4259ec832..c90109907 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -173,6 +173,8 @@ ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
endif()
 ENDIF()
 
+pkg_check_modules(LIBVULKAN QUIET vulkan)
+
 IF(PIGLIT_HAS_GLX)
option(PIGLIT_BUILD_GLX_TESTS "Build tests that require GLX" ON)
 ELSE()
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] v3: ext_memory_object: Test sampling memory exported from Vulkan

2017-12-21 Thread Topi Pohjolainen
Here is a revision taking into account feedback from Andres and Fredrik.
Many thanks for both, I hope I didn't miss anything.

CC: Andres Rodriguez <andre...@gmail.com>
CC: Fredrik Hoeglund <fred...@kde.org>
CC: Jason Ekstrand <ja...@jlekstrand.net>

Topi Pohjolainen (11):
  framework: Check for vulkan availability
  framework: HACK: Read glslc path from env
  ext_memory_object: Add script for turning glsl into spirv c-array
  ext_memory_object: Support for setting up vulkan device
  ext_memory_object: Support for drawing with vulkan
  ext_memory_object: Support for setting up vulkan framebuffer
  ext_memory_object: Add tex layout command line
  ext_memory_object: Support for importing vulkan memory
  ext_memory_object: Support for creating simple vulkan pipelines
  ext_memory_object: Add helper for image type support
  ext_memory_object: Test render with vulkan and sample with gl

 CMakeLists.txt |   3 +
 tests/spec/ext_memory_object/CMakeLists.gl.txt |  18 +
 tests/spec/ext_memory_object/common.c  | 167 +
 tests/spec/ext_memory_object/common.h  |  51 ++
 .../compile_and_dump_glsl_as_spirv.py  | 139 +
 tests/spec/ext_memory_object/vk_common.c   | 670 +
 tests/spec/ext_memory_object/vk_common.h   | 176 ++
 .../ext_memory_object/vk_export_image_as_tex.c | 219 +++
 tests/spec/ext_memory_object/vk_fb.c   | 346 +++
 tests/spec/ext_memory_object/vk_fragcoord.fs   |   7 +
 tests/spec/ext_memory_object/vk_fragcoord.vs   |   8 +
 11 files changed, 1804 insertions(+)
 create mode 100644 tests/spec/ext_memory_object/common.c
 create mode 100644 tests/spec/ext_memory_object/common.h
 create mode 100644 
tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py
 create mode 100644 tests/spec/ext_memory_object/vk_common.c
 create mode 100644 tests/spec/ext_memory_object/vk_common.h
 create mode 100644 tests/spec/ext_memory_object/vk_export_image_as_tex.c
 create mode 100644 tests/spec/ext_memory_object/vk_fb.c
 create mode 100644 tests/spec/ext_memory_object/vk_fragcoord.fs
 create mode 100644 tests/spec/ext_memory_object/vk_fragcoord.vs

-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [v3 03/11] ext_memory_object: Add script for turning glsl into spirv c-array

2017-12-21 Thread Topi Pohjolainen
This stripped down version of glsl_scraper.py found in crucible.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 .../compile_and_dump_glsl_as_spirv.py  | 139 +
 1 file changed, 139 insertions(+)
 create mode 100644 
tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py

diff --git a/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py 
b/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py
new file mode 100644
index 0..b7fdeafe2
--- /dev/null
+++ b/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py
@@ -0,0 +1,139 @@
+#! /usr/bin/env python3
+
+import argparse
+import io
+import os
+import re
+import shutil
+import struct
+import subprocess
+import sys
+import tempfile
+from textwrap import dedent
+
+class ShaderCompileError(RuntimeError):
+def __init__(self, *args):
+super(ShaderCompileError, self).__init__(*args)
+
+class Shader:
+def __init__(self, stage, infname):
+self.stage = stage
+self.infname = infname
+self.dwords = None
+self.var_prefix = os.path.basename(infname).replace('.', '_')
+
+def __run_glslc(self, extra_args=[]):
+stage_flag = '-fshader-stage=' + self.stage
+
+with subprocess.Popen([glslc] + extra_args +
+  [stage_flag, '-std=430core', '-o', '-',
+   self.infname],
+  stdout = subprocess.PIPE,
+  stderr = subprocess.PIPE,
+  stdin = subprocess.PIPE) as proc:
+
+out, err = proc.communicate(timeout=30)
+
+if proc.returncode != 0:
+# Unfortunately, glslang dumps errors to standard out.
+# However, since we don't really want to count on that,
+# we'll grab the output of both
+message = out.decode('utf-8') + '\n' + err.decode('utf-8')
+raise ShaderCompileError(message.strip())
+
+return out
+
+def compile(self):
+def dwords(f):
+while True:
+dword_str = f.read(4)
+if not dword_str:
+return
+assert len(dword_str) == 4
+yield struct.unpack('I', dword_str)[0]
+
+spirv = self.__run_glslc()
+self.dwords = list(dwords(io.BytesIO(spirv)))
+self.assembly = str(self.__run_glslc(['-S']), 'utf-8')
+
+def _dump_glsl_code(self, f, var_name):
+# First dump the GLSL source as strings
+f.write('static const char {0}[] ='.format(var_name))
+f.write('\n"#version 330\\n"')
+
+infile = open_file(self.infname, 'r')
+for line in infile:
+f.write('\n"{0}\\n"'.format(line.strip('\n')))
+f.write(';\n\n')
+
+def _dump_spirv_code(self, f, var_name):
+f.write('/* SPIR-V Assembly:\n')
+f.write(' *\n')
+for line in self.assembly.splitlines():
+f.write(' * ' + line + '\n')
+f.write(' */\n')
+
+f.write('static const uint32_t {0}[] = {{'.format(var_name))
+line_start = 0
+while line_start < len(self.dwords):
+f.write('\n')
+for i in range(line_start, min(line_start + 6, len(self.dwords))):
+f.write(' 0x{:08x},'.format(self.dwords[i]))
+line_start += 6
+f.write('\n};\n')
+
+def dump_c_code(self, f):
+self._dump_glsl_code(f, self.var_prefix + '_glsl_src')
+self._dump_spirv_code(f, self.var_prefix + '_spir_v_src')
+
+def parse_args():
+description = dedent("""\
+This program compiles the given glsl source file into SPIR-V and
+writes it to another C file as an array of 32-bit words.
+
+If '-' is passed as the input file or output file, stdin or stdout
+will be used instead of a file on disc.""")
+
+p = argparse.ArgumentParser(
+description=description,
+formatter_class=argparse.RawDescriptionHelpFormatter)
+p.add_argument('-o', '--outfile', default='-',
+help='Output to the given file (default: stdout).')
+p.add_argument('--with-glslc', metavar='PATH',
+default='glslc',
+dest='glslc',
+help='Full path to the glslc shader compiler.')
+p.add_argument('--stage', dest='stage')
+p.add_argument('infile', metavar='INFILE')
+
+return p.parse_args()
+
+def open_file(name, mode):
+if name == '-':
+if mode == 'w':
+return sys.stdout
+elif mode == 'r':
+return sys.stdin
+else:
+assert False
+else:
+return open(name, mode)
+
+args = parse_args()
+outfname = args.outfile
+glslc = args.glslc
+
+shader = Shader(args.stage, args.infile)
+shader.compile()
+
+with ope

[Mesa-dev] [v3 02/11] framework: HACK: Read glslc path from env

2017-12-21 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c90109907..767b90add 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -174,6 +174,7 @@ ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
 ENDIF()
 
 pkg_check_modules(LIBVULKAN QUIET vulkan)
+set(GLSLC $ENV{GLSLC})
 
 IF(PIGLIT_HAS_GLX)
option(PIGLIT_BUILD_GLX_TESTS "Build tests that require GLX" ON)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-04 Thread Topi Pohjolainen
This is just drafting some thoughts and only compile tested.

CC: "Rogovin, Kevin" 
---
 src/mesa/drivers/dri/i965/brw_blorp.c   |  8 +
 src/mesa/drivers/dri/i965/brw_context.h | 10 ++
 src/mesa/drivers/dri/i965/brw_draw.c| 54 -
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 680121b6ab..b3f84ab8ca 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -186,11 +186,19 @@ blorp_surf_for_miptree(struct brw_context *brw,
  surf->aux_addr.buffer = mt->hiz_buf->bo;
  surf->aux_addr.offset = mt->hiz_buf->offset;
   }
+
+  if (!is_render_target && brw->screen->devinfo.gen == 9)
+ gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_AUX);
} else {
   surf->aux_addr = (struct blorp_address) {
  .buffer = NULL,
   };
   memset(>clear_color, 0, sizeof(surf->clear_color));
+
+  if (!is_render_target && brw->screen->devinfo.gen == 9 &&
+  (mt->format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+   mt->format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5))
+ gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5);
}
assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
   (surf->aux_addr.buffer == NULL));
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0670483806..44602c23c0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -165,6 +165,11 @@ enum brw_cache_id {
BRW_MAX_CACHE
 };
 
+enum gen9_astc5x5_wa_tex_type {
+   GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
+   GEN9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1,
+};
+
 enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1262,6 +1267,8 @@ struct brw_context
 */
bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS];
 
+   enum gen9_astc5x5_wa_tex_type gen9_sampler_wa_tex_mask;
+
__DRIcontext *driContext;
struct intel_screen *screen;
 };
@@ -1286,6 +1293,9 @@ void intel_update_renderbuffers(__DRIcontext *context,
 __DRIdrawable *drawable);
 void intel_prepare_render(struct brw_context *brw);
 
+void gen9_astc5x5_sampler_wa(struct brw_context *brw,
+ enum gen9_astc5x5_wa_tex_type curr_mask);
+
 void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering);
 
 void intel_resolve_for_dri2_flush(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e..929f806eb3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -371,6 +371,50 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
return found;
 }
 
+static enum gen9_astc5x5_wa_tex_type
+gen9_astc5x5_wa_get_tex_mask(const struct brw_context *brw)
+{
+   enum gen9_astc5x5_wa_tex_type mask = 0;
+   const struct gl_context *ctx = >ctx;
+   const struct intel_texture_object *tex_obj;
+
+   const int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+   for (int i = 0; i <= maxEnabledUnit; i++) {
+  if (!ctx->Texture.Unit[i]._Current)
+continue;
+  tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+  if (!tex_obj || !tex_obj->mt)
+continue;
+
+  if (tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE)
+ mask |= GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+
+  if (tex_obj->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+  tex_obj->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+ mask |= GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+   }
+
+   return mask;
+}
+
+/* TODO: Do we actually need this both ways: astc5x5 followed by aux
+ *   and vice-versa? Or is only one direction problematic?
+ */
+void
+gen9_astc5x5_sampler_wa(struct brw_context *brw,
+enum gen9_astc5x5_wa_tex_type curr_mask)
+{
+   if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+   (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX))
+  brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+   if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+   (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))
+  brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+   brw->gen9_sampler_wa_tex_mask = curr_mask;
+}
+
 /**
  * \brief Resolve buffers before drawing.
  *
@@ -383,6 +427,12 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering)
struct gl_context *ctx = >ctx;
struct intel_texture_object *tex_obj;
 
+   const enum gen9_astc5x5_wa_tex_type curr_wa_mask =
+  (brw->screen->devinfo.gen == 9) ? gen9_astc5x5_wa_get_tex_mask(brw) : 0;
+
+   if (brw->screen->devinfo.gen == 9)
+  gen9_astc5x5_sampler_wa(brw, 

[Mesa-dev] [PATCH 44/51] glsl: WIP: Add lowering pass for treating mediump as float16

2017-11-24 Thread Topi Pohjolainen
At least the following need more thought:

1) Converting right-hand-side of assignments from 16-bits to 32-bits
   - More correct thing to do is to treat rhs as 32-bits latest in the
 expression producing the value

2) Texture arguments except coordinates are not handled at all
   - Moreover, coordinates are always converted into 32-bits due to
 logic missing in the Intel compiler backend.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/Makefile.sources   |   1 +
 src/compiler/glsl/ir_optimization.h |   1 +
 src/compiler/glsl/lower_mediump.cpp | 273 
 3 files changed, 275 insertions(+)
 create mode 100644 src/compiler/glsl/lower_mediump.cpp

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 2ab8e163a2..47bde4fb78 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -94,6 +94,7 @@ LIBGLSL_FILES = \
glsl/lower_int64.cpp \
glsl/lower_jumps.cpp \
glsl/lower_mat_op_to_vec.cpp \
+   glsl/lower_mediump.cpp \
glsl/lower_noise.cpp \
glsl/lower_offset_array.cpp \
glsl/lower_packed_varyings.cpp \
diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 2b8c195151..09c4d664e0 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -132,6 +132,7 @@ bool do_vec_index_to_swizzle(exec_list *instructions);
 bool lower_discard(exec_list *instructions);
 void lower_discard_flow(exec_list *instructions);
 bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
+bool lower_mediump(struct gl_linked_shader *shader);
 bool lower_noise(exec_list *instructions);
 bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
 exec_list *instructions, bool lower_input, bool lower_output,
diff --git a/src/compiler/glsl/lower_mediump.cpp 
b/src/compiler/glsl/lower_mediump.cpp
new file mode 100644
index 00..89eed8b294
--- /dev/null
+++ b/src/compiler/glsl/lower_mediump.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_mediump.cpp
+ *
+ */
+
+#include "compiler/glsl_types.h"
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+#include "ast.h"
+
+static const glsl_type *
+get_mediump(const glsl_type *highp)
+{
+   if (highp->is_float())
+  return glsl_type::get_instance(GLSL_TYPE_FLOAT16,
+ highp->vector_elements, 
+ highp->matrix_columns);
+
+   if (highp->is_array() && highp->fields.array->is_float())
+  return glsl_type::get_array_instance(
+glsl_type::get_instance(GLSL_TYPE_FLOAT16,
+highp->fields.array->vector_elements, 
+highp->fields.array->matrix_columns),
+highp->length);
+
+   return highp;
+}
+
+static bool
+is_16_bit(const ir_rvalue *ir)
+{
+   return ir->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16;
+}
+
+static bool
+refers_16_bit_float(const ir_rvalue *ir)
+{
+   ir_variable *var = ir->variable_referenced();
+
+   /* Only variables have the mediump property, constants need conversion. */
+   if (!var)
+  return false;
+
+   return var->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16;
+}
+
+static ir_rvalue *
+convert(ir_rvalue *ir, enum ir_expression_operation op)
+{
+   if (ir->ir_type == ir_type_constant) {
+  assert(op == ir_unop_f2h);
+  ir->type = get_mediump(ir->type);
+  return ir;
+   }
+
+   void *ctx = ralloc_parent(ir);
+   return new(ctx) ir_expression(op, ir);
+}
+
+class lower_mediump_visitor : public ir_rvalu

[Mesa-dev] [PATCH 29/51] intel/compiler/fs: Add register padding support

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp  |  3 ++-
 src/intel/compiler/brw_fs.h|  3 ++-
 src/intel/compiler/brw_fs_builder.h| 25 ++---
 src/intel/compiler/brw_fs_copy_propagation.cpp |  1 +
 src/intel/compiler/brw_fs_nir.cpp  |  9 +++--
 src/intel/compiler/brw_ir_fs.h |  3 +++
 6 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index cedfde5096..9c3410b698 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -440,6 +440,7 @@ fs_reg::fs_reg(struct ::brw_reg reg) :
 {
this->offset = 0;
this->stride = 1;
+   this->pad_per_component = 0;
if (this->file == IMM &&
(this->type != BRW_REGISTER_TYPE_V &&
 this->type != BRW_REGISTER_TYPE_UV &&
@@ -467,7 +468,7 @@ fs_reg::component_size(unsigned width) const
const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride :
 hstride == 0 ? 0 :
 1 << (hstride - 1));
-   return MAX2(width * stride, 1) * type_sz(type);
+   return (MAX2(width * stride, 1) * (type_sz(type)) + pad_per_component);
 }
 
 /**
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 30557324d5..d9c4f737e6 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -231,7 +231,8 @@ public:
   nir_jump_instr *instr);
fs_reg get_nir_src(const nir_src );
fs_reg get_nir_src_imm(const nir_src );
-   fs_reg get_nir_dest(const nir_dest );
+   fs_reg get_nir_dest(const nir_dest ,
+   bool pad_components_to_full_registers = false);
fs_reg get_nir_image_deref(const nir_deref_var *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder , const fs_inst ,
diff --git a/src/intel/compiler/brw_fs_builder.h 
b/src/intel/compiler/brw_fs_builder.h
index 633086c64b..804d52e5df 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -182,17 +182,28 @@ namespace brw {
* component in this IR).
*/
   dst_reg
-  vgrf(enum brw_reg_type type, unsigned n = 1) const
+  vgrf(enum brw_reg_type type,
+   unsigned n = 1,
+   bool pad_components_to_full_registers = false) const
   {
  assert(dispatch_width() <= 32);
 
- if (n > 0)
-return dst_reg(VGRF, shader->alloc.allocate(
-  DIV_ROUND_UP(n * type_sz(type) * 
dispatch_width(),
-   REG_SIZE)),
-   type);
- else
+ if (n == 0)
 return retype(null_reg_ud(), type);
+
+ const unsigned pad_per_component =
+(pad_components_to_full_registers &&
+ type_sz(type) == 2 &&
+ dispatch_width() == 8) ? (REG_SIZE / 2) : 0;
+ const unsigned size =
+n * ((type_sz(type) * dispatch_width()) + pad_per_component);
+ const unsigned nr = shader->alloc.allocate(
+DIV_ROUND_UP(size, REG_SIZE));
+
+ dst_reg dst = dst_reg(VGRF, nr, type);
+ dst.pad_per_component = pad_per_component;
+
+ return dst;
   }
 
   /**
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp 
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index ed2511ecfa..637a1de6ae 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -447,6 +447,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)
inst->src[arg].file = entry->src.file;
inst->src[arg].nr = entry->src.nr;
inst->src[arg].stride *= entry->src.stride;
+   inst->src[arg].pad_per_component = entry->src.pad_per_component;
inst->saturate = inst->saturate || entry->saturate;
 
/* Compute the offset of inst->src[arg] relative to entry->dst */
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 16e8dfc186..35e78b134a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -357,6 +357,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
   unsigned size = array_elems * reg->num_components;
   const brw_reg_type reg_type =
  brw_reg_type_from_bit_size(reg->bit_size, BRW_REGISTER_TYPE_F);
+
+  /* TODO: Consider if 16-bit component padding is needed. */
+
   nir_locals[reg->index] = bld.vgrf(reg_type, size);
}
 
@@ -1602,13 +1605,15 @@ fs_visitor::get_nir_src_imm(const nir_src )
 }
 
 fs_reg
-fs_visitor::get_nir_dest(const nir_dest )
+fs_visitor::get_nir_dest(co

[Mesa-dev] [PATCH 45/51] glsl: Use 16-bit constants if operation is otherwise 16-bit

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/lower_mediump.cpp | 43 -
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/lower_mediump.cpp 
b/src/compiler/glsl/lower_mediump.cpp
index 89eed8b294..0276e74d6e 100644
--- a/src/compiler/glsl/lower_mediump.cpp
+++ b/src/compiler/glsl/lower_mediump.cpp
@@ -67,6 +67,25 @@ refers_16_bit_float(const ir_rvalue *ir)
return var->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16;
 }
 
+static bool
+is_constant(const ir_rvalue *ir)
+{
+   if (ir->ir_type == ir_type_constant)
+  return true;
+
+   if (ir->ir_type != ir_type_expression)
+  return false;
+
+   const ir_expression *expr = (const ir_expression *)ir;
+
+   for (unsigned i = 0; i < expr->num_operands; i++) {
+  if (!is_constant(expr->operands[i]))
+ return false;
+   }
+
+   return true;
+}
+
 static ir_rvalue *
 convert(ir_rvalue *ir, enum ir_expression_operation op)
 {
@@ -99,6 +118,7 @@ private:
bool can_be_lowered(const ir_variable *var) const;
 
void retype_to_float16(const glsl_type **t);
+   void retype_to_float16(ir_rvalue *ir);
 };
 
 bool
@@ -119,6 +139,22 @@ lower_mediump_visitor::retype_to_float16(const glsl_type 
**t)
*t = mediump;
 }
 
+void
+lower_mediump_visitor::retype_to_float16(ir_rvalue *ir)
+{
+   retype_to_float16(>type);
+
+   if (ir->ir_type != ir_type_expression)
+  return;
+
+   const ir_expression *expr = (const ir_expression *)ir;
+
+   for (unsigned i = 0; i < expr->num_operands; i++) {
+  assert(is_constant(expr->operands[i]));
+  retype_to_float16(>operands[i]->type);
+   }
+}
+
 ir_visitor_status
 lower_mediump_visitor::visit(ir_variable *ir)
 {
@@ -228,7 +264,7 @@ lower_mediump_visitor::visit_leave(ir_expression *ir)
for (unsigned i = 0; i < ir->num_operands; i++) {
   if (is_16_bit(ir->operands[i]))
  has_16_bit_src = true;
-  else
+  else if (!is_constant(ir->operands[i]))
  has_32_bit_src = true;
}
 
@@ -240,6 +276,11 @@ lower_mediump_visitor::visit_leave(ir_expression *ir)
 */
if (!has_32_bit_src &&
ir->operation != ir_triop_lrp) {
+  for (unsigned i = 0; i < ir->num_operands; i++) {
+ if (is_constant(ir->operands[i]))
+retype_to_float16(ir->operands[i]);
+  }
+
   retype_to_float16(>type);
   return visit_continue;
}
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 41/51] intel/compiler/eu: Take stride into account in 16-bit ops

2017-11-24 Thread Topi Pohjolainen
This is needed when converting from F -> HF.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_eu_validate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/intel/compiler/brw_eu_validate.c 
b/src/intel/compiler/brw_eu_validate.c
index 6ee6b4ffbe..735ea6 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -459,6 +459,9 @@ general_restrictions_based_on_operand_types(const struct 
gen_device_info *devinf
exec_type_size == 8 && dst_type_size == 4)
   dst_type_size = 8;
 
+   if (exec_type_size == 4 && dst_type_size == 2 && dst_stride == 2)
+  dst_type_size = 4;
+
if (exec_type_size > dst_type_size) {
   ERROR_IF(dst_stride * dst_type_size != exec_type_size,
"Destination stride must be equal to the ratio of the sizes of "
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/51] intel/compiler: Add support for loading 16-bit constants

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index a973c18203..65a5bfa49a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1515,6 +1515,11 @@ fs_visitor::nir_emit_load_const(const fs_builder ,
fs_reg reg = bld.vgrf(reg_type, instr->def.num_components);
 
switch (instr->def.bit_size) {
+   case 16:
+  for (unsigned i = 0; i < instr->def.num_components; i++)
+ bld.MOV(offset(reg, bld, i), brw_imm_w(instr->value.i16[i]));
+  break;
+
case 32:
   for (unsigned i = 0; i < instr->def.num_components; i++)
  bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 51/51] i965/fs: Lower gles mediump floats into 16-bits

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_link.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index d18521e792..89ccbb06b5 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -134,6 +134,9 @@ process_glsl_ir(struct brw_context *brw,
lower_noise(shader->ir);
lower_quadop_vector(shader->ir, false);
 
+   if (shader_prog->IsES && shader->Stage == MESA_SHADER_FRAGMENT)
+  lower_mediump(shader);
+
validate_ir_tree(shader->ir);
 
/* Now that we've finished altering the linked IR, reparent any live IR back
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 48/51] glsl: HACK: Treat input varyings as 16-bits by conversion

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/lower_mediump.cpp | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/lower_mediump.cpp 
b/src/compiler/glsl/lower_mediump.cpp
index 094ab4e743..45cf75b53c 100644
--- a/src/compiler/glsl/lower_mediump.cpp
+++ b/src/compiler/glsl/lower_mediump.cpp
@@ -92,6 +92,20 @@ refers_16_bit_float(const ir_rvalue *ir)
 }
 
 static bool
+defers_input_varying(const ir_rvalue *ir)
+{
+   ir_variable *var = ir->variable_referenced();
+   if (!var)
+  return false;
+
+   if (var->data.mode != ir_var_shader_in)
+  return false;
+
+   return var->data.precision == ast_precision_low ||
+  var->data.precision == ast_precision_medium;
+}
+
+static bool
 is_constant(const ir_rvalue *ir)
 {
if (ir->ir_type == ir_type_constant)
@@ -152,6 +166,13 @@ lower_mediump_visitor::can_be_lowered(const ir_variable 
*var) const
if (!var->type->get_scalar_type()->is_float())
   return false;
 
+   /* TODO: Intel compiler backend isn't prepared for interpolated 16-bit
+*   varyings. Input varyings are instead converted to 16-bits before
+*   use.
+*/
+   if (var->data.mode == ir_var_shader_in)
+  return false;
+
return var->data.precision == ast_precision_low ||
   var->data.precision == ast_precision_medium;
 }
@@ -309,7 +330,8 @@ lower_mediump_visitor::visit_leave(ir_expression *ir)
for (unsigned i = 0; i < ir->num_operands; i++) {
   if (is_16_bit(ir->operands[i]))
  has_16_bit_src = true;
-  else if (!is_constant(ir->operands[i]))
+  else if (!is_constant(ir->operands[i]) &&
+   !defers_input_varying(ir->operands[i]))
  has_32_bit_src = true;
}
 
@@ -324,6 +346,8 @@ lower_mediump_visitor::visit_leave(ir_expression *ir)
   for (unsigned i = 0; i < ir->num_operands; i++) {
  if (is_constant(ir->operands[i]))
 retype_to_float16(ir->operands[i]);
+ else if (defers_input_varying(ir->operands[i]))
+ir->operands[i] = convert(ir->operands[i], ir_unop_f2h);
   }
 
   retype_to_float16(>type);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 33/51] intel/compiler/fs: Pad 16-bit nir intrinsic dest into full reg

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index cbb1c118d2..64243312b9 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3881,7 +3881,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
 {
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
-  dest = get_nir_dest(instr->dest);
+  dest = get_nir_dest(instr->dest, true /* pad components to full regs */);
 
switch (instr->intrinsic) {
case nir_intrinsic_image_load:
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/51] intel/compiler/disasm: Print fp16 also for sampler messages

2017-11-24 Thread Topi Pohjolainen
This is what render target write does.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_disasm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
index da2a5d78dd..fbb18b0f26 100644
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -1621,6 +1621,11 @@ brw_disassemble_inst(FILE *file, const struct 
gen_device_info *devinfo,
   brw_inst_sampler_msg_type(devinfo, inst), 
);
err |= control(file, "sampler simd mode", 
gen5_sampler_simd_mode,
   brw_inst_sampler_simd_mode(devinfo, inst), 
);
+   if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
+   brw_inst_data_format(devinfo, inst)) {
+  string(file, " HP");
+   }
+
format(file, " Surface = %"PRIu64" Sampler = %"PRIu64,
   brw_inst_binding_table_index(devinfo, inst),
   brw_inst_sampler(devinfo, inst));
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 37/51] intel/compiler/fs: Consider original sizes when retyping alu ops

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 30 --
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index baa84b0f3c..d28ed57eca 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -655,6 +655,26 @@ emit_find_msb_using_lzd(const fs_builder ,
inst->src[0].negate = true;
 }
 
+static enum brw_reg_type
+get_nir_alu_dest_type(const struct gen_device_info *devinfo,
+  const nir_alu_instr *instr, unsigned size)
+{
+   brw_reg_type res = brw_type_for_nir_type(devinfo,
+  (nir_alu_type)(nir_op_infos[instr->op].output_type |
+ nir_dest_bit_size(instr->dest.dest)));
+   return brw_reg_type_from_bit_size(size * 8, res);
+}
+
+static enum brw_reg_type
+get_nir_alu_src_type(const struct gen_device_info *devinfo,
+ const nir_alu_instr *instr, unsigned i, unsigned size)
+{
+   brw_reg_type res = brw_type_for_nir_type(devinfo,
+  (nir_alu_type)(nir_op_infos[instr->op].input_types[i] |
+ nir_src_bit_size(instr->src[i].src)));
+   return brw_reg_type_from_bit_size(size * 8, res);
+}
+
 void
 fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr)
 {
@@ -662,16 +682,14 @@ fs_visitor::nir_emit_alu(const fs_builder , 
nir_alu_instr *instr)
fs_inst *inst;
 
fs_reg result = get_nir_alu_dest(instr);
-   result.type = brw_type_for_nir_type(devinfo,
-  (nir_alu_type)(nir_op_infos[instr->op].output_type |
- nir_dest_bit_size(instr->dest.dest)));
+   result.type = get_nir_alu_dest_type(devinfo, instr,
+   brw_reg_type_to_size(result.type));
 
fs_reg op[4];
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
   op[i] = get_nir_src(instr->src[i].src);
-  op[i].type = brw_type_for_nir_type(devinfo,
- (nir_alu_type)(nir_op_infos[instr->op].input_types[i] |
-nir_src_bit_size(instr->src[i].src)));
+  op[i].type = get_nir_alu_src_type(devinfo, instr, i,
+brw_reg_type_to_size(op[i].type));
   op[i].abs = instr->src[i].abs;
   op[i].negate = instr->src[i].negate;
}
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 30/51] intel/compiler/fs: Pad 16-bit texture return payloads

2017-11-24 Thread Topi Pohjolainen
This is to tell offset and read/write calculators enough to
work correctly with 16-bit texture payloads.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 35e78b134a..6d9b272a57 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4949,7 +4949,22 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
   }
}
 
-   fs_reg dst = bld.vgrf(brw_type_for_nir_type(devinfo, instr->dest_type), 4);
+   const enum brw_reg_type dst_type =
+  brw_type_for_nir_type(devinfo, instr->dest_type);
+
+   /* In case of 16-bit return format one needs to prepare for 4 registers
+* regardless of the dispatch width:
+*
+* From SKL PRM Vol. 7 Page 131, Return Format = 16-bit:
+*
+* A SIMD8* writeback message with Return Format of 16-bit consists of 
+* up to 4 destination registers).
+*
+* Therefore tell builder to give full register per component even in
+* case of 16-bit size and SIMD8.
+*/
+   const bool pad_components_to_full_registers = true;
+   fs_reg dst = bld.vgrf(dst_type, 4, pad_components_to_full_registers);
fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
inst->offset = header_bits;
 
@@ -4987,7 +5002,9 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
   bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
}
 
-   bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0);
+   bld.LOAD_PAYLOAD(get_nir_dest(instr->dest,
+ pad_components_to_full_registers),
+nir_dest, dest_size, 0);
 }
 
 void
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 39/51] intel/compiler/fs: Consider logic ops on 16-bit booleans

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 70 ++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 2a32b1449a..aff592c354 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1662,7 +1662,75 @@ fs_visitor::get_nir_alu_dest(const nir_alu_instr *instr)
 * one component per register.
 */
const bool pad_components_to_full_register = true;
-   return get_nir_dest(instr->dest.dest, pad_components_to_full_register);
+
+   switch (instr->op) {
+   case nir_op_flt:
+   case nir_op_fge:
+   case nir_op_feq:
+   case nir_op_fne: {
+  assert(instr->dest.dest.is_ssa);
+   
+  if (nir_src_bit_size(instr->src[0].src) > 16)
+ return get_nir_dest(instr->dest.dest);
+
+  assert(nir_src_bit_size(instr->src[0].src) == 16 &&
+ nir_src_bit_size(instr->src[1].src) == 16);
+
+  /* Destination type for comparison operations is boolean which NIR
+   * treats as having 32-bit size. If, however, sources are 16-bit
+   * hardware will produce 16-bit result (0x/0x). Therefore set
+   * the destination type accordingly.
+   */
+  nir_ssa_values[instr->dest.dest.ssa.index] =
+ bld.vgrf(BRW_REGISTER_TYPE_HF,
+  instr->dest.dest.ssa.num_components,
+  pad_components_to_full_register);
+  return nir_ssa_values[instr->dest.dest.ssa.index];
+   }
+   case nir_op_inot:
+   case nir_op_ixor:
+   case nir_op_ior:
+   case nir_op_iand: {
+  assert(instr->dest.dest.is_ssa);
+   
+  const fs_reg src0 = get_nir_src(instr->src[0].src);
+  const fs_reg src1 = get_nir_src(instr->src[0].src);
+
+  /* TODO: This specifically prepares for mixed precision operations which
+   *   in principle shouldn't happen. There is, however, corner case
+   *   when this is possible. As NIR doesn't consider how booleans
+   *   are produced, we may end up here with one source operand
+   *   produced from an operation with 32-bit sources and another from
+   *   16-bits.
+   *   This is handled by marking this operation as producing 16-bits
+   *   and relying on nir_emit_alu() to adjust the 32-bit source
+   *   operand to 16-bits with stride == 2. Recall that 32-bit
+   *   booleans are just 0x/0x and it suffices to read
+   *   only the lower 16-bits.
+   * WARN: This blindly assumes that mixed precision integer source
+   *   operands represent boolean values. There is no way of checking
+   *   if that holds.
+   */   
+  if (brw_reg_type_to_size(src0.type) > 2 &&
+  brw_reg_type_to_size(src1.type) > 2)
+ return get_nir_dest(instr->dest.dest);
+
+  /* Translation from GLSL to NIR produces logical operations with
+   * integer operands even when operands are booleans. See handling
+   * of ir_binop_bit_*.
+   * As hardware will produce 16-bit results when the sources are 16-bit
+   * set the destination type accordingly.
+   */
+  nir_ssa_values[instr->dest.dest.ssa.index] =
+ bld.vgrf(BRW_REGISTER_TYPE_W,
+  instr->dest.dest.ssa.num_components,
+  pad_components_to_full_register);
+  return nir_ssa_values[instr->dest.dest.ssa.index];
+   }
+   default:
+  return get_nir_dest(instr->dest.dest,
+  pad_components_to_full_register);
+   }
 }
 
 fs_reg
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 35/51] intel/compiler/fs: Pad 16-bit payload lowering

2017-11-24 Thread Topi Pohjolainen
Otherwise copy propagation fails when write sizes differ.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp  |  5 -
 src/intel/compiler/brw_ir_fs.h | 13 +
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 9c3410b698..8e77248470 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3450,7 +3450,10 @@ fs_visitor::lower_load_payload()
 
   for (uint8_t i = inst->header_size; i < inst->sources; i++) {
  if (inst->src[i].file != BAD_FILE)
-ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]);
+ibld.MOV(retype_pad_to_full_register(
+dst, dispatch_width, inst->src[i].type),
+ inst->src[i]);
+
  if (type_sz(inst->src[i].type) == 2)
 dst = byte_offset(dst, REG_SIZE);
  else
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index b4a1d7ef5a..fe7f7c4be7 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -72,6 +72,19 @@ retype(fs_reg reg, enum brw_reg_type type)
 }
 
 static inline fs_reg
+retype_pad_to_full_register(fs_reg reg, unsigned dispatch_width,
+enum brw_reg_type type)
+{
+   reg.type = type;
+
+   assert(reg.pad_per_component == 0);
+   if (dispatch_width == 8 && type_sz(reg.type) == 2)
+  reg.pad_per_component = REG_SIZE / 2;
+
+   return reg;
+}
+
+static inline fs_reg
 byte_offset(fs_reg reg, unsigned delta)
 {
switch (reg.file) {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 32/51] intel/compiler/fs: Pad 16-bit nir vec* components into full reg

2017-11-24 Thread Topi Pohjolainen
This allows quite a bit of infra to be kept as is, such as
liveness analysis, copy propagation and dead code elimination.

Here one deals with virtual register space and this doesn't prevent
from packing more than one component into one hardware register
later on. That is entirely matter of register allocator working
with sub-registers.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.h   |  1 +
 src/intel/compiler/brw_fs_nir.cpp | 19 ++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index d9c4f737e6..b23d2b1733 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -233,6 +233,7 @@ public:
fs_reg get_nir_src_imm(const nir_src );
fs_reg get_nir_dest(const nir_dest ,
bool pad_components_to_full_registers = false);
+   fs_reg get_nir_alu_dest(const nir_alu_instr *instr);
fs_reg get_nir_image_deref(const nir_deref_var *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder , const fs_inst ,
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index d3125d7dcd..cbb1c118d2 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -656,7 +656,7 @@ fs_visitor::nir_emit_alu(const fs_builder , 
nir_alu_instr *instr)
struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
fs_inst *inst;
 
-   fs_reg result = get_nir_dest(instr->dest.dest);
+   fs_reg result = get_nir_alu_dest(instr);
result.type = brw_type_for_nir_type(devinfo,
   (nir_alu_type)(nir_op_infos[instr->op].output_type |
  nir_dest_bit_size(instr->dest.dest)));
@@ -1624,6 +1624,23 @@ fs_visitor::get_nir_dest(const nir_dest ,
 }
 
 fs_reg
+fs_visitor::get_nir_alu_dest(const nir_alu_instr *instr)
+{
+   /* With data type size =< 16 bits one can fit two or more components
+* into one register. In virtual register space this doesn't really add
+* any value but requires things such as liveness analysis,
+* copy propagation and dead code elimination to be updated to work with
+* sub-regsiter regions.
+*
+* Therefore instead allocate full padded registers per component. This
+* doesn't prevent final hardware register allocator from packing more than
+* one component per register.
+*/
+   const bool pad_components_to_full_register = true;
+   return get_nir_dest(instr->dest.dest, pad_components_to_full_register);
+}
+
+fs_reg
 fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
 {
fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 27/51] intel/compiler/fs: Set tex type for generator to flag fp16

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 5751bb0ad7..0d415e2393 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2601,7 +2601,15 @@ fs_visitor::opt_sampler_eot()
 
tex_inst->offset |= fb_write->target << 24;
tex_inst->eot = true;
-   tex_inst->dst = ibld.null_reg_ud();
+
+   /* Set the null destination type specifically so that generator knows to
+* flag half precision flag.
+*/
+   if (tex_inst->dst.type == BRW_REGISTER_TYPE_HF)
+  tex_inst->dst = ibld.null_reg_hf();
+   else
+  tex_inst->dst = ibld.null_reg_ud();
+
tex_inst->size_written = 0;
fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 46/51] glsl: Lower float conversions to mediump

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/lower_mediump.cpp | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/src/compiler/glsl/lower_mediump.cpp 
b/src/compiler/glsl/lower_mediump.cpp
index 0276e74d6e..07f1f1ba9d 100644
--- a/src/compiler/glsl/lower_mediump.cpp
+++ b/src/compiler/glsl/lower_mediump.cpp
@@ -55,6 +55,30 @@ is_16_bit(const ir_rvalue *ir)
return ir->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16;
 }
 
+static void
+retype_x2f_x2f16(ir_rvalue *ir)
+{
+   if (ir->ir_type != ir_type_expression)
+  return;
+
+   ir_expression *expr = (ir_expression *)ir;
+   switch (expr->operation) {
+   case ir_unop_i2f:
+  expr->operation = ir_unop_i2h;
+  break;
+   case ir_unop_b2f:
+  expr->operation = ir_unop_b2h;
+  break;
+   case ir_unop_u2f:
+  expr->operation = ir_unop_u2h;
+  break;
+   default:
+  return;
+   }
+
+   ir->type = get_mediump(ir->type);
+}
+
 static bool
 refers_16_bit_float(const ir_rvalue *ir)
 {
@@ -259,6 +283,8 @@ lower_mediump_visitor::visit_leave(ir_expression *ir)
 {
ir_rvalue_visitor::visit_leave(ir);
 
+   retype_x2f_x2f16(ir);
+
bool has_32_bit_src = false;
bool has_16_bit_src = false;
for (unsigned i = 0; i < ir->num_operands; i++) {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 50/51] glsl: HACK: Lower all temporary float variables to 16-bits

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/lower_mediump.cpp | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/compiler/glsl/lower_mediump.cpp 
b/src/compiler/glsl/lower_mediump.cpp
index bae18c9bfb..73b8aa577c 100644
--- a/src/compiler/glsl/lower_mediump.cpp
+++ b/src/compiler/glsl/lower_mediump.cpp
@@ -184,6 +184,17 @@ lower_mediump_visitor::can_be_lowered(const ir_variable 
*var) const
var->data.how_declared == ir_var_declared_implicitly)
   return true;
 
+   /* Such as builtins, temporary variables don't have have precision
+* qualifiers either. Lower them by default.
+*
+* TODO: Surrounding expressions should really be examined to tell if
+*   full precision needed. Moreover, these can be referred from
+*   multiple locations. If any requires full precision, then all
+*   expressions involved would need to operate on full precision?
+*/
+   if (var->data.mode == ir_var_temporary)
+  return true;
+
return var->data.precision == ast_precision_low ||
   var->data.precision == ast_precision_medium;
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 43/51] intel/compiler/fs: WIP: Use 32-bit slots for 16-bit uniforms

2017-11-24 Thread Topi Pohjolainen
---
 src/intel/compiler/brw_fs_nir.cpp | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 2060a3139d..631bbf7f92 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4164,7 +4164,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
  src.offset = const_offset->u32[0];
 
  for (unsigned j = 0; j < instr->num_components; j++) {
-bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+/* Currently 16-bit uniforms occupy 32-bit slot. */
+const unsigned src_offset =
+   src.type == BRW_REGISTER_TYPE_HF ? 2 * j : j;
+
+bld.MOV(offset(dest, bld, j), offset(src, bld, src_offset));
  }
   } else {
  fs_reg indirect = retype(get_nir_src(instr->src[0]),
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/51] intel/compiler: Prepare for glsl mediump float uniforms

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_shader.cpp   | 13 +
 src/mesa/drivers/dri/i965/brw_program.c | 10 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_shader.cpp 
b/src/intel/compiler/brw_shader.cpp
index 234b5a11c1..cc9297772b 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -78,6 +78,19 @@ type_size_scalar(const struct glsl_type *type)
return 0;
 }
 
+/* Variant of type_size_scalar() taking into account that GL core and api
+ * don't deal with 16-bit uniforms but with 32-bit. Only compiler backend can
+ * work with reduced precision if desired.
+ */
+extern "C" int
+uniform_storage_type_size_scalar(const struct glsl_type *type)
+{
+   if (type->base_type == GLSL_TYPE_FLOAT16)
+  return type->components();
+
+   return type_size_scalar(type);
+}
+
 enum brw_reg_type
 brw_type_for_base_type(const struct glsl_type *type)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 755d4973cc..4573d9d303 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -47,12 +47,20 @@
 #include "brw_defines.h"
 #include "intel_batchbuffer.h"
 
+int uniform_storage_type_size_scalar(const struct glsl_type *type);
+
+static int
+uniform_storage_type_size_scalar_bytes(const struct glsl_type *type)
+{
+   return uniform_storage_type_size_scalar(type) * 4;
+}
+
 static bool
 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
 {
if (is_scalar) {
   nir_assign_var_locations(>uniforms, >num_uniforms,
-   type_size_scalar_bytes);
+   uniform_storage_type_size_scalar_bytes);
   return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
} else {
   nir_assign_var_locations(>uniforms, >num_uniforms,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 49/51] glsl: HACK: Lower builtin float outputs to 16-bits by default

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/lower_mediump.cpp | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/compiler/glsl/lower_mediump.cpp 
b/src/compiler/glsl/lower_mediump.cpp
index 45cf75b53c..bae18c9bfb 100644
--- a/src/compiler/glsl/lower_mediump.cpp
+++ b/src/compiler/glsl/lower_mediump.cpp
@@ -173,6 +173,17 @@ lower_mediump_visitor::can_be_lowered(const ir_variable 
*var) const
if (var->data.mode == ir_var_shader_in)
   return false;
 
+   /* Builtin outputs such as gl_FragColor don't have precision qualifier.
+* Lower them by default.
+* 
+* TODO: If this gets assigned with full precision value, output would
+*   need to be in full precision instead of the value being converted
+*   to 16-bits?
+*/
+   if (var->data.mode == ir_var_shader_out &&
+   var->data.how_declared == ir_var_declared_implicitly)
+  return true;
+
return var->data.precision == ast_precision_low ||
   var->data.precision == ast_precision_medium;
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 38/51] intel/compiler/fs: Use original reg size when retyping nir src

2017-11-24 Thread Topi Pohjolainen
In case of boolean typed the values maybe given in 16-bits whereas
NIR unconditionally regards them as 32-bit.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index d28ed57eca..2a32b1449a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1604,8 +1604,9 @@ fs_visitor::get_nir_src(const nir_src )
* default to an integer type - instructions that need floating point
* semantics will set this to F if they need to
*/
-  reg.type = brw_reg_type_from_bit_size(nir_src_bit_size(src),
-BRW_REGISTER_TYPE_D);
+  reg.type = brw_reg_type_from_bit_size(
+brw_reg_type_to_size(reg.type) * 8,
+BRW_REGISTER_TYPE_D);
}
 
return reg;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 26/51] intel/compiler/fs: Set 16-bit sampler return format

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_generator.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 20d018e1fe..610a545cd8 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1051,6 +1051,9 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
   brw_inst_set_eot(p->devinfo, brw_last_inst, true);
   brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
}
+
+   if (dst.type == BRW_REGISTER_TYPE_HF)
+  brw_inst_set_data_format(p->devinfo, brw_last_inst, 1);
 }
 
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/51] intel/compiler/fs: Support for dumping 16-bit IMM values

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 694fcc1919..1b972972c1 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -39,6 +39,7 @@
 #include "compiler/glsl_types.h"
 #include "compiler/nir/nir_builder.h"
 #include "program/prog_parameter.h"
+#include "util/half_float.h"
 
 using namespace brw;
 
@@ -5532,6 +5533,10 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst, FILE *file)
  break;
   case IMM:
  switch (inst->src[i].type) {
+ case BRW_REGISTER_TYPE_HF:
+fprintf(file, "%-gHF",
+_mesa_half_to_float((uint16_t)inst->src[i].ud));
+break;
  case BRW_REGISTER_TYPE_F:
 fprintf(file, "%-gf", inst->src[i].f);
 break;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 31/51] intel/compiler/fs: Pad 16-bit output (store/fb write) payloads

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 6d9b272a57..d3125d7dcd 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3254,7 +3254,7 @@ alloc_temporary(const fs_builder , unsigned size, 
fs_reg *regs, unsigned n,
} else {
   const brw_reg_type type =
  is_16bit ? BRW_REGISTER_TYPE_HF : BRW_REGISTER_TYPE_F;
-  const fs_reg tmp = bld.vgrf(type, size);
+  const fs_reg tmp = bld.vgrf(type, size, is_16bit);
 
   for (unsigned i = 0; i < n; i++)
  regs[i] = tmp;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 42/51] i965: WIP: Support for uploading 16-bit uniforms from 32-bit store

2017-11-24 Thread Topi Pohjolainen
At this point 16-bit uniforms still take full 32-bit slots in the
pull/push constant buffers and in shader deployment payload.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_compiler.h   |  9 +
 src/intel/compiler/brw_fs.cpp   | 12 
 src/intel/compiler/brw_fs_nir.cpp   |  2 ++
 src/intel/compiler/brw_fs_visitor.cpp   |  1 +
 src/intel/compiler/brw_vec4.cpp |  8 
 src/intel/compiler/brw_vec4_gs_visitor.cpp  |  8 
 src/intel/compiler/brw_vec4_visitor.cpp |  4 
 src/mesa/drivers/dri/i965/brw_cs.c  |  2 ++
 src/mesa/drivers/dri/i965/brw_curbe.c   |  2 ++
 src/mesa/drivers/dri/i965/brw_disk_cache.c  | 14 ++
 src/mesa/drivers/dri/i965/brw_gs.c  |  2 ++
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp  | 10 ++
 src/mesa/drivers/dri/i965/brw_program.c |  2 ++
 src/mesa/drivers/dri/i965/brw_state.h   |  1 +
 src/mesa/drivers/dri/i965/brw_tcs.c |  2 ++
 src/mesa/drivers/dri/i965/brw_tes.c |  2 ++
 src/mesa/drivers/dri/i965/brw_vs.c  |  2 ++
 src/mesa/drivers/dri/i965/brw_wm.c  |  2 ++
 src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 -
 19 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index cdd61aae6c..7b43c4a135 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -613,6 +613,12 @@ struct brw_stage_prog_data {
 */
uint32_t *param;
uint32_t *pull_param;
+
+   /* Tells for GLSL backend if conversion from 32-bit store to, for example,
+* 16-bits is required.
+*/
+   unsigned char *param_type;  /* enum glsl_base_type */
+   unsigned char *pull_param_type; /* enum glsl_base_type */
 };
 
 static inline uint32_t *
@@ -621,6 +627,9 @@ brw_stage_prog_data_add_params(struct brw_stage_prog_data 
*prog_data,
 {
unsigned old_nr_params = prog_data->nr_params;
prog_data->nr_params += nr_new_params;
+   prog_data->param_type = reralloc(ralloc_parent(prog_data->param_type),
+prog_data->param_type, unsigned char,
+prog_data->nr_params);
prog_data->param = reralloc(ralloc_parent(prog_data->param),
prog_data->param, uint32_t,
prog_data->nr_params);
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 8e77248470..3ca1d4cbc7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2102,19 +2102,26 @@ fs_visitor::assign_constant_locations()
 * create two new arrays for push/pull params.
 */
uint32_t *param = stage_prog_data->param;
+   unsigned char *param_type = stage_prog_data->param_type;
stage_prog_data->nr_params = num_push_constants;
if (num_push_constants) {
   stage_prog_data->param = ralloc_array(mem_ctx, uint32_t,
 num_push_constants);
+  stage_prog_data->param_type = ralloc_array(mem_ctx, unsigned char,
+ num_push_constants);
} else {
   stage_prog_data->param = NULL;
+  stage_prog_data->param_type = NULL;
}
assert(stage_prog_data->nr_pull_params == 0);
assert(stage_prog_data->pull_param == NULL);
+   assert(stage_prog_data->pull_param_type == NULL);
if (num_pull_constants > 0) {
   stage_prog_data->nr_pull_params = num_pull_constants;
   stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
  num_pull_constants);
+  stage_prog_data->pull_param_type = ralloc_array(NULL, unsigned char,
+  num_pull_constants);
}
 
/* Now that we know how many regular uniforms we'll push, reduce the
@@ -2143,11 +2150,16 @@ fs_visitor::assign_constant_locations()
   uint32_t value = param[i];
   if (pull_constant_loc[i] != -1) {
  stage_prog_data->pull_param[pull_constant_loc[i]] = value;
+ stage_prog_data->pull_param_type[pull_constant_loc[i]] = 
+param_type[i];
   } else if (push_constant_loc[i] != -1) {
  stage_prog_data->param[push_constant_loc[i]] = value;
+ stage_prog_data->param_type[push_constant_loc[i]] =
+param_type[i];
   }
}
ralloc_free(param);
+   ralloc_free(param_type);
 }
 
 bool
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 43127e00e8..2060a3139d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -120,9 +120,11 @@ fs_visitor::nir_setup_uniforms()
* on the list.
 

[Mesa-dev] [PATCH 47/51] glsl: HACK: Force texture return into 16-bits

2017-11-24 Thread Topi Pohjolainen
and convert coordinates unconditionally to 32-bits.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/lower_mediump.cpp | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/compiler/glsl/lower_mediump.cpp 
b/src/compiler/glsl/lower_mediump.cpp
index 07f1f1ba9d..094ab4e743 100644
--- a/src/compiler/glsl/lower_mediump.cpp
+++ b/src/compiler/glsl/lower_mediump.cpp
@@ -132,6 +132,7 @@ public:
 
virtual ir_visitor_status visit_leave(ir_assignment *ir);
virtual ir_visitor_status visit_leave(ir_expression *ir);
+   virtual ir_visitor_status visit_leave(ir_texture *ir);
virtual ir_visitor_status visit_leave(ir_swizzle *ir);
 
virtual void handle_rvalue(ir_rvalue **rvalue);
@@ -238,6 +239,24 @@ lower_mediump_visitor::visit_leave(ir_assignment *ir)
 }
 
 ir_visitor_status
+lower_mediump_visitor::visit_leave(ir_texture *ir)
+{
+   ir_rvalue_visitor::visit_leave(ir);
+
+   /* HACK: Intel compiler backend isn't prepared for 16-bit texture
+*   arguments.
+* TODO: Convert the rest of the operands.
+*/
+   if (is_16_bit(ir->coordinate))
+  ir->coordinate = convert(ir->coordinate, ir_unop_h2f);
+
+   if (ir->type->is_float())
+  retype_to_float16(>type);
+
+   return visit_continue;
+}
+
+ir_visitor_status
 lower_mediump_visitor::visit_leave(ir_swizzle *ir)
 {
ir_rvalue_visitor::visit_leave(ir);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 34/51] intel/compiler/fs: Pad 16-bit const loads into full regs

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 64243312b9..c455fa4e27 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1519,7 +1519,8 @@ fs_visitor::nir_emit_load_const(const fs_builder ,
 {
const brw_reg_type reg_type =
   brw_reg_type_from_bit_size(instr->def.bit_size, BRW_REGISTER_TYPE_D);
-   fs_reg reg = bld.vgrf(reg_type, instr->def.num_components);
+   fs_reg reg = bld.vgrf(reg_type, instr->def.num_components,
+ true /* pad components to full regs */);
 
switch (instr->def.bit_size) {
case 16:
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 28/51] intel/compiler/fs: Use component_size() instead of open coded

2017-11-24 Thread Topi Pohjolainen
This prepares for following patch will add 16-bit tex/fb write
payload padding support.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp  | 2 +-
 src/intel/compiler/brw_fs_copy_propagation.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0d415e2393..cedfde5096 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -639,7 +639,7 @@ bool
 fs_inst::is_partial_write() const
 {
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
-   (this->exec_size * type_sz(this->dst.type)) < 32 ||
+   dst.component_size(exec_size) < 32 ||
!this->dst.is_contiguous() ||
this->dst.offset % REG_SIZE != 0);
 }
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp 
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index 470eaeec4f..ed2511ecfa 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -801,8 +801,8 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, 
bblock_t *block,
  for (int i = 0; i < inst->sources; i++) {
 int effective_width = i < inst->header_size ? 8 : inst->exec_size;
 assert(effective_width * MAX2(4, type_sz(inst->src[i].type)) % 
REG_SIZE == 0);
-const unsigned size_written = effective_width *
-  type_sz(inst->src[i].type);
+const unsigned size_written =
+   inst->src[i].component_size(effective_width);
 if (inst->src[i].file == VGRF) {
acp_entry *entry = rzalloc(copy_prop_ctx, acp_entry);
entry->dst = byte_offset(inst->dst, offset);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/51] intel/compiler: Add support for negating 16-bit floats

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_shader.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_shader.cpp 
b/src/intel/compiler/brw_shader.cpp
index cc9297772b..3a83f55f28 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -653,7 +653,8 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg 
*reg)
case BRW_REGISTER_TYPE_V:
   assert(!"unimplemented: negate UV/V immediate");
case BRW_REGISTER_TYPE_HF:
-  assert(!"unimplemented: negate HF immediate");
+  reg->ud ^= 0x8000;
+  return true;
}
 
return false;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/51] intel/compiler: Allow 16-bit math

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_eu_emit.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 1507968e6c..87b144e871 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -1921,8 +1921,10 @@ void gen6_math(struct brw_codegen *p,
   assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
  (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
} else {
-  assert(src0.type == BRW_REGISTER_TYPE_F);
-  assert(src1.type == BRW_REGISTER_TYPE_F);
+  assert(src0.type == BRW_REGISTER_TYPE_F ||
+ src0.type == BRW_REGISTER_TYPE_HF);
+  assert(src1.type == BRW_REGISTER_TYPE_F ||
+ src1.type == BRW_REGISTER_TYPE_HF);
}
 
/* Source modifiers are ignored for extended math instructions on Gen6. */
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 40/51] intel/compiler/fs: Prepare 16-bit and/or/xor for 32-bit src

2017-11-24 Thread Topi Pohjolainen
In GLSL->NIR translation logic operations with boolean typed operands
are treated as operating with integer operands.

The values of the operands therefore can be 0xFFF/0x000 in case
they are produced with 32-bit execution type or 0x/0x in case of
16-bit.

This patch allows 16-bit logic operations to use 32-bit boolean types
as sources.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 21 +
 1 file changed, 21 insertions(+)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index aff592c354..43127e00e8 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1127,6 +1127,13 @@ fs_visitor::nir_emit_alu(const fs_builder , 
nir_alu_instr *instr)
   break;
case nir_op_ixor:
   if (devinfo->gen >= 8) {
+ if (brw_reg_type_to_size(result.type) == 2) {
+op[0] = subscript(op[0],
+  brw_reg_type_from_bit_size(16, op[0].type), 0);
+op[1] = subscript(op[1],
+  brw_reg_type_from_bit_size(16, op[1].type), 0);
+ }
+
  op[0] = resolve_source_modifiers(op[0]);
  op[1] = resolve_source_modifiers(op[1]);
   }
@@ -1134,6 +1141,13 @@ fs_visitor::nir_emit_alu(const fs_builder , 
nir_alu_instr *instr)
   break;
case nir_op_ior:
   if (devinfo->gen >= 8) {
+ if (brw_reg_type_to_size(result.type) == 2) {
+op[0] = subscript(op[0],
+  brw_reg_type_from_bit_size(16, op[0].type), 0);
+op[1] = subscript(op[1],
+  brw_reg_type_from_bit_size(16, op[1].type), 0);
+ }
+
  op[0] = resolve_source_modifiers(op[0]);
  op[1] = resolve_source_modifiers(op[1]);
   }
@@ -1141,6 +1155,13 @@ fs_visitor::nir_emit_alu(const fs_builder , 
nir_alu_instr *instr)
   break;
case nir_op_iand:
   if (devinfo->gen >= 8) {
+ if (brw_reg_type_to_size(result.type) == 2) {
+op[0] = subscript(op[0],
+  brw_reg_type_from_bit_size(16, op[0].type), 0);
+op[1] = subscript(op[1],
+  brw_reg_type_from_bit_size(16, op[1].type), 0);
+ }
+
  op[0] = resolve_source_modifiers(op[0]);
  op[1] = resolve_source_modifiers(op[1]);
   }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/51] intel/compiler/fs: Use 16-bit null dest with 16-bit math

2017-11-24 Thread Topi Pohjolainen
Even though this doesn't seem to alter anything else than dumping
it is more consistent.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_generator.cpp | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 03fd34c00a..20d018e1fe 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1918,8 +1918,13 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
 if (devinfo->gen >= 6) {
 assert(inst->mlen == 0);
 assert(devinfo->gen >= 7 || inst->exec_size == 8);
+
+struct brw_reg null_reg = brw_null_reg();
+if (brw_reg_type_to_size(dst.type) == 2)
+   null_reg = retype(null_reg, BRW_REGISTER_TYPE_HF);
+
 gen6_math(p, dst, brw_math_function(inst->opcode),
-  src[0], brw_null_reg());
+  src[0], null_reg);
 } else {
 assert(inst->mlen >= 1);
 assert(devinfo->gen == 5 || devinfo->is_g4x || inst->exec_size == 
8);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 36/51] intel/compiler/fs: Prepare nir_emit_if() for 16-bit sources

2017-11-24 Thread Topi Pohjolainen
Comparison operations using 16-bit sources produce 16-bit results
(0x/0x) instead of (0xFFF/0x).

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index c455fa4e27..baa84b0f3c 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -396,10 +396,15 @@ fs_visitor::nir_emit_cf_list(exec_list *list)
 void
 fs_visitor::nir_emit_if(nir_if *if_stmt)
 {
+   const fs_reg src = get_nir_src(if_stmt->condition);
+   fs_inst *inst;
+
/* first, put the condition into f0 */
-   fs_inst *inst = bld.MOV(bld.null_reg_d(),
-retype(get_nir_src(if_stmt->condition),
-   BRW_REGISTER_TYPE_D));
+   if (brw_reg_type_to_size(src.type) == 2)
+  inst = bld.MOV(bld.null_reg_w(), retype(src, BRW_REGISTER_TYPE_W));
+   else
+  inst = bld.MOV(bld.null_reg_d(), retype(src, BRW_REGISTER_TYPE_D));
+
inst->conditional_mod = BRW_CONDITIONAL_NZ;
 
bld.IF(BRW_PREDICATE_NORMAL);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/51] intel/compiler/disasm: Print 16-bit IMM values

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_disasm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
index c752e15331..da2a5d78dd 100644
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -1286,7 +1286,8 @@ imm(FILE *file, const struct gen_device_info *devinfo, 
enum brw_reg_type type,
   format(file, "%-gDF", brw_inst_imm_df(devinfo, inst));
   break;
case BRW_REGISTER_TYPE_HF:
-  string(file, "Half Float IMM");
+  format(file, "%-gHF",
+ _mesa_half_to_float((uint16_t) brw_inst_imm_ud(devinfo, inst)));
   break;
case BRW_REGISTER_TYPE_UB:
case BRW_REGISTER_TYPE_B:
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/51] intel/compiler/fs: Use 16-bit null dest with 16-bit compare

2017-11-24 Thread Topi Pohjolainen
Otherwise EU-emitter will deduce wrong execution size when
examining source types and finding 32-bit wide register.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 65a5bfa49a..16e8dfc186 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -25,6 +25,7 @@
 #include "brw_fs.h"
 #include "brw_fs_surface_builder.h"
 #include "brw_nir.h"
+#include "util/half_float.h"
 
 using namespace brw;
 using namespace brw::surface_access;
@@ -1446,7 +1447,10 @@ fs_visitor::nir_emit_alu(const fs_builder , 
nir_alu_instr *instr)
   if (optimize_frontfacing_ternary(instr, result))
  return;
 
-  bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
+  if (brw_reg_type_to_size(op[0].type) == 2)
+ bld.CMP(bld.null_reg_w(), op[0], brw_imm_w(0), BRW_CONDITIONAL_NZ);
+  else
+ bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
   inst = bld.SEL(result, op[1], op[2]);
   inst->predicate = BRW_PREDICATE_NORMAL;
   break;
@@ -3410,8 +3414,14 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder ,
*/
   fs_inst *cmp;
   if (instr->intrinsic == nir_intrinsic_discard_if) {
- cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
-   brw_imm_d(0), BRW_CONDITIONAL_Z);
+ const fs_reg src = get_nir_src(instr->src[0]);
+
+ if (brw_reg_type_to_size(src.type) == 2)
+cmp = bld.CMP(bld.null_reg_hf(), get_nir_src(instr->src[0]),
+  brw_imm_w(0), BRW_CONDITIONAL_Z);
+ else
+cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
+  brw_imm_d(0), BRW_CONDITIONAL_Z);
   } else {
  fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW));
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/51] glsl: Enable 16-bit texturing in nir-conversion

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/glsl_to_nir.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index c0adf744e0..b16efa6555 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -2057,6 +2057,9 @@ nir_visitor::visit(ir_texture *ir)
case GLSL_TYPE_FLOAT:
   instr->dest_type = nir_type_float;
   break;
+   case GLSL_TYPE_FLOAT16:
+  instr->dest_type = nir_type_float16;
+  break;
case GLSL_TYPE_INT:
   instr->dest_type = nir_type_int;
   break;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 25/51] intel/compiler/fs: Support for combining 16-bit immediates

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_combine_constants.cpp | 84 +
 1 file changed, 71 insertions(+), 13 deletions(-)

diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp 
b/src/intel/compiler/brw_fs_combine_constants.cpp
index e0c95d379b..5772ffb94a 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -36,6 +36,7 @@
 
 #include "brw_fs.h"
 #include "brw_cfg.h"
+#include "util/half_float.h"
 
 using namespace brw;
 
@@ -95,6 +96,15 @@ link(void *mem_ctx, fs_reg *reg)
return >link;
 }
 
+union imm_val {
+   double df;
+   uint64_t u64;
+   int64_t d64;
+   float f;
+   int   d;
+   unsigned ud;
+};
+
 /**
  * Information about an immediate value.
  */
@@ -114,8 +124,10 @@ struct imm {
 */
exec_list *uses;
 
-   /** The immediate value.  We currently only handle floats. */
-   float val;
+   enum brw_reg_type type;
+
+   /** The immediate value.  We currently handle floats and half floats. */
+   union imm_val val;
 
/**
 * The GRF register and subregister number where we've decided to store the
@@ -145,10 +157,10 @@ struct table {
 };
 
 static struct imm *
-find_imm(struct table *table, float val)
+find_imm(struct table *table, enum brw_reg_type type, union imm_val val)
 {
for (int i = 0; i < table->len; i++) {
-  if (table->imm[i].val == val) {
+  if (table->imm[i].val.u64 == val.u64 && table->imm[i].type == type) {
  return >imm[i];
   }
}
@@ -190,6 +202,33 @@ compare(const void *_a, const void *_b)
return a->first_use_ip - b->first_use_ip;
 }
 
+static uint16_t
+fabs_f16(uint16_t hf)
+{
+   return _mesa_float_to_half(fabs(_mesa_half_to_float(hf)));
+}
+
+static union imm_val
+get_val(const struct gen_device_info *devinfo, fs_inst *inst, unsigned i)
+{
+   union imm_val res = { 0 };
+
+   switch (inst->src[i].type) {
+   case BRW_REGISTER_TYPE_F:
+  res.f = !inst->can_do_source_mods(devinfo) ?
+  inst->src[i].f : fabs(inst->src[i].f);
+  break;
+   case BRW_REGISTER_TYPE_HF:
+  res.ud = !inst->can_do_source_mods(devinfo) ?
+   inst->src[i].ud : fabs_f16(inst->src[i].ud);
+  break;
+   default:
+  unreachable("unsupported immediate type");
+   }
+
+   return res;
+}
+
 bool
 fs_visitor::opt_combine_constants()
 {
@@ -215,12 +254,12 @@ fs_visitor::opt_combine_constants()
 
   for (int i = 0; i < inst->sources; i++) {
  if (inst->src[i].file != IMM ||
- inst->src[i].type != BRW_REGISTER_TYPE_F)
+ (inst->src[i].type != BRW_REGISTER_TYPE_F &&
+  inst->src[i].type != BRW_REGISTER_TYPE_HF))
 continue;
 
- float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
- fabs(inst->src[i].f);
- struct imm *imm = find_imm(, val);
+ union imm_val val = get_val(devinfo, inst, i);
+ struct imm *imm = find_imm(, inst->src[i].type, val);
 
  if (imm) {
 bblock_t *intersection = cfg_t::intersect(block, imm->block);
@@ -238,6 +277,7 @@ fs_visitor::opt_combine_constants()
 imm->uses = new(const_ctx) exec_list();
 imm->uses->push_tail(link(const_ctx, >src[i]));
 imm->val = val;
+imm->type = inst->src[i].type;
 imm->uses_by_coissue = could_coissue(devinfo, inst);
 imm->must_promote = must_promote_imm(devinfo, inst);
 imm->first_use_ip = ip;
@@ -278,7 +318,14 @@ fs_visitor::opt_combine_constants()
   imm->block->last_non_control_flow_inst()->next);
   const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
 
-  ibld.MOV(reg, brw_imm_f(imm->val));
+  if (imm->type == BRW_REGISTER_TYPE_F)
+ ibld.MOV(reg, brw_imm_f(imm->val.f));
+  else if (imm->type == BRW_REGISTER_TYPE_HF) {
+ ibld.MOV(retype(reg, BRW_REGISTER_TYPE_HF),
+  retype(brw_imm_ud(imm->val.ud), BRW_REGISTER_TYPE_HF));
+  } else
+ unreachable("unsupported immediate type");
+
   imm->nr = reg.nr;
   imm->subreg_offset = reg.offset;
 
@@ -298,9 +345,19 @@ fs_visitor::opt_combine_constants()
  reg->nr = table.imm[i].nr;
  reg->offset = table.imm[i].subreg_offset;
  reg->stride = 0;
- reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
- assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
-fabsf(reg->f) == fabs(table.imm[i].val));
+ reg->negate = signbit(reg->f) != signbit(table.imm[i].val.f);
+
+ switch (table.imm[i].type) {
+ case BRW_REGISTER_TYPE_F:
+  

[Mesa-dev] [PATCH 23/51] intel/compiler: Prepare for 16-bit 3-src ops

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_eu_emit.c  | 21 +
 src/intel/compiler/brw_inst.h |  4 
 src/intel/compiler/brw_reg_type.c |  2 ++
 3 files changed, 27 insertions(+)

diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 87b144e871..fb8d5b5513 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -810,6 +810,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct 
brw_reg dest,
   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
  dest.file == BRW_MESSAGE_REGISTER_FILE);
   assert(dest.type == BRW_REGISTER_TYPE_F  ||
+ dest.type == BRW_REGISTER_TYPE_HF ||
  dest.type == BRW_REGISTER_TYPE_DF ||
  dest.type == BRW_REGISTER_TYPE_D  ||
  dest.type == BRW_REGISTER_TYPE_UD);
@@ -857,6 +858,21 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct 
brw_reg dest,
   */
  brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
  brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
+
+ if (dest.type == BRW_REGISTER_TYPE_HF) {
+/* From the Bspec: Instruction types
+ *
+ * Three source instructions can use operands with mixed-mode
+ * precision. When SrcType field is set to :f or :hf it defines
+ * precision for source 0 only, and fields Src1Type and Src2Type
+ * define precision for other source operands:
+ *
+ *   0b = :f. Single precision Float (32-bit).
+ *   1b = :hf. Half precision Float (16-bit).
+ */
+brw_inst_set_3src_src1_type(devinfo, inst, 1);
+brw_inst_set_3src_src2_type(devinfo, inst, 1);
+ }
   }
}
 
@@ -902,11 +918,16 @@ brw_inst *brw_##OP(struct brw_codegen *p, \
  struct brw_reg src2)   \
 {   \
assert(dest.type == BRW_REGISTER_TYPE_F ||   \
+  dest.type == BRW_REGISTER_TYPE_HF ||  \
   dest.type == BRW_REGISTER_TYPE_DF);   \
if (dest.type == BRW_REGISTER_TYPE_F) {  \
   assert(src0.type == BRW_REGISTER_TYPE_F); \
   assert(src1.type == BRW_REGISTER_TYPE_F); \
   assert(src2.type == BRW_REGISTER_TYPE_F); \
+   } else if (dest.type == BRW_REGISTER_TYPE_HF) {  \
+  assert(src0.type == BRW_REGISTER_TYPE_HF);\
+  assert(src1.type == BRW_REGISTER_TYPE_HF);\
+  assert(src2.type == BRW_REGISTER_TYPE_HF);\
} else if (dest.type == BRW_REGISTER_TYPE_DF) {  \
   assert(src0.type == BRW_REGISTER_TYPE_DF);\
   assert(src1.type == BRW_REGISTER_TYPE_DF);\
diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h
index 2501d6adff..c295a2b3ff 100644
--- a/src/intel/compiler/brw_inst.h
+++ b/src/intel/compiler/brw_inst.h
@@ -222,6 +222,10 @@ F8(3src_src1_negate,39, 39, 40, 40)
 F8(3src_src1_abs,   38, 38, 39, 39)
 F8(3src_src0_negate,37, 37, 38, 38)
 F8(3src_src0_abs,   36, 36, 37, 37)
+
+F(3src_src2_type,   36, 36)
+F(3src_src1_type,   35, 35)
+
 F8(3src_a16_flag_reg_nr,34, 34, 33, 33)
 F8(3src_a16_flag_subreg_nr, 33, 33, 32, 32)
 FF(3src_a16_dst_reg_file,
diff --git a/src/intel/compiler/brw_reg_type.c 
b/src/intel/compiler/brw_reg_type.c
index b7fff0867f..55956ef563 100644
--- a/src/intel/compiler/brw_reg_type.c
+++ b/src/intel/compiler/brw_reg_type.c
@@ -93,6 +93,7 @@ enum hw_3src_reg_type {
GEN7_3SRC_TYPE_D  = 1,
GEN7_3SRC_TYPE_UD = 2,
GEN7_3SRC_TYPE_DF = 3,
+   GEN7_3SRC_TYPE_HF = 4,
 
/** When ExecutionDatatype is 1: @{ */
GEN10_ALIGN1_3SRC_REG_TYPE_HF = 0b000,
@@ -120,6 +121,7 @@ static const struct hw_3src_type {
[BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
[BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
[BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
+   [BRW_REGISTER_TYPE_HF] = { GEN7_3SRC_TYPE_HF },
 }, gen10_hw_3src_align1_type[] = {
 #define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/51] glsl: Allow 16-bit neg() and dot()

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/ir_validate.cpp | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/ir_validate.cpp 
b/src/compiler/glsl/ir_validate.cpp
index a20f52e527..735e862141 100644
--- a/src/compiler/glsl/ir_validate.cpp
+++ b/src/compiler/glsl/ir_validate.cpp
@@ -263,7 +263,8 @@ ir_validate::visit_leave(ir_expression *ir)
   assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
  ir->operands[0]->type->is_float() ||
  ir->operands[0]->type->is_double() ||
- ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
+ ir->operands[0]->type->base_type == GLSL_TYPE_INT64 ||
+ ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
   assert(ir->type == ir->operands[0]->type);
   break;
 
@@ -742,9 +743,11 @@ ir_validate::visit_leave(ir_expression *ir)
 
case ir_binop_dot:
   assert(ir->type == glsl_type::float_type ||
- ir->type == glsl_type::double_type);
+ ir->type == glsl_type::double_type ||
+ ir->type->base_type == GLSL_TYPE_FLOAT16);
   assert(ir->operands[0]->type->is_float() ||
- ir->operands[0]->type->is_double());
+ ir->operands[0]->type->is_double() ||
+ ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
   assert(ir->operands[0]->type->is_vector());
   assert(ir->operands[0]->type == ir->operands[1]->type);
   break;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/51] intel/compiler/fs: Add helpers for 16-bit null regs

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs_builder.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/intel/compiler/brw_fs_builder.h 
b/src/intel/compiler/brw_fs_builder.h
index 87394bc17b..633086c64b 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -205,6 +205,12 @@ namespace brw {
   }
 
   dst_reg
+  null_reg_hf() const
+  {
+ return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_HF));
+  }
+
+  dst_reg
   null_reg_df() const
   {
  return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
@@ -219,6 +225,12 @@ namespace brw {
  return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
   }
 
+  dst_reg
+  null_reg_w() const
+  {
+ return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_W));
+  }
+
   /**
* Create a null register of unsigned integer type.
*/
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/51] intel/compiler/fs: Use two SIMD8 instructions for 16-bit math

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 3c70231be8..5751bb0ad7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4903,6 +4903,15 @@ get_lowered_simd_width(const struct gen_device_info 
*devinfo,
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
+  /* From the SKL PRM Vol 2, math - Extended Math Function:
+   *
+   * The execution size must be no more than 8 when half-floats are used
+   * in source or destination operand.
+   */
+  if (inst->src[0].type == BRW_REGISTER_TYPE_HF ||
+  inst->dst.type == BRW_REGISTER_TYPE_HF)
+ return MIN2(8, inst->exec_size);
+
   /* Unary extended math instructions are limited to SIMD8 on Gen4 and
* Gen6.
*/
@@ -4911,6 +4920,15 @@ get_lowered_simd_width(const struct gen_device_info 
*devinfo,
   MIN2(8, inst->exec_size));
 
case SHADER_OPCODE_POW:
+  /* From the SKL PRM Vol 2, math - Extended Math Function:
+   *
+   * The execution size must be no more than 8 when half-floats are used
+   * in source or destination operand.
+   */
+  if (inst->src[0].type == BRW_REGISTER_TYPE_HF ||
+  inst->dst.type == BRW_REGISTER_TYPE_HF)
+ return MIN2(8, inst->exec_size);
+
   /* SIMD16 is only allowed on Gen7+. */
   return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
   MIN2(8, inst->exec_size));
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/51] intel/compiler: Move type_size_scalar() into brw_shader.cpp

2017-11-24 Thread Topi Pohjolainen
Next path will add another variant and in order not to make
brw_fs.cpp any bigger it already is, add both in brw_shader.cpp
instead.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp | 48 ---
 src/intel/compiler/brw_shader.cpp | 48 +++
 2 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 1b972972c1..3c70231be8 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -470,54 +470,6 @@ fs_reg::component_size(unsigned width) const
return MAX2(width * stride, 1) * type_sz(type);
 }
 
-extern "C" int
-type_size_scalar(const struct glsl_type *type)
-{
-   unsigned int size, i;
-
-   switch (type->base_type) {
-   case GLSL_TYPE_UINT:
-   case GLSL_TYPE_INT:
-   case GLSL_TYPE_FLOAT:
-   case GLSL_TYPE_BOOL:
-  return type->components();
-   case GLSL_TYPE_UINT16:
-   case GLSL_TYPE_INT16:
-   case GLSL_TYPE_FLOAT16:
-  return DIV_ROUND_UP(type->components(), 2);
-   case GLSL_TYPE_DOUBLE:
-   case GLSL_TYPE_UINT64:
-   case GLSL_TYPE_INT64:
-  return type->components() * 2;
-   case GLSL_TYPE_ARRAY:
-  return type_size_scalar(type->fields.array) * type->length;
-   case GLSL_TYPE_STRUCT:
-  size = 0;
-  for (i = 0; i < type->length; i++) {
-size += type_size_scalar(type->fields.structure[i].type);
-  }
-  return size;
-   case GLSL_TYPE_SAMPLER:
-  /* Samplers take up no register space, since they're baked in at
-   * link time.
-   */
-  return 0;
-   case GLSL_TYPE_ATOMIC_UINT:
-  return 0;
-   case GLSL_TYPE_SUBROUTINE:
-  return 1;
-   case GLSL_TYPE_IMAGE:
-  return BRW_IMAGE_PARAM_SIZE;
-   case GLSL_TYPE_VOID:
-   case GLSL_TYPE_ERROR:
-   case GLSL_TYPE_INTERFACE:
-   case GLSL_TYPE_FUNCTION:
-  unreachable("not reached");
-   }
-
-   return 0;
-}
-
 /**
  * Create a MOV to read the timestamp register.
  *
diff --git a/src/intel/compiler/brw_shader.cpp 
b/src/intel/compiler/brw_shader.cpp
index 74b52976d7..234b5a11c1 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -30,6 +30,54 @@
 #include "main/uniforms.h"
 #include "util/macros.h"
 
+extern "C" int
+type_size_scalar(const struct glsl_type *type)
+{
+   unsigned int size, i;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+  return type->components();
+   case GLSL_TYPE_UINT16:
+   case GLSL_TYPE_INT16:
+   case GLSL_TYPE_FLOAT16:
+  return DIV_ROUND_UP(type->components(), 2);
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT64:
+   case GLSL_TYPE_INT64:
+  return type->components() * 2;
+   case GLSL_TYPE_ARRAY:
+  return type_size_scalar(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+  size = 0;
+  for (i = 0; i < type->length; i++) {
+ size += type_size_scalar(type->fields.structure[i].type);
+  }
+  return size;
+   case GLSL_TYPE_SAMPLER:
+  /* Samplers take up no register space, since they're baked in at
+   * link time.
+   */
+  return 0;
+   case GLSL_TYPE_ATOMIC_UINT:
+  return 0;
+   case GLSL_TYPE_SUBROUTINE:
+  return 1;
+   case GLSL_TYPE_IMAGE:
+  return BRW_IMAGE_PARAM_SIZE;
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
+   case GLSL_TYPE_FUNCTION:
+  unreachable("not reached");
+   }
+
+   return 0;
+}
+
 enum brw_reg_type
 brw_type_for_base_type(const struct glsl_type *type)
 {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/51] glsl: Allow 16-bit math

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/ir_validate.cpp | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/ir_validate.cpp 
b/src/compiler/glsl/ir_validate.cpp
index 735e862141..d246af866d 100644
--- a/src/compiler/glsl/ir_validate.cpp
+++ b/src/compiler/glsl/ir_validate.cpp
@@ -272,7 +272,8 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_rsq:
case ir_unop_sqrt:
   assert(ir->type->is_float() ||
- ir->type->is_double());
+ ir->type->is_double() ||
+ ir->type->base_type == GLSL_TYPE_FLOAT16);
   assert(ir->type == ir->operands[0]->type);
   break;
 
@@ -281,7 +282,9 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_exp2:
case ir_unop_log2:
case ir_unop_saturate:
-  assert(ir->operands[0]->type->is_float());
+  assert(ir->operands[0]->type->is_float() ||
+ (ir->operands[0]->type->get_scalar_type()->base_type ==
+  GLSL_TYPE_FLOAT16));
   assert(ir->type == ir->operands[0]->type);
   break;
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/51] nir: Add 16-bit float support into algebraic opts

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/nir/nir_search.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index dec56fee74..3b28da4a3f 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -27,6 +27,7 @@
 
 #include 
 #include "nir_search.h"
+#include "util/half_float.h"
 
 struct match_state {
bool inexact_match;
@@ -194,6 +195,9 @@ match_value(const nir_search_value *value, nir_alu_instr 
*instr, unsigned src,
  for (unsigned i = 0; i < num_components; ++i) {
 double val;
 switch (load->def.bit_size) {
+case 16:
+   val = _mesa_half_to_float(load->value.u16[new_swizzle[i]]);
+   break;
 case 32:
val = load->value.f32[new_swizzle[i]];
break;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/51] glsl: Add conversion ops to/from 16-bit floats

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/glsl_to_nir.cpp| 2 ++
 src/compiler/glsl/ir.cpp | 8 
 src/compiler/glsl/ir_expression_operation.py | 5 +
 src/compiler/glsl/ir_validate.cpp| 8 
 src/mesa/program/ir_to_mesa.cpp  | 2 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 3 +++
 6 files changed, 28 insertions(+)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 289f8be031..14c358465b 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1561,6 +1561,8 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_d2b:
case ir_unop_i2d:
case ir_unop_u2d:
+   case ir_unop_h2f:
+   case ir_unop_f2h:
case ir_unop_i642i:
case ir_unop_i642u:
case ir_unop_i642f:
diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp
index 2c61dd9d64..a901ec5683 100644
--- a/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@ -281,6 +281,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_i2f:
case ir_unop_u2f:
case ir_unop_d2f:
+   case ir_unop_h2f:
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_u2f:
case ir_unop_i642f:
@@ -334,6 +335,13 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
   this->type = glsl_type::get_instance(GLSL_TYPE_UINT64,
   op0->type->vector_elements, 1);
   break;
+
+   case ir_unop_f2h:
+  this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16,
+  op0->type->vector_elements, 1);
+  break;
+
+
case ir_unop_noise:
   this->type = glsl_type::float_type;
   break;
diff --git a/src/compiler/glsl/ir_expression_operation.py 
b/src/compiler/glsl/ir_expression_operation.py
index d8542925a0..3158533c02 100644
--- a/src/compiler/glsl/ir_expression_operation.py
+++ b/src/compiler/glsl/ir_expression_operation.py
@@ -82,6 +82,7 @@ int_type = type("int", "i", "GLSL_TYPE_INT")
 uint64_type = type("uint64_t", "u64", "GLSL_TYPE_UINT64")
 int64_type = type("int64_t", "i64", "GLSL_TYPE_INT64")
 float_type = type("float", "f", "GLSL_TYPE_FLOAT")
+float16_t_type = type("float16_t_type", "f", "GLSL_TYPE_FLOAT16")
 double_type = type("double", "d", "GLSL_TYPE_DOUBLE")
 bool_type = type("bool", "b", "GLSL_TYPE_BOOL")
 
@@ -460,6 +461,10 @@ ir_expression_operation = [
operation("u2d", 1, source_types=(uint_type,), dest_type=double_type, 
c_expression="{src0}"),
# Double-to-boolean conversion.
operation("d2b", 1, source_types=(double_type,), dest_type=bool_type, 
c_expression="{src0} != 0.0"),
+   # hafl-to-float conversion.
+   operation("h2f", 1, source_types=(float16_t_type,), dest_type=float_type, 
c_expression="{src0}"),
+   # hafl-to-float conversion.
+   operation("f2h", 1, source_types=(float_type,), dest_type=float16_t_type, 
c_expression="{src0}"),
# 'Bit-identical int-to-float "conversion"
operation("bitcast_i2f", 1, source_types=(int_type,), dest_type=float_type, 
c_expression="bitcast_u2f({src0})"),
# 'Bit-identical float-to-int "conversion"
diff --git a/src/compiler/glsl/ir_validate.cpp 
b/src/compiler/glsl/ir_validate.cpp
index aa07f8aea6..29e3cda865 100644
--- a/src/compiler/glsl/ir_validate.cpp
+++ b/src/compiler/glsl/ir_validate.cpp
@@ -595,6 +595,14 @@ ir_validate::visit_leave(ir_expression *ir)
   assert(ir->operands[0]->type->is_double());
   assert(ir->type->is_boolean());
   break;
+   case ir_unop_h2f:
+  assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
+  assert(ir->type->is_float());
+  break;
+   case ir_unop_f2h:
+  assert(ir->operands[0]->type->is_float());
+  assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
+  break;
 
case ir_unop_frexp_sig:
   assert(ir->operands[0]->type->is_float() ||
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ac12389f70..d57e50366e 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1313,6 +1313,8 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_d2u:
case ir_unop_u2d:
case ir_unop_d2b:
+   case ir_unop_h2f:
+   case ir_unop_f2h:
case ir_unop_frexp_sig:
case ir_unop_frexp_exp:
   assert(!"not supported");
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0772b73627..f8cb94c7dc 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/s

[Mesa-dev] i965: Kicking off fp16 glsl support

2017-11-24 Thread Topi Pohjolainen
lues there were 16-bit, backend would still need to
   know types.

   My feeling is that we just need to rewrite fair amount of the
   Intel push/pull constant setup.

5) Patches 44-50 are all about the GLSL lowering pass. This is
   really work-in-progress. What I have here is crude attempt to
   do everything in one pass. It also has several hacks working
   around shortcomings in the Intel backend.

   Short story is that there are quite a few things which don't
   have precision and compiler needs to analyze expressions
   recursively in order to know what precision to use.

   Take, for example, variables that don't have precision but are
   referred to from multiple locations. These require the compiler
   to examine all the expressions involved and use full precision
   for the variable even if one of the expressions require it. This
   in turn alters the requirements in the other expressions -
   compiler would need to emit conversions for them. And I don't
   think this can be done cleanly in one pass.

   I also realized that there may be cases where the compiler
   would need to use full precision instead of half in order to
   submit the most optimal code. Such shaders sound just evil and
   I don't even want to think about that now. There is more than
   enough work to get even the rules covered...

This series doesn't touch hardware register allocator - it still
allocates one full register per 16-bit float component even in
case of SIMD8.

Patches can be found in (it is rebased on current master and
Igalia's work):

git://people.freedesktop.org/~tpohjola/mesa:16_bit_gles

There are also some simple shader runner tests I wrote along
the way:

git://people.freedesktop.org/~tpohjola/piglit:fp16


All feedback is very welcome. I'm prepared to keep on working on
this if people find it useful. Personally I'd be curious to add
fp16 for pln() and lrp() and see if 16-bits could beat 32-bits
performance wise. Proper push/pull constant support is another
thing on the list. Hardware register allocator with sub-register
support sounds both interesting and scary.


CC: Jose Maria Casanova Crespo <jmcasan...@igalia.com>
CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Kenneth Graunke <kenn...@whitecape.org>
CC: Matt Turner <matts...@gmail.com>
CC: Ian Romanick <i...@freedesktop.org>
CC: Francisco Jerez <curroje...@riseup.net>

Topi Pohjolainen (51):
  nir: Prepare constant folding for 16-bits
  nir: Prepare constant lowering for 16-bits constants
  nir: Add 16-bit float support into algebraic opts
  glsl: Print 16-bit constants
  nir: Print 16-bit constants
  glsl: Add support for 16-bit float constants in nir-conversion
  glsl: Add conversion ops to/from 16-bit floats
  glsl: Add more conversion ops to/from 16-bit floats
  glsl: Allow 16-bit neg() and dot()
  glsl: Allow 16-bit math
  glsl: Enable 16-bit texturing in nir-conversion
  intel/compiler/disasm: Print 16-bit IMM values
  intel/compiler/disasm: Print fp16 also for sampler messages
  intel/compiler/fs: Support for dumping 16-bit IMM values
  intel/compiler: Add support for loading 16-bit constants
  intel/compiler: Move type_size_scalar() into brw_shader.cpp
  intel/compiler: Prepare for glsl mediump float uniforms
  intel/compiler: Allow 16-bit math
  intel/compiler/fs: Add helpers for 16-bit null regs
  intel/compiler/fs: Use two SIMD8 instructions for 16-bit math
  intel/compiler/fs: Use 16-bit null dest with 16-bit math
  intel/compiler/fs: Use 16-bit null dest with 16-bit compare
  intel/compiler: Prepare for 16-bit 3-src ops
  intel/compiler: Add support for negating 16-bit floats
  intel/compiler/fs: Support for combining 16-bit immediates
  intel/compiler/fs: Set 16-bit sampler return format
  intel/compiler/fs: Set tex type for generator to flag fp16
  intel/compiler/fs: Use component_size() instead of open coded
  intel/compiler/fs: Add register padding support
  intel/compiler/fs: Pad 16-bit texture return payloads
  intel/compiler/fs: Pad 16-bit output (store/fb write) payloads
  intel/compiler/fs: Pad 16-bit nir vec* components into full reg
  intel/compiler/fs: Pad 16-bit nir intrinsic dest into full reg
  intel/compiler/fs: Pad 16-bit const loads into full regs
  intel/compiler/fs: Pad 16-bit payload lowering
  intel/compiler/fs: Prepare nir_emit_if() for 16-bit sources
  intel/compiler/fs: Consider original sizes when retyping alu ops
  intel/compiler/fs: Use original reg size when retyping nir src
  intel/compiler/fs: Consider logic ops on 16-bit booleans
  intel/compiler/fs: Prepare 16-bit and/or/xor for 32-bit src
  intel/compiler/eu: Take stride into account in 16-bit ops
  i965: WIP: Support for uploading 16-bit uniforms from 32-bit store
  intel/compiler/fs: WIP: Use 32-bit slots for 16-bit uniforms
  glsl: WIP: Add lowering pass for treating mediump as float16
  glsl: Use 16-bit constants if operation is otherwise 16-bit
  glsl: Lower float conversions to mediump
  glsl: HACK: Force texture re

[Mesa-dev] [PATCH 05/51] nir: Print 16-bit constants

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/nir/nir_print.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index fcc8025346..9ed23a74bb 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -27,6 +27,7 @@
 
 #include "nir.h"
 #include "compiler/shader_enums.h"
+#include "util/half_float.h"
 #include 
 #include 
 #include  /* for PRIx64 macro */
@@ -842,6 +843,10 @@ print_load_const_instr(nir_load_const_instr *instr, 
print_state *state)
   if (instr->def.bit_size == 64)
  fprintf(fp, "0x%16" PRIx64 " /* %f */", instr->value.u64[i],
  instr->value.f64[i]);
+  else if (instr->def.bit_size == 16)
+ fprintf(fp, "0x%04x /* %f */",
+ instr->value.u16[i],
+ _mesa_half_to_float(instr->value.u16[i]));
   else
  fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], 
instr->value.f32[i]);
}
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/51] glsl: Add support for 16-bit float constants in nir-conversion

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/glsl_to_nir.cpp | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 1e636225c1..289f8be031 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -32,6 +32,7 @@
 #include "compiler/nir/nir_control_flow.h"
 #include "compiler/nir/nir_builder.h"
 #include "main/imports.h"
+#include "util/half_float.h"
 
 /*
  * pass to lower GLSL IR to NIR
@@ -245,6 +246,14 @@ constant_copy(ir_constant *ir, void *mem_ctx)
 
   break;
 
+   case GLSL_TYPE_FLOAT16:
+  for (unsigned c = 0; c < cols; c++) {
+ for (unsigned r = 0; r < rows; r++)
+ret->values[c].u16[r] =
+   _mesa_float_to_half(ir->value.f[c * rows + r]);
+  }
+  break;
+
case GLSL_TYPE_FLOAT:
   for (unsigned c = 0; c < cols; c++) {
  for (unsigned r = 0; r < rows; r++)
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/51] glsl: Add more conversion ops to/from 16-bit floats

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/glsl/glsl_to_nir.cpp|  6 ++
 src/compiler/glsl/ir_expression_operation.py | 16 ++--
 src/compiler/glsl/ir_validate.cpp| 24 
 src/mesa/program/ir_to_mesa.cpp  |  6 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   |  6 ++
 5 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 14c358465b..c0adf744e0 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1563,6 +1563,12 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_u2d:
case ir_unop_h2f:
case ir_unop_f2h:
+   case ir_unop_h2u:
+   case ir_unop_u2h:
+   case ir_unop_h2i:
+   case ir_unop_i2h:
+   case ir_unop_h2b:
+   case ir_unop_b2h:
case ir_unop_i642i:
case ir_unop_i642u:
case ir_unop_i642f:
diff --git a/src/compiler/glsl/ir_expression_operation.py 
b/src/compiler/glsl/ir_expression_operation.py
index 3158533c02..0316d1a82d 100644
--- a/src/compiler/glsl/ir_expression_operation.py
+++ b/src/compiler/glsl/ir_expression_operation.py
@@ -461,10 +461,22 @@ ir_expression_operation = [
operation("u2d", 1, source_types=(uint_type,), dest_type=double_type, 
c_expression="{src0}"),
# Double-to-boolean conversion.
operation("d2b", 1, source_types=(double_type,), dest_type=bool_type, 
c_expression="{src0} != 0.0"),
-   # hafl-to-float conversion.
+   # half-to-float conversion.
operation("h2f", 1, source_types=(float16_t_type,), dest_type=float_type, 
c_expression="{src0}"),
-   # hafl-to-float conversion.
+   # float-to-half conversion.
operation("f2h", 1, source_types=(float_type,), dest_type=float16_t_type, 
c_expression="{src0}"),
+   # half-to-unsigned conversion.
+   operation("h2u", 1, source_types=(float16_t_type,), dest_type=uint_type, 
c_expression="{src0}"),
+   # unsigned-to-half conversion.
+   operation("u2h", 1, source_types=(uint_type,), dest_type=float16_t_type, 
c_expression="{src0}"),
+   # half-to-integer conversion.
+   operation("h2i", 1, source_types=(float16_t_type,), dest_type=int_type, 
c_expression="{src0}"),
+   # integer-to-half conversion.
+   operation("i2h", 1, source_types=(int_type,), dest_type=float16_t_type, 
c_expression="{src0}"),
+   # half-to-boolean conversion.
+   operation("h2b", 1, source_types=(float16_t_type,), dest_type=bool_type, 
c_expression="{src0} != 0.0"),
+   # boolean-to-half conversion.
+   operation("b2h", 1, source_types=(bool_type,), dest_type=float16_t_type, 
c_expression="{src0} ? 1.0F : 0.0F"),
# 'Bit-identical int-to-float "conversion"
operation("bitcast_i2f", 1, source_types=(int_type,), dest_type=float_type, 
c_expression="bitcast_u2f({src0})"),
# 'Bit-identical float-to-int "conversion"
diff --git a/src/compiler/glsl/ir_validate.cpp 
b/src/compiler/glsl/ir_validate.cpp
index 29e3cda865..a20f52e527 100644
--- a/src/compiler/glsl/ir_validate.cpp
+++ b/src/compiler/glsl/ir_validate.cpp
@@ -603,6 +603,30 @@ ir_validate::visit_leave(ir_expression *ir)
   assert(ir->operands[0]->type->is_float());
   assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
   break;
+   case ir_unop_h2u:
+  assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
+  assert(ir->type->base_type == GLSL_TYPE_UINT);
+  break;
+   case ir_unop_u2h:
+  assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+  assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
+  break;
+   case ir_unop_h2i:
+  assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
+  assert(ir->type->base_type == GLSL_TYPE_INT);
+  break;
+   case ir_unop_i2h:
+  assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+  assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
+  break;
+   case ir_unop_h2b:
+  assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16);
+  assert(ir->type->is_boolean());
+  break;
+   case ir_unop_b2h:
+  assert(ir->operands[0]->type->is_boolean());
+  assert(ir->type->base_type == GLSL_TYPE_FLOAT16);
+  break;
 
case ir_unop_frexp_sig:
   assert(ir->operands[0]->type->is_float() ||
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d57e50366e..286b9e07bf 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1315,6 +1315,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_d2b:
case ir_unop_h2f:
case ir_unop_f2h:
+   case ir_unop_h2u:
+   c

[Mesa-dev] [PATCH 01/51] nir: Prepare constant folding for 16-bits

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/nir/nir_opt_constant_folding.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_opt_constant_folding.c 
b/src/compiler/nir/nir_opt_constant_folding.c
index d6be807b3d..b63660ea4d 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -78,6 +78,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
j++) {
  if (load_const->def.bit_size == 64)
 src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
+ else if (load_const->def.bit_size == 16)
+src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]];
  else
 src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
   }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/51] glsl: Print 16-bit constants

2017-11-24 Thread Topi Pohjolainen
---
 src/compiler/glsl/ir_print_visitor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/glsl/ir_print_visitor.cpp 
b/src/compiler/glsl/ir_print_visitor.cpp
index ea14cdeb6c..ab9a35d73f 100644
--- a/src/compiler/glsl/ir_print_visitor.cpp
+++ b/src/compiler/glsl/ir_print_visitor.cpp
@@ -482,6 +482,7 @@ void ir_print_visitor::visit(ir_constant *ir)
 case GLSL_TYPE_UINT:  fprintf(f, "%u", ir->value.u[i]); break;
 case GLSL_TYPE_INT:   fprintf(f, "%d", ir->value.i[i]); break;
 case GLSL_TYPE_FLOAT:
+case GLSL_TYPE_FLOAT16:
 if (ir->value.f[i] == 0.0f)
/* 0.0 == -0.0, so print with %f to get the proper sign. */
fprintf(f, "%f", ir->value.f[i]);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/51] nir: Prepare constant lowering for 16-bits constants

2017-11-24 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/compiler/nir/nir_lower_load_const_to_scalar.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c 
b/src/compiler/nir/nir_lower_load_const_to_scalar.c
index e494facfd2..76eb1d3a12 100644
--- a/src/compiler/nir/nir_lower_load_const_to_scalar.c
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -52,9 +52,13 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
  nir_load_const_instr_create(b.shader, 1, lower->def.bit_size);
   if (lower->def.bit_size == 64)
  load_comp->value.f64[0] = lower->value.f64[i];
+  else if (lower->def.bit_size == 16)
+ load_comp->value.u16[0] = lower->value.u16[i];
   else
  load_comp->value.u32[0] = lower->value.u32[i];
-  assert(lower->def.bit_size == 64 || lower->def.bit_size == 32);
+  assert(lower->def.bit_size == 64 ||
+ lower->def.bit_size == 32 ||
+ lower->def.bit_size == 16);
   nir_builder_instr_insert(, _comp->instr);
   loads[i] = _comp->def;
}
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/compiler/gen9: Pixel shader header only workaround

2017-10-25 Thread Topi Pohjolainen
Fixes intermittent GPU hangs on Broxton with an Intel internal
test case.

There are plenty of similar fragment shaders in piglit that do
not use any varyings and any uniforms. According to the
documentation special timing is needed between pipeline stages.
Apparently we just don't hit that with piglit. Even with the
failing test case one doesn't always get the hang.

Moreover, according to the error states the hang happens
significantly later than the execution of the problematic shader.
There are multiple render cycles (primitive submissions) in between.
I've also seen error states where the ACTHD points outside the
batch. Almost as if the hardware writes somewhere that gets used
later on. That would also explain why piglit doesn't suffer from
this - most tests kick off one render cycle and any corruption
is left unseen.

v2 (Ken): Instead of enabling push constants, enable one of the
  inputs (PSIZ).
v3 (Ken, Jason): Use LAYER instead making vulkan emit_3dstate_sbe()
 happy.

CC: Kenneth Graunke <kenn...@whitecape.org>
CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Eero Tamminen <eero.t.tammi...@intel.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_fs.cpp | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 30e8841242..2c6dc1e5a0 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6164,6 +6164,31 @@ fs_visitor::run_gs()
return !failed;
 }
 
+/* From the SKL PRM, Volume 16, Workarounds:
+ *
+ *   0877  3D   Pixel Shader Hang possible when pixel shader dispatched with
+ *  only header phases (R0-R2)
+ *
+ *   WA: Enable a non-header phase (e.g. push constant) when dispatch would
+ *   have been header only.
+ *
+ * Instead of enabling push constants one can alternatively enable one of the
+ * inputs. Here one simply chooses point size which shouldn't impose much
+ * overhead.
+ */
+static void
+gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
+{
+   if (wm_prog_data->num_varying_inputs)
+  return;
+
+   if (wm_prog_data->base.curb_read_length)
+  return;
+
+   wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
+   wm_prog_data->num_varying_inputs = 1;
+}
+
 bool
 fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 {
@@ -6227,6 +6252,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
   optimize();
 
   assign_curb_setup();
+
+  if (devinfo->gen >= 9)
+ gen9_ps_header_only_workaround(wm_prog_data);
+
   assign_urb_setup();
 
   fixup_3src_null_dest();
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/compiler/gen9: Pixel shader header only workaround

2017-09-25 Thread Topi Pohjolainen
Fixes intermittent GPU hangs on Broxton with an Intel internal
test case.

There are plenty of similar fragment shaders in piglit that do
not use any varyings and any uniforms. According to the
documentation special timing is needed between pipeline stages.
Apparently we just don't hit that with piglit. Even with the
failing test case one doesn't always get the hang.

Moreover, according to the error states the hang happens
significantly later than the execution of the problematic shader.
There are multiple render cycles (primitive submissions) in between.
I've also seen error states where the ACTHD points outside the
batch. Almost as if the hardware writes somewhere that gets used
later on. That would also explain why piglit doesn't suffer from
this - most tests kick off one render cycle and any corruption
is left unseen.

For clarity I chose to make the decision in the compiler only and
mark it with a boolean. In principle, constant loaders could make
the same decision by examing num_varying_inputs along with push
constant details.

Alternatively tweaking nr_params in compiler would allow GL driver
to be kept as is if one did, for example:

   static const gl_constant_value zero = { 0 };
   wm_prog_data->base.param[0] = 
   wm_prog_data->base.nr_params = 1;

This, however, doesn't work for Vulkan which would still need
some logic to be added in anv_cmd_buffer_push_constants().

In the end I thought future debugging is probably easier when
the explicit boolean tells about this corner case.

CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Eero Tamminen <eero.t.tammi...@intel.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_compiler.h   |  7 
 src/intel/compiler/brw_fs.cpp   | 46 +
 src/intel/vulkan/anv_cmd_buffer.c   | 22 ++--
 src/intel/vulkan/genX_pipeline.c|  6 +++-
 src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 +++--
 src/mesa/drivers/dri/i965/genX_state_upload.c   |  3 +-
 6 files changed, 93 insertions(+), 8 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index 6753a8daf0..8a1c8c85ac 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -622,6 +622,13 @@ struct brw_wm_prog_data {
bool contains_noperspective_varying;
 
/**
+* Tell constant uplaoders, gen6_upload_push_constants() and
+* anv_cmd_buffer_push_constants(), that workaround is needed.
+* See gen9_ps_header_only_workaround().
+*/
+   bool needs_gen9_ps_header_only_workaround;
+
+   /**
 * Mask of which interpolation modes are required by the fragment shader.
 * Used in hardware setup on gen6+.
 */
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index eb9b4c3890..5f4271fb59 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6159,6 +6159,48 @@ fs_visitor::run_gs()
return !failed;
 }
 
+/* From the SKL PRM, Volume 16, Workarounds:
+ *
+ *   0877  3D   Pixel Shader Hang possible when pixel shader dispatched with
+ *  only header phases (R0-R2)
+ *
+ *   WA: Enable a non-header phase (e.g. push constant) when dispatch would
+ *   have been header only.
+ *
+ * Additionally from the SKL PRM, Volume 2a, Command Reference,
+ * 3DSTATE_PS and Push Constant Enable:
+ *
+ *   This field must be enabled if the sum of the PS Constant Buffer [3:0]
+ *   Read Length fields in 3DSTATE_CONSTANT_PS is nonzero, and must be
+ *   disabled if the sum is zero. 
+ *
+ * Therefore one needs to prepare register space for minimum amount of
+ * constants to be uploaded.
+ *
+ * Here it is assumed that assign_curb_setup() has determined the total amount
+ * of constants (uniforms + ubos) and therefore it is safe to examine if the
+ * workaround is needed.
+ */
+static void
+gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data,
+   int *first_non_payload_grf)
+{
+   if (wm_prog_data->num_varying_inputs)
+  return;
+
+   if (wm_prog_data->base.curb_read_length)
+  return;
+
+   assert(wm_prog_data->base.nr_params == 0);
+
+   wm_prog_data->needs_gen9_ps_header_only_workaround = true;
+
+   const unsigned wa_upload_size = DIV_ROUND_UP(1, 8);
+
+   wm_prog_data->base.curb_read_length = wa_upload_size;
+   *first_non_payload_grf += wa_upload_size;
+}
+
 bool
 fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 {
@@ -6222,6 +6264,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
   optimize();
 
   assign_curb_setup();
+
+  if (devinfo->gen >= 9)
+ gen9_ps_header_only_workaround(wm_prog_data, _non_payload_grf);
+
   assign_urb_setup();
 
   fixup_3src_null_dest();
diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 3b59af8f6f..07d45bd5d4 100644
--- 

[Mesa-dev] [PATCH] i965/screen: Check that given format is valid

2017-09-21 Thread Topi Pohjolainen
CID: 1418110
Fixes: 939b53d3325 "i965/screen: Implement queryDmaBufFormatModifierAttirbs"

CC: Jason Ekstrand <ja...@jlekstrand.net>
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_screen.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index bc2bba00b6..f85d1ba51d 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -830,6 +830,8 @@ intel_query_format_modifier_attribs(__DRIscreen *dri_screen,
 {
struct intel_screen *screen = dri_screen->driverPrivate;
const struct intel_image_format *f = intel_image_format_lookup(fourcc);
+   if (f == NULL)
+  return false;
 
if (!modifier_is_supported(>devinfo, f, 0, modifier))
   return false;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] i965: Two possible bug fixes

2017-09-11 Thread Topi Pohjolainen
While debugging one internal workload I've been trying various
things. Here are two of those. I'm not aware of them actually
fixing anything but...

CC: Mark Janes <mark.a.ja...@intel.com>

Topi Pohjolainen (3):
  i965/gen8: Remove unused gen8_emit_3dstate_multisample()
  intel/blorp/hiz: Always set sample number
  i965: Disable stencil cache optimization combining two 4x2 blocks

 src/intel/blorp/blorp_genX_exec.h  | 11 +++
 src/mesa/drivers/dri/i965/brw_context.h|  1 -
 src/mesa/drivers/dri/i965/brw_defines.h|  5 -
 src/mesa/drivers/dri/i965/brw_state_upload.c   |  1 +
 src/mesa/drivers/dri/i965/gen8_multisample_state.c | 16 
 5 files changed, 16 insertions(+), 18 deletions(-)

-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] i965/gen8: Remove unused gen8_emit_3dstate_multisample()

2017-09-11 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.h|  1 -
 src/mesa/drivers/dri/i965/gen8_multisample_state.c | 16 
 2 files changed, 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 92fc16de13..bd56ffc819 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1510,7 +1510,6 @@ void
 gen6_set_sample_maps(struct gl_context *ctx);
 
 /* gen8_multisample_state.c */
-void gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samp);
 void gen8_emit_3dstate_sample_pattern(struct brw_context *brw);
 
 /* gen7_urb.c */
diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c 
b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
index 7a31a5df4a..3afa586275 100644
--- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
@@ -28,22 +28,6 @@
 #include "brw_multisample_state.h"
 
 /**
- * 3DSTATE_MULTISAMPLE
- */
-void
-gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samples)
-{
-   assert(num_samples <= 16);
-
-   unsigned log2_samples = ffs(MAX2(num_samples, 1)) - 1;
-
-   BEGIN_BATCH(2);
-   OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE << 16 | (2 - 2));
-   OUT_BATCH(MS_PIXEL_LOCATION_CENTER | log2_samples << 1);
-   ADVANCE_BATCH();
-}
-
-/**
  * 3DSTATE_SAMPLE_PATTERN
  */
 void
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965: Disable stencil cache optimization combining two 4x2 blocks

2017-09-11 Thread Topi Pohjolainen
From the BDW PRM, Volume 15, Workarounds:

KMD Wa4x4STCOptimizationDisable HIZ/STC hang in hawx frames.

W/A: Disable 4x4 RCPFE STC optimization and therefore only send one
 valid 4x4 to STC on 4x4 interface. This will require setting bit
 6 of reg. 0x7004. Must be done at boot and all save/restore paths.

From the SKL PRM, Volume 16, Workarounds:

0556 KMD Wa4x4STCOptimizationDisable HIZ/STC hang in hawx frames.

W/A: Disable 4 x4 RCPFE STC optimization and therefore only send
 one valid 4x4 to STC on 4x4 interface.  This will require setting
 bit 6 of reg. 0x7004. Must be done at boot and all save/restore
 paths.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h  | 5 -
 src/mesa/drivers/dri/i965/brw_state_upload.c | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 4abb790612..248512e01a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1611,11 +1611,14 @@ enum brw_pixel_shader_coverage_mask_mode {
 
 #define GEN7_CACHE_MODE_1   0x7004
 # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
+# define GEN8_4X4_RCPFE_STC_OPTIMIZATION_DISABLE (1 << 6)
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
 # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
 # define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1)
 # define GEN8_HIZ_PMA_MASK_BITS \
-   REG_MASK(GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE)
+   REG_MASK(GEN8_4X4_RCPFE_STC_OPTIMIZATION_DISABLE | \
+GEN8_HIZ_NP_PMA_FIX_ENABLE | \
+GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE)
 
 #define GEN7_GT_MODE0x7008
 # define GEN9_SUBSLICE_HASHING_8x8  (0 << 8)
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7b31aad170..4149a3d5d4 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -71,6 +71,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
   OUT_BATCH(GEN7_CACHE_MODE_1);
   OUT_BATCH(REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
 REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
+REG_MASK(GEN8_4X4_RCPFE_STC_OPTIMIZATION_DISABLE) |
 GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
   ADVANCE_BATCH();
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] intel/blorp/hiz: Always set sample number

2017-09-11 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/blorp/blorp_genX_exec.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 5f9a8ab4a5..5389262098 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1454,6 +1454,17 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
if (params->stencil.enabled)
   assert(params->hiz_op == BLORP_HIZ_OP_DEPTH_CLEAR);
 
+   /* From the BDW PRM Volume 2, 3DSTATE_WM_HZ_OP:
+*
+* 3DSTATE_MULTISAMPLE packet must be used prior to this packet to change
+* the Number of Multisamples. This packet must not be used to change
+* Number of Multisamples in a rendering sequence.
+*
+* Since HIZ may be the first thing in a batch buffer, play safe and always
+* emit 3DSTATE_MULTISAMPLE.
+*/
+   blorp_emit_3dstate_multisample(batch, params);
+
/* If we can't alter the depth stencil config and multiple layers are
 * involved, the HiZ op will fail. This is because the op requires that a
 * new config is emitted for each additional layer.
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/compiler: Cast reg types explicitly

2017-08-25 Thread Topi Pohjolainen
Makes coverity happier.

CC: Matt Turner <matts...@gmail.com>
CID: 1416799
Fixes: c1ac1a3d25 (i965: Add a brw_hw_type_to_reg_type() function)

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/compiler/brw_reg_type.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_reg_type.c 
b/src/intel/compiler/brw_reg_type.c
index a0f674f0d7..98c4cf7234 100644
--- a/src/intel/compiler/brw_reg_type.c
+++ b/src/intel/compiler/brw_reg_type.c
@@ -111,13 +111,13 @@ brw_hw_type_to_reg_type(const struct gen_device_info 
*devinfo,
 {
if (file == BRW_IMMEDIATE_VALUE) {
   for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) {
- if (gen4_hw_type[i].imm_type == hw_type) {
+ if (gen4_hw_type[i].imm_type == (enum hw_imm_type)hw_type) {
 return i;
  }
   }
} else {
   for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) {
- if (gen4_hw_type[i].reg_type == hw_type) {
+ if (gen4_hw_type[i].reg_type == (enum hw_reg_type)hw_type) {
 return i;
  }
   }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [v2] intel/blorp: Adjust intra-tile x when faking rgb with red-only

2017-08-19 Thread Topi Pohjolainen
v2 (Jason): Adjust directly in surf_fake_rgb_with_red()

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101910

CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Mark Janes <mark.a.ja...@intel.com>
CC: mesa-sta...@lists.freedesktop.org

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/blorp/blorp_blit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index ed00516373..35008cbbb0 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1549,6 +1549,7 @@ surf_fake_rgb_with_red(const struct isl_device *isl_dev,
 
info->surf.logical_level0_px.width *= 3;
info->surf.phys_level0_sa.width *= 3;
+   info->tile_x_sa *= 3;
*x *= 3;
*width *= 3;
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/blorp: Adjust intra-tile x when faking rgb with red-only

2017-08-19 Thread Topi Pohjolainen
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101910

CC: Jason Ekstrand <ja...@jlekstrand.net>
CC: Mark Janes <mark.a.ja...@intel.com>
CC: mesa-sta...@lists.freedesktop.org

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/blorp/blorp_blit.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index ed00516373..db93d0f585 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1839,6 +1839,12 @@ try_blorp_blit(struct blorp_batch *batch,
   surf_get_intratile_offset_px(>dst,
>wm_inputs.dst_offset.x,
>wm_inputs.dst_offset.y);
+
+  if (wm_prog_key->dst_rgb) {
+ /* See surf_fake_rgb_with_red() */
+ params->wm_inputs.dst_offset.x *= 3;
+  }
+
   params->x0 += params->wm_inputs.dst_offset.x;
   params->y0 += params->wm_inputs.dst_offset.y;
   params->x1 += params->wm_inputs.dst_offset.x;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] i965/miptree: Use isl_image_offset

2017-07-26 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_blorp.c|  2 +-
 src/mesa/drivers/dri/i965/brw_context.c  |  1 -
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 +++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 49 +++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 19 +
 src/mesa/drivers/dri/i965/intel_pixel_draw.c |  3 +-
 src/mesa/drivers/dri/i965/intel_pixel_read.c |  2 +-
 src/mesa/drivers/dri/i965/intel_tex.c|  3 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c  |  2 +-
 9 files changed, 36 insertions(+), 58 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index b2987ca4fa..ebe4a051f4 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -149,7 +149,7 @@ blorp_surf_for_miptree(struct brw_context *brw,
surf->surf = >surf;
surf->addr = (struct blorp_address) {
   .buffer = mt->bo,
-  .offset = mt->offset,
+  .offset = mt->offset.tile_aligned_byte_offset,
   .read_domains = is_render_target ? I915_GEM_DOMAIN_RENDER :
  I915_GEM_DOMAIN_SAMPLER,
   .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0,
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index d0b22d4342..ddd50a16fc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1513,7 +1513,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
   intel_miptree_create_for_bo(brw,
   bo,
   intel_rb_format(rb),
-  0,
   drawable->w,
   drawable->h,
   1,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 2da0984c0f..86e903888c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -135,12 +135,7 @@ brw_emit_surface_state(struct brw_context *brw,
uint32_t mocs, uint32_t *surf_offset, int surf_index,
unsigned read_domains, unsigned write_domains)
 {
-   struct isl_image_offset offset = {
-  .tile_aligned_byte_offset = mt->offset,
-  .intra_tile_x = mt->level[0].level_x,
-  .intra_tile_y = mt->level[0].level_y
-   };
-
+   struct isl_image_offset offset = mt->offset;
struct isl_surf surf;
 
get_isl_surf(brw, mt, target, , , );
@@ -1648,8 +1643,10 @@ update_image_surface(struct brw_context *brw,
 
  if (format == ISL_FORMAT_RAW) {
 brw_emit_buffer_surface_state(
-   brw, surf_offset, mt->bo, mt->offset,
-   format, mt->bo->size - mt->offset, 1 /* pitch */,
+   brw, surf_offset, mt->bo, mt->offset.tile_aligned_byte_offset,
+   format,
+   mt->bo->size - mt->offset.tile_aligned_byte_offset,
+   1 /* pitch */,
access != GL_READ_ONLY);
 
  } else {
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index cab888f04d..d0546851b4 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -757,8 +757,6 @@ intel_miptree_create(struct brw_context *brw,
if (!mt)
   return NULL;
 
-   mt->offset = 0;
-
if (!intel_miptree_alloc_aux(brw, mt)) {
   intel_miptree_release();
   return NULL;
@@ -771,7 +769,6 @@ struct intel_mipmap_tree *
 intel_miptree_create_for_bo(struct brw_context *brw,
 struct brw_bo *bo,
 mesa_format format,
-uint32_t offset,
 uint32_t width,
 uint32_t height,
 uint32_t depth,
@@ -817,12 +814,6 @@ intel_miptree_create_for_bo(struct brw_context *brw,
 
brw_bo_get_tiling(bo, , );
 
-   /* Nothing will be able to use this miptree with the BO if the offset isn't
-* aligned.
-*/
-   if (tiling != I915_TILING_NONE)
-  assert(offset % 4096 == 0);
-
/* miptrees can't handle negative pitch.  If you need flipping of images,
 * that's outside of the scope of the mt.
 */
@@ -845,7 +836,6 @@ intel_miptree_create_for_bo(struct brw_context *brw,
 
brw_bo_reference(bo);
mt->bo = bo;
-   mt->offset = offset;
 
if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
   intel_miptree_choose_aux_usage(brw, mt);
@@ -874,13 +864,13 @@ miptree_create_for_planar_image(struct brw_context *brw,
*/
   struct

[Mesa-dev] [PATCH 1/6] i965/miptree: Take import tile offset along with intra-tile x, y

2017-07-26 Thread Topi Pohjolainen
Imported miptrees represent single images in buffer objects that
themselves may contain multiple images (full mipmaps or arrays).
In such case there may be an offset which consists of pointer
to a tile and x,y coordinates giving the start position within that
tile.

Until now callers got only the intra tile x,y offsets but applied
the tile aligned byte offsets directly themselves. This patch
drops applying the byte offset separately and returns it from
intel_miptree_get_tile_offsets() along with the intra tile
offsets.

Note that intel_renderbuffer_get_tile_offsets() calls
intel_miptree_get_tile_offsets().

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 ++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 17 +
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index a0ca6ddf98..abf1d29678 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -99,11 +99,11 @@ get_isl_surf(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
 */
assert(brw->has_surface_tile_offset);
assert(view->levels == 1 && view->array_len == 1);
-   assert(*tile_x == 0 && *tile_y == 0);
+   assert(*tile_x == 0 && *tile_y == 0 && *offset == 0);
 
-   *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
- view->base_array_layer,
- tile_x, tile_y);
+   *offset = intel_miptree_get_tile_offsets(mt, view->base_level,
+view->base_array_layer,
+tile_x, tile_y);
 
/* Minify the logical dimensions of the texture. */
const unsigned l = view->base_level - mt->first_level;
@@ -976,9 +976,8 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
  format << BRW_SURFACE_FORMAT_SHIFT);
 
/* reloc */
-   assert(mt->offset % mt->cpp == 0);
-   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, _x, _y) +
- mt->bo->offset64 + mt->offset);
+   surf[1] = intel_renderbuffer_get_tile_offsets(irb, _x, _y) +
+mt->bo->offset64;
 
surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
  (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index ed7cb8e215..1b42edd285 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1343,6 +1343,23 @@ intel_miptree_get_tile_offsets(const struct 
intel_mipmap_tree *mt,
uint32_t *tile_x,
uint32_t *tile_y)
 {
+   /* First consider the special case where caller wants the very first slice.
+* In such case there is only possible import offset to consider. This
+* consists of tile aligned byte offset and intra tile x,y coordinates.
+*/
+   if (level == 0 && slice == 0) {
+  *tile_x = mt->level[0].level_x;
+  *tile_y = mt->level[0].level_y;
+  return mt->offset;
+   }
+
+   /* Only single slices can be imported - mipmapped and arrayed always
+* start from the beginning of the underlying buffer object.
+*/
+   assert(mt->offset == 0);
+   assert(mt->level[0].level_x == 0);
+   assert(mt->level[0].level_y == 0);
+
uint32_t x, y;
uint32_t mask_x, mask_y;
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] i965/miptree: Use isl_image_offset in get_tile_offsets()

2017-07-26 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 54 ++--
 src/mesa/drivers/dri/i965/intel_fbo.h| 14 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 28 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h|  8 ++--
 src/mesa/drivers/dri/i965/intel_screen.c |  9 ++--
 5 files changed, 61 insertions(+), 52 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index abf1d29678..2da0984c0f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -77,8 +77,7 @@ uint32_t rb_mocs[] = {
 static void
 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  GLenum target, struct isl_view *view,
- uint32_t *tile_x, uint32_t *tile_y,
- uint32_t *offset, struct isl_surf *surf)
+ struct isl_image_offset *surf_offset, struct isl_surf *surf)
 {
*surf = mt->surf;
 
@@ -99,11 +98,12 @@ get_isl_surf(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
 */
assert(brw->has_surface_tile_offset);
assert(view->levels == 1 && view->array_len == 1);
-   assert(*tile_x == 0 && *tile_y == 0 && *offset == 0);
+   assert(surf_offset->intra_tile_x == 0 &&
+  surf_offset->intra_tile_y == 0 &&
+  surf_offset->tile_aligned_byte_offset == 0);
 
-   *offset = intel_miptree_get_tile_offsets(mt, view->base_level,
-view->base_array_layer,
-tile_x, tile_y);
+   intel_miptree_get_tile_offsets(mt, view->base_level,
+  view->base_array_layer, surf_offset);
 
/* Minify the logical dimensions of the texture. */
const unsigned l = view->base_level - mt->first_level;
@@ -135,13 +135,15 @@ brw_emit_surface_state(struct brw_context *brw,
uint32_t mocs, uint32_t *surf_offset, int surf_index,
unsigned read_domains, unsigned write_domains)
 {
-   uint32_t tile_x = mt->level[0].level_x;
-   uint32_t tile_y = mt->level[0].level_y;
-   uint32_t offset = mt->offset;
+   struct isl_image_offset offset = {
+  .tile_aligned_byte_offset = mt->offset,
+  .intra_tile_x = mt->level[0].level_x,
+  .intra_tile_y = mt->level[0].level_y
+   };
 
struct isl_surf surf;
 
-   get_isl_surf(brw, mt, target, , _x, _y, , );
+   get_isl_surf(brw, mt, target, , , );
 
union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 
@@ -180,14 +182,17 @@ brw_emit_surface_state(struct brw_context *brw,
  surf_offset);
 
isl_surf_fill_state(>isl_dev, state, .surf = >surf, .view = ,
-   .address = mt->bo->offset64 + offset,
+   .address = mt->bo->offset64 +
+  offset.tile_aligned_byte_offset,
.aux_surf = aux_surf, .aux_usage = aux_usage,
.aux_address = aux_offset,
.mocs = mocs, .clear_color = clear_color,
-   .x_offset_sa = tile_x, .y_offset_sa = tile_y);
+   .x_offset_sa = offset.intra_tile_x,
+   .y_offset_sa = offset.intra_tile_y);
 
brw_emit_reloc(>batch, *surf_offset + brw->isl_dev.ss.addr_offset,
-  mt->bo, offset, read_domains, write_domains);
+  mt->bo, offset.tile_aligned_byte_offset,
+  read_domains, write_domains);
 
if (aux_surf) {
   /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
@@ -938,7 +943,7 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb->mt;
uint32_t *surf;
-   uint32_t tile_x, tile_y;
+   struct isl_image_offset image_offset;
enum isl_format format;
uint32_t offset;
/* _NEW_BUFFERS */
@@ -949,9 +954,9 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 
if (rb->TexImage && !brw->has_surface_tile_offset) {
-  intel_renderbuffer_get_tile_offsets(irb, _x, _y);
+  intel_renderbuffer_get_tile_offsets(irb, _offset);
 
-  if (tile_x != 0 || tile_y != 0) {
+  if (image_offset.intra_tile_x != 0 || image_offset.intra_tile_y != 0) {
 /* Original gen4 hardware couldn't draw to a non-tile-aligned
  * destination in a miptree unless you actually setup your renderbuffer
  * as a miptree and used the fragile lod/array_index/etc. controls to
@@ -975,9 +980,10 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
surf[

[Mesa-dev] [PATCH 5/6] i965/blit: Let _intratile_offset_el() resolve image offset

2017-07-26 Thread Topi Pohjolainen
Image offset is really a triple: aligned byte offset, intra-tile
x and intra-tile y. Taking intra-tile offsets into account in the
caller side of emit_miptree_blit() and then applying tile-aligned
byte offset in emit_miptree_blit() is confusing. Now both are
handled in single location: get_blit_intratile_offset_el().

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 106 ++---
 1 file changed, 71 insertions(+), 35 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
b/src/mesa/drivers/dri/i965/intel_blit.c
index e7338bdf46..d308cfb416 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -42,6 +42,7 @@
 static void
 intel_miptree_set_alpha_to_one(struct brw_context *brw,
struct intel_mipmap_tree *mt,
+   unsigned level, unsigned layer,
int x, int y, int width, int height);
 
 static GLuint translate_raster_op(GLenum logicop)
@@ -164,15 +165,68 @@ intel_miptree_blit_compatible_formats(mesa_format src, 
mesa_format dst)
 
 static void
 get_blit_intratile_offset_el(const struct brw_context *brw,
- struct intel_mipmap_tree *mt,
+ const struct intel_mipmap_tree *mt,
+ unsigned level, unsigned layer, 
  uint32_t total_x_offset_el,
  uint32_t total_y_offset_el,
  struct isl_image_offset *image_offset)
 {
+   struct isl_image_offset base_offset;
+   intel_miptree_get_tile_offsets(mt, level, layer, _offset);
+   
+   /* Given offsets are relative to the start of the slice and we need the
+* offset that is relative to the beginning of the buffer.
+*
+* Image offset is really a triple: aligned byte offset, intra-tile
+* x and intra-tile y. In order to simply add the page aligned offset of
+* the start of the image and the page aligned offset of the given position
+* (total_x_offset_el, total_y_offset_el) within the image, we need to
+* first augment the position within the image with the intra-tile start
+* position (x,y) of the image itself. Otherwise we might get intra-tile
+* offsets that don't actually fit into one page:
+*
+* +-+ page N
+*/ \  | |
+*   d_1   |   | |
+*\ /  | |
+* + + start of the image
+* | |
+* | |
+* +-+ page N + 1
+* | |
+* . .
+* . .
+* +-+ page N + M
+*/ \  | |
+*   d_2   |   | |
+*\ /  | |
+* + + (total_x_offset_el,
+* | |  total_y_offset_el)
+* | |
+* +-+ page N + M + 1
+*
+* Consider a case where d_1 + d_2 > sizeof(page). If one calculates the
+* triple separately for the start of the image and for the position
+* within the image, one gets page aligned of N + M and d_1 + d_2. If in
+* turn one takes d_1 into account as offsetting the position _within_ the
+* image, one gets page aligned of N + M + 1 and d_1 + d_2 - sizeof(page)
+* where 0 <= d_1 + d_2 - sizeof(page) < sizeof(page).
+*/
+   total_x_offset_el += base_offset.intra_tile_x;
+   total_y_offset_el += base_offset.intra_tile_y;
+
isl_tiling_get_intratile_offset_el(mt->surf.tiling,
   mt->cpp * 8, mt->surf.row_pitch,
   total_x_offset_el, total_y_offset_el,
   image_offset);
+ 
+   /* Finally add the byte offset of the page aligned start of the image to
+* the page aligned offset with the image giving page aligned offset
+* relative to the beginning of the buffer.
+*/
+   image_offset->tile_aligned_byte_offset +=
+  base_offset.tile_aligned_byte_offset;
+
if (mt->surf.tiling == ISL_TILING_LINEAR) {
   /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
*
@@ -197,8 +251,10 @@ get_blit_intratile_offset_el(const struct brw_context *brw,
 static 

[Mesa-dev] [PATCH 4/6] i965/miptree: Use isl instead of local offset calculator

2017-07-26 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_misc_state.c| 20 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 38 +++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 ---
 3 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 1e3be784c5..983fc0c736 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -189,10 +189,22 @@ rebase_depth_stencil(struct brw_context *brw, struct 
intel_renderbuffer *irb,
 
brw->depthstencil.tile_x = tile_x;
brw->depthstencil.tile_y = tile_y;
-   brw->depthstencil.depth_offset = intel_miptree_get_aligned_offset(
-   irb->mt,
-   irb->draw_x & ~tile_mask_x,
-   irb->draw_y & ~tile_mask_y);
+
+   struct isl_image_offset image_offset;
+   isl_tiling_get_intratile_offset_el(irb->mt->surf.tiling,
+  irb->mt->cpp * 8,
+  irb->mt->surf.row_pitch,
+  irb->draw_x & ~tile_mask_x,
+  irb->draw_y & ~tile_mask_y,
+  _offset);
+
+   brw->depthstencil.depth_offset = image_offset.tile_aligned_byte_offset;
+
+   /* Given x and y were already masked to provide aligned offset. Therefore
+* there should be no intra tile offset.
+*/
+   assert(image_offset.intra_tile_x == 0);
+   assert(image_offset.intra_tile_y == 0);
 
return false;
 }
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 37024c011d..cab888f04d 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1300,34 +1300,6 @@ intel_get_tile_masks(enum isl_tiling tiling, uint32_t 
cpp,
 }
 
 /**
- * Compute the offset (in bytes) from the start of the BO to the given x
- * and y coordinate.  For tiled BOs, caller must ensure that x and y are
- * multiples of the tile size.
- */
-uint32_t
-intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
- uint32_t x, uint32_t y)
-{
-   int cpp = mt->cpp;
-   uint32_t pitch = mt->surf.row_pitch;
-
-   switch (mt->surf.tiling) {
-   default:
-  unreachable("not reached");
-   case ISL_TILING_LINEAR:
-  return y * pitch + x * cpp;
-   case ISL_TILING_X:
-  assert((x % (512 / cpp)) == 0);
-  assert((y % 8) == 0);
-  return y * pitch + x / (512 / cpp) * 4096;
-   case ISL_TILING_Y0:
-  assert((x % (128 / cpp)) == 0);
-  assert((y % 32) == 0);
-  return y * pitch + x / (128 / cpp) * 4096;
-   }
-}
-
-/**
  * Rendering with tiled buffers requires that the base address of the buffer
  * be aligned to a page boundary.  For renderbuffers, and sometimes with
  * textures, we may want the surface to point at a texture image level that
@@ -1361,15 +1333,11 @@ intel_miptree_get_tile_offsets(const struct 
intel_mipmap_tree *mt,
assert(mt->level[0].level_y == 0);
 
uint32_t x, y;
-   uint32_t mask_x, mask_y;
-
-   intel_get_tile_masks(mt->surf.tiling, mt->cpp, _x, _y);
intel_miptree_get_image_offset(mt, level, slice, , );
 
-   image_offset->intra_tile_x = x & mask_x;
-   image_offset->intra_tile_y = y & mask_y;
-   image_offset->tile_aligned_byte_offset =
-  intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
+   isl_tiling_get_intratile_offset_el(mt->surf.tiling,
+  mt->cpp * 8, mt->surf.row_pitch,
+  x, y, image_offset);
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index b8d36b35e0..d9d2ce9ee2 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -468,10 +468,6 @@ intel_miptree_get_tile_offsets(const struct 
intel_mipmap_tree *mt,
unsigned level, unsigned slice,
struct isl_image_offset *image_offset);
 
-uint32_t
-intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
- uint32_t x, uint32_t y);
-
 void
 intel_miptree_copy_slice(struct brw_context *brw,
  struct intel_mipmap_tree *src_mt,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] intel/isl: Introduce tiled image offset

2017-07-26 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/blorp/blorp_blit.c   | 19 --
 src/intel/isl/isl.c| 44 +++
 src/intel/isl/isl.h| 29 +++--
 src/mesa/drivers/dri/i965/intel_blit.c | 47 +++---
 4 files changed, 73 insertions(+), 66 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index ed00516373..db675dc1e4 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1405,12 +1405,14 @@ blorp_surf_convert_to_single_slice(const struct 
isl_device *isl_dev,
else
   layer = info->view.base_array_layer;
 
-   uint32_t byte_offset;
+   struct isl_image_offset image_offset;
isl_surf_get_image_surf(isl_dev, >surf,
info->view.base_level, layer, z,
>surf,
-   _offset, >tile_x_sa, >tile_y_sa);
-   info->addr.offset += byte_offset;
+   _offset);
+   info->addr.offset += image_offset.tile_aligned_byte_offset;
+   info->tile_x_sa = image_offset.intra_tile_x;
+   info->tile_y_sa = image_offset.intra_tile_y;
 
uint32_t tile_x_px, tile_y_px;
surf_get_intratile_offset_px(info, _x_px, _y_px);
@@ -1905,7 +1907,8 @@ shrink_surface_params(const struct isl_device *dev,
   struct brw_blorp_surface_info *info,
   double *x0, double *x1, double *y0, double *y1)
 {
-   uint32_t byte_offset, x_offset_sa, y_offset_sa, size;
+   uint32_t x_offset_sa, y_offset_sa, size;
+   struct isl_image_offset image_offset;
struct isl_extent2d px_size_sa;
int adjust;
 
@@ -1922,10 +1925,10 @@ shrink_surface_params(const struct isl_device *dev,
isl_tiling_get_intratile_offset_sa(info->surf.tiling,
   info->surf.format, info->surf.row_pitch,
   x_offset_sa, y_offset_sa,
-  _offset,
-  >tile_x_sa, >tile_y_sa);
-
-   info->addr.offset += byte_offset;
+  _offset);
+   info->addr.offset += image_offset.tile_aligned_byte_offset;
+   info->tile_x_sa = image_offset.intra_tile_x;
+   info->tile_y_sa = image_offset.intra_tile_y;
 
adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0;
*x0 += adjust;
diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 5e3d279b0b..8431d18639 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -2295,25 +2295,25 @@ isl_surf_get_image_offset_B_tile_sa(const struct 
isl_surf *surf,
 _x_offset_el,
 _y_offset_el);
 
-   uint32_t x_offset_el, y_offset_el;
+   struct isl_image_offset image_offset;
isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
   surf->row_pitch,
   total_x_offset_el,
   total_y_offset_el,
-  offset_B,
-  _offset_el,
-  _offset_el);
+  _offset);
+
+   *offset_B = image_offset.tile_aligned_byte_offset;
 
if (x_offset_sa) {
-  *x_offset_sa = x_offset_el * fmtl->bw;
+  *x_offset_sa = image_offset.intra_tile_x * fmtl->bw;
} else {
-  assert(x_offset_el == 0);
+  assert(image_offset.intra_tile_x == 0);
}
 
if (y_offset_sa) {
-  *y_offset_sa = y_offset_el * fmtl->bh;
+  *y_offset_sa = image_offset.intra_tile_y * fmtl->bh;
} else {
-  assert(y_offset_el == 0);
+  assert(image_offset.intra_tile_y == 0);
}
 }
 
@@ -2324,17 +2324,15 @@ isl_surf_get_image_surf(const struct isl_device *dev,
 uint32_t logical_array_layer,
 uint32_t logical_z_offset_px,
 struct isl_surf *image_surf,
-uint32_t *offset_B,
-uint32_t *x_offset_sa,
-uint32_t *y_offset_sa)
+struct isl_image_offset *offset)
 {
isl_surf_get_image_offset_B_tile_sa(surf,
level,
logical_array_layer,
logical_z_offset_px,
-   offset_B,
-   x_offset_sa,
-   y_offset_sa);
+   >tile_aligned_byte_offset,
+   >intra_tile_x,
+   >intra_tile_y);
 
/* Even for cube maps there will be

[Mesa-dev] i965/miptree: Rework import offsets

2017-07-26 Thread Topi Pohjolainen
Offsets to tiled images consist of two parts: tile offset and
intra-tile x,y coordinates giving the start position within
the tile.

Until now these have been split into different parts of
miptree: intel_mipmap_tree::offset giving the tile offset and
intel_mipmap_tree::level[0].level_x/y giving the intra tile
coordinates. Moreover, logic in various places had been split
in similar fashion.

This patch set brings the two parts closer each other.

CC: Jason Ekstrand <ja...@jlekstrand.net>

Topi Pohjolainen (6):
  i965/miptree: Take import tile offset along with intra-tile x,y
  intel/isl: Introduce tiled image offset
  i965/miptree: Use isl_image_offset in get_tile_offsets()
  i965/miptree: Use isl instead of local offset calculator
  i965/blit: Let _intratile_offset_el() resolve image offset
  i965/miptree: Use isl_image_offset

 src/intel/blorp/blorp_blit.c |  19 +--
 src/intel/isl/isl.c  |  44 ---
 src/intel/isl/isl.h  |  29 +++--
 src/mesa/drivers/dri/i965/brw_blorp.c|   2 +-
 src/mesa/drivers/dri/i965/brw_context.c  |   1 -
 src/mesa/drivers/dri/i965/brw_misc_state.c   |  20 +++-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  58 -
 src/mesa/drivers/dri/i965/intel_blit.c   | 145 +++
 src/mesa/drivers/dri/i965/intel_fbo.h|  14 +--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 106 +++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h|  29 +
 src/mesa/drivers/dri/i965/intel_pixel_draw.c |   3 +-
 src/mesa/drivers/dri/i965/intel_pixel_read.c |   2 +-
 src/mesa/drivers/dri/i965/intel_screen.c |   9 +-
 src/mesa/drivers/dri/i965/intel_tex.c|   3 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c  |   2 +-
 16 files changed, 254 insertions(+), 232 deletions(-)

-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/17] squash: i965/gen4: Force x-tiling for color surfaces

2017-07-21 Thread Topi Pohjolainen
This is what brw_miptree_choose_tiling() currently does even
though blorp is available.

Before enabling y-tiled one needs to fix, for example, batch
wrapping caused by mipmap offsets not being tile aligned
anymore and intel_renderbuffer_move_to_temp() kicking in where it
didn't before as x-tiled met the alignment constraints.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 7b2f98cc1b..af5d37bc47 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -953,8 +953,13 @@ miptree_create(struct brw_context *brw,
if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
   alloc_flags |= BO_ALLOC_FOR_RENDER;
 
-   const isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ?
+   isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ?
   ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK;
+
+   /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
+   if (brw->gen < 6)
+  tiling_flags &= ~ISL_TILING_Y0_BIT;
+
struct intel_mipmap_tree *mt = make_surface(
  brw, target, format,
  first_level, last_level,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/17] i965/miptree: Check tex image allocation failures

2017-07-21 Thread Topi Pohjolainen
allowing graceful failure instead of crash on assert later on.

This can be hit, for example, on SNB when trying to allocate
8kx8k CUBE_MAP against isl: x-tiled buffer size becomes
2421161984 exceeding the maximum of 1 << 31 == 2147483648.

Another way to hit this on SNB is with multisampling of over
64-bit formats.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_tex.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_tex.c 
b/src/mesa/drivers/dri/i965/intel_tex.c
index 82e25fc5ea..7ce2ceb9a2 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -95,6 +95,8 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx,
   intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj,
   intel_image,
   1 /* samples */);
+  if (!intel_image->mt)
+ return false;
 
   /* Even if the object currently has a mipmap tree associated
* with it, this one is a more likely candidate to represent the
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/17] main/teximage: Even on failure use valid format for init()

2017-07-21 Thread Topi Pohjolainen
Otherwise init_teximage_fields_ms() (called by
_mesa_init_teximage_fields()) will always assert as it can't
find valid base format.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/main/teximage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 5e13025ed1..2132aaee76 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -5772,7 +5772,7 @@ texture_image_multisample(struct gl_context *ctx, GLuint 
dims,
  * like, but being tidy is good.
  */
 _mesa_init_teximage_fields(ctx, texImage,
-  0, 0, 0, 0, GL_NONE, MESA_FORMAT_NONE);
+  0, 0, 0, 0, internalformat, texFormat);
  }
   }
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/17] intel/isl/gen7: Allow msaa with 128-bit formats

2017-07-21 Thread Topi Pohjolainen
These formats are already allowed by the i965 GL driver, and the
feature seems to work just fine.

There are tests for multisampled rendering in piglit:
tests/spec/ext_framebuffer_multisample which can be patched to
try GL_RGBA16F/32F/16I/16UI/32I/32UI in addition to GL_RGBA/8I.
IvyBridge passed all tests with all sample numbers and even
with 128-bit formats.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/intel/isl/isl_format.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c
index a9f9c6be73..435b0d003a 100644
--- a/src/intel/isl/isl_format.c
+++ b/src/intel/isl/isl_format.c
@@ -554,16 +554,19 @@ isl_format_supports_multisampling(const struct 
gen_device_info *devinfo,
 *   - any compressed texture format (BC*)
 *   - any YCRCB* format
 *
-* The restriction on the format's size is removed on Broadwell.  Also,
-* there is an exception for HiZ which we treat as a compressed format and
-* is allowed to be multisampled on Broadwell and earlier.
+* The restriction on the format's size is removed on Broadwell. Moreover,
+* empirically it looks that even IvyBridge can handle multisampled surfaces
+* with format sizes all the way to 128-bits (RGBA32F, RGBA32I, RGBA32UI).
+*
+* Also, there is an exception for HiZ which we treat as a compressed
+* format and is allowed to be multisampled on Broadwell and earlier.
 */
if (format == ISL_FORMAT_HIZ) {
   /* On SKL+, HiZ is always single-sampled even when the primary surface
* is multisampled.  See also isl_surf_get_hiz_surf().
*/
   return devinfo->gen <= 8;
-   } else if (devinfo->gen < 8 && isl_format_get_layout(format)->bpb > 64) {
+   } else if (devinfo->gen < 7 && isl_format_get_layout(format)->bpb > 64) {
   return false;
} else if (isl_format_is_compressed(format)) {
   return false;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/17] i965/miptree: Drop miptree_array_layout in get_isl_dim_layout()

2017-07-21 Thread Topi Pohjolainen
This was only needed for checking gen6 stencil which is already
using isl. One could delete GEN6_HIZ_STENCIL layout altogether
but that will be gone with the rest after a while anyway.

The dim_layout converter is needed even after transition to isl
when setting up surface states - see brw_emit_surface_state().
Hence dropping the unneeded argument separately.

Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  5 +++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 11 ---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h|  3 +--
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 45ac106f3f..e9a50b89eb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -88,9 +88,10 @@ get_isl_surf(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
   surf->dim = get_isl_surf_dim(target);
}
 
+   assert(mt->array_layout != GEN6_HIZ_STENCIL);
+
const enum isl_dim_layout dim_layout =
-  get_isl_dim_layout(>screen->devinfo, mt->surf.tiling, target,
- mt->array_layout);
+  get_isl_dim_layout(>screen->devinfo, mt->surf.tiling, target);
 
if (surf->dim_layout == dim_layout)
   return;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 73637b0fc5..3a2395b030 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -3819,12 +3819,8 @@ get_isl_surf_dim(GLenum target)
 
 enum isl_dim_layout
 get_isl_dim_layout(const struct gen_device_info *devinfo,
-   enum isl_tiling tiling, GLenum target,
-   enum miptree_array_layout array_layout)
+   enum isl_tiling tiling, GLenum target)
 {
-   if (array_layout == GEN6_HIZ_STENCIL)
-  return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
-
switch (target) {
case GL_TEXTURE_1D:
case GL_TEXTURE_1D_ARRAY:
@@ -3865,10 +3861,11 @@ intel_miptree_get_isl_surf(struct brw_context *brw,
const struct intel_mipmap_tree *mt,
struct isl_surf *surf)
 {
+   assert(mt->array_layout != GEN6_HIZ_STENCIL);
+
surf->dim = get_isl_surf_dim(mt->target);
surf->dim_layout = get_isl_dim_layout(>screen->devinfo,
- mt->surf.tiling, mt->target,
- mt->array_layout);
+ mt->surf.tiling, mt->target);
surf->msaa_layout = mt->surf.msaa_layout;
surf->tiling = intel_miptree_get_isl_tiling(mt);
surf->row_pitch = mt->surf.row_pitch;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index e7872ff96c..7de7f86eee 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -668,8 +668,7 @@ get_isl_surf_dim(GLenum target);
 
 enum isl_dim_layout
 get_isl_dim_layout(const struct gen_device_info *devinfo,
-   enum isl_tiling tiling,
-   GLenum target, enum miptree_array_layout array_layout);
+   enum isl_tiling tiling, GLenum target);
 
 enum isl_tiling
 intel_miptree_get_isl_tiling(const struct intel_mipmap_tree *mt);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/17] i965/miptree: Clean-up unused

2017-07-21 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources   |   1 -
 src/mesa/drivers/dri/i965/brw_blorp.c|   8 +-
 src/mesa/drivers/dri/i965/brw_tex_layout.c   | 735 ---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  23 +-
 src/mesa/drivers/dri/i965/gen6_depth_state.c |   8 +-
 src/mesa/drivers/dri/i965/gen7_misc_state.c  |   8 +-
 src/mesa/drivers/dri/i965/gen8_depth_state.c |   8 +-
 src/mesa/drivers/dri/i965/intel_blit.c   |  33 +-
 src/mesa/drivers/dri/i965/intel_fbo.c|  44 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 583 ++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 241 
 src/mesa/drivers/dri/i965/intel_screen.c |  13 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c  |  29 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c   |   8 +-
 14 files changed, 96 insertions(+), 1646 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_layout.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 431712f76e..425c883de8 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -51,7 +51,6 @@ i965_FILES = \
brw_tcs_surface_state.c \
brw_tes.c \
brw_tes_surface_state.c \
-   brw_tex_layout.c \
brw_urb.c \
brw_util.c \
brw_util.h \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 474dfc61c1..e50173d442 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -147,13 +147,7 @@ blorp_surf_for_miptree(struct brw_context *brw,
  intel_miptree_check_level_layer(mt, *level, start_layer + i);
}
 
-   if (mt->surf.size > 0) {
-  surf->surf = >surf;
-   } else {
-  intel_miptree_get_isl_surf(brw, mt, _surfs[0]);
-  surf->surf = _surfs[0];
-   }
-
+   surf->surf = >surf;
surf->addr = (struct blorp_address) {
   .buffer = mt->bo,
   .offset = mt->offset,
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
deleted file mode 100644
index f3b5a17c88..00
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ /dev/null
@@ -1,735 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * Copyright © 2006 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_tex_layout.cpp
- *
- * Code to lay out images in a mipmap tree.
- *
- * \author Keith Whitwell <kei...@vmware.com>
- * \author Michel Dänzer <daen...@vmware.com>
- */
-
-#include "intel_mipmap_tree.h"
-#include "brw_context.h"
-#include "main/macros.h"
-#include "main/glformats.h"
-
-#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-
-static unsigned int
-intel_horizontal_texture_alignment_unit(struct brw_context *brw,
-struct intel_mipmap_tree *mt,
-uint32_t layout_flags)
-{
-   if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16)
-  return 16;
-
-   /**
-* +--+
-* || alignment unit width  ("i") |
-* | Surface Property   |-|
-* || 915 | 965 | ILK | SNB | IVB |
-* +--+
-* | YUV 4:2:2 format   |  8  |  4  |  4  |  4  |  4  |
-* | BC1-5 compressed format (DXTn/S3TC)|  4  |  4  |  4  |  4  |  4  |
-* | FXT1  compressed format|  8

  1   2   3   4   5   6   7   8   9   10   >