[Freedreno] [PATCH] freedreno/a4xx: add stencil texturing support

2017-11-19 Thread Ilia Mirkin
Copied from a5xx, should be identical.

Signed-off-by: Ilia Mirkin 
---
 docs/features.txt|  6 ++---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c|  2 ++
 src/gallium/drivers/freedreno/a4xx/fd4_format.c  | 11 +---
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 34 ++--
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 99fb1715e0b..2d6e0b20fb5 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -180,7 +180,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, radeonsi
   GL_ARB_robust_buffer_access_behavior  DONE (i965)
   GL_ARB_shader_image_size  DONE (freedreno/a5xx, 
i965, r600, softpipe)
   GL_ARB_shader_storage_buffer_object   DONE (freedreno/a5xx, 
i965, softpipe)
-  GL_ARB_stencil_texturing  DONE (freedreno/a5xx, 
i965/hsw+, nv50, r600, llvmpipe, softpipe, swr)
+  GL_ARB_stencil_texturing  DONE (freedreno, 
i965/hsw+, nv50, r600, llvmpipe, softpipe, swr)
   GL_ARB_texture_buffer_range   DONE (freedreno, nv50, 
i965, r600, llvmpipe)
   GL_ARB_texture_query_levels   DONE (all drivers that 
support GLSL 1.30)
   GL_ARB_texture_storage_multisampleDONE (all drivers that 
support GL_ARB_texture_multisample)
@@ -203,7 +203,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, radeonsi
   GL_ARB_multi_bind DONE (all drivers)
   GL_ARB_query_buffer_objectDONE (i965/hsw+)
   GL_ARB_texture_mirror_clamp_to_edge   DONE (i965, nv50, 
r600, llvmpipe, softpipe, swr)
-  GL_ARB_texture_stencil8   DONE (freedreno/a5xx, 
i965/hsw+, nv50, r600, llvmpipe, softpipe, swr)
+  GL_ARB_texture_stencil8   DONE (freedreno, 
i965/hsw+, nv50, r600, llvmpipe, softpipe, swr)
   GL_ARB_vertex_type_10f_11f_11f_revDONE (i965, nv50, 
r600, llvmpipe, softpipe, swr)
 
 GL 4.5, GLSL 4.50 -- all DONE: nvc0, radeonsi
@@ -252,7 +252,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
   GL_ARB_shader_storage_buffer_object   DONE (freedreno/a5xx, 
i965/gen7+, softpipe)
   GL_ARB_shading_language_packing   DONE (all drivers)
   GL_ARB_separate_shader_objectsDONE (all drivers)
-  GL_ARB_stencil_texturing  DONE (freedreno/a5xx, 
nv50, r600, llvmpipe, softpipe, swr)
+  GL_ARB_stencil_texturing  DONE (freedreno, nv50, 
r600, llvmpipe, softpipe, swr)
   GL_ARB_texture_multisample (Multisample textures) DONE (i965/gen7+, 
nv50, r600, llvmpipe, softpipe)
   GL_ARB_texture_storage_multisampleDONE (all drivers that 
support GL_ARB_texture_multisample)
   GL_ARB_vertex_attrib_binding  DONE (all drivers)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 0f7c6470330..8262b45daad 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -190,6 +190,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer 
*ring,
OUT_RING(ring, view->texconst3);
if (view->base.texture) {
struct fd_resource *rsc = 
fd_resource(view->base.texture);
+   if (view->base.format == 
PIPE_FORMAT_X32_S8X24_UINT)
+   rsc = rsc->stencil;
OUT_RELOC(ring, rsc->bo, view->offset, 
view->texconst4, 0);
} else {
OUT_RING(ring, 0x);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 3e1dc277850..75d24126149 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -211,10 +211,13 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
_T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,WZYX),
 
-   _T(Z24X8_UNORM,   X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
-   _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
-   _T(Z32_FLOAT, 32_FLOAT,   R8G8B8A8_UNORM, WZYX),
-   _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX),
+   _T(Z16_UNORM,16_UNORM, R16_UNORM,  WZYX),
+   _T(Z24X8_UNORM,  X8Z24_UNORM,  R8G8B8A8_UNORM, WZYX),
+   _T(X24S8_UINT,   8_8_8_8_UINT, R8G8B8A8_UINT,  XYZW),
+   _T(Z24_UNORM_S8_UINT,X8Z24_UNORM,  R8G8B8A8_UNORM, WZYX),
+   _T(Z32_FLOAT,32_FLOAT,

[Freedreno] [PATCH 1/2] nir: allow texture offsets with cube maps

2017-11-19 Thread Ilia Mirkin
GL doesn't have this, but some hardware supports it. This is convenient
for lowering tg4 to plain texture calls, which is necessary on Adreno
A4xx hardware.

Signed-off-by: Ilia Mirkin 
---
 src/compiler/nir/nir.h | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f46f6147110..64965ae16d6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1364,8 +1364,7 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, 
unsigned src)
if (instr->src[src].src_type == nir_tex_src_ms_mcs)
   return 4;
 
-   if (instr->src[src].src_type == nir_tex_src_offset ||
-   instr->src[src].src_type == nir_tex_src_ddx ||
+   if (instr->src[src].src_type == nir_tex_src_ddx ||
instr->src[src].src_type == nir_tex_src_ddy) {
   if (instr->is_array)
  return instr->coord_components - 1;
@@ -1373,6 +1372,18 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, 
unsigned src)
  return instr->coord_components;
}
 
+   /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
+* the offset, since a cube maps to a single face.
+*/
+   if (instr->src[src].src_type == nir_tex_src_offset) {
+  unsigned ret = instr->coord_components;
+  if (instr->is_array)
+ ret--;
+  if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+ ret--;
+  return ret;
+   }
+
return 1;
 }
 
-- 
2.13.6

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 2/2] freedreno/ir3: add a pass to lower tg4 to txl, enable gather on a4xx

2017-11-19 Thread Ilia Mirkin
Unfortunately Adreno A4xx hardware returns incorrect results with the
GATHER4 opcodes. As a result, we have to lower to 4 individual texture
calls (txl since we have to force lod to 0). We achieve this using
offsets, including on cube maps which normally never have offsets.

Signed-off-by: Ilia Mirkin 
---

This pass relies on the hw doing the "right thing", working with nonconst
offsets, and not having the usual limits (since the gather offset will in
effect get offset by another 1).

It fails two tests out of all the gather ones:

bin/zero-tex-coord textureGather
tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-textureGatherOffset-uniform-array-offset.shader_test

We haven't fully investigated why yet, but this is a good start.

Note that the blob does this differently - they modify the source coordinate.
However this seems unnecessary given that the hw can be made to use the
offsets.

Also please note that my knowledge of nir is minimal. Please carefully check
that I used the right helpers/etc. This was largely a result of seeing what
doesn't result in assertions.

 docs/features.txt  |   4 +-
 src/gallium/drivers/freedreno/Makefile.sources |   1 +
 src/gallium/drivers/freedreno/freedreno_screen.c   |   2 +-
 .../drivers/freedreno/ir3/ir3_compiler_nir.c   |   7 +-
 src/gallium/drivers/freedreno/ir3/ir3_nir.c|   2 +
 src/gallium/drivers/freedreno/ir3/ir3_nir.h|   1 +
 .../freedreno/ir3/ir3_nir_lower_tg4_to_tex.c   | 139 +
 src/gallium/drivers/freedreno/meson.build  |   1 +
 8 files changed, 152 insertions(+), 5 deletions(-)
 create mode 100644 src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c

diff --git a/docs/features.txt b/docs/features.txt
index 633d2593738..99fb1715e0b 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -130,7 +130,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, 
radeonsi
   GL_ARB_tessellation_shaderDONE (i965/gen7+)
   GL_ARB_texture_buffer_object_rgb32DONE (freedreno, 
i965/gen6+, llvmpipe, softpipe, swr)
   GL_ARB_texture_cube_map_array DONE (i965/gen6+, 
nv50, llvmpipe, softpipe)
-  GL_ARB_texture_gather DONE (freedreno/a5xx, 
i965/gen6+, nv50, llvmpipe, softpipe, swr)
+  GL_ARB_texture_gather DONE (freedreno, 
i965/gen6+, nv50, llvmpipe, softpipe, swr)
   GL_ARB_texture_query_lod  DONE (freedreno, i965, 
nv50, llvmpipe, softpipe)
   GL_ARB_transform_feedback2DONE (i965/gen6+, 
nv50, llvmpipe, softpipe, swr)
   GL_ARB_transform_feedback3DONE (i965/gen7+, 
llvmpipe, softpipe, swr)
@@ -256,7 +256,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
   GL_ARB_texture_multisample (Multisample textures) DONE (i965/gen7+, 
nv50, r600, llvmpipe, softpipe)
   GL_ARB_texture_storage_multisampleDONE (all drivers that 
support GL_ARB_texture_multisample)
   GL_ARB_vertex_attrib_binding  DONE (all drivers)
-  GS5 Enhanced textureGatherDONE (i965/gen7+, r600)
+  GS5 Enhanced textureGatherDONE (freedreno, 
i965/gen7+, r600)
   GS5 Packing/bitfield/conversion functions DONE (i965/gen6+, r600)
   GL_EXT_shader_integer_mix DONE (all drivers that 
support GLSL)
 
diff --git a/src/gallium/drivers/freedreno/Makefile.sources 
b/src/gallium/drivers/freedreno/Makefile.sources
index b109a5a7a21..40c2eff0455 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -168,6 +168,7 @@ ir3_SOURCES := \
ir3/ir3_nir.c \
ir3/ir3_nir.h \
ir3/ir3_nir_lower_if_else.c \
+   ir3/ir3_nir_lower_tg4_to_tex.c \
ir3/ir3_print.c \
ir3/ir3_ra.c \
ir3/ir3_sched.c \
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index e61344fd104..62e4a574b90 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -264,7 +264,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
return 0;
 
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-   if (is_a5xx(screen))
+   if (is_a4xx(screen) || is_a5xx(screen))
return 4;
return 0;
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index da4aeaa7acb..c97df4f1d63 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -2399,9 +2399,12 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)