Re: [Mesa-dev] [PATCH 30/59] intel/compiler: document MAD algebraic optimization

2018-12-07 Thread Pohjolainen, Topi
On Tue, Dec 04, 2018 at 08:16:54AM +0100, Iago Toral Quiroga wrote:
> This optimization depends on two other optimization passes: the
> constant propagation pass, which allows immediate propagation
> on MAD/LRP instructions even though the hardware can't do it,
> and the combine constants pass to fix this up afterwards for the
> cases that we could not optimize here.
> 
> Also, the optimization can generate cases for MUL/ADD that we
> should not find otherwise, which are then implemented building
> on that assumption, so better documenting these is useful.

Reviewed-by: Topi Pohjolainen 

> ---
>  src/intel/compiler/brw_fs.cpp | 22 ++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 509c6febf38..a9ddafc05d1 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -2461,6 +2461,11 @@ fs_visitor::opt_algebraic()
>   }
>  
>   if (inst->src[0].file == IMM) {
> +/* We produce these from the MAD optimization below, which
> + * should only be happening for 32-bit float because we
> + * prevent constant propagation to MAD sources for other
> + * bit-sizes.
> + */
>  assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
>  inst->opcode = BRW_OPCODE_MOV;
>  inst->src[0].f *= inst->src[1].f;
> @@ -2482,6 +2487,11 @@ fs_visitor::opt_algebraic()
>   }
>  
>   if (inst->src[0].file == IMM) {
> +/* We produce these from the MAD optimization below, which
> + * should only be happening for 32-bit float because we
> + * prevent constant propagation to MAD sources for other
> + * bit-sizes.
> + */
>  assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
>  inst->opcode = BRW_OPCODE_MOV;
>  inst->src[0].f += inst->src[1].f;
> @@ -2565,6 +2575,11 @@ fs_visitor::opt_algebraic()
>   }
>   break;
>case BRW_OPCODE_MAD:
> + /* ALign16 MAD can't do immediate sources, however we allow constant
> +  * propagation to these instructions to enable these algebraic
> +  * optimizations. For the cases that we can't optmize here, we
> +  * rely on the combine constants pass to fix it up later.
> +  */
>   if (inst->src[1].is_zero() || inst->src[2].is_zero()) {
>  inst->opcode = BRW_OPCODE_MOV;
>  inst->src[1] = reg_undef;
> @@ -2585,6 +2600,13 @@ fs_visitor::opt_algebraic()
>  inst->src[2] = reg_undef;
>  progress = true;
>   } else if (inst->src[1].file == IMM && inst->src[2].file == IMM) {
> +/* We should not be getting here for anything other than 32-bit
> + * float since we prevent constant-propagation to MAD 
> instructions
> + * for everything else.
> + */
> +assert(inst->src[1].type == inst->src[2].type &&
> +   inst->src[1].type == BRW_REGISTER_TYPE_F);
> +
>  inst->opcode = BRW_OPCODE_ADD;
>  inst->src[1].f *= inst->src[2].f;
>  inst->src[2] = reg_undef;
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nv50/ir: fix use-after-free in ConstantFolding::visit

2018-12-07 Thread Ilia Mirkin
On Fri, Dec 7, 2018 at 3:57 AM Karol Herbst  wrote:
>
> opnd() might delete the passed in instruction, but it's used through
> i->srcExists() later in visit
>
> Signed-off-by: Karol Herbst 
> ---
>  .../nouveau/codegen/nv50_ir_peephole.cpp  | 71 +++
>  1 file changed, 43 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 9524ba63654..d69ceaafd73 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -370,7 +370,8 @@ private:
>
> void expr(Instruction *, ImmediateValue&, ImmediateValue&);
> void expr(Instruction *, ImmediateValue&, ImmediateValue&, 
> ImmediateValue&);
> -   void opnd(Instruction *, ImmediateValue&, int s);
> +   /* true if i was deleted */
> +   bool opnd(Instruction *i, ImmediateValue&, int s);
> void opnd3(Instruction *, ImmediateValue&);
>
> void unary(Instruction *, const ImmediateValue&);
> @@ -421,11 +422,13 @@ ConstantFolding::visit(BasicBlock *bb)
>i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
>   expr(i, src0, src1);
>else
> -  if (i->srcExists(0) && i->src(0).getImmediate(src0))
> - opnd(i, src0, 0);
> -  else
> +  if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
> + if (opnd(i, src0, 0))
> +return true;
> +  } else
>if (i->srcExists(1) && i->src(1).getImmediate(src1))
> - opnd(i, src1, 1);
> + if (opnd(i, src1, 1))
> +return true;
>if (i->srcExists(2) && i->src(2).getImmediate(src2))
>   opnd3(i, src2);

Is there a reason not to do this under an else instead? You're
aborting the whole BB loop here whenever we delete an instruction,
which presumably happens with some frequency.

> }
> @@ -1011,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, 
> Value *a, int64_t b, Value *
> return false;
>  }
>
> -void
> +bool
>  ConstantFolding::opnd(Instruction *i, ImmediateValue , int s)
>  {
> const int t = !s;
> const operation op = i->op;
> Instruction *newi = i;
> +   bool deleted = false;
>
> switch (i->op) {
> case OP_SPLIT: {
> @@ -1036,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>   val >>= bitsize;
>}
>delete_Instruction(prog, i);
> +  deleted = true;
>break;
> }
> case OP_MUL:
> @@ -1050,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>  newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
>   TYPE_S32, i->getSrc(t), bld.mkImm(0));
>  delete_Instruction(prog, i);
> +deleted = true;
>   } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
>  // The high bits can't be set in this case (either mul by 0 or
>  // unsigned by 1)
> @@ -1101,8 +1107,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>if (!isFloatType(i->dType) && !i->src(t).mod) {
>   bld.setPosition(i, false);
>   int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : 
> imm0.reg.data.s32;
> - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
> + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
>  delete_Instruction(prog, i);
> +deleted = true;
> + }
>} else
>if (i->postFactor && i->sType == TYPE_F32) {
>   /* Can't emit a postfactor with an immediate, have to fold it in */
> @@ -1139,8 +1147,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && 
> !i->src(2).mod) {
>   bld.setPosition(i, false);
>   int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : 
> imm0.reg.data.s32;
> - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, 
> i->getSrc(2)))
> + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, 
> i->getSrc(2))) {
>  delete_Instruction(prog, i);
> +deleted = true;
> + }
>}
>break;
> case OP_SUB:
> @@ -1210,6 +1220,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>  bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
>
>   delete_Instruction(prog, i);
> + deleted = true;
>} else
>if (imm0.reg.data.s32 == -1) {
>   i->op = OP_NEG;
> @@ -1242,6 +1253,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>  bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
>
>   delete_Instruction(prog, i);
> + deleted = true;
>}
>break;
>
> @@ -1273,6 +1285,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
> 

[Mesa-dev] [Bug 108530] [Tracker] Mesa 18.3 Release Tracker

2018-12-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108530

Emil Velikov  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Emil Velikov  ---
Mesa 18.3.0 is out

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nv50/ir: fix use-after-free in ConstantFolding::visit

2018-12-07 Thread Karol Herbst
On Fri, Dec 7, 2018 at 1:58 PM Ilia Mirkin  wrote:
>
> On Fri, Dec 7, 2018 at 3:57 AM Karol Herbst  wrote:
> >
> > opnd() might delete the passed in instruction, but it's used through
> > i->srcExists() later in visit
> >
> > Signed-off-by: Karol Herbst 
> > ---
> >  .../nouveau/codegen/nv50_ir_peephole.cpp  | 71 +++
> >  1 file changed, 43 insertions(+), 28 deletions(-)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > index 9524ba63654..d69ceaafd73 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > @@ -370,7 +370,8 @@ private:
> >
> > void expr(Instruction *, ImmediateValue&, ImmediateValue&);
> > void expr(Instruction *, ImmediateValue&, ImmediateValue&, 
> > ImmediateValue&);
> > -   void opnd(Instruction *, ImmediateValue&, int s);
> > +   /* true if i was deleted */
> > +   bool opnd(Instruction *i, ImmediateValue&, int s);
> > void opnd3(Instruction *, ImmediateValue&);
> >
> > void unary(Instruction *, const ImmediateValue&);
> > @@ -421,11 +422,13 @@ ConstantFolding::visit(BasicBlock *bb)
> >i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
> >   expr(i, src0, src1);
> >else
> > -  if (i->srcExists(0) && i->src(0).getImmediate(src0))
> > - opnd(i, src0, 0);
> > -  else
> > +  if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
> > + if (opnd(i, src0, 0))
> > +return true;
> > +  } else
> >if (i->srcExists(1) && i->src(1).getImmediate(src1))
> > - opnd(i, src1, 1);
> > + if (opnd(i, src1, 1))
> > +return true;
> >if (i->srcExists(2) && i->src(2).getImmediate(src2))
> >   opnd3(i, src2);
>
> Is there a reason not to do this under an else instead? You're
> aborting the whole BB loop here whenever we delete an instruction,
> which presumably happens with some frequency.
>

I think using continue instead should be better.. I simply didn't see
the loop at all

> > }
> > @@ -1011,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, 
> > Value *a, int64_t b, Value *
> > return false;
> >  }
> >
> > -void
> > +bool
> >  ConstantFolding::opnd(Instruction *i, ImmediateValue , int s)
> >  {
> > const int t = !s;
> > const operation op = i->op;
> > Instruction *newi = i;
> > +   bool deleted = false;
> >
> > switch (i->op) {
> > case OP_SPLIT: {
> > @@ -1036,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > , int s)
> >   val >>= bitsize;
> >}
> >delete_Instruction(prog, i);
> > +  deleted = true;
> >break;
> > }
> > case OP_MUL:
> > @@ -1050,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > , int s)
> >  newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
> >   TYPE_S32, i->getSrc(t), bld.mkImm(0));
> >  delete_Instruction(prog, i);
> > +deleted = true;
> >   } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
> >  // The high bits can't be set in this case (either mul by 0 or
> >  // unsigned by 1)
> > @@ -1101,8 +1107,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > , int s)
> >if (!isFloatType(i->dType) && !i->src(t).mod) {
> >   bld.setPosition(i, false);
> >   int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : 
> > imm0.reg.data.s32;
> > - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
> > + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
> >  delete_Instruction(prog, i);
> > +deleted = true;
> > + }
> >} else
> >if (i->postFactor && i->sType == TYPE_F32) {
> >   /* Can't emit a postfactor with an immediate, have to fold it in 
> > */
> > @@ -1139,8 +1147,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > , int s)
> >if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && 
> > !i->src(2).mod) {
> >   bld.setPosition(i, false);
> >   int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : 
> > imm0.reg.data.s32;
> > - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, 
> > i->getSrc(2)))
> > + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, 
> > i->getSrc(2))) {
> >  delete_Instruction(prog, i);
> > +deleted = true;
> > + }
> >}
> >break;
> > case OP_SUB:
> > @@ -1210,6 +1220,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > , int s)
> >  bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
> >
> >   delete_Instruction(prog, i);
> > + deleted = true;
> >} else
> >if (imm0.reg.data.s32 == 

Re: [Mesa-dev] [PATCH] nir: Fixup algebraic test for variable-sized conversions

2018-12-07 Thread Jason Ekstrand

Ack

On December 7, 2018 03:54:21 Connor Abbott  wrote:


b2i can now take any size boolean in preparation for 1-bit booleans, so
the error message printed is slightly different.

Fixes: dca6cd9ce65 ("nir: Make boolean conversions sized just like the others")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108961
Cc: Jason Ekstrand 
---
src/compiler/nir/tests/algebraic_parser_test.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/nir/tests/algebraic_parser_test.py 
b/src/compiler/nir/tests/algebraic_parser_test.py

index 492a09ec7db..d96da7db519 100644
--- a/src/compiler/nir/tests/algebraic_parser_test.py
+++ b/src/compiler/nir/tests/algebraic_parser_test.py
@@ -67,7 +67,7 @@ class ValidatorTests(unittest.TestCase):

def test_replace_src_bitsize(self):
self.common((('iadd', a, ('b2i', b)), ('iadd', a, b)),
-"Sources a (bit size of a) and b (bit size of 32) " \
+"Sources a (bit size of a) and b (bit size of b) " \
"of ('iadd', 'a', 'b') may not have the same bit size " \
"when building the replacement expression.")

--
2.17.2




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] mesa 18.3.0

2018-12-07 Thread Emil Velikov
Mesa 18.3.0 is now available.

This release consists of approximately 1700 commits from 120
developers.

Huge thanks to all the developers, testers and users for their
ongoing work and support shaping up the 18.3.0 release.

The top highlights include:

 - GL_AMD_depth_clamp_separate on r600, radeonsi.
 - GL_AMD_framebuffer_multisample_advanced on radeonsi.
 - GL_AMD_gpu_shader_int64 on i965, nvc0, radeonsi.
 - GL_AMD_multi_draw_indirect on all GL 4.x drivers.
 - GL_AMD_query_buffer_object on i965, nvc0, r600, radeonsi.
 - GL_EXT_disjoint_timer_query on radeonsi and most other Gallium drivers (ES 
extension)
 - GL_EXT_texture_compression_s3tc on all drivers (ES extension)
 - GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
 - GL_EXT_window_rectangles on radeonsi.
 - GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
 - GL_NV_fragment_shader_interlock on i965.
 - EGL_EXT_device_base for all drivers.
 - EGL_EXT_device_drm for all drivers.
 - EGL_MESA_device_software for all drivers.


Additional features:

ANV
 - VK_EXT_calibrated_timestamps
 - VK_EXT_pci_bus_info
 - VK_EXT_sampler_filter_minmax
 - VK_EXT_vertex_attribute_divisor v3
 - VK_GOOGLE_decorate_string
 - VK_GOOGLE_hlsl_functionality1
 - VK_KHR_driver_properties
 
RADV 
 - VK_EXT_calibrated_timestamps
 - VK_EXT_conservative_rasterization
 - VK_EXT_pci_bus_info
 - VK_EXT_transform_feedback
 - VK_EXT_vertex_attribute_divisor v3
 - VK_GOOGLE_decorate_string
 - VK_GOOGLE_hlsl_functionality1
 - VK_KHR_driver_properties

 
For the full log see:
   git log 18.2-branchpoint..mesa-18.3.0


Changes since rc6:

Emil Velikov (2):
  Update version to 18.3.0 (final)
  docs: update 18.3.0 release notes

git tag: mesa-18.3.0

https://mesa.freedesktop.org/archive/mesa-18.3.0.tar.gz
MD5:  13c2af753d1ad536a2035167433e683c  mesa-18.3.0.tar.gz
SHA1: efc81aaa24a1c7c8f10aebfc26a212a7b1979c3c  mesa-18.3.0.tar.gz
SHA256: 17a124d4dbc712505d22a7815c9b0cee22214c96c8abb91539a2b1351e38a000  
mesa-18.3.0.tar.gz
SHA512: 
7ac062f1b6be1f5a61bbe560c8348645a8e1abbc8651de918134ae857fd8e4bae7b5163fdce5bdf0c19de03d57cedbe86bcf165e3814b1a7b5e0c431fdf83fc9
  mesa-18.3.0.tar.gz
PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.0.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-18.3.0.tar.xz
MD5:  2d69eff8fe0c6e89bb793d4fd69b750d  mesa-18.3.0.tar.xz
SHA1: d4ddc4e7aa8e11a41d35b5d51476f867e81056ca  mesa-18.3.0.tar.xz
SHA256: b63f947e735d6ef3dfaa30c789a9adfbae18aea671191eaacde95a18c17fc38a  
mesa-18.3.0.tar.xz
SHA512: 
6643d8a100c50efee7178fe950e7cccad24c3a98538d3a13c7c6570add30a56776b4e5f279e2b0a20c4038e682e9461ca5a4fd4ac23a7f60d2f4a0c972525c42
  mesa-18.3.0.tar.xz
PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.0.tar.xz.sig



signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108967] DRM : eglCreatePbufferSurface failed with error EGL_BAD_MATCH

2018-12-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108967

Bug ID: 108967
   Summary: DRM : eglCreatePbufferSurface failed with error
EGL_BAD_MATCH
   Product: Mesa
   Version: 17.3
  Hardware: ARM
OS: Linux (All)
Status: NEW
  Severity: major
  Priority: medium
 Component: EGL
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: vishwa...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

We use qtwebengine ,Currently failing to initialize with 
error:

gl_surface_qt.cpp(480)] eglCreatePbufferSurface failed with error EGL_BAD_MATCH
gl_surface_qt.cpp(480)] eglCreatePbufferSurface failed with error EGL_BAD_MATCH
---

mesa is configured with platform drm and wayland (Below config).
It seems mesa egl doesn't support "PbufferSurface" for both wayland and drm
platform.
But qt is expecting to have pBuffers.
Is possible to have any alternative to fix this issue.

Hardware:imx6qdl
Linux kernel version: 4.14
QT Version: 5.9
mesa Version:17.3.8

mesa-17.3.8- configure: 

prefix:  /usr
exec_prefix: /usr
libdir:  /usr/lib
includedir:  /usr/include

OpenGL:  yes (ES1: yes ES2: yes)

OSMesa:  no

DRI platform:drm
DRI drivers: swrast 
DRI driver dir:  ${libdir}/dri
GLX: no

EGL: yes
EGL drivers: builtin:egl_dri2
GBM: yes
EGL/Vulkan/VL platforms:   wayland drm

Vulkan drivers:  no

llvm:no

Gallium drivers: etnaviv imx
Gallium st:  mesa

HUD extra stats: no
HUD lmsensors:   no


Shared libs: yes
Static libs: no
Shared-glapi:yes

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: remove unused float coord wrapping for aos sampling

2018-12-07 Thread Jose Fonseca

On 07/12/2018 01:28, srol...@vmware.com wrote:

From: Roland Scheidegger 

AoS sampling tries to use integers for coord wrapping when possible,
as it should be faster. However, for AVX, this was suboptimal, because
only floats can use 8x32bit vectors, whereas integers have to be split
into 4x32bit vectors. (I believe part of why it was slower was also
that at least earlier llvm versions had trouble optimizing it properly,
since you can still do simple bit ops with 8x32bit vectors, so a
sequence of int add / and / int add / and with such vectors would
actually end up doing 128bit inserts/extracts between the operations
instead of just doing the cheap 128bit ands.)
Hence, a special float coord wrapping path was added to AoS sampling.
But this path was actually disabled for a long time already, since we
found that just splitting everything before entering the AoS path was
still sligthly faster usually, so none of this float coord wrapping
code was used anymore (AoS sampling code, when avx2 isn't supported,
never sees vectors with length > 4). I thought it might be useful some
day again, but I'm not interested anymore in optimizing for very weird
instruction sets which have support for 256bit vectors for floats but
not for ints, so just drop it.
---
  .../auxiliary/gallivm/lp_bld_sample_aos.c | 530 +-
  1 file changed, 23 insertions(+), 507 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index c46749dbac8..ad3a9e4a4ca 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -131,68 +131,6 @@ lp_build_sample_wrap_nearest_int(struct 
lp_build_sample_context *bld,
  }
  
  
-/**

- * Build LLVM code for texture coord wrapping, for nearest filtering,
- * for float texcoords.
- * \param coord  the incoming texcoord (s,t or r)
- * \param length  the texture size along one dimension
- * \param offset  the texel offset along the coord axis
- * \param is_pot  if TRUE, length is a power of two
- * \param wrap_mode  one of PIPE_TEX_WRAP_x
- * \param icoord  the texcoord after wrapping, as int
- */
-static void
-lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
-   LLVMValueRef coord,
-   LLVMValueRef length,
-   LLVMValueRef offset,
-   boolean is_pot,
-   unsigned wrap_mode,
-   LLVMValueRef *icoord)
-{
-   struct lp_build_context *coord_bld = >coord_bld;
-   LLVMValueRef length_minus_one;
-
-   switch(wrap_mode) {
-   case PIPE_TEX_WRAP_REPEAT:
-  if (offset) {
- /* this is definitely not ideal for POT case */
- offset = lp_build_int_to_float(coord_bld, offset);
- offset = lp_build_div(coord_bld, offset, length);
- coord = lp_build_add(coord_bld, coord, offset);
-  }
-  /* take fraction, unnormalize */
-  coord = lp_build_fract_safe(coord_bld, coord);
-  coord = lp_build_mul(coord_bld, coord, length);
-  *icoord = lp_build_itrunc(coord_bld, coord);
-  break;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-  length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
-  if (bld->static_sampler_state->normalized_coords) {
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length);
-  }
-  if (offset) {
- offset = lp_build_int_to_float(coord_bld, offset);
- coord = lp_build_add(coord_bld, coord, offset);
-  }
-  coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
- length_minus_one);
-  *icoord = lp_build_itrunc(coord_bld, coord);
-  break;
-
-   case PIPE_TEX_WRAP_CLAMP:
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-   default:
-  assert(0);
-   }
-}
-
-
  /**
   * Helper to compute the first coord and the weight for
   * linear wrap repeat npot textures
@@ -424,129 +362,6 @@ lp_build_sample_wrap_linear_int(struct 
lp_build_sample_context *bld,
  }
  
  
-/**

- * Build LLVM code for texture coord wrapping, for linear filtering,
- * for float texcoords.
- * \param block_length  is the length of the pixel block along the
- *  coordinate axis
- * \param coord  the incoming texcoord (s,t or r)
- * \param length  the texture size along one dimension
- * \param offset  the texel offset along the coord axis
- * \param is_pot  if TRUE, length is a power of two
- * \param wrap_mode  one of PIPE_TEX_WRAP_x
- * \param coord0  the first texcoord after wrapping, as int
- * \param coord1  the second texcoord after wrapping, as int
- * \param weight  the filter weight as int (0-255)
- * \param 

Re: [Mesa-dev] [PATCH 34/59] intel/compiler: fix ddy for half-float in gen8

2018-12-07 Thread Pohjolainen, Topi
On Tue, Dec 04, 2018 at 08:16:58AM +0100, Iago Toral Quiroga wrote:
> We use ALign16 mode for this, since it is more convenient, but the PRM
> for Broadwell states in Volume 3D Media GPGPU, Chapter 'Register region
> restrictions', Section '1. Special Restrictions':
> 
>"In Align16 mode, the channel selects and channel enables apply to a
> pair of half-floats, because these parameters are defined for DWord
> elements ONLY. This is applicable when both source and destination
> are half-floats."
> 
> This means that we cannot select individual HF elements using swizzles
> like we do with 32-bit floats so we can't implement the required
> regioning for this.
> 
> Use the gen11 path for this instead, which uses Align1 mode.
> 
> The restriction is not present in gen9 of gen10, where the Align16

 or?

> implementation seems to work just fine.
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index d8e4bae17e0..ba7ed07e692 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1281,8 +1281,14 @@ fs_generator::generate_ddy(const fs_inst *inst,
> const uint32_t type_size = type_sz(src.type);
>  
> if (inst->opcode == FS_OPCODE_DDY_FINE) {
> -  /* produce accurate derivatives */
> -  if (devinfo->gen >= 11) {
> +  /* produce accurate derivatives. We can do this easily in Align16
> +   * but this is not supported in gen11+ and gen8 Align16 swizzles
> +   * for Half-Float operands work in units of 32-bit and always
> +   * select pairs of consecutive half-float elements, so we can't use
> +   * use it for this.
> +   */
> +  if (devinfo->gen >= 11 ||
> +  (devinfo->gen == 8 && src.type == BRW_REGISTER_TYPE_HF)) {
>   src = stride(src, 0, 2, 1);
>   struct brw_reg src_0  = byte_offset(src,  0 * type_size);
>   struct brw_reg src_2  = byte_offset(src,  2 * type_size);
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107524] Broken packDouble2x32 at llvmpipe

2018-12-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107524

Emil Velikov  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #9 from Emil Velikov  ---
Should be fixed with the following commit. Feel free to reopen otherwise.

commit bb17ae49ee2591a4a35479ed6e48cb3c18422e2a
Author: Dave Airlie 
Date:   Mon Aug 27 02:03:41 2018 +0100

gallivm: allow to pass two swizzles into fetches.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] nv50/ir: initialize relDegree staticly

2018-12-07 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 
On Fri, Dec 7, 2018 at 3:57 AM Karol Herbst  wrote:
>
> this race condition is pretty harmless, but also pretty trivial to fix
>
> Signed-off-by: Karol Herbst 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 23 +--
>  1 file changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index 23bd07af33a..5d1a96f8d71 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -803,7 +803,21 @@ private:
> Function *func;
> Program *prog;
>
> -   static uint8_t relDegree[17][17];
> +   struct RelDegree {
> +  uint8_t data[17][17];
> +
> +  RelDegree() {
> + for (int i = 1; i <= 16; ++i)
> +for (int j = 1; j <= 16; ++j)
> +   data[i][j] = j * ((i + j - 1) / j);
> +  }
> +
> +  const uint8_t* operator[](std::size_t i) const {
> + return data[i];
> +  }
> +   };
> +
> +   static const RelDegree relDegree;
>
> RegisterSet regs;
>
> @@ -815,7 +829,7 @@ private:
> std::list mustSpill;
>  };
>
> -uint8_t GCRA::relDegree[17][17];
> +const GCRA::RelDegree GCRA::relDegree;
>
>  GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
>  {
> @@ -1172,11 +1186,6 @@ GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
> spill(spill)
>  {
> prog = func->getProgram();
> -
> -   // initialize relative degrees array - i takes away from j
> -   for (int i = 1; i <= 16; ++i)
> -  for (int j = 1; j <= 16; ++j)
> - relDegree[i][j] = j * ((i + j - 1) / j);
>  }
>
>  GCRA::~GCRA()
> --
> 2.19.2
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107971] SPV_GOOGLE_hlsl_functionality1 / SPV_GOOGLE_decorate_string

2018-12-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107971

Emil Velikov  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #6 from Emil Velikov  ---
Should be resolved with the following commit. Feel free to reopen otherwise.

commit ca4e465f7d018f8702ddb5332bf1c892b1808366
Author: Jason Ekstrand 
Date:   Sat Oct 13 08:46:20 2018 -0500

anv,radv: Trivially expose two new VK_GOOGLE extensions

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: add missing PIPE_CAP_SURFACE_SAMPLE_COUNT default value

2018-12-07 Thread Michel Dänzer
On 2018-12-07 10:08 a.m., Samuel Pitoiset wrote:
> Fixes: 2710c40e3c8 ("gallium: Add new PIPE_CAP_SURFACE_SAMPLE_COUNT")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/auxiliary/util/u_screen.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/util/u_screen.c 
> b/src/gallium/auxiliary/util/u_screen.c
> index 73dbbee94a9..aef21bc46ae 100644
> --- a/src/gallium/auxiliary/util/u_screen.c
> +++ b/src/gallium/auxiliary/util/u_screen.c
> @@ -326,6 +326,9 @@ u_pipe_screen_get_param_defaults(struct pipe_screen 
> *pscreen,
> case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
>return 2047;
>  
> +   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
> +  return 0;
> +
> default:
>unreachable("bad PIPE_CAP_*");
> }
> 

Tested-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 41/59] intel/compiler: split is_partial_write() into two variants

2018-12-07 Thread Pohjolainen, Topi
On Tue, Dec 04, 2018 at 08:17:05AM +0100, Iago Toral Quiroga wrote:
> This function is used in two different scenarios that for 32-bit
> instructions are the same, but for 16-bit instructions are not.
> 
> One scenario is that in which we are working at a SIMD8 register
> level and we need to know if a register is fully defined or written.
> This is useful, for example, in the context of liveness analysis or
> register allocation, where we work with units of registers.
> 
> The other scenario is that in which we want to know if an instruction
> is writing a full scalar component or just some subset of it. This is
> useful, for example, in the context of some optimization passes
> like copy propagation.
> 
> For 32-bit instructions (or larger), a SIMD8 dispatch will always write
> at least a full SIMD8 register (32B) if the write is not partial. The
> function is_partial_write() checks this to determine if we have a partial
> write. However, when we deal with 16-bit instructions, that logic disables
> some optimizations that should be safe. For example, a SIMD8 16-bit MOV will
> only update half of a SIMD register, but it is still a complete write of the
> variable for a SIMD8 dispatch, so we should not prevent copy propagation in
> this scenario because we don't write all 32 bytes in the SIMD register
> or because the write starts at offset 16B (wehere we pack components Y or
> W of 16-bit vectors).
> 
> This is a problem for SIMD8 executions (VS, TCS, TES, GS) of 16-bit
> instructions, which lose a number of optimizations because of this, most
> important of which is copy-propagation.
> 
> This patch splits is_partial_write() into is_partial_reg_write(), which
> represents the current is_partial_write(), useful for things like
> liveness analysis, and is_partial_var_write(), which considers
> the dispatch size to check if we are writing a full variable (rather
> than a full register) to decide if the write is partial or not, which
> is what we really want in many optimization passes.
> 
> Then the patch goes on and rewrites all uses of is_partial_write() to use
> one or the other version. Specifically, we use is_partial_var_write()
> in the following places: copy propagation, cmod propagation, common
> subexpression elimination, saturate propagation and sel peephole.
> 
> Notice that the semantics of is_partial_var_write() exactly match the
> current implementation of is_partial_write() for anything that is
> 32-bit or larger, so no changes are expected for 32-bit instructions.
> 
> Tested against ~5000 tests involving 16-bit instructions in CTS produced
> the following changes in instruction counts:
> 
> Patched  | Master|%|
> 
> SIMD8  |621,900  |706,721| -12.00% |
> 
> SIMD16 | 93,252  | 93,252|   0.00% |
> 
> 
> As expected, the change only affects SIMD8 dispatches.

I like this. But I think I want to try and rebase my fp16 work on top to see
if there are any differences in the final assembly between this and my
"register padding" scheme.

> ---
>  src/intel/compiler/brw_fs.cpp | 31 +++
>  .../compiler/brw_fs_cmod_propagation.cpp  | 20 ++--
>  .../compiler/brw_fs_copy_propagation.cpp  |  8 ++---
>  src/intel/compiler/brw_fs_cse.cpp |  3 +-
>  .../compiler/brw_fs_dead_code_eliminate.cpp   |  2 +-
>  src/intel/compiler/brw_fs_live_variables.cpp  |  2 +-
>  src/intel/compiler/brw_fs_reg_allocate.cpp|  2 +-
>  .../compiler/brw_fs_register_coalesce.cpp |  2 +-
>  .../compiler/brw_fs_saturate_propagation.cpp  |  7 +++--
>  src/intel/compiler/brw_fs_sel_peephole.cpp|  4 +--
>  src/intel/compiler/brw_ir_fs.h|  3 +-
>  11 files changed, 54 insertions(+), 30 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 1d5d1dd0d22..9ea67975e1e 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -698,14 +698,33 @@ fs_visitor::limit_dispatch_width(unsigned n, const char 
> *msg)
>   * it.
>   */
>  bool
> -fs_inst::is_partial_write() const
> +fs_inst::is_partial_reg_write() const
>  {
> return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
> -   (this->exec_size * type_sz(this->dst.type)) < 32 ||
> !this->dst.is_contiguous() ||
> +   (this->exec_size * type_sz(this->dst.type)) < REG_SIZE ||
> this->dst.offset % REG_SIZE != 0);
>  }
>  
> +/**
> + * Returns true if the instruction has a flag that means it won't
> + * update an entire variable for the given dispatch width.
> + *
> + * This is only different from is_partial_reg_write() for SIMD8
> + * dispatches of 16-bit (or smaller) instructions.
> + */
> +bool
> +fs_inst::is_partial_var_write(uint32_t dispatch_width) const
> +{
> +   const uint32_t type_size 

Re: [Mesa-dev] [PATCH 34/59] intel/compiler: fix ddy for half-float in gen8

2018-12-07 Thread Iago Toral
On Fri, 2018-12-07 at 15:06 +0200, Pohjolainen, Topi wrote:
> On Tue, Dec 04, 2018 at 08:16:58AM +0100, Iago Toral Quiroga wrote:
> > We use ALign16 mode for this, since it is more convenient, but the
> > PRM
> > for Broadwell states in Volume 3D Media GPGPU, Chapter 'Register
> > region
> > restrictions', Section '1. Special Restrictions':
> > 
> >"In Align16 mode, the channel selects and channel enables apply
> > to a
> > pair of half-floats, because these parameters are defined for
> > DWord
> > elements ONLY. This is applicable when both source and
> > destination
> > are half-floats."
> > 
> > This means that we cannot select individual HF elements using
> > swizzles
> > like we do with 32-bit floats so we can't implement the required
> > regioning for this.
> > 
> > Use the gen11 path for this instead, which uses Align1 mode.
> > 
> > The restriction is not present in gen9 of gen10, where the Align16
> 
>  or?

Right, the issue is exclusive to gen8.

Iago

> > implementation seems to work just fine.
> > ---
> >  src/intel/compiler/brw_fs_generator.cpp | 10 --
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_fs_generator.cpp
> > b/src/intel/compiler/brw_fs_generator.cpp
> > index d8e4bae17e0..ba7ed07e692 100644
> > --- a/src/intel/compiler/brw_fs_generator.cpp
> > +++ b/src/intel/compiler/brw_fs_generator.cpp
> > @@ -1281,8 +1281,14 @@ fs_generator::generate_ddy(const fs_inst
> > *inst,
> > const uint32_t type_size = type_sz(src.type);
> >  
> > if (inst->opcode == FS_OPCODE_DDY_FINE) {
> > -  /* produce accurate derivatives */
> > -  if (devinfo->gen >= 11) {
> > +  /* produce accurate derivatives. We can do this easily in
> > Align16
> > +   * but this is not supported in gen11+ and gen8 Align16
> > swizzles
> > +   * for Half-Float operands work in units of 32-bit and
> > always
> > +   * select pairs of consecutive half-float elements, so we
> > can't use
> > +   * use it for this.
> > +   */
> > +  if (devinfo->gen >= 11 ||
> > +  (devinfo->gen == 8 && src.type == BRW_REGISTER_TYPE_HF))
> > {
> >   src = stride(src, 0, 2, 1);
> >   struct brw_reg src_0  = byte_offset(src,  0 * type_size);
> >   struct brw_reg src_2  = byte_offset(src,  2 * type_size);
> > -- 
> > 2.17.1
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/25] amd/common: scan/reduce across waves of a workgroup

2018-12-07 Thread Haehnle, Nicolai
On 06.12.18 15:20, Connor Abbott wrote:
> Is this going to be used by an extension? If you don't have a use for
> it yet, it would probably be better to wait.

Well, I have been using it quite extensively in a branch I've been 
working on, but that's not quite ready yet.

Cheers,
Nicolai


> On Thu, Dec 6, 2018 at 3:01 PM Nicolai Hähnle  wrote:
>>
>> From: Nicolai Hähnle 
>>
>> Order-aware scan/reduce can trade-off LDS traffic for external atomics
>> memory traffic in producer/consumer compute shaders.
>> ---
>>   src/amd/common/ac_llvm_build.c | 195 -
>>   src/amd/common/ac_llvm_build.h |  36 ++
>>   2 files changed, 227 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
>> index 68c8bad9e83..932f4bbdeef 100644
>> --- a/src/amd/common/ac_llvm_build.c
>> +++ b/src/amd/common/ac_llvm_build.c
>> @@ -3345,68 +3345,88 @@ ac_build_alu_op(struct ac_llvm_context *ctx, 
>> LLVMValueRef lhs, LLVMValueRef rhs,
>>  _64bit ? ctx->f64 : ctx->f32,
>>  (LLVMValueRef[]){lhs, rhs}, 2, 
>> AC_FUNC_ATTR_READNONE);
>>  case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
>>  case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, "");
>>  case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, "");
>>  default:
>>  unreachable("bad reduction intrinsic");
>>  }
>>   }
>>
>> -/* TODO: add inclusive and excluse scan functions for SI chip class.  */
>> +/**
>> + * \param maxprefix specifies that the result only needs to be correct for a
>> + * prefix of this many threads
>> + *
>> + * TODO: add inclusive and excluse scan functions for SI chip class.
>> + */
>>   static LLVMValueRef
>> -ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, 
>> LLVMValueRef identity)
>> +ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, 
>> LLVMValueRef identity,
>> + unsigned maxprefix)
>>   {
>>  LLVMValueRef result, tmp;
>>  result = src;
>> +   if (maxprefix <= 1)
>> +   return result;
>>  tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, 
>> false);
>>  result = ac_build_alu_op(ctx, result, tmp, op);
>> +   if (maxprefix <= 2)
>> +   return result;
>>  tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, 
>> false);
>>  result = ac_build_alu_op(ctx, result, tmp, op);
>> +   if (maxprefix <= 3)
>> +   return result;
>>  tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, 
>> false);
>>  result = ac_build_alu_op(ctx, result, tmp, op);
>> +   if (maxprefix <= 4)
>> +   return result;
>>  tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, 
>> false);
>>  result = ac_build_alu_op(ctx, result, tmp, op);
>> +   if (maxprefix <= 8)
>> +   return result;
>>  tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, 
>> false);
>>  result = ac_build_alu_op(ctx, result, tmp, op);
>> +   if (maxprefix <= 16)
>> +   return result;
>>  tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 
>> 0xf, false);
>>  result = ac_build_alu_op(ctx, result, tmp, op);
>> +   if (maxprefix <= 32)
>> +   return result;
>>  tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 
>> 0xf, false);
>>  result = ac_build_alu_op(ctx, result, tmp, op);
>>  return result;
>>   }
>>
>>   LLVMValueRef
>>   ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, 
>> nir_op op)
>>   {
>>  ac_build_optimization_barrier(ctx, );
>>  LLVMValueRef result;
>>  LLVMValueRef identity =
>>  get_reduction_identity(ctx, op, 
>> ac_get_type_size(LLVMTypeOf(src)));
>>  result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, 
>> src, identity),
>>LLVMTypeOf(identity), "");
>> -   result = ac_build_scan(ctx, op, result, identity);
>> +   result = ac_build_scan(ctx, op, result, identity, 64);
>>
>>  return ac_build_wwm(ctx, result);
>>   }
>>
>>   LLVMValueRef
>>   ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, 
>> nir_op op)
>>   {
>>  ac_build_optimization_barrier(ctx, );
>>  LLVMValueRef result;
>>  LLVMValueRef identity =
>>  get_reduction_identity(ctx, op, 
>> ac_get_type_size(LLVMTypeOf(src)));
>>  result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, 
>> src, identity),
>>LLVMTypeOf(identity), "");
>>  result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, 
>> false);
>> -   result = 

[Mesa-dev] [PATCH] meson: Add nir_algebraic_parser_test to suites

2018-12-07 Thread Dylan Baker
Just to make it easier to run a nir tests together.

Fixes: a0ae12ca91a45f81897e774019cde9bd081f03a0
   ("nir/algebraic: Add unit tests for bitsize validation")
---
 src/compiler/nir/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index e86c97b8864..b0ca27cb700 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -268,5 +268,6 @@ if with_tests
 args : [
   join_paths(meson.current_source_dir(), 'tests/algebraic_parser_test.py')
 ],
+suite : ['compiler', 'nir'],
   )
 endif
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/59] intel/compiler: implement 16-bit fsign

2018-12-07 Thread Jason Ekstrand
I think it's probably less code to just make a separate 16-bit case.

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> ---
>  src/intel/compiler/brw_fs_nir.cpp | 27 +--
>  1 file changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 3eba8a478f5..559b55a0f84 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -938,14 +938,29 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
>* Predicated OR ORs 1.0 (0x3f80) with the sign bit if val
> is not
>* zero.
>*/
> - bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f),
> BRW_CONDITIONAL_NZ);
> + fs_reg zero, one_mask, sign_mask;
> + brw_reg_type reg_type;
> + if (type_sz(op[0].type) == 4) {
> +zero = brw_imm_f(0.0f);
> +one_mask = brw_imm_ud(0x3f80);
> +sign_mask = brw_imm_ud(0x8000);
> +reg_type = BRW_REGISTER_TYPE_UD;
> + } else {
> +assert(type_sz(op[0].type) == 2);
> +zero = retype(brw_imm_uw(0), BRW_REGISTER_TYPE_HF);
> +one_mask = brw_imm_uw(0x3c00);
> +sign_mask = brw_imm_uw(0x8000);
> +reg_type = BRW_REGISTER_TYPE_UW;
> + }
> +
> + bld.CMP(bld.null_reg_f(), op[0], zero, BRW_CONDITIONAL_NZ);
>
> - fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
> - op[0].type = BRW_REGISTER_TYPE_UD;
> - result.type = BRW_REGISTER_TYPE_UD;
> - bld.AND(result_int, op[0], brw_imm_ud(0x8000u));
> + fs_reg result_int = retype(result, reg_type);
> + op[0].type = reg_type;
> + result.type = reg_type;
> + bld.AND(result_int, op[0], sign_mask);
>
> - inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f80u));
> + inst = bld.OR(result_int, result_int, one_mask);
>   inst->predicate = BRW_PREDICATE_NORMAL;
>} else {
>   /* For doubles we do the same but we need to consider:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 23/59] intel/compiler: Extended Math is limited to SIMD8 on half-float

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> From the Skylake PRM, Extended Math Function:
>
>   "The execution size must be no more than 8 when half-floats
>are used in source or destination operand."
>
> Earlier generations do not support Extended Math with half-float.
> ---
>  src/intel/compiler/brw_fs.cpp | 30 +++---
>  1 file changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 43b920ae33d..509c6febf38 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -5386,18 +5386,34 @@ get_lowered_simd_width(const struct
> gen_device_info *devinfo,
> case SHADER_OPCODE_EXP2:
> case SHADER_OPCODE_LOG2:
> case SHADER_OPCODE_SIN:
> -   case SHADER_OPCODE_COS:
> +   case SHADER_OPCODE_COS: {
>/* Unary extended math instructions are limited to SIMD8 on Gen4 and
> * Gen6.
> */
> -  return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
> -  devinfo->gen == 5 || devinfo->is_g4x ? MIN2(16,
> inst->exec_size) :
> -  MIN2(8, inst->exec_size));
> +  unsigned max_width =
> + (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
> +  devinfo->gen == 5 || devinfo->is_g4x ? MIN2(16,
> inst->exec_size) :
> +  MIN2(8, inst->exec_size));
>

Curro went  a lot of work to structure this as a nested ternary.  I agree
that that isn't really holding up anymore but if we're going to break with
it, let's break with it in a more readable way:

if (devinfo->gen == 6 || (devinfo->gen == 4 && !devinfo->is_g4x))
   return MIN2(8, inst->exec_size);
if (inst->dst.type == BRW_REGISTER_TYPE_HF)
   return MIN2(8, inst->exec_size);
else
   return MIN2(16, inst->exec_size);


> -   case SHADER_OPCODE_POW:
> +  /* Extended Math Function is limited to SIMD8 with half-float */
> +  if (inst->dst.type == BRW_REGISTER_TYPE_HF)
> + max_width = MIN2(max_width, 8);
> +
> +  return max_width;
> +   }
> +
> +   case SHADER_OPCODE_POW: {
>/* SIMD16 is only allowed on Gen7+. */
> -  return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
> -  MIN2(8, inst->exec_size));
> +  unsigned max_width =
> +  (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
> +   MIN2(8, inst->exec_size));
> +
> +  /* Extended Math Function is limited to SIMD8 with half-float */
> +  if (inst->dst.type == BRW_REGISTER_TYPE_HF)
> + max_width = MIN2(max_width, 8);
> +
> +  return max_width;
> +   }
>
> case SHADER_OPCODE_INT_QUOTIENT:
> case SHADER_OPCODE_INT_REMAINDER:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: Add nir_algebraic_parser_test to suites

2018-12-07 Thread Eric Engestrom
On Friday, 2018-12-07 09:16:38 -0800, Dylan Baker wrote:
> Just to make it easier to run a nir tests together.
> 
> Fixes: a0ae12ca91a45f81897e774019cde9bd081f03a0
>("nir/algebraic: Add unit tests for bitsize validation")

Reviewed-by: Eric Engestrom 

> ---
>  src/compiler/nir/meson.build | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index e86c97b8864..b0ca27cb700 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -268,5 +268,6 @@ if with_tests
>  args : [
>join_paths(meson.current_source_dir(), 
> 'tests/algebraic_parser_test.py')
>  ],
> +suite : ['compiler', 'nir'],
>)
>  endif
> -- 
> 2.19.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/59] compiler/spirv: implement 16-bit frexp

2018-12-07 Thread Jason Ekstrand
Looks the same as what we do for the others.

Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> ---
>  src/compiler/spirv/vtn_glsl450.c | 48 ++--
>  1 file changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/spirv/vtn_glsl450.c
> b/src/compiler/spirv/vtn_glsl450.c
> index 85851755aab..bb340c87416 100644
> --- a/src/compiler/spirv/vtn_glsl450.c
> +++ b/src/compiler/spirv/vtn_glsl450.c
> @@ -389,6 +389,45 @@ build_atan2(nir_builder *b, nir_ssa_def *y,
> nir_ssa_def *x)
>  nir_fneg(b, arc), arc);
>  }
>
> +static nir_ssa_def *
> +build_frexp16(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
> +{
> +   assert(x->bit_size == 16);
> +
> +   nir_ssa_def *abs_x = nir_fabs(b, x);
> +   nir_ssa_def *zero = nir_imm_floatN_t(b, 0, 16);
> +
> +   /* Half-precision floating-point values are stored as
> +*   1 sign bit;
> +*   5 exponent bits;
> +*   10 mantissa bits.
> +*
> +* An exponent shift of 10 will shift the mantissa out, leaving only
> the
> +* exponent and sign bit (which itself may be zero, if the absolute
> value
> +* was taken before the bitcast and shift).
> +*/
> +   nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
> +   nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);
> +
> +   nir_ssa_def *sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
> +
> +   /* Exponent of floating-point values in the range [0.5, 1.0). */
> +   nir_ssa_def *exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
> +
> +   nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
> +
> +   /* Significand return must be of the same type as the input, but the
> +* exponent must be a 32-bit integer.
> +*/
> +   *exponent =
> +  nir_i2i32(b,
> +nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
> +nir_bcsel(b, is_not_zero, exponent_bias,
> zero)));
> +
> +   return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
> + nir_bcsel(b, is_not_zero, exponent_value, zero));
> +}
> +
>  static nir_ssa_def *
>  build_frexp32(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
>  {
> @@ -751,8 +790,10 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
>nir_ssa_def *exponent;
>if (src[0]->bit_size == 64)
>   val->ssa->def = build_frexp64(nb, src[0], );
> -  else
> +  else if (src[0]->bit_size == 32)
>   val->ssa->def = build_frexp32(nb, src[0], );
> +  else
> + val->ssa->def = build_frexp16(nb, src[0], );
>nir_store_deref(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
>return;
> }
> @@ -762,9 +803,12 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
>if (src[0]->bit_size == 64)
>   val->ssa->elems[0]->def = build_frexp64(nb, src[0],
>
> >ssa->elems[1]->def);
> -  else
> +  else if (src[0]->bit_size == 32)
>   val->ssa->elems[0]->def = build_frexp32(nb, src[0],
>
> >ssa->elems[1]->def);
> +  else
> + val->ssa->elems[0]->def = build_frexp16(nb, src[0],
> +
>  >ssa->elems[1]->def);
>return;
> }
>
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/59] intel/compiler: simplify f2*64 opcodes

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> Now that this case only handles 64-bit destinations we can simplify
> a bit the code.
>

"the code a bit".  Sorry, English is hard


> ---
>  src/intel/compiler/brw_fs_nir.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 6c765fc2661..3eba8a478f5 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -833,7 +833,7 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
> * 64-bit need to have the source data elements aligned to 64-bit.
> * This restriction does not apply to BDW and later.
> */
> -  if (type_sz(result.type) == 8 && type_sz(op[0].type) < 8 &&
> +  if (type_sz(op[0].type) < 8 &&
>(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
>   fs_reg tmp = bld.vgrf(result.type, 1);
>   tmp = subscript(tmp, op[0].type, 0);
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/28] util: added float to float16 conversions with RTZ and RTNE

2018-12-07 Thread Roland Scheidegger
Am 07.12.18 um 05:22 schrieb Matt Turner:
> On Thu, Dec 6, 2018 at 7:22 PM Roland Scheidegger  wrote:
>>
>> Am 07.12.18 um 03:20 schrieb Matt Turner:
>>> Since this is for an extension that will be BDW+ can we use the
>>> _cvtss_sh() intrinsic instead? It corresponds to an IVB+ instruction
>>> and even takes the rounding mode directly as an immediate argument.
>>
>> Not saying trying to use it isn't a good idea, but you'd need the right
>> compile flags, and you can't assume it's present, since even the latest
>> pentiums don't support avx (and by extension, f16c). (The same is true
>> for atoms too, of course).
> 
> I'm not sure that AVX and F16C are related, but from a quick glance it
> seems that you're right that Atoms ("little core") doesn't support
> F16C. I had no idea :(
> 
> As far as I can tell all "big cores" have F16C. That's what
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gnu.org%2Fonlinedocs%2Fgcc%2Fx86-Options.htmldata=02%7C01%7Csroland%40vmware.com%7Ca977fe6f49144fb22be608d65bfbb280%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636797533925838415sdata=oyAmOqL3xyDJ4pWo7jpduH4XawLuSKJf432K7X31094%3Dreserved=0
>  indicates.
That also indicates SNB and up all have AVX. Despite that,
Pentiums/Celerons from those families definitely do not. (I suppose that
means cputype=ivbybridge etc. can't be used if you target the
pentiums/celerons, at least not for gcc. I know this was a recurring
problem for llvm with autodetect of cpu type, when it would recognize
newer core and then trying to use avx / avx2 on pentiums, dying in a fire.)
That f16c is tied implicitly to avx seems obvious without a doubt, since
the instructions (VCVTPH2PS, VCVTPS2PH) only exist with VEX encoding.
You cannot issue VEX-encoded instructions without AVX (VEX-encoding _is_
AVX, regardless if you use the 128bit or 256bit variants).
If you don't like that pentiums don't support those, complain to intel
(as it's just disabled, of course). IMHO it's a bit silly nowadays...

> 
> If we've got to have the code, we might as well use it and not
> complicate it by using _cvtss_sh() then. Dang.
> 
> (Unfortunately there seems to be bad information out there confusing
> things though... see 
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcommunities.intel.com%2Fthread%2F121635data=02%7C01%7Csroland%40vmware.com%7Ca977fe6f49144fb22be608d65bfbb280%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636797533925838415sdata=KOCiTY%2BLWFc1eu7iMPWPm2PALY7Bl%2FNaEoVk%2FP%2BAvaw%3Dreserved=0)

Quite sure this is blatantly false. Seems even intel is confused about
it :-).

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/38] ac/nir: implement 16-bit shifts

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 517da7ba9b..aac3330c0d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -672,20 +672,17 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
break;
case nir_op_ishl:
result = LLVMBuildShl(ctx->ac.builder, src[0],
- LLVMBuildZExt(ctx->ac.builder, src[1],
-   LLVMTypeOf(src[0]), ""),
+ ac_build_ui_cast(>ac, src[1], 
LLVMTypeOf(src[0])),
  "");
break;
case nir_op_ishr:
result = LLVMBuildAShr(ctx->ac.builder, src[0],
-  LLVMBuildZExt(ctx->ac.builder, src[1],
-LLVMTypeOf(src[0]), ""),
+  ac_build_ui_cast(>ac, src[1], 
LLVMTypeOf(src[0])),
   "");
break;
case nir_op_ushr:
result = LLVMBuildLShr(ctx->ac.builder, src[0],
-  LLVMBuildZExt(ctx->ac.builder, src[1],
-LLVMTypeOf(src[0]), ""),
+  ac_build_ui_cast(>ac, src[1], 
LLVMTypeOf(src[0])),
   "");
break;
case nir_op_ilt:
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 37/38] ac/nir: have nir_op_f2f16 round to zero

2018-12-07 Thread Rhys Perry
In the hope that one day LLVM will then be able to generate code with
vectorized v_cvt_pkrtz_f16_f32 instructions.

Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 92b773981b..88b26e019f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -884,6 +884,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], 
ac_to_float_type(>ac, def_type), "");
break;
case nir_op_f2f16_rtz:
+   case nir_op_f2f16:
src[0] = ac_to_float(>ac, src[0]);
if (LLVMTypeOf(src[0]) == ctx->ac.f64)
src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], 
ctx->ac.f32, "");
@@ -894,7 +895,6 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i16, 
"");
break;
case nir_op_f2f16_rtne:
-   case nir_op_f2f16:
case nir_op_f2f32:
case nir_op_f2f64:
src[0] = ac_to_float(>ac, src[0]);
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 30/38] ac/nir: add 8-bit and 16-bit types to glsl_base_to_llvm_type

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index d69135cc25..e4ae85a1ae 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3961,11 +3961,19 @@ glsl_base_to_llvm_type(struct ac_llvm_context *ac,
   enum glsl_base_type type)
 {
switch (type) {
+   case GLSL_TYPE_INT8:
+   case GLSL_TYPE_UINT8:
+   return ac->i8;
+   case GLSL_TYPE_INT16:
+   case GLSL_TYPE_UINT16:
+   return ac->i16;
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SUBROUTINE:
return ac->i32;
+   case GLSL_TYPE_FLOAT16:
+   return ac->f16;
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
return ac->f32;
case GLSL_TYPE_INT64:
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/38] compiler/nir: add lowering option for 16-bit ffma

2018-12-07 Thread Rhys Perry
The lowering needs to be disabled for sufficient precision to pass
deqp-vk's 16-bit fma test on radv.

Signed-off-by: Rhys Perry 
---
 src/broadcom/compiler/nir_to_vir.c| 1 +
 src/compiler/nir/nir.h| 1 +
 src/compiler/nir/nir_opt_algebraic.py | 4 +++-
 src/gallium/drivers/radeonsi/si_get.c | 1 +
 src/gallium/drivers/vc4/vc4_program.c | 1 +
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index 57be43d724..ec73ed269d 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1975,6 +1975,7 @@ const nir_shader_compiler_options v3d_nir_options = {
 .lower_fdiv = true,
 .lower_find_lsb = true,
 .lower_ffma = true,
+.lower_ffma16 = true,
 .lower_flrp32 = true,
 .lower_fpow = true,
 .lower_fsat = true,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e731653afc..0e40d6a97d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2072,6 +2072,7 @@ typedef struct nir_function {
 
 typedef struct nir_shader_compiler_options {
bool lower_fdiv;
+   bool lower_ffma16;
bool lower_ffma;
bool fuse_ffma;
bool lower_flrp16;
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index e3821320e8..c2a8b61d92 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -133,7 +133,9 @@ optimizations = [
(('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a, ('bcsel', c, 
b, a), 'options->lower_flrp32'),
(('~fadd@32', a, ('fmul', c , ('fadd', b, ('fneg', a, ('flrp', 
a, b, c), '!options->lower_flrp32'),
(('~fadd@64', a, ('fmul', c , ('fadd', b, ('fneg', a, ('flrp', 
a, b, c), '!options->lower_flrp64'),
-   (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+   (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 
'options->lower_ffma16'),
+   (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+   (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
 
(('fdot4', ('vec4', a, b,   c,   1.0), d), ('fdph',  ('vec3', a, b, c), d)),
diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index 91f38329d5..d295821d65 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -497,6 +497,7 @@ static const struct nir_shader_compiler_options nir_options 
= {
.lower_fdiv = true,
.lower_sub = true,
.lower_ffma = true,
+   .lower_ffma16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index b98baca30c..9e8c6607cc 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2234,6 +2234,7 @@ static const nir_shader_compiler_options nir_options = {
 .lower_extract_word = true,
 .lower_fdiv = true,
 .lower_ffma = true,
+.lower_ffma16 = true,
 .lower_flrp32 = true,
 .lower_fpow = true,
 .lower_fsat = true,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 35/38] ac,radv: run LLVM's SLP vectorizer

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_util.c | 9 ++---
 src/amd/common/ac_llvm_util.h | 1 +
 src/amd/vulkan/radv_shader.c  | 3 +++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index dc9b684e9d..3219126188 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -33,6 +33,7 @@
 #if HAVE_LLVM >= 0x0700
 #include 
 #endif
+#include 
 #include "c11/threads.h"
 #include "gallivm/lp_bld_misc.h"
 #include "util/u_math.h"
@@ -177,7 +178,7 @@ static LLVMTargetMachineRef ac_create_target_machine(enum 
radeon_family family,
 }
 
 static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef 
target_library_info,
-   bool check_ir)
+   enum ac_target_machine_options 
tm_options)
 {
LLVMPassManagerRef passmgr = LLVMCreatePassManager();
if (!passmgr)
@@ -187,7 +188,7 @@ static LLVMPassManagerRef 
ac_create_passmgr(LLVMTargetLibraryInfoRef target_libr
LLVMAddTargetLibraryInfo(target_library_info,
 passmgr);
 
-   if (check_ir)
+   if (tm_options & AC_TM_CHECK_IR)
LLVMAddVerifierPass(passmgr);
LLVMAddAlwaysInlinerPass(passmgr);
/* Normally, the pass manager runs all passes on one function before
@@ -203,6 +204,8 @@ static LLVMPassManagerRef 
ac_create_passmgr(LLVMTargetLibraryInfoRef target_libr
LLVMAddLICMPass(passmgr);
LLVMAddAggressiveDCEPass(passmgr);
LLVMAddCFGSimplificationPass(passmgr);
+   if (tm_options & AC_TM_SLP_VECTORIZE)
+   LLVMAddSLPVectorizePass(passmgr);
/* This is recommended by the instruction combining pass. */
LLVMAddEarlyCSEMemSSAPass(passmgr);
LLVMAddInstructionCombiningPass(passmgr);
@@ -332,7 +335,7 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
}
 
compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
- tm_options & AC_TM_CHECK_IR);
+ tm_options);
if (!compiler->passmgr)
goto fail;
 
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index eaf5f21876..35b12c6c6c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
AC_TM_CHECK_IR = (1 << 5),
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
AC_TM_CREATE_LOW_OPT = (1 << 7),
+   AC_TM_SLP_VECTORIZE = (1 << 8),
 };
 
 enum ac_float_mode {
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 9ba20ac72e..a2ddf17680 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -598,6 +598,9 @@ shader_variant_create(struct radv_device *device,
tm_options |= AC_TM_SISCHED;
if (options->check_ir)
tm_options |= AC_TM_CHECK_IR;
+   /* vectorization is disabled on pre-GFX9 because it's not very useful 
there */
+   if (device->physical_device->rad_info.chip_class >= GFX9)
+   tm_options |= AC_TM_SLP_VECTORIZE;
 
thread_compiler = !(device->instance->debug_flags & 
RADV_DEBUG_NOTHREADLLVM);
radv_init_llvm_once();
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 23/38] nir: make bitfield_reverse and ifind_msb work with all integers

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/compiler/nir/nir_opcodes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 4ef4ecc6f2..962971c650 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -318,7 +318,7 @@ unop_convert("unpack_64_2x32_split_y", tuint32, tuint64, 
"src0 >> 32")
 # Bit operations, part of ARB_gpu_shader5.
 
 
-unop("bitfield_reverse", tuint32, """
+unop("bitfield_reverse", tuint, """
 /* we're not winning any awards for speed here, but that's ok */
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++)
@@ -342,7 +342,7 @@ for (int bit = bit_size - 1; bit >= 0; bit--) {
 }
 """)
 
-unop("ifind_msb", tint32, """
+unop_convert("ifind_msb", tint32, tint, """
 dst = -1;
 for (int bit = 31; bit >= 0; bit--) {
/* If src0 < 0, we're looking for the first 0 bit.
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 33/38] radv: store all fragment shader inputs as f32

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index e5e4637f0d..3d367c1378 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2093,7 +2093,6 @@ static void interp_fs_input(struct radv_shader_context 
*ctx,
LLVMValueRef attr_number;
unsigned chan;
LLVMValueRef i, j;
-   bool interp = !LLVMIsUndef(interp_param);
 
attr_number = LLVMConstInt(ctx->ac.i32, attr, false);
 
@@ -2107,7 +2106,7 @@ static void interp_fs_input(struct radv_shader_context 
*ctx,
 * fs.interp cannot be used on integers, because they can be equal
 * to NaN.
 */
-   if (interp) {
+   if (interp_param) {
interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param,
ctx->ac.v2f32, "");
 
@@ -2120,7 +2119,7 @@ static void interp_fs_input(struct radv_shader_context 
*ctx,
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
-   if (interp) {
+   if (interp_param) {
result[chan] = ac_build_fs_interp(>ac,
  llvm_chan,
  attr_number,
@@ -2132,7 +2131,6 @@ static void interp_fs_input(struct radv_shader_context 
*ctx,
  attr_number,
  prim_mask);
result[chan] = LLVMBuildBitCast(ctx->ac.builder, 
result[chan], ctx->ac.i32, "");
-   result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, 
result[chan], LLVMTypeOf(interp_param), "");
}
}
 }
@@ -2160,10 +2158,6 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
 
interp = lookup_interp_param(>abi, 
variable->data.interpolation, interp_type);
}
-   bool is_16bit = glsl_type_is_16bit(variable->type);
-   LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
-   if (interp == NULL)
-   interp = LLVMGetUndef(type);
 
for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
@@ -2224,7 +2218,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (ctx->shader_info->info.ps.uses_input_attachments ||
ctx->shader_info->info.needs_multiview_view_index) {
ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
-   ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = 
LLVMGetUndef(ctx->ac.i32);
+   ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = 
NULL;
}
 
for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
@@ -2240,7 +2234,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
interp_fs_input(ctx, index, interp_param, 
ctx->abi.prim_mask,
inputs);
 
-   if (LLVMIsUndef(interp_param))
+   if (!interp_param)
ctx->shader_info->fs.flat_shaded_mask |= 1u << 
index;
++index;
} else if (i == VARYING_SLOT_CLIP_DIST0) {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 26/38] ac/nir: implement 8 and 16 bit ac_build_imsb

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 0123f3e31d..2172d81f8b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1640,6 +1640,10 @@ ac_build_imsb(struct ac_llvm_context *ctx,
  LLVMValueRef arg,
  LLVMTypeRef dst_type)
 {
+   //TODO: support 64-bit integers
+   if (LLVMTypeOf(arg) != ctx->i32)
+   arg = LLVMBuildSExt(ctx->builder, arg, ctx->i32, "");
+
LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32",
  dst_type, , 1,
  AC_FUNC_ATTR_READNONE);
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 25/38] ac/nir: make ac_build_umsb work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 38 +++---
 1 file changed, 7 insertions(+), 31 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 754ceda89b..0123f3e31d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1664,36 +1664,12 @@ ac_build_umsb(struct ac_llvm_context *ctx,
  LLVMValueRef arg,
  LLVMTypeRef dst_type)
 {
-   const char *intrin_name;
-   LLVMTypeRef type;
-   LLVMValueRef highest_bit;
-   LLVMValueRef zero;
-   unsigned bitsize;
-
-   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
-   switch (bitsize) {
-   case 64:
-   intrin_name = "llvm.ctlz.i64";
-   type = ctx->i64;
-   highest_bit = LLVMConstInt(ctx->i64, 63, false);
-   zero = ctx->i64_0;
-   break;
-   case 32:
-   intrin_name = "llvm.ctlz.i32";
-   type = ctx->i32;
-   highest_bit = LLVMConstInt(ctx->i32, 31, false);
-   zero = ctx->i32_0;
-   break;
-   case 16:
-   intrin_name = "llvm.ctlz.i16";
-   type = ctx->i16;
-   highest_bit = LLVMConstInt(ctx->i16, 15, false);
-   zero = ctx->i16_0;
-   break;
-   default:
-   unreachable(!"invalid bitsize");
-   break;
-   }
+   unsigned bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
+   LLVMTypeRef type = ac_int_of_size(ctx, bitsize);
+   LLVMValueRef highest_bit = LLVMConstInt(type, bitsize - 1, false);
+   LLVMValueRef zero = ac_get_zero(ctx, type);
+   char intrin_name[64];
+   snprintf(intrin_name, sizeof(intrin_name), "llvm.ctlz.i%d", bitsize);
 
LLVMValueRef params[2] = {
arg,
@@ -1707,7 +1683,7 @@ ac_build_umsb(struct ac_llvm_context *ctx,
/* The HW returns the last bit index from MSB, but TGSI/NIR wants
 * the index from LSB. Invert it by doing "31 - msb". */
msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
-   msb = LLVMBuildTruncOrBitCast(ctx->builder, msb, ctx->i32, "");
+   msb = ac_build_ui_cast(ctx, msb, dst_type);
 
/* check for zero */
return LLVMBuildSelect(ctx->builder,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/38] ac/nir: implement 8 and 16 bit ac_build_readlane

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index f394d16bc9..6266058b77 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -3149,9 +3149,15 @@ ac_build_readlane(struct ac_llvm_context *ctx, 
LLVMValueRef src, LLVMValueRef la
 {
LLVMTypeRef src_type = LLVMTypeOf(src);
src = ac_to_integer(ctx, src);
-   unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+   unsigned src_bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+   unsigned bits = src_bits;
LLVMValueRef ret;
 
+   if (bits < 32) {
+   src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+   bits = 32;
+   }
+
if (bits == 32) {
ret = _ac_build_readlane(ctx, src, lane);
} else {
@@ -3168,6 +3174,10 @@ ac_build_readlane(struct ac_llvm_context *ctx, 
LLVMValueRef src, LLVMValueRef la
LLVMConstInt(ctx->i32, i, 0), 
"");
}
}
+
+   if (src_bits < 32)
+   ret = LLVMBuildTrunc(ctx->builder, ret, ac_int_of_size(ctx, 
src_bits), "");
+
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
 }
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 27/38] ac/nir: make ac_build_bit_count work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 33 +++--
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2172d81f8b..3990a1f56d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2362,35 +2362,16 @@ LLVMValueRef ac_build_fsign(struct ac_llvm_context 
*ctx, LLVMValueRef src0,
 
 LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
 {
-   LLVMValueRef result;
-   unsigned bitsize;
+   unsigned bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
 
-   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+   char name[64];
+   snprintf(name, sizeof(name), "llvm.ctpop.i%d", bitsize);
 
-   switch (bitsize) {
-   case 64:
-   result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
-   (LLVMValueRef []) { src0 }, 1,
-   AC_FUNC_ATTR_READNONE);
-
-   result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
-   break;
-   case 32:
-   result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
-   (LLVMValueRef []) { src0 }, 1,
-   AC_FUNC_ATTR_READNONE);
-   break;
-   case 16:
-   result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
-   (LLVMValueRef []) { src0 }, 1,
-   AC_FUNC_ATTR_READNONE);
-   break;
-   default:
-   unreachable(!"invalid bitsize");
-   break;
-   }
+   LLVMValueRef result = ac_build_intrinsic(ctx, name, LLVMTypeOf(src0),
+(LLVMValueRef []) { src0 }, 1,
+AC_FUNC_ATTR_READNONE);
 
-   return result;
+   return ac_build_ui_cast(ctx, result, ctx->i32);
 }
 
 LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 32/38] ac/nir: store all outputs as f32

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c   | 13 -
 src/amd/vulkan/radv_nir_to_llvm.c | 22 +-
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index fa7b8c70f0..b4418af50a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2051,7 +2051,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context 
*ctx,
unreachable("unhandle variable mode");
}
ret = ac_build_varying_gather_values(>ac, values, ve, comp);
-   return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, 
>dest.ssa), "");
+   return ac_build_reinterpret(>ac, ret, get_def_type(ctx, 
>dest.ssa));
 }
 
 static void
@@ -2063,7 +2063,7 @@ visit_store_var(struct ac_nir_context *ctx,
LLVMValueRef temp_ptr, value;
int idx = var->data.driver_location;
unsigned comp = var->data.location_frac;
-   LLVMValueRef src = ac_to_float(>ac, get_src(ctx, instr->src[1]));
+   LLVMValueRef src = get_src(ctx, instr->src[1]);
int writemask = instr->const_index[0];
LLVMValueRef indir_index;
unsigned const_index;
@@ -2082,6 +2082,11 @@ visit_store_var(struct ac_nir_context *ctx,
 
writemask = writemask << comp;
 
+   LLVMTypeRef type = ctx->ac.f32;
+   if (LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMVectorTypeKind)
+   type = LLVMVectorType(ctx->ac.f32, 
LLVMGetVectorSize(LLVMTypeOf(src)));
+   src = ac_build_reinterpret(>ac, src, type);
+
switch (var->data.mode) {
case nir_var_shader_out:
 
@@ -4008,12 +4013,10 @@ ac_handle_shader_output_decl(struct ac_llvm_context 
*ctx,
}
}
 
-   bool is_16bit = glsl_type_is_16bit(variable->type);
-   LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
for (unsigned i = 0; i < attrib_count; ++i) {
for (unsigned chan = 0; chan < 4; chan++) {
abi->outputs[ac_llvm_reg_index_soa(output_loc + i, 
chan)] =
-  ac_build_alloca_undef(ctx, type, "");
+  ac_build_alloca_undef(ctx, ctx->f32, "");
}
}
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index f114a86018..e5e4637f0d 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2340,6 +2340,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
 
bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
if (ctx->stage == MESA_SHADER_FRAGMENT) {
+   bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
unsigned index = target - V_008DFC_SQ_EXP_MRT;
unsigned col_format = (ctx->options->key.fs.col_format >> (4 * 
index)) & 0xf;
bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
@@ -2456,16 +2457,8 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
return;
}
 
-   if (is_16bit) {
-   for (unsigned chan = 0; chan < 4; chan++) {
-   values[chan] = LLVMBuildBitCast(ctx->ac.builder, 
values[chan], ctx->ac.i16, "");
-   args->out[chan] = LLVMBuildZExt(ctx->ac.builder, 
values[chan], ctx->ac.i32, "");
-   }
-   } else
-   memcpy(>out[0], values, sizeof(values[0]) * 4);
-
-   for (unsigned i = 0; i < 4; ++i)
-   args->out[i] = ac_to_float(>ac, args->out[i]);
+   for (unsigned chan = 0; chan < 4; chan++)
+   args->out[chan] = ac_build_reinterpret(>ac, values[chan], 
ctx->ac.f32);
 }
 
 static void
@@ -3172,9 +3165,12 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
if (i < FRAG_RESULT_DATA0)
continue;
 
-   for (unsigned j = 0; j < 4; j++)
-   values[j] = ac_to_float(>ac,
-   radv_load_output(ctx, i, j));
+   for (unsigned j = 0; j < 4; j++) {
+   values[j] = radv_load_output(ctx, i, j);
+   unsigned index = ac_llvm_reg_index_soa(i, 0);
+   LLVMTypeRef new_type = ctx->abi.output_types[index];
+   values[j] = ac_build_reinterpret(>ac, values[j], 
new_type);
+   }
 
bool ret = si_export_mrt_color(ctx, values,
   i - FRAG_RESULT_DATA0,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 34/38] radv: handle all fragment output types

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 55 ---
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 3d367c1378..342b79274a 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2332,9 +2332,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
if (!values)
return;
 
-   bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
if (ctx->stage == MESA_SHADER_FRAGMENT) {
-   bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
unsigned index = target - V_008DFC_SQ_EXP_MRT;
unsigned col_format = (ctx->options->key.fs.col_format >> (4 * 
index)) & 0xf;
bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
@@ -2345,6 +2343,28 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef 
args[2],
  unsigned bits, bool hi) = NULL;
 
+   if (LLVMTypeOf(values[0]) == ctx->ac.f16 &&
+   col_format != V_028714_SPI_SHADER_FP16_ABGR) {
+   for (unsigned chan = 0; chan < 4; chan++)
+   values[chan] = LLVMBuildFPExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.f32, "");
+   }
+
+   if (LLVMTypeOf(values[0]) == ctx->ac.i16 || 
LLVMTypeOf(values[0]) == ctx->ac.i8) {
+   if (col_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+   for (unsigned chan = 0; chan < 4; chan++)
+   values[chan] = 
LLVMBuildSExt(ctx->ac.builder,
+
values[chan],
+
ctx->ac.i32, "");
+   } else {
+   for (unsigned chan = 0; chan < 4; chan++)
+   values[chan] = 
LLVMBuildZExt(ctx->ac.builder,
+
values[chan],
+
ctx->ac.i32, "");
+   }
+   }
+
switch(col_format) {
case V_028714_SPI_SHADER_ZERO:
args->enabled_channels = 0; /* writemask */
@@ -2370,12 +2390,16 @@ si_llvm_init_export_args(struct radv_shader_context 
*ctx,
 
case V_028714_SPI_SHADER_FP16_ABGR:
args->enabled_channels = 0x5;
-   packf = ac_build_cvt_pkrtz_f16;
-   if (is_16bit) {
-   for (unsigned chan = 0; chan < 4; chan++)
-   values[chan] = 
LLVMBuildFPExt(ctx->ac.builder,
- 
values[chan],
- 
ctx->ac.f32, "");
+   if (LLVMTypeOf(values[0]) == ctx->ac.f16) {
+   packi = ac_build_cvt_pk_u16;
+   for (unsigned chan = 0; chan < 4; chan++) {
+   values[chan] = ac_to_integer(>ac, 
values[chan]);
+   values[chan] = 
LLVMBuildZExt(ctx->ac.builder,
+
values[chan],
+
ctx->ac.i32, "");
+   }
+   } else {
+   packf = ac_build_cvt_pkrtz_f16;
}
break;
 
@@ -2392,23 +2416,11 @@ si_llvm_init_export_args(struct radv_shader_context 
*ctx,
case V_028714_SPI_SHADER_UINT16_ABGR:
args->enabled_channels = 0x5;
packi = ac_build_cvt_pk_u16;
-   if (is_16bit) {
-   for (unsigned chan = 0; chan < 4; chan++)
-   values[chan] = 
LLVMBuildZExt(ctx->ac.builder,
- 
ac_to_integer(>ac, values[chan]),
- 
ctx->ac.i32, "");
-   }
break;
 
case V_028714_SPI_SHADER_SINT16_ABGR:
args->enabled_channels = 0x5;
packi = ac_build_cvt_pk_i16;
-   if (is_16bit) {
-   for (unsigned chan = 

[Mesa-dev] [PATCH 28/38] ac/nir: make ac_build_bitfield_reverse work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 26 ++
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 3990a1f56d..68ea6078d3 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2377,28 +2377,14 @@ LLVMValueRef ac_build_bit_count(struct ac_llvm_context 
*ctx, LLVMValueRef src0)
 LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
   LLVMValueRef src0)
 {
-   LLVMValueRef result;
-   unsigned bitsize;
-
-   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+   unsigned bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
 
-   switch (bitsize) {
-   case 32:
-   result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", 
ctx->i32,
-   (LLVMValueRef []) { src0 }, 1,
-   AC_FUNC_ATTR_READNONE);
-   break;
-   case 16:
-   result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", 
ctx->i16,
-   (LLVMValueRef []) { src0 }, 1,
-   AC_FUNC_ATTR_READNONE);
-   break;
-   default:
-   unreachable(!"invalid bitsize");
-   break;
-   }
+   char name[64];
+   snprintf(name, sizeof(name), "llvm.bitreverse.i%d", bitsize);
 
-   return result;
+   return ac_build_intrinsic(ctx, name, LLVMTypeOf(src0),
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
 }
 
 #define AC_EXP_TARGET  0
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 29/38] ac/nir: implement 16-bit pack/unpack opcodes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index aac3330c0d..d69135cc25 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1011,6 +1011,30 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
break;
}
 
+   case nir_op_pack_32_2x16_split: {
+   LLVMValueRef tmp = ac_build_gather_values(>ac, src, 2);
+   result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, 
"");
+   break;
+   }
+
+   case nir_op_unpack_32_2x16_split_x: {
+   LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+   ctx->ac.v2i16,
+   "");
+   result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ctx->ac.i32_0, "");
+   break;
+   }
+
+   case nir_op_unpack_32_2x16_split_y: {
+   LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+   ctx->ac.v2i16,
+   "");
+   result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ctx->ac.i32_1, "");
+   break;
+   }
+
case nir_op_cube_face_coord: {
src[0] = ac_to_float(>ac, src[0]);
LLVMValueRef results[2];
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/38] ac/nir: make ac_find_lsb work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 31 +--
 1 file changed, 5 insertions(+), 26 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6266058b77..754ceda89b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2752,29 +2752,10 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
 LLVMValueRef src0)
 {
unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-   const char *intrin_name;
-   LLVMTypeRef type;
-   LLVMValueRef zero;
-
-   switch (src0_bitsize) {
-   case 64:
-   intrin_name = "llvm.cttz.i64";
-   type = ctx->i64;
-   zero = ctx->i64_0;
-   break;
-   case 32:
-   intrin_name = "llvm.cttz.i32";
-   type = ctx->i32;
-   zero = ctx->i32_0;
-   break;
-   case 16:
-   intrin_name = "llvm.cttz.i16";
-   type = ctx->i16;
-   zero = ctx->i16_0;
-   break;
-   default:
-   unreachable(!"invalid bitsize");
-   }
+   char intrin_name[64];
+   LLVMTypeRef type = ac_int_of_size(ctx, src0_bitsize);
+   LLVMValueRef zero = ac_get_zero(ctx, type);
+   snprintf(intrin_name, sizeof(intrin_name), "llvm.cttz.i%d", 
src0_bitsize);
 
LLVMValueRef params[2] = {
src0,
@@ -2795,9 +2776,7 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
  params, 2,
  AC_FUNC_ATTR_READNONE);
 
-   if (src0_bitsize == 64) {
-   lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
-   }
+   lsb = ac_build_ui_cast(ctx, lsb, ctx->i32);
 
/* TODO: We need an intrinsic to skip this conditional. */
/* Check for zero: */
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 38/38] radv: expose float16, int16 and int8 features and extensions

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/vulkan/radv_device.c  | 17 +
 src/amd/vulkan/radv_extensions.py |  4 
 src/amd/vulkan/radv_shader.c  |  3 +++
 3 files changed, 24 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ad057a8750..8444651a84 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -848,6 +848,23 @@ void radv_GetPhysicalDeviceFeatures2(
features->geometryStreams = true;
break;
}
+   case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
+   VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
+   (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
+   bool enabled = pdevice->rad_info.chip_class >= VI;
+   features->shaderFloat16 = enabled && HAVE_LLVM >= 
0x0800;
+   features->shaderInt8 = enabled;
+   break;
+   }
+   case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
+   VkPhysicalDevice8BitStorageFeaturesKHR *features =
+   (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
+   bool enabled = pdevice->rad_info.chip_class >= VI;
+   features->storageBuffer8BitAccess = enabled;
+   features->uniformAndStorageBuffer8BitAccess = enabled;
+   features->storagePushConstant8 = enabled;
+   break;
+   }
default:
break;
}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 6bdf988d11..62c58e98af 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -91,6 +91,8 @@ EXTENSIONS = [
 Extension('VK_KHR_xlib_surface',  6, 
'VK_USE_PLATFORM_XLIB_KHR'),
 Extension('VK_KHR_multiview', 1, True),
 Extension('VK_KHR_display',  23, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
+Extension('VK_KHR_shader_float16_int8',   1, 
'device->rad_info.chip_class >= VI'),
+Extension('VK_KHR_8bit_storage',  1, 
'device->rad_info.chip_class >= VI'),
 Extension('VK_EXT_direct_mode_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_acquire_xlib_display',  1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
 Extension('VK_EXT_calibrated_timestamps', 1, True),
@@ -117,6 +119,8 @@ EXTENSIONS = [
 Extension('VK_AMD_shader_core_properties',1, True),
 Extension('VK_AMD_shader_info',   1, True),
 Extension('VK_AMD_shader_trinary_minmax', 1, True),
+Extension('VK_AMD_gpu_shader_half_float', 1, 
'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
+Extension('VK_AMD_gpu_shader_int16',  1, 
'device->rad_info.chip_class >= VI'),
 Extension('VK_GOOGLE_decorate_string',1, True),
 Extension('VK_GOOGLE_hlsl_functionality1',1, True),
 ]
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index a2ddf17680..921b9669f0 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -246,6 +246,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
.storage_16bit = true,
.geometry_streams = true,
.transform_feedback = true,
+   .float16 = true,
+   .storage_8bit = true,
+   .int8 = true,
},
};
entry_point = spirv_to_nir(spirv, module->size / 4,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 31/38] ac/nir, radv: create an array of varying output types

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c   | 68 +++
 src/amd/common/ac_shader_abi.h|  1 +
 src/amd/vulkan/radv_nir_to_llvm.c |  3 ++
 3 files changed, 72 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e4ae85a1ae..fa7b8c70f0 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3917,6 +3917,68 @@ static void visit_cf_list(struct ac_nir_context *ctx,
}
 }
 
+static unsigned traverse_var_component_slots(struct ac_llvm_context *ctx, bool 
vs_in,
+struct nir_variable *var, unsigned 
cur_offset,
+const struct glsl_type *cur_type,
+void (*cb)(struct ac_llvm_context 
*, unsigned, enum glsl_base_type, void *),
+void *cbdata)
+{
+   if (glsl_type_is_struct(cur_type)) {
+   for (unsigned i = 0; i < glsl_get_length(cur_type); i++) {
+   const struct glsl_type *ft = 
glsl_get_struct_field(cur_type, i);
+   cur_offset = traverse_var_component_slots(ctx, vs_in, 
var, cur_offset, ft, cb, cbdata);
+   }
+   return (cur_offset + 3) / 4 * 4;
+   }
+
+   enum glsl_base_type base_type = 
glsl_get_base_type(glsl_without_array_or_matrix(cur_type));
+
+   unsigned stride = 
glsl_get_component_slots(glsl_without_array_or_matrix(cur_type));
+   if (!var->data.compact)
+   stride = (stride + 3) / 4 * 4;
+   unsigned arr_len = MAX2(glsl_get_matrix_columns(cur_type), 1);
+   if (glsl_type_is_array(cur_type))
+   arr_len *= glsl_get_aoa_size(cur_type);
+   for (unsigned i = 0; i < arr_len; i++) {
+   for (unsigned j = 0; j < 
glsl_get_component_slots(glsl_without_array_or_matrix(cur_type)); j++) {
+   cb(ctx, cur_offset + var->data.location_frac + j, 
base_type, cbdata);
+   }
+   cur_offset += stride;
+   }
+   return cur_offset;
+}
+
+static void setup_output_type(struct ac_llvm_context *ctx, unsigned index, 
enum glsl_base_type base, void *output_types)
+{
+   LLVMTypeRef type;
+   switch (base) {
+   case GLSL_TYPE_INT8:
+   case GLSL_TYPE_UINT8:
+   type = ctx->i8;
+   break;
+   case GLSL_TYPE_INT16:
+   case GLSL_TYPE_UINT16:
+   type = ctx->i16;
+   break;
+   case GLSL_TYPE_FLOAT16:
+   type = ctx->f16;
+   break;
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_INT64:
+   case GLSL_TYPE_UINT64:
+   type = ctx->i32;
+   break;
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
+   default:
+   type = ctx->f32;
+   break;
+   }
+   ((LLVMTypeRef*)output_types)[index] = type;
+}
+
 void
 ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
 struct ac_shader_abi *abi,
@@ -3954,6 +4016,9 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
   ac_build_alloca_undef(ctx, type, "");
}
}
+
+   traverse_var_component_slots(ctx, false, variable, output_loc * 4,
+variable->type, _output_type, 
abi->output_types);
 }
 
 static LLVMTypeRef
@@ -4077,6 +4142,9 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
 
ctx.main_function = 
LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
 
+   for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS * 4; i++)
+   ctx.abi->output_types[i] = ac->i32;
+
nir_foreach_variable(variable, >outputs)
ac_handle_shader_output_decl(, ctx.abi, nir, variable,
 ctx.stage);
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 6b9a91c92a..1d078fc42d 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -69,6 +69,7 @@ struct ac_shader_abi {
LLVMValueRef view_index;
 
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
+   LLVMTypeRef output_types[AC_LLVM_MAX_OUTPUTS * 4];
 
/* For VS and PS: pre-loaded shader inputs.
 *
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 90bcc8dbfe..f114a86018 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3945,6 +3945,9 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler 
*ac_llvm,
ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
ac_setup_rings();
 
+   for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS * 4; i++)
+   ctx.abi.output_types[i] = ctx.ac.i32;
+

[Mesa-dev] [PATCH 21/38] ac/nir: implement 16-bit ac_build_ddxy

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index e85c178f78..f394d16bc9 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1519,6 +1519,11 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
 
+   int size = ac_get_type_size(LLVMTypeOf(val));
+
+   if (size == 2)
+   val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+
if (HAVE_LLVM >= 0x0700) {
unsigned tl_lanes[4], trbl_lanes[4];
 
@@ -1600,13 +1605,19 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
AC_FUNC_ATTR_CONVERGENT);
}
 
-   tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
-   trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
+   if (size == 2) {
+   tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
+   trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
+   }
+
+   LLVMTypeRef type = ac_float_of_size(ctx, size * 8);
+   tl = LLVMBuildBitCast(ctx->builder, tl, type, "");
+   trbl = LLVMBuildBitCast(ctx->builder, trbl, type, "");
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
 
if (HAVE_LLVM >= 0x0700) {
result = ac_build_intrinsic(ctx,
-   "llvm.amdgcn.wqm.f32", ctx->f32,
+   LLVMTypeOf(val) == ctx->f32 ? "llvm.amdgcn.wqm.f32" : 
"llvm.amdgcn.wqm.f16", type,
, 1, 0);
}
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 36/38] ac/nir: generate better code for nir_op_f2f16_rtz

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b4418af50a..92b773981b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -889,7 +889,9 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], 
ctx->ac.f32, "");
LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
result = ac_build_cvt_pkrtz_f16(>ac, param);
-   result = LLVMBuildExtractElement(ctx->ac.builder, result, 
ctx->ac.i32_0, "");
+   // generates better code than an extractelement with slp 
vectorization
+   result = LLVMBuildBitCast(ctx->ac.builder, result, ctx->ac.i32, 
"");
+   result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i16, 
"");
break;
case nir_op_f2f16_rtne:
case nir_op_f2f16:
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/59] intel/compiler: implement conversions from 16-bit float to 64-bit

2018-12-07 Thread Jason Ekstrand
Would it be easier to split it into two instructions in NIR and just
implement the two conversions in the back-end?  I suppose structuring
things this way, it's probably fairly easy to just do it in the back-end.
I guess that's ok.

On Tue, Dec 4, 2018 at 1:17 AM Iago Toral Quiroga  wrote:

> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 41 +++
>  1 file changed, 41 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 6eb68794f58..7294f49ddc0 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -796,6 +796,47 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
> case nir_op_f2f64:
> case nir_op_f2i64:
> case nir_op_f2u64:
> +  /* BDW PRM, vol02, Command Reference Instructions, mov - MOVE:
> +   *
> +   *   "There is no direct conversion from HF to DF or DF to HF.
> +   *Use two instructions and F (Float) as an intermediate type.
> +   *
> +   *There is no direct conversion from HF to Q/UQ or Q/UQ to HF.
> +   *Use two instructions and F (Float) or a word integer type
> +   *or a DWord integer type as an intermediate type."
> +   */
> +  if (nir_src_bit_size(instr->src[0].src) == 16) {
> + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 1);
> + inst = bld.MOV(tmp, op[0]);
> + inst->saturate = instr->dest.saturate;
> + op[0] = tmp;
> +  }
> +
> +  /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region
> Restrictions:
> +   *
> +   *"When source or destination is 64b (...), regioning in Align1
> +   * must follow these rules:
> +   *
> +   * 1. Source and destination horizontal stride must be aligned
> to
> +   *the same qword.
> +   * (...)"
> +   *
> +   * This means that conversions from bit-sizes smaller than 64-bit to
> +   * 64-bit need to have the source data elements aligned to 64-bit.
> +   * This restriction does not apply to BDW and later.
> +   */
> +  if (type_sz(result.type) == 8 && type_sz(op[0].type) < 8 &&
> +  (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
> + fs_reg tmp = bld.vgrf(result.type, 1);
> + tmp = subscript(tmp, op[0].type, 0);
> + inst = bld.MOV(tmp, op[0]);
> + op[0] = tmp;
> +  }
> +
> +  inst = bld.MOV(result, op[0]);
> +  inst->saturate = instr->dest.saturate;
> +  break;
> +
> case nir_op_i2f64:
> case nir_op_i2i64:
> case nir_op_u2f64:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: implement VK_EXT_sample_locations

2018-12-07 Thread Samuel Pitoiset
Basically, this extension allows applications to use custom
sample locations. This only implements the barely minimum.
It doesn't support variable sample locations during subpass.

Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
CTS now pass.

Only enabled on VI+ because it's untested on older chips.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c  | 177 +-
 src/amd/vulkan/radv_device.c  |  27 +
 src/amd/vulkan/radv_extensions.py |   1 +
 src/amd/vulkan/radv_pipeline.c|  30 +
 src/amd/vulkan/radv_private.h |  26 +++--
 5 files changed, 253 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b4aea5bc898..c4bebeda0ce 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
dest->viewport.count = src->viewport.count;
dest->scissor.count = src->scissor.count;
dest->discard_rectangle.count = src->discard_rectangle.count;
+   dest->sample_location.count = src->sample_location.count;
 
if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
if (memcmp(>viewport.viewports, >viewport.viewports,
@@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
}
}
 
+   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+   if (dest->sample_location.per_pixel != 
src->sample_location.per_pixel ||
+   dest->sample_location.grid_size.width != 
src->sample_location.grid_size.width ||
+   dest->sample_location.grid_size.height != 
src->sample_location.grid_size.height ||
+   memcmp(>sample_location.locations,
+  >sample_location.locations,
+  src->sample_location.count * 
sizeof(VkSampleLocationEXT))) {
+   dest->sample_location.per_pixel = 
src->sample_location.per_pixel;
+   dest->sample_location.grid_size = 
src->sample_location.grid_size;
+   typed_memcpy(dest->sample_location.locations,
+src->sample_location.locations,
+src->sample_location.count);
+   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+   }
+   }
+
cmd_buffer->state.dirty |= dest_mask;
 }
 
@@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
*cmd_buffer,
}
 }
 
+/**
+ * Convert the user sample locations to hardware sample locations (the values
+ * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
+ */
+static void
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
+ uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+{
+   uint32_t x_offset = x % state->grid_size.width;
+   uint32_t y_offset = y % state->grid_size.height;
+   uint32_t num_samples = (uint32_t)state->per_pixel;
+   VkSampleLocationEXT *user_locs;
+   uint32_t pixel_offset;
+
+   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
num_samples;
+
+   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+   user_locs = >locations[pixel_offset];
+
+   for (uint32_t i = 0; i < num_samples; i++) {
+   float shifted_pos_x = user_locs[i].x - 0.5;
+   float shifted_pos_y = user_locs[i].y - 0.5;
+
+   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
+   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+
+   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+   }
+}
+
+/**
+ * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
+ * locations.
+ */
+static void
+radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
+  uint32_t *sample_locs_pixel)
+{
+   for (uint32_t i = 0; i < num_samples; ++i) {
+   uint32_t sample_reg_idx = i / 4;
+   uint32_t sample_loc_idx = i % 4;
+   int32_t pos_x = sample_locs[i].x;
+   int32_t pos_y = sample_locs[i].y;
+
+   uint32_t shift_x = 8 * sample_loc_idx;
+   uint32_t shift_y = shift_x + 4;
+
+   sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+   sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+   }
+}
+
+/**
+ * Emit the sample locations that are specified with VK_EXT_sample_locations.
+ */
+static void
+radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
+{
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_multisample_state *ms = >graphics.ms;
+   struct radv_sample_locations_state *sample_location =
+   

[Mesa-dev] [Bug 107565] TypeError: __init__() got an unexpected keyword argument 'future_imports'

2018-12-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107565

Emil Velikov  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Emil Velikov  ---
Should be fixed with the following commit. Feel free to reopen otherwise.

commit c782168751ec6373c28ebb4b4c39a8f3ae06a075
Author: Dylan Baker 
Date:   Tue Aug 14 10:32:12 2018 -0700

scons: Check for mako 0.8.0

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/59] intel/compiler: lower some 16-bit float operations to 32-bit

2018-12-07 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> The hardware doesn't support half-float for these.
> ---
>  src/intel/compiler/brw_nir.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index aa6788b9fe5..e0027f5179c 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -620,6 +620,11 @@ lower_bit_size_callback(const nir_alu_instr *alu,
> UNUSED void *data)
> case nir_op_irem:
> case nir_op_udiv:
> case nir_op_umod:
> +   case nir_op_fceil:
> +   case nir_op_ffloor:
> +   case nir_op_ffract:
> +   case nir_op_fround_even:
> +   case nir_op_ftrunc:
>return 32;
> default:
>return 0;
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/59] intel/compiler: handle b2i/b2f with other integer conversion opcodes

2018-12-07 Thread Jason Ekstrand
This'll have to be rebased on dca6cd9ce6510 but otherwise looks fine.  I've
been a bit annoyed by this myself.

Reviewed-by: Jason Ekstrand 

Incidentally, this could also be lowered in NIR Not sure if we want to
but there it is.

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> Since we handle booleans as integers this makes more sense.
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 9f3d3bf9762..6c765fc2661 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -801,11 +801,6 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
>inst->saturate = instr->dest.saturate;
>break;
>
> -   case nir_op_b2i:
> -   case nir_op_b2f:
> -  op[0].type = BRW_REGISTER_TYPE_D;
> -  op[0].negate = !op[0].negate;
> -  /* fallthrough */
> case nir_op_f2f64:
> case nir_op_f2i64:
> case nir_op_f2u64:
> @@ -850,6 +845,11 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
>inst->saturate = instr->dest.saturate;
>break;
>
> +   case nir_op_b2i:
> +   case nir_op_b2f:
> +  op[0].type = BRW_REGISTER_TYPE_D;
> +  op[0].negate = !op[0].negate;
> +  /* fallthrough */
> case nir_op_i2f64:
> case nir_op_i2i64:
> case nir_op_u2f64:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102597] [Regression] mpv, high rendering times (two to three times higher)

2018-12-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102597

Emil Velikov  changed:

   What|Removed |Added

 Status|NEEDINFO|RESOLVED
 Resolution|--- |FIXED

--- Comment #11 from Emil Velikov  ---
The following commit reverts to the original behaviour, this we can close this.
Feel free to reopen if it doesn't help.


commit ea9f95e2a67eca90bb84eea24e7b4b804b3b1345
Author: Marek Olšák 
Date:   Tue Nov 13 16:19:42 2018 -0500

radeonsi: go back to using bottom-of-pipe for beginning of TIME_ELAPSED

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/38] ac/nir: make ac_build_clamp work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index cc7c6da5a4..1ef28323d1 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1707,16 +1707,20 @@ ac_build_umsb(struct ac_llvm_context *ctx,
 LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
   LLVMValueRef b)
 {
+   char intr[64];
+   snprintf(intr, sizeof(intr), "llvm.minnum.f%d", ac_get_elem_bits(ctx, 
LLVMTypeOf(a)));
LLVMValueRef args[2] = {a, b};
-   return ac_build_intrinsic(ctx, "llvm.minnum.f32", ctx->f32, args, 2,
+   return ac_build_intrinsic(ctx, intr, LLVMTypeOf(a), args, 2,
  AC_FUNC_ATTR_READNONE);
 }
 
 LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
   LLVMValueRef b)
 {
+   char intr[64];
+   snprintf(intr, sizeof(intr), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, 
LLVMTypeOf(a)));
LLVMValueRef args[2] = {a, b};
-   return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args, 2,
+   return ac_build_intrinsic(ctx, intr, LLVMTypeOf(a), args, 2,
  AC_FUNC_ATTR_READNONE);
 }
 
@@ -1743,8 +1747,9 @@ LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, 
LLVMValueRef a,
 
 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
 {
-   return ac_build_fmin(ctx, ac_build_fmax(ctx, value, ctx->f32_0),
-ctx->f32_1);
+   LLVMTypeRef t = LLVMTypeOf(value);
+   return ac_build_fmin(ctx, ac_build_fmax(ctx, value, ac_get_zerof(ctx, 
t)),
+ac_get_onef(ctx, t));
 }
 
 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/38] ac/nir: implement half-float nir_op_ldexp

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b447da092f..bb7c421606 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -829,8 +829,10 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
break;
case nir_op_ldexp:
src[0] = ac_to_float(>ac, src[0]);
-   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32)
+   if (ac_get_elem_bits(>ac, def_type) == 32)
result = ac_build_intrinsic(>ac, 
"llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
+   else if (ac_get_elem_bits(>ac, def_type) == 16)
+   result = ac_build_intrinsic(>ac, 
"llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
else
result = ac_build_intrinsic(>ac, 
"llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
break;
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/38] ac/nir: make ac_build_isign work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 27 ---
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 0a1987c65b..fa5c68d1b6 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2342,30 +2342,11 @@ LLVMValueRef ac_build_fract(struct ac_llvm_context 
*ctx, LLVMValueRef src0,
 LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
 {
-   LLVMValueRef cmp, val, zero, one;
-   LLVMTypeRef type;
-
-   switch (bitsize) {
-   case 64:
-   type = ctx->i64;
-   zero = ctx->i64_0;
-   one = ctx->i64_1;
-   break;
-   case 32:
-   type = ctx->i32;
-   zero = ctx->i32_0;
-   one = ctx->i32_1;
-   break;
-   case 16:
-   type = ctx->i16;
-   zero = ctx->i16_0;
-   one = ctx->i16_1;
-   break;
-   default:
-   unreachable(!"invalid bitsize");
-   break;
-   }
+   LLVMTypeRef type = ac_int_of_size(ctx, bitsize);
+   LLVMValueRef zero = ac_get_zero(ctx, type);
+   LLVMValueRef one = ac_get_one(ctx, type);
 
+   LLVMValueRef cmp, val;
cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/38] ac/nir: make ac_build_fsign work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index fa5c68d1b6..c85f9a214e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2357,19 +2357,11 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context 
*ctx, LLVMValueRef src0,
 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
 {
-   LLVMValueRef cmp, val, zero, one;
-   LLVMTypeRef type;
-
-   if (bitsize == 32) {
-   type = ctx->f32;
-   zero = ctx->f32_0;
-   one = ctx->f32_1;
-   } else {
-   type = ctx->f64;
-   zero = ctx->f64_0;
-   one = ctx->f64_1;
-   }
+   LLVMTypeRef type = ac_float_of_size(ctx, bitsize);
+   LLVMValueRef zero = ac_get_zerof(ctx, type);
+   LLVMValueRef one = ac_get_onef(ctx, type);
 
+   LLVMValueRef cmp, val;
cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/38] ac: add various helpers for float16/int16/int8

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c  | 123 ++--
 src/amd/common/ac_llvm_build.h  |  22 +-
 src/amd/common/ac_nir_to_llvm.c |  30 
 3 files changed, 154 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 154cc696a2..cc7c6da5a4 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -87,12 +87,16 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
+   ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
+   ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
+   ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
+   ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
@@ -201,7 +205,9 @@ ac_get_type_size(LLVMTypeRef type)
 
 static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, 
LLVMTypeRef t)
 {
-   if (t == ctx->f16 || t == ctx->i16)
+   if (t == ctx->i8)
+   return ctx->i8;
+   else if (t == ctx->f16 || t == ctx->i16)
return ctx->i16;
else if (t == ctx->f32 || t == ctx->i32)
return ctx->i32;
@@ -268,6 +274,110 @@ ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), 
"");
 }
 
+LLVMValueRef ac_get_zerof(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+   if (t == ctx->f16)
+   return ctx->f16_0;
+   else if (t == ctx->f32)
+   return ctx->f32_0;
+   else if (t == ctx->f64)
+   return ctx->f64_0;
+   else
+   unreachable("Unhandled float size");
+}
+
+LLVMValueRef ac_get_onef(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+   if (t == ctx->f16)
+   return ctx->f16_1;
+   else if (t == ctx->f32)
+   return ctx->f32_1;
+   else if (t == ctx->f64)
+   return ctx->f64_1;
+   else
+   unreachable("Unhandled float size");
+}
+
+LLVMValueRef ac_get_zero(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+   if (t == ctx->i8)
+   return ctx->i8_0;
+   else if (t == ctx->i16)
+   return ctx->i16_0;
+   else if (t == ctx->i32)
+   return ctx->i32_0;
+   else if (t == ctx->i64)
+   return ctx->i64_0;
+   else
+   unreachable("Unhandled bit size");
+}
+
+LLVMValueRef ac_get_one(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+   if (t == ctx->i8)
+   return ctx->i8_1;
+   else if (t == ctx->i16)
+   return ctx->i16_1;
+   else if (t == ctx->i32)
+   return ctx->i32_1;
+   else if (t == ctx->i64)
+   return ctx->i64_1;
+   else
+   unreachable("Unhandled bit size");
+}
+
+LLVMTypeRef ac_float_of_size(struct ac_llvm_context *ctx, unsigned bit_size)
+{
+   switch (bit_size) {
+   case 16:
+   return ctx->f16;
+   case 32:
+   return ctx->f32;
+   case 64:
+   return ctx->f64;
+   default:
+   unreachable("Unhandled bit size");
+   }
+}
+
+LLVMTypeRef ac_int_of_size(struct ac_llvm_context *ctx, unsigned bit_size)
+{
+   switch (bit_size) {
+   case 8:
+   return ctx->i8;
+   case 16:
+   return ctx->i16;
+   case 32:
+   return ctx->i32;
+   case 64:
+   return ctx->i64;
+   default:
+   unreachable("Unhandled bit size");
+   }
+}
+
+LLVMValueRef ac_build_ui_cast(struct ac_llvm_context *ctx, LLVMValueRef v, 
LLVMTypeRef t)
+{
+   unsigned new_bit_size = ac_get_elem_bits(ctx, t);
+   unsigned old_bit_size = ac_get_elem_bits(ctx, LLVMTypeOf(v));
+   if (new_bit_size > old_bit_size)
+   return LLVMBuildZExt(ctx->builder, v, t, "");
+   else if (new_bit_size < old_bit_size)
+   return LLVMBuildTrunc(ctx->builder, v, t, "");
+   else
+   return v;
+}
+
+LLVMValueRef ac_build_reinterpret(struct ac_llvm_context *ctx, LLVMValueRef v, 
LLVMTypeRef t)
+{
+   if (LLVMTypeOf(v) == t)
+   return v;
+
+   v = ac_to_integer(ctx, v);
+   v = ac_build_ui_cast(ctx, v, ac_to_integer_type(ctx, t));
+   return LLVMBuildBitCast(ctx->builder, v, t, "");
+}
+
 
 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
@@ -1309,15 

[Mesa-dev] [PATCH 13/38] ac/nir: implement half-float nir_op_frcp

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ef850d6d22..7084b390d2 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -657,8 +657,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
break;
case nir_op_frcp:
src[0] = ac_to_float(>ac, src[0]);
-   result = ac_build_fdiv(>ac, instr->dest.dest.ssa.bit_size 
== 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
-  src[0]);
+   result = ac_build_fdiv(>ac, ac_get_onef(>ac, 
LLVMTypeOf(src[0])), src[0]);
break;
case nir_op_iand:
result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/38] ac/nir: implement 8-bit nir_load_const_instr

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 535a47d790..6d0d2cbd55 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1110,6 +1110,10 @@ static void visit_load_const(struct ac_nir_context *ctx,
 
for (unsigned i = 0; i < instr->def.num_components; ++i) {
switch (instr->def.bit_size) {
+   case 8:
+   values[i] = LLVMConstInt(element_type,
+instr->value.u8[i], false);
+   break;
case 16:
values[i] = LLVMConstInt(element_type,
 instr->value.u16[i], false);
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/38] ac/nir: fix 64-bit nir_op_f2f16_rtz

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7c827b443b..ef850d6d22 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -886,6 +886,8 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
break;
case nir_op_f2f16_rtz:
src[0] = ac_to_float(>ac, src[0]);
+   if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+   src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], 
ctx->ac.f32, "");
LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
result = ac_build_cvt_pkrtz_f16(>ac, param);
result = LLVMBuildExtractElement(ctx->ac.builder, result, 
ctx->ac.i32_0, "");
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/38] ac/nir: make ac_build_fract work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1ef28323d1..0a1987c65b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2327,16 +2327,9 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, 
unsigned simm16)
 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
 {
-   LLVMTypeRef type;
-   char *intr;
-
-   if (bitsize == 32) {
-   intr = "llvm.floor.f32";
-   type = ctx->f32;
-   } else {
-   intr = "llvm.floor.f64";
-   type = ctx->f64;
-   }
+   LLVMTypeRef type = ac_float_of_size(ctx, bitsize);
+   char intr[64];
+   snprintf(intr, sizeof(intr), "llvm.floor.f%d", bitsize);
 
LLVMValueRef params[] = {
src0,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/38] radv: lower 16-bit flrp

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/vulkan/radv_shader.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 456c462a23..9ba20ac72e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -53,6 +53,7 @@
 static const struct nir_shader_compiler_options nir_options = {
.vertex_id_zero_based = true,
.lower_scmp = true,
+   .lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_device_index_to_zero = true,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/38] radv, ac: 16-bit and 8-bit arithmetic and 8-bit storage

2018-12-07 Thread Rhys Perry
This series add support for:
- VK_KHR_shader_float16_int8
- VK_AMD_gpu_shader_half_float
- VK_AMD_gpu_shader_int16
- VK_KHR_8bit_storage
on VI+. Half floats are currently disabled on LLVM 7 because of a bug
causing large memory usage and long (or unbounded) compilation times with
some tests.

It depends on the follow patch series:
- https://patchwork.freedesktop.org/series/53454/
- https://patchwork.freedesktop.org/series/53602/
- https://patchwork.freedesktop.org/series/53660/

An older version was tested on my Polaris card, but due to hardware issues
I currently can't test the latest version of the series.

deqp-vk has no regressions and none of the newly enabled tests fail.

Rhys Perry (38):
  ac: add various helpers for float16/int16/int8
  ac/nir: implement 8-bit push constant, ssbo and ubo loads
  ac/nir: implement 8-bit ssbo stores
  ac/nir: fix 16-bit ssbo stores
  ac/nir: implement 8-bit nir_load_const_instr
  ac/nir: implement 8-bit conversions
  ac/nir: fix 64-bit nir_op_f2f16_rtz
  ac/nir: make ac_build_clamp work on all bit sizes
  ac/nir: make ac_build_fract work on all bit sizes
  ac/nir: make ac_build_isign work on all bit sizes
  ac/nir: make ac_build_fsign work on all bit sizes
  ac/nir: make ac_build_fdiv support 16-bit floats
  ac/nir: implement half-float nir_op_frcp
  ac/nir: implement half-float nir_op_frsq
  ac/nir: implement half-float nir_op_ldexp
  radv: lower 16-bit flrp
  ac/nir: support half floats in emit_b2f
  ac/nir: make emit_b2i work on all bit sizes
  ac/nir: implement 16-bit shifts
  compiler/nir: add lowering option for 16-bit ffma
  ac/nir: implement 16-bit ac_build_ddxy
  ac/nir: implement 8 and 16 bit ac_build_readlane
  nir: make bitfield_reverse and ifind_msb work with all integers
  ac/nir: make ac_find_lsb work on all bit sizes
  ac/nir: make ac_build_umsb work on all bit sizes
  ac/nir: implement 8 and 16 bit ac_build_imsb
  ac/nir: make ac_build_bit_count work on all bit sizes
  ac/nir: make ac_build_bitfield_reverse work on all bit sizes
  ac/nir: implement 16-bit pack/unpack opcodes
  ac/nir: add 8-bit and 16-bit types to glsl_base_to_llvm_type
  ac/nir,radv: create an array of varying output types
  ac/nir: store all outputs as f32
  radv: store all fragment shader inputs as f32
  radv: handle all fragment output types
  ac,radv: run LLVM's SLP vectorizer
  ac/nir: generate better code for nir_op_f2f16_rtz
  ac/nir: have nir_op_f2f16 round to zero
  radv: expose float16, int16 and int8 features and extensions

 src/amd/common/ac_llvm_build.c| 355 ++
 src/amd/common/ac_llvm_build.h|  22 +-
 src/amd/common/ac_llvm_util.c |   9 +-
 src/amd/common/ac_llvm_util.h |   1 +
 src/amd/common/ac_nir_to_llvm.c   | 258 +++
 src/amd/common/ac_shader_abi.h|   1 +
 src/amd/vulkan/radv_device.c  |  17 ++
 src/amd/vulkan/radv_extensions.py |   4 +
 src/amd/vulkan/radv_nir_to_llvm.c |  92 ---
 src/amd/vulkan/radv_shader.c  |   7 +
 src/broadcom/compiler/nir_to_vir.c|   1 +
 src/compiler/nir/nir.h|   1 +
 src/compiler/nir/nir_opcodes.py   |   4 +-
 src/compiler/nir/nir_opt_algebraic.py |   4 +-
 src/gallium/drivers/radeonsi/si_get.c |   1 +
 src/gallium/drivers/vc4/vc4_program.c |   1 +
 16 files changed, 516 insertions(+), 262 deletions(-)

-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/38] ac/nir: implement 8-bit ssbo stores

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 8910dabb3d..31fb77290c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1497,7 +1497,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
get_src(ctx, instr->src[1]), true);
-   LLVMValueRef base_data = ac_to_float(>ac, src_data);
+   LLVMValueRef base_data = src_data;
base_data = ac_trim_vector(>ac, base_data, instr->num_components);
LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
 
@@ -1538,7 +1538,25 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
  LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
}
-   if (num_bytes == 2) {
+   if (num_bytes == 1) {
+   store_name = "llvm.amdgcn.tbuffer.store.i32";
+   data_type = ctx->ac.i32;
+   data = LLVMBuildZExt(ctx->ac.builder, data, data_type, 
"");
+   LLVMValueRef tbuffer_params[] = {
+   data,
+   rsrc,
+   ctx->ac.i32_0, /* vindex */
+   offset,/* voffset */
+   ctx->ac.i32_0,
+   ctx->ac.i32_0,
+   LLVMConstInt(ctx->ac.i32, 1, false), // dfmt (= 
8bit)
+   LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= 
uint)
+   glc,
+   ctx->ac.i1false,
+   };
+   ac_build_intrinsic(>ac, store_name,
+  ctx->ac.voidt, tbuffer_params, 10, 
0);
+   } else if (num_bytes == 2) {
store_name = "llvm.amdgcn.tbuffer.store.i32";
data_type = ctx->ac.i32;
LLVMValueRef tbuffer_params[] = {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/38] ac/nir: implement 8-bit push constant, ssbo and ubo loads

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 37 +++--
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2e9fd7b689..8910dabb3d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1395,7 +1395,30 @@ static LLVMValueRef visit_load_push_constant(struct 
ac_nir_context *ctx,
 
ptr = ac_build_gep0(>ac, ctx->abi->push_constants, addr);
 
-   if (instr->dest.ssa.bit_size == 16) {
+   if (instr->dest.ssa.bit_size == 8) {
+   unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 
1;
+   LLVMTypeRef vec_type = 
LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
+   ptr = ac_cast_ptr(>ac, ptr, vec_type);
+   LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+
+   LLVMValueRef params[3];
+   if (load_dwords > 1) {
+   LLVMValueRef res_vec = 
LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
+   params[0] = LLVMBuildExtractElement(ctx->ac.builder, 
res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
+   params[1] = LLVMBuildExtractElement(ctx->ac.builder, 
res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
+   } else {
+   res = LLVMBuildBitCast(ctx->ac.builder, res, 
ctx->ac.i32, "");
+   params[0] = ctx->ac.i32_0;
+   params[1] = res;
+   }
+   params[2] = addr;
+   res = ac_build_intrinsic(>ac, "llvm.amdgcn.alignbyte", 
ctx->ac.i32, params, 3, 0);
+
+   res = LLVMBuildTrunc(ctx->ac.builder, res, 
LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
+   if (instr->dest.ssa.num_components > 1)
+   res = LLVMBuildBitCast(ctx->ac.builder, res, 
LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 
instr->dest.ssa.num_components), "");
+   return res;
+   } else if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
LLVMTypeRef vec_type = 
LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
ptr = ac_cast_ptr(>ac, ptr, vec_type);
@@ -1651,7 +1674,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * 
elem_size_bytes, false);
 
LLVMValueRef ret;
-   if (load_bytes == 2) {
+   if (load_bytes <= 2) {
ret = ac_build_tbuffer_load_short_byte(>ac,
   rsrc,
   vindex,
@@ -1659,7 +1682,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
   ctx->ac.i32_0,
   immoffset,
   glc,
-  2);
+  load_bytes);
} else {
const char *load_name;
LLVMTypeRef data_type;
@@ -1675,6 +1698,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
data_type = ctx->ac.v2f32;
break;
case 4:
+   case 3:
load_name = "llvm.amdgcn.buffer.load.f32";
data_type = ctx->ac.f32;
break;
@@ -1721,7 +1745,8 @@ static LLVMValueRef visit_load_ubo_buffer(struct 
ac_nir_context *ctx,
if (instr->dest.ssa.bit_size == 64)
num_components *= 2;
 
-   if (instr->dest.ssa.bit_size == 16) {
+   if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
+   unsigned size = instr->dest.ssa.bit_size / 8;
LLVMValueRef results[num_components];
for (unsigned i = 0; i < num_components; ++i) {
results[i] = ac_build_tbuffer_load_short_byte(>ac,
@@ -1729,9 +1754,9 @@ static LLVMValueRef visit_load_ubo_buffer(struct 
ac_nir_context *ctx,
  
ctx->ac.i32_0,
  offset,
  
ctx->ac.i32_0,
- 
LLVMConstInt(ctx->ac.i32, 2 * i, 0),
+

[Mesa-dev] [PATCH 06/38] ac/nir: implement 8-bit conversions

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6d0d2cbd55..7c827b443b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -858,12 +858,14 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
src[i] = ac_to_integer(>ac, src[i]);
result = ac_build_gather_values(>ac, src, num_components);
break;
+   case nir_op_f2i8:
case nir_op_f2i16:
case nir_op_f2i32:
case nir_op_f2i64:
src[0] = ac_to_float(>ac, src[0]);
result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
break;
+   case nir_op_f2u8:
case nir_op_f2u16:
case nir_op_f2u32:
case nir_op_f2u64:
@@ -898,15 +900,14 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
else
result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], 
ac_to_float_type(>ac, def_type), "");
break;
+   case nir_op_u2u8:
case nir_op_u2u16:
case nir_op_u2u32:
case nir_op_u2u64:
src[0] = ac_to_integer(>ac, src[0]);
-   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[0])) < 
ac_get_elem_bits(>ac, def_type))
-   result = LLVMBuildZExt(ctx->ac.builder, src[0], 
def_type, "");
-   else
-   result = LLVMBuildTrunc(ctx->ac.builder, src[0], 
def_type, "");
+   result = ac_build_ui_cast(>ac, src[0], def_type);
break;
+   case nir_op_i2i8:
case nir_op_i2i16:
case nir_op_i2i32:
case nir_op_i2i64:
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/38] ac/nir: make ac_build_fdiv support 16-bit floats

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c85f9a214e..e85c178f78 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -716,7 +716,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
 * If we do (num * (1 / den)), LLVM does:
 *return num * v_rcp_f32(den);
 */
-   LLVMValueRef one = LLVMTypeOf(num) == ctx->f64 ? ctx->f64_1 : 
ctx->f32_1;
+   LLVMValueRef one = ac_get_onef(ctx, LLVMTypeOf(num));
LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
 
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/38] ac/nir: make emit_b2i work on all bit sizes

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 459d9c119c..517da7ba9b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -347,11 +347,7 @@ static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
 unsigned bitsize)
 {
LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
-
-   if (bitsize == 32)
-   return result;
-
-   return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
+   return ac_build_ui_cast(ctx, result, ac_int_of_size(ctx, bitsize));
 }
 
 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/38] ac/nir: support half floats in emit_b2f

2018-12-07 Thread Rhys Perry
This seems to generate fine code, even though the IR is a bit ugly.

Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index bb7c421606..459d9c119c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -316,14 +316,20 @@ static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
 unsigned bitsize)
 {
LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
-  LLVMBuildBitCast(ctx->builder, 
LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
+  LLVMBuildBitCast(ctx->builder, 
ctx->f32_1, ctx->i32, ""),
   "");
result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
 
-   if (bitsize == 32)
+   switch (bitsize) {
+   case 16:
+   return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
+   case 32:
return result;
-
-   return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
+   case 64:
+   return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
+   default:
+   unreachable("Unsupported bit size.");
+   }
 }
 
 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/38] ac/nir: fix 16-bit ssbo stores

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 31fb77290c..535a47d790 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1559,6 +1559,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
} else if (num_bytes == 2) {
store_name = "llvm.amdgcn.tbuffer.store.i32";
data_type = ctx->ac.i32;
+   data = LLVMBuildBitCast(ctx->ac.builder, data, 
ctx->ac.i16, "");
+   data = LLVMBuildZExt(ctx->ac.builder, data, data_type, 
"");
LLVMValueRef tbuffer_params[] = {
data,
rsrc,
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/38] ac/nir: implement half-float nir_op_frsq

2018-12-07 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_nir_to_llvm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7084b390d2..b447da092f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -788,8 +788,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
case nir_op_frsq:
result = emit_intrin_1f_param(>ac, "llvm.sqrt",
  ac_to_float_type(>ac, 
def_type), src[0]);
-   result = ac_build_fdiv(>ac, instr->dest.dest.ssa.bit_size 
== 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
-  result);
+   result = ac_build_fdiv(>ac, ac_get_onef(>ac, 
LLVMTypeOf(result)), result);
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(>ac, src[0]);
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/59] compiler/spirv: use 32-bit polynomial approximation for 16-bit asin()

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> The 16-bit polynomial execution doesn't meet Khronos precision
> requirements.
> Also, the half-float denorm range starts at 2^(-14) and with asin taking
> input
> values in the range [0, 1], polynomial approximations can lead to flushing
> relatively easy.
>
> An alternative is to use the atan2 formula to compute asin, which is the
> reference taken by Khronos to determine precision requirements, but that
> ends up generating too many additional instructions when compared to the
> polynomial approximation. Specifically, for the Intel case, doing this
> adds +41 instructions to the program for each asin/acos call, which looks
> like an undesirable trade off.
>
> So for now we take the easy way out and fallback to using the 32-bit
> polynomial approximation, which is better (faster) than the 16-bit atan2
> implementation and gives us better precision that matches Khronos
> requirements.
> ---
>  src/compiler/spirv/vtn_glsl450.c | 21 +++--
>  1 file changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/spirv/vtn_glsl450.c
> b/src/compiler/spirv/vtn_glsl450.c
> index bb340c87416..64a1431ae14 100644
> --- a/src/compiler/spirv/vtn_glsl450.c
> +++ b/src/compiler/spirv/vtn_glsl450.c
> @@ -201,8 +201,20 @@ build_log(nir_builder *b, nir_ssa_def *x)
>   * in each case.
>   */
>  static nir_ssa_def *
> -build_asin(nir_builder *b, nir_ssa_def *x, float _p0, float _p1)
> +build_asin(nir_builder *b, nir_ssa_def *_x, float _p0, float _p1)
>  {
> +   /* The polynomial approximation isn't precise enough to meet half-float
> +* precision requirements. Alternatively, we could implement this using
> +* the formula:
>

This isn't surprising.  It's possible we could restructure the
floating-point calculation to be more stable but just doing 32-bit seems
reasonable.


> +*
> +* asin(x) = atan2(x, sqrt(1 - x*x))
> +*
> +* But that is very expensive, so instead we just do the polynomial
> +* approximation in 32-bit math and then we convert the result back to
> +* 16-bit.
> +*/
> +   nir_ssa_def *x = _x->bit_size == 16 ? nir_f2f32(b, _x) : _x;
>

Mind restructuring this as follows?

if (x->bit_size == 16) {
   /* Comment goes here */
   return f2f16(b, build_asin(b, f2f32(b, x), p0, p1));
}

I find a bit of recursion easier to read than having two bits at the
beginning and end.


> +
> nir_ssa_def *p0 = nir_imm_floatN_t(b, _p0, x->bit_size);
> nir_ssa_def *p1 = nir_imm_floatN_t(b, _p1, x->bit_size);
> nir_ssa_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size);
> @@ -210,7 +222,8 @@ build_asin(nir_builder *b, nir_ssa_def *x, float _p0,
> float _p1)
> nir_ssa_def *m_pi_4_minus_one =
>nir_imm_floatN_t(b, M_PI_4f - 1.0f, x->bit_size);
> nir_ssa_def *abs_x = nir_fabs(b, x);
> -   return nir_fmul(b, nir_fsign(b, x),
> +   nir_ssa_def *result =
> +   nir_fmul(b, nir_fsign(b, x),
> nir_fsub(b, m_pi_2,
>  nir_fmul(b, nir_fsqrt(b, nir_fsub(b, one,
> abs_x)),
>   nir_fadd(b, m_pi_2,
> @@ -220,6 +233,10 @@ build_asin(nir_builder *b, nir_ssa_def *x, float _p0,
> float _p1)
>
> nir_fadd(b, p0,
>
>  nir_fmul(b, abs_x,
>
>   p1);
> +   if (_x->bit_size == 16)
> +  result = nir_f2f16(b, result);
> +
> +   return result;
>  }
>
>  /**
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 94957] dEQP failures on llvmpipe

2018-12-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=94957

--- Comment #12 from Emil Velikov  ---
The below commit allows us to disable the perf. optimisations (for release
builds), and thus fixing the functional.texture tests.

Should we close this bug, or keep it open as all the failing tests have a
solution/workaround?

commit 8f77156c268356baf9df8490c52cc5d8475b9db8
Author: Gert Wollny 
Date:   Fri Oct 5 15:08:51 2018 +0200

gallivm: Make it possible to disable some optimization shortcuts in release
builds

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] docs: Document GitLab merge request process (email alternative)

2018-12-07 Thread Eric Engestrom
On Wednesday, 2018-12-05 15:32:05 -0800, Jordan Justen wrote:
> This documents a process for using GitLab Merge Requests as an second
> way to submit code changes for Mesa. Only one of the two methods is
> allowed for each patch series.
> 
> We will *not* require all patches to be emailed. Some code changes may
> be reviewed and merged without any discussion on the mesa-dev email
> list.
> 
> v2:
>  * No longer require email. Allow submitter to choose email or a
>GitLab merge request.
>  * Various feedback from Brian, Daniel, Dylan, Eric, Erik, Jason,
>Matt, Michel and Rob.
> 
> Signed-off-by: Jordan Justen 

Reviewed-by: Eric Engestrom 

> ---
>  docs/submittingpatches.html | 76 ++---
>  1 file changed, 71 insertions(+), 5 deletions(-)
> 
> diff --git a/docs/submittingpatches.html b/docs/submittingpatches.html
> index 92d954a2d09..21175988d0b 100644
> --- a/docs/submittingpatches.html
> +++ b/docs/submittingpatches.html
> @@ -21,7 +21,7 @@
>  Basic guidelines
>  Patch formatting
>  Testing Patches
> -Mailing Patches
> +Submitting Patches
>  Reviewing Patches
>  Nominating a commit for a stable branch
>  Criteria for accepting patches to the stable 
> branch
> @@ -42,8 +42,10 @@ components.
>  git bisect.)
>  Patches should be properly formatted.
>  Patches should be sufficiently tested before 
> submitting.
> -Patches should be submitted to mesa-dev
> -for review using git send-email.
> +Patches should be submitted
> +to mesa-dev or with
> +a merge request
> +for review.
>  
>  
>  
> @@ -180,10 +182,19 @@ run.
>  
>  
>  
> -Mailing Patches
> +Submitting Patches
>  
>  
> -Patches should be sent to the mesa-dev mailing list for review:
> +Patches may be submitted to the Mesa project by
> +email or with a
> +GitLab merge request. To prevent
> +duplicate code review, only use one method to submit your changes.
> +
> +
> +Mailing Patches
> +
> +
> +Patches may be sent to the mesa-dev mailing list for review:
>  https://lists.freedesktop.org/mailman/listinfo/mesa-dev;>
>  mesa-dev@lists.freedesktop.org.
>  When submitting a patch make sure to use
> @@ -217,8 +228,63 @@ disabled before sending your patches. (Note that you may 
> need to contact
>  your email administrator for this.)
>  
>  
> +GitLab Merge Requests
> +
> +
> +  https://gitlab.freedesktop.org/mesa/mesa;>GitLab Merge
> +  Requests (MR) can also be used to submit patches for Mesa.
> +
> +
> +
> +  If the MR may have interest for most of the Mesa community, you can
> +  send an email to the mesa-dev email list including a link to the MR.
> +  Don't send the patch to mesa-dev, just the MR link.
> +
> +
> +  Add labels to your MR to help reviewers find it. For example:
> +  
> +Mesa changes affecting all drivers: mesa
> +Hardware vendor specific code: amd, intel, nvidia, ...
> +Driver specific code: anvil, freedreno, i965, iris, radeonsi,
> +  radv, vc4, ...
> +Other tag examples: gallium, util
> +  
> +
> +
> +  If you revise your patches based on code review and push an update
> +  to your branch, you should maintain a clean history
> +  in your patches. There should not be "fixup" patches in the history.
> +  The series should be buildable and functional after every commit
> +  whenever you push the branch.
> +
> +
> +  It is your responsibility to keep the MR alive and making progress,
> +  as there are no guarantees that a Mesa dev will independently take
> +  interest in it.
> +
> +
> +  Some other notes:
> +  
> +Make changes and update your branch based on feedback
> +Old, stale MR may be closed, but you can reopen it if you
> +  still want to pursue the changes
> +You should periodically check to see if your MR needs to be
> +  rebased
> +Make sure your MR is closed if your patches get pushed outside
> +  of GitLab
> +  
> +
> +
>  Reviewing Patches
>  
> +
> +  To participate in code review, you should monitor the
> +  https://lists.freedesktop.org/mailman/listinfo/mesa-dev;>
> +  mesa-dev email list and the GitLab
> +  Mesa  href="https://gitlab.freedesktop.org/mesa/mesa/merge_requests;>Merge
> +  Requests page.
> +
> +
>  
>  When you've reviewed a patch on the mailing list, please be unambiguous
>  about your review.  That is, state either
> -- 
> 2.20.0.rc2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 22/59] compiler/nir: add lowering for 16-bit ldexp

2018-12-07 Thread Jason Ekstrand
On Wed, Dec 5, 2018 at 5:32 AM Pohjolainen, Topi 
wrote:

> On Wed, Dec 05, 2018 at 12:26:06PM +0100, Iago Toral wrote:
> > On Wed, 2018-12-05 at 13:20 +0200, Pohjolainen, Topi wrote:
> > > On Wed, Dec 05, 2018 at 11:53:44AM +0100, Iago Toral wrote:
> > > > On Wed, 2018-12-05 at 11:39 +0200, Pohjolainen, Topi wrote:
> > > > > I remember people preferring to order things 16, 32, 64 before.
> > > > > Should
> > > > > we follow that here as well?
> > > >
> > > > Yes, it makes sense. I'll change that.
>

Agreed.


> > > >
> > > > > On Tue, Dec 04, 2018 at 08:16:46AM +0100, Iago Toral Quiroga
> > > > > wrote:
> > > > > > ---
> > > > > >  src/compiler/nir/nir_opt_algebraic.py | 5 +
> > > > > >  1 file changed, 5 insertions(+)
> > > > > >
> > > > > > diff --git a/src/compiler/nir/nir_opt_algebraic.py
> > > > > > b/src/compiler/nir/nir_opt_algebraic.py
> > > > > > index 6c3b77c9b6e..747f1751086 100644
> > > > > > --- a/src/compiler/nir/nir_opt_algebraic.py
> > > > > > +++ b/src/compiler/nir/nir_opt_algebraic.py
> > > > > > @@ -778,6 +778,8 @@ def fexp2i(exp, bits):
> > > > > >return ('ishl', ('iadd', exp, 127), 23)
> > > > > > elif bits == 64:
> > > > > >return ('pack_64_2x32_split', 0, ('ishl', ('iadd', exp,
> > > > > > 1023), 20))
> > > > > > +   elif bits == 16:
> > > > > > +  return ('i2i16', ('ishl', ('iadd', exp, 15), 10))
> > > > > > else:
> > > > > >assert False
> > > > > >
> > > > > > @@ -796,6 +798,8 @@ def ldexp(f, exp, bits):
> > > > > >exp = ('imin', ('imax', exp, -252), 254)
> > > > > > elif bits == 64:
> > > > > >exp = ('imin', ('imax', exp, -2044), 2046)
> > > > > > +   elif bits == 16:
> > > > > > +  exp = ('imin', ('imax', exp, -30), 30)
> > > > >
> > > > > I expected this to be:
> > > > >
> > > > >  exp = ('imin', ('imax', exp, -29), 30)
> > > >
> > > > Actually, I think this should be -28, since the minimum exponent
> > > > value
> > > > is -14.
> > >
> > > I kept wondering about. The offset is 15 and -14 - 15 yields -29. But
> > > -28
> > > in turn would be more in line with the 32- and 64-bit cases.
> >
> > I think the idea is to have this be 2x the minimum (and maximum)
> > exponents we can represent, since below we are dividing it by two and
> > emitting two exponentials, each with half that exponent. That way we
> > ensure that when we divide the exponent by 2 we still produce a
> > representable exponent for the bit-size.
>
> Ah, right. I should have checked the context, -28 makes sense now.
>
> Reviewed-by: Topi Pohjolainen 
>

With things in the right order and [-28, 30],

Reviewed-by: Jason Ekstrand 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/59] compiler/spirv: implement 16-bit hyperbolic trigonometric functions

2018-12-07 Thread Jason Ekstrand
My comment earlier, I think, applies to all of the first 7.  Let's just add
nir_fadd_imm and nir_fmul_imm and rewrite them to use those.  That'll make
them handle doubles as well if we ever need it.

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> ---
>  src/compiler/spirv/vtn_glsl450.c | 29 +++--
>  1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/src/compiler/spirv/vtn_glsl450.c
> b/src/compiler/spirv/vtn_glsl450.c
> index 8bdef9db822..85851755aab 100644
> --- a/src/compiler/spirv/vtn_glsl450.c
> +++ b/src/compiler/spirv/vtn_glsl450.c
> @@ -672,7 +672,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
> case GLSLstd450Sinh:
>/* 0.5 * (e^x - e^(-x)) */
>val->ssa->def =
> - nir_fmul(nb, nir_imm_float(nb, 0.5f),
> + nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f, src[0]->bit_size),
>nir_fsub(nb, build_exp(nb, src[0]),
> build_exp(nb, nir_fneg(nb, src[0];
>return;
> @@ -680,7 +680,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
> case GLSLstd450Cosh:
>/* 0.5 * (e^x + e^(-x)) */
>val->ssa->def =
> - nir_fmul(nb, nir_imm_float(nb, 0.5f),
> + nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f, src[0]->bit_size),
>nir_fadd(nb, build_exp(nb, src[0]),
> build_exp(nb, nir_fneg(nb, src[0];
>return;
> @@ -693,11 +693,20 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
> * We clamp x to (-inf, +10] to avoid precision problems.  When x >
> 10,
> * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in
> the
> * computation e^2x +/- 1 so it can be ignored.
> +   *
> +   * For 16-bit precision we clamp x to (-inf, +4.2] since the maximum
> +   * representable number is only 65,504 and e^(2*6) exceeds that.
> Also,
> +   * if x > 4.2, tanh(x) will return 1.0 in fp16.
> */
> -  nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
> -  nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x,
> nir_imm_float(nb, 2)));
> -  val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb,
> 1)),
> -   nir_fadd(nb, exp2x, nir_imm_float(nb,
> 1)));
> +  const uint32_t bit_size = src[0]->bit_size;
> +  const double clamped_x = bit_size > 16 ? 10.0 : 4.2;
> +  nir_ssa_def *x = nir_fmin(nb, src[0],
> +nir_imm_floatN_t(nb, clamped_x,
> bit_size));
> +  nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, bit_size);
> +  nir_ssa_def *two = nir_imm_floatN_t(nb, 2.0, bit_size);
> +  nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, two));
> +  val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, one),
> +   nir_fadd(nb, exp2x, one));
>return;
> }
>
> @@ -705,16 +714,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum
> GLSLstd450 entrypoint,
>val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
>   build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
> nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0],
> src[0]),
> -  nir_imm_float(nb,
> 1.0f));
> +  nir_imm_floatN_t(nb,
> 1.0f, src[0]->bit_size));
>return;
> case GLSLstd450Acosh:
>val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
>   nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
> -nir_imm_float(nb, 1.0f);
> +nir_imm_floatN_t(nb, 1.0f,
> src[0]->bit_size);
>return;
> case GLSLstd450Atanh: {
> -  nir_ssa_def *one = nir_imm_float(nb, 1.0);
> -  val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
> +  nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size);
> +  val->ssa->def = nir_fmul(nb, nir_imm_floatN_t(nb, 0.5f,
> src[0]->bit_size),
>   build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
>  nir_fsub(nb, one, src[0];
>return;
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/59] intel/compiler: lower 16-bit extended math to 32-bit prior to gen9

2018-12-07 Thread Jason Ekstrand
I haven't checked the HW docs but

Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> Extended math desn't support half-float on these generations.
> ---
>  src/intel/compiler/brw_nir.c | 13 -
>  1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index e0027f5179c..0b3094724c4 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -614,6 +614,8 @@ lower_bit_size_callback(const nir_alu_instr *alu,
> UNUSED void *data)
> if (alu->dest.dest.ssa.bit_size != 16)
>return 0;
>
> +   const struct brw_compiler *compiler = (const struct brw_compiler *)
> data;
> +
> switch (alu->op) {
> case nir_op_idiv:
> case nir_op_imod:
> @@ -626,6 +628,15 @@ lower_bit_size_callback(const nir_alu_instr *alu,
> UNUSED void *data)
> case nir_op_fround_even:
> case nir_op_ftrunc:
>return 32;
> +   case nir_op_frcp:
> +   case nir_op_frsq:
> +   case nir_op_fsqrt:
> +   case nir_op_fpow:
> +   case nir_op_fexp2:
> +   case nir_op_flog2:
> +   case nir_op_fsin:
> +   case nir_op_fcos:
> +  return compiler->devinfo->gen < 9 ? 32 : 0;
> default:
>return 0;
> }
> @@ -692,7 +703,7 @@ brw_preprocess_nir(const struct brw_compiler
> *compiler, nir_shader *nir)
>OPT(nir_opt_large_constants, NULL, 32);
> }
>
> -   OPT(nir_lower_bit_size, lower_bit_size_callback, NULL);
> +   OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
>
> if (is_scalar) {
>OPT(nir_lower_load_const_to_scalar);
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 28/59] intel/compiler: set correct precision fields for 3-source float instructions

2018-12-07 Thread Jason Ekstrand
On Wed, Dec 5, 2018 at 7:14 AM Pohjolainen, Topi 
wrote:

> On Wed, Dec 05, 2018 at 02:04:16PM +0100, Iago Toral wrote:
> > On Wed, 2018-12-05 at 14:58 +0200, Pohjolainen, Topi wrote:
> > > On Tue, Dec 04, 2018 at 08:16:52AM +0100, Iago Toral Quiroga wrote:
> > > > Source0 and Destination extract the floating-point precision
> > > > automatically
> > > > from the SrcType and DstType instruction fields respectively when
> > > > they are
> > > > set to types :F or :HF. For Source1 and Source2 operands, we use
> > > > the new
> > > > 1-bit fields Src1Type and Src2Type, where 0 means normal precision
> > > > and 1
> > > > means half-precision. Since we always use the type of the
> > > > destination for
> > > > all operands when we emit 3-source instructions, we only need set
> > > > Src1Type
> > > > and Src2Type to 1 when we are emitting a half-precision
> > > > instruction.
> > > > ---
> > > >  src/intel/compiler/brw_eu_emit.c | 5 +
> > > >  1 file changed, 5 insertions(+)
> > > >
> > > > diff --git a/src/intel/compiler/brw_eu_emit.c
> > > > b/src/intel/compiler/brw_eu_emit.c
> > > > index 2c9fc9a5c7c..66edfb43baf 100644
> > > > --- a/src/intel/compiler/brw_eu_emit.c
> > > > +++ b/src/intel/compiler/brw_eu_emit.c
> > > > @@ -801,6 +801,11 @@ brw_alu3(struct brw_codegen *p, unsigned
> > > > opcode, struct brw_reg dest,
> > > >*/
> > > >   brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
> > > >   brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
> > > > +
> > > > + if (devinfo->gen >= 8 && dest.type ==
> > > > BRW_REGISTER_TYPE_HF) {
> > > > +brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
> > > > +brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
> > > > + }
> > >
> > > I had similar patch which prepares for mixed mode (useful for linterp
> > > with
> > > 32-bit input varyings):
> > >
> > >  /* From the Bspec: Instruction types
> > >   *
> > >   * Three source instructions can use operands with mixed-
> > > mode
> > >   * precision. When SrcType field is set to :f or :hf it
> > > defines
> > >   * precision for source 0 only, and fields Src1Type and
> > > Src2Type
> > >   * define precision for other source operands:
> > >   *
> > >   *   0b = :f. Single precision Float (32-bit).
> > >   *   1b = :hf. Half precision Float (16-bit).
> > >   */
> > >  if (src1.type == BRW_REGISTER_TYPE_HF)
> > > brw_inst_set_3src_src1_type(devinfo, inst, 1);
> > >
> > >  if (src2.type == BRW_REGISTER_TYPE_HF)
> > > brw_inst_set_3src_src2_type(devinfo, inst, 1);
> > >
> > > How would you feel about that? (Direct cut-paste and the helpers have
> > > different name).'
>

Yeah, let's not base source precisions on destination types.  I like Topi's
version better.


> >
> > Sure, if we are planning to use mixed mode in the future this makes
> > more sense. Thanks!
>
> Nice!
>
> Reviewed-by: Topi Pohjolainen 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 36/59] compiler/spirv: add implementation to check for SpvCapabilityFloat16 support

2018-12-07 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> ---
>  src/compiler/shader_info.h| 1 +
>  src/compiler/spirv/spirv_to_nir.c | 4 +++-
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
> index 65bc0588d67..0a3cb37069c 100644
> --- a/src/compiler/shader_info.h
> +++ b/src/compiler/shader_info.h
> @@ -45,6 +45,7 @@ struct spirv_supported_capabilities {
> bool variable_pointers;
> bool storage_16bit;
> bool int16;
> +   bool float16;
> bool shader_viewport_index_layer;
> bool subgroup_arithmetic;
> bool subgroup_ballot;
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index a05c4d236ca..6f6673c8fb1 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -3415,7 +3415,6 @@ vtn_handle_preamble_instruction(struct vtn_builder
> *b, SpvOp opcode,
>case SpvCapabilityLinkage:
>case SpvCapabilityVector16:
>case SpvCapabilityFloat16Buffer:
> -  case SpvCapabilityFloat16:
>case SpvCapabilityInt64Atomics:
>case SpvCapabilityStorageImageMultisample:
>case SpvCapabilityInt8:
> @@ -3432,6 +3431,9 @@ vtn_handle_preamble_instruction(struct vtn_builder
> *b, SpvOp opcode,
>case SpvCapabilityFloat64:
>   spv_check_supported(float64, cap);
>   break;
> +  case SpvCapabilityFloat16:
> + spv_check_supported(float16, cap);
> + break;
>case SpvCapabilityInt64:
>   spv_check_supported(int64, cap);
>   break;
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 42/59] intel/compiler: activate 16-bit bit-size lowerings also for 8-bit

2018-12-07 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:19 AM Iago Toral Quiroga  wrote:

> Particularly, we need the same lowewrings we use for 16-bit
> integers.
> ---
>  src/intel/compiler/brw_nir.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 0b3094724c4..0a5aa35c700 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -611,7 +611,7 @@ static unsigned
>  lower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data)
>  {
> assert(alu->dest.dest.is_ssa);
> -   if (alu->dest.dest.ssa.bit_size != 16)
> +   if (alu->dest.dest.ssa.bit_size >= 32)
>return 0;
>
> const struct brw_compiler *compiler = (const struct brw_compiler *)
> data;
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] docs: Document GitLab merge request process (email alternative)

2018-12-07 Thread Daniel Stone
Hi,

On Sat, 8 Dec 2018 at 05:15, Eric Engestrom  wrote:
> On Friday, 2018-12-07 10:19:23 +0100, Erik Faye-Lund wrote:
> > Automated emails (and perhaps IRC bot) would be really nice.
>
> Agreed. Email would be great to help with the transition.
> There's work currently being done on GitLab to allow for mailing lists
> to be notified; this should cover 'new MR' as well.
> If we need this feature before GitLab is done, it should be possible to
> write a bot using the webhooks, just needs someone to take the time to
> do it :)
>
> For IRC, there's already some integration, but it's limited to notifying
> about git pushes for now:
> https://docs.gitlab.com/ee/user/project/integrations/irker.html
>
> There's an open issue about adding more events, but it hasn't seen much
> activity:
> https://gitlab.com/gitlab-org/gitlab-ce/issues/7965

Wayland uses a couple of eventd plugins chained together:
https://github.com/sardemff7/git-eventc

That notifies the channel when issues and MRs are opened or closed and
on push as well, including things like the labels. It's been pretty
useful so far.

> > Even better if it could be hooked up to scripts/get_reviewer.pl, and
> > automatically CC "the right people".
>
> Side note, I've been rewriting that script, although I need to send v2
> out at some point:
> https://patchwork.freedesktop.org/patch/226256/
>
> I would be trivial to hook that into a bot we'd write, but I don't think
> GitLab has support for something like this. I just opened an issue about
> adding support directly in GitLab:
> https://gitlab.com/gitlab-org/gitlab-ce/issues/55035

This already exists, as an EE-only feature called 'code owners':
https://docs.gitlab.com/ee/user/project/code_owners.html
https://gitlab.com/gitlab-org/gitlab-ee/issues/1012

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 29/59] intel/compiler: don't propagate HF immediates to 3-src instructions

2018-12-07 Thread Jason Ekstrand
Seems reasonable though I thought you had patches to the constant combining
to fix this.  Maybe they'll be ready in time that we won't need this?

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> 3-src instructions don't support immediates, but since 36bc5f06dd22,
> we allow them on MAD and LRP relying on the combine constants pass to
> fix it up later. However, that pass is specialized for 32-bit float
> immediates and can't handle HF constants at present, so this patch
> ensures that copy-propagation only does this for 32-bit constants.
> ---
>  src/intel/compiler/brw_fs_copy_propagation.cpp | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp
> b/src/intel/compiler/brw_fs_copy_propagation.cpp
> index ab34b63748e..58d5080b4e9 100644
> --- a/src/intel/compiler/brw_fs_copy_propagation.cpp
> +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
> @@ -741,8 +741,16 @@ fs_visitor::try_constant_propagate(fs_inst *inst,
> acp_entry *entry)
>
>case BRW_OPCODE_MAD:
>case BRW_OPCODE_LRP:
> - inst->src[i] = val;
> - progress = true;
> + /* 3-src instructions can't take IMM registers, however, for
> 32-bit
> +  * floating instructions we rely on the combine constants pass
> to fix
> +  * it up. For anything else, we shouldn't be promoting immediates
> +  * until we can make the pass capable of combining constants of
> +  * different sizes.
> +  */
> + if (val.type == BRW_REGISTER_TYPE_F) {
> +inst->src[i] = val;
> +progress = true;
> + }
>   break;
>
>default:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 33/59] intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> The implementation of these opcodes in the generator assumes that their
> arguments are packed, and it generates register regions based on that
> assumption. While this expectation is reasonable for 32-bit,


Expectation, sure, but if someone does ddx(f2f32(d)) where d is a double,
it's broken.  Maybe we should back-port?  Either way

Reviewed-by: Jason Ekstrand 


> when we
> load 16-bit elements from UBOs we get them with a stride of 2 that we
> then need to pack with a stride of 1. Copy propagation can see through this
> and rewrite ddx/ddy operands to use the original, strided register,
> breaking
> the implementation in the generator.
> ---
>  .../compiler/brw_fs_copy_propagation.cpp  | 21 +++
>  1 file changed, 21 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp
> b/src/intel/compiler/brw_fs_copy_propagation.cpp
> index 58d5080b4e9..c01d4ec4a4f 100644
> --- a/src/intel/compiler/brw_fs_copy_propagation.cpp
> +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
> @@ -361,6 +361,20 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned
> stride,
> return true;
>  }
>
> +static bool
> +instruction_requires_packed_data(fs_inst *inst)
> +{
> +   switch (inst->opcode) {
> +   case FS_OPCODE_DDX_FINE:
> +   case FS_OPCODE_DDX_COARSE:
> +   case FS_OPCODE_DDY_FINE:
> +   case FS_OPCODE_DDY_COARSE:
> +  return true;
> +   default:
> +  return false;
> +   }
> +}
> +
>  bool
>  fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
>  {
> @@ -407,6 +421,13 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int
> arg, acp_entry *entry)
> inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
>return false;
>
> +   /* Some instructions implemented in the generator backend, such as
> +* derivatives, assume that their operands are packed so we can't
> +* generally propagate strided regions to them.
> +*/
> +   if (instruction_requires_packed_data(inst) && entry->src.stride > 1)
> +  return false;
> +
> /* Bail if the result of composing both strides would exceed the
>  * hardware limit.
>  */
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 46/59] intel/compiler: fix integer to/from half-float in atom platforms

2018-12-07 Thread Jason Ekstrand
We are starting to get a *lot* of special cases in the conversion code.
I'm not sure what the best thing to do is.  Maybe some master conversion
function that just does it all?  Maybe some NIR lowering?  In any case, I
think we can do better than the pile of special cases we are starting to
accumulate.

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> Section Register Region Restriction of the 3D Media GPGPU chapter states:
>
>"Conversion between Integer and HF (Half Float) must be DWord
> aligned and strided by a DWord on the destination."
>
> The same restriction shows up in all hardware platforms that support
> half-float, however, empirical testing suggests that only atom
> platforms are affected.
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 41 +--
>  1 file changed, 39 insertions(+), 2 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 3f98c6a4474..db3a8812ae3 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -917,6 +917,25 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
>   inst->saturate = instr->dest.saturate;
>   break;
>}
> +
> +  /* CHV PRM, 3D Media GPGPU Engine, Register Region Restrictions,
> +   * Special Restrictions:
> +   *
> +   *"Conversion between Integer and HF (Half Float) must be DWord
> +   * aligned and strided by a DWord on the destination."
> +   *
> +   * The same restriction is listed for other hardware platforms,
> however,
> +   * empirical testing suggests that only atom platforms are affected.
> +   */
> +  if ((devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) &&
> +  nir_dest_bit_size(instr->dest.dest) == 16) {
> + assert(result.type == BRW_REGISTER_TYPE_HF);
> + fs_reg tmp =
> +horiz_stride(retype(bld.vgrf(BRW_REGISTER_TYPE_F, 1),
> result.type), 2);
> + bld.MOV(tmp, op[0]);
> + op[0] = tmp;
> +  }
> +
>inst = bld.MOV(result, op[0]);
>inst->saturate = instr->dest.saturate;
>break;
> @@ -939,11 +958,29 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
>}
>/* Fallthrough */
>
> +   case nir_op_f2i16:
> +   case nir_op_f2u16:
> +  /* CHV PRM, 3D Media GPGPU Engine, Register Region Restrictions,
> +   * Special Restrictions:
> +   *
> +   *"Conversion between Integer and HF (Half Float) must be DWord
> +   * aligned and strided by a DWord on the destination."
> +   *
> +   * The same restriction is listed for other hardware platforms,
> however,
> +   * empirical testing suggests that only atom platforms are affected.
> +   */
> +  if ((devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) &&
> +  nir_src_bit_size(instr->src[0].src) == 16) {
> + fs_reg tmp =
> +horiz_stride(retype(bld.vgrf(BRW_REGISTER_TYPE_D, 1),
> result.type), 2);
> + bld.MOV(tmp, op[0]);
> + op[0] = tmp;
> +  }
> +  /* Fallthrough */
> +
> case nir_op_f2f32:
> case nir_op_f2i32:
> case nir_op_f2u32:
> -   case nir_op_f2i16:
> -   case nir_op_f2u16:
> case nir_op_i2i32:
> case nir_op_u2u32:
> case nir_op_i2i16:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 48/59] intel/compiler: implement isign for int8

2018-12-07 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:19 AM Iago Toral Quiroga  wrote:

> ---
>  src/intel/compiler/brw_fs_nir.cpp | 25 +
>  1 file changed, 21 insertions(+), 4 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index db3a8812ae3..7a4594a24ac 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -1063,11 +1063,28 @@ fs_visitor::nir_emit_alu(const fs_builder ,
> nir_alu_instr *instr)
> *  Predicated OR sets 1 if val is positive.
> */
>uint32_t bit_size = nir_dest_bit_size(instr->dest.dest);
> -  assert(bit_size == 32 || bit_size == 16);
>
> -  fs_reg zero = bit_size == 32 ? brw_imm_d(0) : brw_imm_w(0);
> -  fs_reg one = bit_size == 32 ? brw_imm_d(1) : brw_imm_w(1);
> -  fs_reg shift = bit_size == 32 ? brw_imm_d(31) : brw_imm_w(15);
> +  fs_reg zero, one, shift;
> +  switch (bit_size) {
> +  case 32:
> + zero = brw_imm_d(0);
> + one = brw_imm_d(1);
> + shift = brw_imm_d(31);
> + break;
> +  case 16:
> + zero = brw_imm_w(0);
> + one = brw_imm_w(1);
> + shift = brw_imm_w(15);
> + break;
> +  case 8: {
> + zero = setup_imm_b(bld, 0);
> + one = setup_imm_b(bld, 1);
> + shift = setup_imm_b(bld, 7);
> + break;
> +  }
> +  default:
> + unreachable("unsupported bit-size");
> +  };
>
>bld.CMP(bld.null_reg_d(), op[0], zero, BRW_CONDITIONAL_G);
>bld.ASR(result, op[0], shift);
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 53/59] intel/compiler: implement is_zero, is_one, is_negative_one for 8-bit/16-bit

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> There are no 8-bit immediates, so assert in that case.
> 16-bit immediates are replicated in each word of a 32-bit immediate, so
> we only need to check the lower 16-bits.
> ---
>  src/intel/compiler/brw_shader.cpp | 20 
>  1 file changed, 20 insertions(+)
>
> diff --git a/src/intel/compiler/brw_shader.cpp
> b/src/intel/compiler/brw_shader.cpp
> index b77bd798d17..adbb52f 100644
> --- a/src/intel/compiler/brw_shader.cpp
> +++ b/src/intel/compiler/brw_shader.cpp
> @@ -708,11 +708,18 @@ backend_reg::is_zero() const
> if (file != IMM)
>return false;
>
> +   assert(type_sz(type) > 1);
> +
>

We should probably also assert that things are properly replicated.


> switch (type) {
> +   case BRW_REGISTER_TYPE_HF:
> +  return (d & 0x) == 0;
>

Do we want to check for -0 as well?  I think that'd be 0x8000.


> case BRW_REGISTER_TYPE_F:
>return f == 0;
> case BRW_REGISTER_TYPE_DF:
>return df == 0;
> +   case BRW_REGISTER_TYPE_W:
> +   case BRW_REGISTER_TYPE_UW:
> +  return (d & 0x) == 0;
> case BRW_REGISTER_TYPE_D:
> case BRW_REGISTER_TYPE_UD:
>return d == 0;
> @@ -730,11 +737,18 @@ backend_reg::is_one() const
> if (file != IMM)
>return false;
>
> +   assert(type_sz(type) > 1);
>

Again, assert proper replication?


> +
> switch (type) {
> +   case BRW_REGISTER_TYPE_HF:
> +  return (d & 0x) == 0x3c00;
> case BRW_REGISTER_TYPE_F:
>return f == 1.0f;
> case BRW_REGISTER_TYPE_DF:
>return df == 1.0;
> +   case BRW_REGISTER_TYPE_W:
> +   case BRW_REGISTER_TYPE_UW:
> +  return (d & 0x) == 1;
> case BRW_REGISTER_TYPE_D:
> case BRW_REGISTER_TYPE_UD:
>return d == 1;
> @@ -752,11 +766,17 @@ backend_reg::is_negative_one() const
> if (file != IMM)
>return false;
>
> +   assert(type_sz(type) > 1);
> +
> switch (type) {
> +   case BRW_REGISTER_TYPE_HF:
> +  return (d & 0x) == 0xbc00;
> case BRW_REGISTER_TYPE_F:
>return f == -1.0;
> case BRW_REGISTER_TYPE_DF:
>return df == -1.0;
> +   case BRW_REGISTER_TYPE_W:
> +  return (d & 0x) == -1;
> case BRW_REGISTER_TYPE_D:
>return d == -1;
> case BRW_REGISTER_TYPE_Q:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 54/59] intel/compiler: add a brw_reg_type_is_integer helper

2018-12-07 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:19 AM Iago Toral Quiroga  wrote:

> ---
>  src/intel/compiler/brw_reg_type.h | 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/src/intel/compiler/brw_reg_type.h
> b/src/intel/compiler/brw_reg_type.h
> index ffbec90d3fe..a3365b7e34c 100644
> --- a/src/intel/compiler/brw_reg_type.h
> +++ b/src/intel/compiler/brw_reg_type.h
> @@ -82,6 +82,24 @@ brw_reg_type_is_floating_point(enum brw_reg_type type)
> }
>  }
>
> +static inline bool
> +brw_reg_type_is_integer(enum brw_reg_type type)
> +{
> +   switch (type) {
> +   case BRW_REGISTER_TYPE_Q:
> +   case BRW_REGISTER_TYPE_UQ:
> +   case BRW_REGISTER_TYPE_D:
> +   case BRW_REGISTER_TYPE_UD:
> +   case BRW_REGISTER_TYPE_W:
> +   case BRW_REGISTER_TYPE_UW:
> +   case BRW_REGISTER_TYPE_B:
> +   case BRW_REGISTER_TYPE_UV:
> +  return true;
> +   default:
> +  return false;
> +   }
> +}
> +
>  unsigned
>  brw_reg_type_to_hw_type(const struct gen_device_info *devinfo,
>  enum brw_reg_file file, enum brw_reg_type type);
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] pci_ids: add new vega10 pci ids

2018-12-07 Thread Alex Deucher
Signed-off-by: Alex Deucher 
Cc: mesa-sta...@lists.freedesktop.org
---
 include/pci_ids/radeonsi_pci_ids.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/pci_ids/radeonsi_pci_ids.h 
b/include/pci_ids/radeonsi_pci_ids.h
index 35ea3559b02..f7defc4197a 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -227,8 +227,14 @@ CHIPSET(0x6863, VEGA10)
 CHIPSET(0x6864, VEGA10)
 CHIPSET(0x6867, VEGA10)
 CHIPSET(0x6868, VEGA10)
-CHIPSET(0x687F, VEGA10)
+CHIPSET(0x6869, VEGA10)
+CHIPSET(0x686A, VEGA10)
+CHIPSET(0x686B, VEGA10)
 CHIPSET(0x686C, VEGA10)
+CHIPSET(0x686D, VEGA10)
+CHIPSET(0x686E, VEGA10)
+CHIPSET(0x686F, VEGA10)
+CHIPSET(0x687F, VEGA10)
 
 CHIPSET(0x69A0, VEGA12)
 CHIPSET(0x69A1, VEGA12)
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] pci_ids: add new vega20 pci id

2018-12-07 Thread Alex Deucher
Signed-off-by: Alex Deucher 
Cc: mesa-sta...@lists.freedesktop.org
---
 include/pci_ids/radeonsi_pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/pci_ids/radeonsi_pci_ids.h 
b/include/pci_ids/radeonsi_pci_ids.h
index f7defc4197a..a2bc9213207 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -246,6 +246,7 @@ CHIPSET(0x66A0, VEGA20)
 CHIPSET(0x66A1, VEGA20)
 CHIPSET(0x66A2, VEGA20)
 CHIPSET(0x66A3, VEGA20)
+CHIPSET(0x66A4, VEGA20)
 CHIPSET(0x66A7, VEGA20)
 CHIPSET(0x66AF, VEGA20)
 
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 31/59] intel/compiler: fix ddx and ddy for 16-bit float

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> We were assuming 32-bit elements.
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 34 +
>  1 file changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index 08dd83dded7..bffd9bc4787 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1259,7 +1259,7 @@ fs_generator::generate_ddx(const fs_inst *inst,
> struct brw_reg src0 = src;
> struct brw_reg src1 = src;
>
> -   src0.subnr   = sizeof(float);
> +   src0.subnr   = type_sz(src.type);
>

Should this be "+="?  I think this is broken if we're SIMD8 and in the
second half of a register.


> src0.vstride = vstride;
> src0.width   = width;
> src0.hstride = BRW_HORIZONTAL_STRIDE_0;
> @@ -1278,23 +1278,25 @@ void
>  fs_generator::generate_ddy(const fs_inst *inst,
> struct brw_reg dst, struct brw_reg src)
>  {
> +   const uint32_t type_size = type_sz(src.type);
> +
> if (inst->opcode == FS_OPCODE_DDY_FINE) {
>/* produce accurate derivatives */
>if (devinfo->gen >= 11) {
>   src = stride(src, 0, 2, 1);
> - struct brw_reg src_0  = byte_offset(src,  0 * sizeof(float));
> - struct brw_reg src_2  = byte_offset(src,  2 * sizeof(float));
> - struct brw_reg src_4  = byte_offset(src,  4 * sizeof(float));
> - struct brw_reg src_6  = byte_offset(src,  6 * sizeof(float));
> - struct brw_reg src_8  = byte_offset(src,  8 * sizeof(float));
> - struct brw_reg src_10 = byte_offset(src, 10 * sizeof(float));
> - struct brw_reg src_12 = byte_offset(src, 12 * sizeof(float));
> - struct brw_reg src_14 = byte_offset(src, 14 * sizeof(float));
> -
> - struct brw_reg dst_0  = byte_offset(dst,  0 * sizeof(float));
> - struct brw_reg dst_4  = byte_offset(dst,  4 * sizeof(float));
> - struct brw_reg dst_8  = byte_offset(dst,  8 * sizeof(float));
> - struct brw_reg dst_12 = byte_offset(dst, 12 * sizeof(float));
> + struct brw_reg src_0  = byte_offset(src,  0 * type_size);
> + struct brw_reg src_2  = byte_offset(src,  2 * type_size);
> + struct brw_reg src_4  = byte_offset(src,  4 * type_size);
> + struct brw_reg src_6  = byte_offset(src,  6 * type_size);
> + struct brw_reg src_8  = byte_offset(src,  8 * type_size);
> + struct brw_reg src_10 = byte_offset(src, 10 * type_size);
> + struct brw_reg src_12 = byte_offset(src, 12 * type_size);
> + struct brw_reg src_14 = byte_offset(src, 14 * type_size);
> +
> + struct brw_reg dst_0  = byte_offset(dst,  0 * type_size);
> + struct brw_reg dst_4  = byte_offset(dst,  4 * type_size);
> + struct brw_reg dst_8  = byte_offset(dst,  8 * type_size);
> + struct brw_reg dst_12 = byte_offset(dst, 12 * type_size);
>
>   brw_push_insn_state(p);
>   brw_set_default_exec_size(p, BRW_EXECUTE_4);
> @@ -1323,8 +1325,8 @@ fs_generator::generate_ddy(const fs_inst *inst,
>/* replicate the derivative at the top-left pixel to other pixels */
>struct brw_reg src0 = stride(src, 4, 4, 0);
>struct brw_reg src1 = stride(src, 4, 4, 0);
> -  src0.subnr = 0 * sizeof(float);
> -  src1.subnr = 2 * sizeof(float);
> +  src0.subnr = 0 * type_size;
> +  src1.subnr = 2 * type_size;
>

Again, +=?  Or, better yet, maybe byte_offset().


>
>brw_ADD(p, dst, negate(src0), src1);
> }
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 32/59] intel/compiler: fix 16-bit float ddx and ddy for SIMD8

2018-12-07 Thread Jason Ekstrand
And here we are I think I'd still like byte_offset better but, either
way patches 31 and 32 are

Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> In SIMD8 we pack 2 vector components in a single SIMD register, so
> for example, component Y of a 16-bit vec2 starts is at byte offset
> 16B. This means that when we compute the offset of the elements to
> be differentiated we should not stomp whatever base offset we have,
> but instead add to it.
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index bffd9bc4787..d8e4bae17e0 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1259,7 +1259,7 @@ fs_generator::generate_ddx(const fs_inst *inst,
> struct brw_reg src0 = src;
> struct brw_reg src1 = src;
>
> -   src0.subnr   = type_sz(src.type);
> +   src0.subnr  += type_sz(src.type);
> src0.vstride = vstride;
> src0.width   = width;
> src0.hstride = BRW_HORIZONTAL_STRIDE_0;
> @@ -1325,8 +1325,8 @@ fs_generator::generate_ddy(const fs_inst *inst,
>/* replicate the derivative at the top-left pixel to other pixels */
>struct brw_reg src0 = stride(src, 4, 4, 0);
>struct brw_reg src1 = stride(src, 4, 4, 0);
> -  src0.subnr = 0 * type_size;
> -  src1.subnr = 2 * type_size;
> +  src0.subnr += 0 * type_size;
> +  src1.subnr += 2 * type_size;
>
>brw_ADD(p, dst, negate(src0), src1);
> }
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 39/59] anv/device: expose support for shaderFloat16 in gen8+

2018-12-07 Thread Jason Ekstrand
Pending review on previous patches.  37 and 39 are

Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> ---
>  src/intel/vulkan/anv_device.c | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 6b5ba25c6bc..caf25ad8a03 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -966,6 +966,15 @@ void anv_GetPhysicalDeviceFeatures2(
>   break;
>}
>
> +  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
> + VkPhysicalDeviceFloat16Int8FeaturesKHR *features = (void *)ext;
> + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
> +
> + features->shaderFloat16 = pdevice->info.gen >= 8;
> + features->shaderInt8 = false;
> + break;
> +  }
> +
>default:
>   anv_debug_ignored_stype(ext->sType);
>   break;
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 40/59] anv/extensions: expose VK_KHR_shader_float16_int8 on gen8+

2018-12-07 Thread Jason Ekstrand
rb

On Tue, Dec 4, 2018 at 1:19 AM Iago Toral Quiroga  wrote:

> ---
>  src/intel/vulkan/anv_extensions.py | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/intel/vulkan/anv_extensions.py
> b/src/intel/vulkan/anv_extensions.py
> index 7c81228f705..9ca42d998ef 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -104,6 +104,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_sampler_mirror_clamp_to_edge',  1, True),
>  Extension('VK_KHR_sampler_ycbcr_conversion',  1, True),
>  Extension('VK_KHR_shader_draw_parameters',1, True),
> +Extension('VK_KHR_shader_float16_int8',   1,
> 'device->info.gen >= 8'),
>  Extension('VK_KHR_storage_buffer_storage_class',  1, True),
>  Extension('VK_KHR_surface',  25,
> 'ANV_HAS_SURFACE'),
>  Extension('VK_KHR_swapchain',68,
> 'ANV_HAS_SURFACE'),
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/11] i965 shader debug through KHR_debug

2018-12-07 Thread Mark Janes
Ilia Mirkin  writes:

> On Thu, Dec 6, 2018 at 7:36 PM Mark Janes  wrote:
>>
>> This series provides Intel shader compilation debug information via
>> KHR_debug.  Previously, shader assembly and related compilation
>> artifacts were dumped to stderr.  Tools associating compilation
>> artifacts with programs (e.g. FrameRetrace*) parsed stderr, which was
>> error prone.  Changes to the shader debug formats and the addition of
>> shader cache assembly dumps further complicate the task of parsing
>> stderr.
>>
>> KHR_debug provides synchronous callbacks with stable identifiers,
>> simplifying the task of matching debug artifacts with the originating
>> GLSL.
>
> One observation is that while the identifiers may be stable within a
> single execution, they will not be stable across different
> applications / traces. id's are set on a first-come first-serve basis,
> so depending on the exact order, the id's will end up different.
>
> Is that OK for frameretrace? Another approach would be to create
> globally-hardcoded id's for these, and start the auto-allocation in a
> higher range.

I did take a few steps down the path of globally hard-coded id's.  I
agree with Eric's assessment in debug_output.c that a giant enum list of
all debug message id's is unworkable.

We could divide id's into regions based on the high bits, and allocate
the lower bits to components to declare and manage.  This would break up
the monolithic declaration of id's, but you still have the issue of id
stability as the driver changes over time.  Refactoring functionality to
a location where it can be shared would involve changing the id's of
emitted debug messages.  Also, any taxonomy used to split up the id's
will probably look shortsighted in 5 years time.

I personally haven't seen a great solution for sharing a 32 debug id
within a large project.  The idea of exporting the id's to external
tools is even more problematic.

Rather than try to solve that problem, I decided to associate unknown
id's with the semantic meaning by parsing the first few words in
frameretrace.  A stable id means subsequent messages can be handled
without parsing.  Unknown messages are turned of via KHR_debug.  This is
a much better parsing stderr, where it is difficult to determine message
boundaries.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] docs: Document GitLab merge request process (email alternative)

2018-12-07 Thread Eric Engestrom
On Friday, 2018-12-07 10:19:23 +0100, Erik Faye-Lund wrote:
> On Wed, 2018-12-05 at 21:46 -0600, Jason Ekstrand wrote:
> > On Wed, Dec 5, 2018 at 7:05 PM Jordan Justen <
> > jordan.l.jus...@intel.com> wrote:
> > > On 2018-12-05 15:44:18, Jason Ekstrand wrote:
> > > > On Wed, Dec 5, 2018 at 5:32 PM Jordan Justen <
> > > jordan.l.jus...@intel.com>
> > > > wrote:
> > > > > -Mailing Patches
> > > > > +Submitting Patches
> > > > >
> > > > >  
> > > > > -Patches should be sent to the mesa-dev mailing list for
> > > review:
> > > > > +Patches may be submitted to the Mesa project by
> > > > > +email or with a
> > > > > +GitLab merge request. To prevent
> > > > > +duplicate code review, only use one method to submit your
> > > changes.
> > > > > +
> > > > >
> > > > 
> > > > Do we want to require a cover-letter to be sent to the ML? 
> > > Ideally, we'd
> > > > just have a bot that makes one every time someone submits a MR
> > > and sends it
> > > > to the list.  Maybe someone just needs to write that bot.
> > > > 
> > > > > +
> > > > > +Mailing Patches
> > > > > +
> > > > > +
> > > > > +Patches may be sent to the mesa-dev mailing list for review:
> > > > >  https://lists.freedesktop.org/mailman/listinfo/mesa-dev;>
> > > > >  mesa-dev@lists.freedesktop.org.
> > > > >  When submitting a patch make sure to use
> > > > > @@ -217,8 +228,63 @@ disabled before sending your patches.
> > > (Note that you
> > > > > may need to contact
> > > > >  your email administrator for this.)
> > > > >  
> > > > >
> > > > > +GitLab Merge Requests
> > > > > +
> > > > > +
> > > > > +  https://gitlab.freedesktop.org/mesa/mesa;>GitLab > > > Merge
> > > > > +  Requests (MR) can also be used to submit patches for Mesa.
> > > > > +
> > > > > +
> > > > > +
> > > > > +  If the MR may have interest for most of the Mesa community,
> > > you can
> > > > > +  send an email to the mesa-dev email list including a link to
> > > the MR.
> > > > > +  Don't send the patch to mesa-dev, just the MR link.
> > > 
> > > Regarding the cover-letter, I put in this weasel worded sentence.
> > > Should it instead say this is required and that it should be a git
> > > format-patch generated cover letter?
> > 
> > I didn't read far enough  No, I don't think we need to require
> > git-send-email formatted.
> >  
> > > Or, should we drop it entirely and assume we'll get an automated
> > > way
> > > to send an email to the list whenever a new MR is opened?
> > > 
> > > Relatedly, I think it might be possible to enable an irc channel to
> > > be
> > > notified about pushes and MR's. Not sure if it'd be a good idea, or
> > > maybe too noisy.
> > 
> > We should totally have an IRC bot.  We had one for wayland and weston
> > when I was working on those and it was great.  If it notifies us of
> > every change, it may be too much but if it dumps something in the
> > channel for every new MR, that shouldn't be bad at all.
> 
> Automated emails (and perhaps IRC bot) would be really nice.

Agreed. Email would be great to help with the transition.
There's work currently being done on GitLab to allow for mailing lists
to be notified; this should cover 'new MR' as well.
If we need this feature before GitLab is done, it should be possible to
write a bot using the webhooks, just needs someone to take the time to
do it :)

For IRC, there's already some integration, but it's limited to notifying
about git pushes for now:
https://docs.gitlab.com/ee/user/project/integrations/irker.html

There's an open issue about adding more events, but it hasn't seen much
activity:
https://gitlab.com/gitlab-org/gitlab-ce/issues/7965

> Even better if it could be hooked up to scripts/get_reviewer.pl, and
> automatically CC "the right people".

Side note, I've been rewriting that script, although I need to send v2
out at some point:
https://patchwork.freedesktop.org/patch/226256/

I would be trivial to hook that into a bot we'd write, but I don't think
GitLab has support for something like this. I just opened an issue about
adding support directly in GitLab:
https://gitlab.com/gitlab-org/gitlab-ce/issues/55035

> Perhaps a we can also somehow map emails to irc nicks?

We maintain such a list on the wiki already:
https://dri.freedesktop.org/wiki/WhosWho/

We could add this mapping to a file, but how would you use it? Are you
suggesting a bot would directly query people on irc for each MR matching
their REVIEWERS group?

I think direct email + mailing list + irc channel would be enough there,
direct irc query feels too spammy.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 49/59] intel/eu: force stride of 2 on NULL register for Byte instructions

2018-12-07 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> The hardware only allows a stride of 1 on a Byte destination for raw
> byte MOV instructions. This is required even when the destination
> is the NULL register.
>
> Rather than making sure that we emit a proper NULL:B destination
> every time we need one, just fix it at emission time.
> ---
>  src/intel/compiler/brw_eu_emit.c | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu_emit.c
> b/src/intel/compiler/brw_eu_emit.c
> index 66edfb43baf..eef36705c7b 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -94,6 +94,17 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst,
> struct brw_reg dest)
> else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
>assert(dest.nr < 128);
>
> +   /* The hardware has a restriction where if the destination is Byte,
> +* the instruction needs to have a stride of 2 (except for packed byte
> +* MOV). This seems to be required even if the destination is the NULL
> +* register.
> +*/
> +   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
> +   dest.nr == BRW_ARF_NULL &&
> +   type_sz(dest.type) == 1) {
> +  dest.hstride = BRW_HORIZONTAL_STRIDE_2;
> +   }
> +
> gen7_convert_mrf_to_grf(p, );
>
> brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 34/59] intel/compiler: fix ddy for half-float in gen8

2018-12-07 Thread Jason Ekstrand
On Tue, Dec 4, 2018 at 1:19 AM Iago Toral Quiroga  wrote:

> We use ALign16 mode for this, since it is more convenient, but the PRM
> for Broadwell states in Volume 3D Media GPGPU, Chapter 'Register region
> restrictions', Section '1. Special Restrictions':
>
>"In Align16 mode, the channel selects and channel enables apply to a
> pair of half-floats, because these parameters are defined for DWord
> elements ONLY. This is applicable when both source and destination
> are half-floats."
>
> This means that we cannot select individual HF elements using swizzles
> like we do with 32-bit floats so we can't implement the required
> regioning for this.
>
> Use the gen11 path for this instead, which uses Align1 mode.
>
> The restriction is not present in gen9 of gen10, where the Align16
>

"or gen10"?

Reviewed-by: Jason Ekstrand 


> implementation seems to work just fine.
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index d8e4bae17e0..ba7ed07e692 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1281,8 +1281,14 @@ fs_generator::generate_ddy(const fs_inst *inst,
> const uint32_t type_size = type_sz(src.type);
>
> if (inst->opcode == FS_OPCODE_DDY_FINE) {
> -  /* produce accurate derivatives */
> -  if (devinfo->gen >= 11) {
> +  /* produce accurate derivatives. We can do this easily in Align16
> +   * but this is not supported in gen11+ and gen8 Align16 swizzles
> +   * for Half-Float operands work in units of 32-bit and always
> +   * select pairs of consecutive half-float elements, so we can't use
> +   * use it for this.
> +   */
> +  if (devinfo->gen >= 11 ||
> +  (devinfo->gen == 8 && src.type == BRW_REGISTER_TYPE_HF)) {
>   src = stride(src, 0, 2, 1);
>   struct brw_reg src_0  = byte_offset(src,  0 * type_size);
>   struct brw_reg src_2  = byte_offset(src,  2 * type_size);
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 52/59] anv/device: expose shaderInt8 feature

2018-12-07 Thread Jason Ekstrand
51 and 52 should probably be rolled together.  For that matter, I don't
think we need 6 patches just to add two SPIR-V capabilities and advertise
one extension.  Maybe roll the two SPIR-V patches together, add one or two
for the extension and the enables?

On Tue, Dec 4, 2018 at 1:18 AM Iago Toral Quiroga  wrote:

> ---
>  src/intel/vulkan/anv_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index caf25ad8a03..17b73c115cd 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -971,7 +971,7 @@ void anv_GetPhysicalDeviceFeatures2(
>   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
>
>   features->shaderFloat16 = pdevice->info.gen >= 8;
> - features->shaderInt8 = false;
> + features->shaderInt8 = pdevice->info.gen >= 8;
>   break;
>}
>
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] intel/blorp: Make KSP a blorp_address instead of an offset.

2018-12-07 Thread Jason Ekstrand
I somewhat recant my statements below.  Even in a driver that puts the full
address in the offset field, having the BO pointer may still be useful for
the purpose of adding it to a residency list somewhere.

On Fri, Dec 7, 2018 at 4:13 PM Jason Ekstrand  wrote:

> I kind-of wonder if we want to allow for relocations and not surface state
> base address in which case you'd want to do something "real" with offset.
> Making this an address and then ignoring the buffer part entirely seems
> like it promises a bit too much.  Then again, I don't think it's really
> hurting anything.  Meh; this is probably fine.
>
> On Thu, Nov 29, 2018 at 2:24 AM Kenneth Graunke 
> wrote:
>
>> In i965, shader programs live in a single buffer, and every batch emits
>> a STATE_BASE_ADDRESS packet pointing to that buffer.  This takes care of
>> pinning the buffer for the batch; from then on, we can use an offset.
>>
>> In the upcoming Iris driver, shader programs can live in multiple
>> buffers, and those buffers need to be pinned when shader assembly is
>> referenced.  To aid in this, we turn KSP into a blorp_address rather
>> than a 32-bit offset.  This lets us also pass a buffer to pin.
>>
>> For now, we simply assert the BO is NULL and return the offset, as
>> we did before - nothing should behave differently.
>> ---
>>  src/intel/blorp/blorp.h |  7 --
>>  src/intel/blorp/blorp_genX_exec.h   | 27 +
>>  src/intel/blorp/blorp_priv.h|  6 ++---
>>  src/intel/vulkan/anv_blorp.c| 10 +---
>>  src/mesa/drivers/dri/i965/brw_blorp.c   | 18 ++
>>  src/mesa/drivers/dri/i965/gen4_blorp_exec.h | 12 -
>>  6 files changed, 50 insertions(+), 30 deletions(-)
>>
>> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
>> index 1e22712602d..da0c9ac205c 100644
>> --- a/src/intel/blorp/blorp.h
>> +++ b/src/intel/blorp/blorp.h
>> @@ -35,6 +35,7 @@ struct brw_stage_prog_data;
>>  extern "C" {
>>  #endif
>>
>> +struct blorp_address;
>>  struct blorp_batch;
>>  struct blorp_params;
>>
>> @@ -47,13 +48,15 @@ struct blorp_context {
>>
>> bool (*lookup_shader)(struct blorp_context *blorp,
>>   const void *key, uint32_t key_size,
>> - uint32_t *kernel_out, void *prog_data_out);
>> + struct blorp_address *kernel_out,
>> + void *prog_data_out);
>> bool (*upload_shader)(struct blorp_context *blorp,
>>   const void *key, uint32_t key_size,
>>   const void *kernel, uint32_t kernel_size,
>>   const struct brw_stage_prog_data *prog_data,
>>   uint32_t prog_data_size,
>> - uint32_t *kernel_out, void *prog_data_out);
>> + struct blorp_address *kernel_out,
>> + void *prog_data_out);
>> void (*exec)(struct blorp_batch *batch, const struct blorp_params
>> *params);
>>  };
>>
>> diff --git a/src/intel/blorp/blorp_genX_exec.h
>> b/src/intel/blorp/blorp_genX_exec.h
>> index 065980616ec..20f30c7116d 100644
>> --- a/src/intel/blorp/blorp_genX_exec.h
>> +++ b/src/intel/blorp/blorp_genX_exec.h
>> @@ -108,6 +108,13 @@ _blorp_combine_address(struct blorp_batch *batch,
>> void *location,
>> }
>>  }
>>
>> +static uint64_t
>> +KSP(struct blorp_batch *batch, struct blorp_address address)
>> +{
>> +   assert(address.buffer == NULL);
>> +   return address.offset;
>> +}
>> +
>>  #define __gen_address_type struct blorp_address
>>  #define __gen_user_data struct blorp_batch
>>  #define __gen_combine_address _blorp_combine_address
>> @@ -615,7 +622,7 @@ blorp_emit_vs_config(struct blorp_batch *batch,
>>if (vs_prog_data) {
>>   vs.Enable = true;
>>
>> - vs.KernelStartPointer = params->vs_prog_kernel;
>> + vs.KernelStartPointer = KSP(batch, params->vs_prog_kernel);
>>
>>   vs.DispatchGRFStartRegisterForURBData =
>>  vs_prog_data->base.base.dispatch_grf_start_reg;
>> @@ -795,11 +802,11 @@ blorp_emit_ps_config(struct blorp_batch *batch,
>>   ps.DispatchGRFStartRegisterForConstantSetupData2 =
>>  brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
>>
>> - ps.KernelStartPointer0 = params->wm_prog_kernel +
>> + ps.KernelStartPointer0 = KSP(batch, params->wm_prog_kernel) +
>>
>>  brw_wm_prog_data_prog_offset(prog_data, ps, 0);
>> - ps.KernelStartPointer1 = params->wm_prog_kernel +
>> + ps.KernelStartPointer1 = KSP(batch, params->wm_prog_kernel) +
>>
>>  brw_wm_prog_data_prog_offset(prog_data, ps, 1);
>> - ps.KernelStartPointer2 = params->wm_prog_kernel +
>> + ps.KernelStartPointer2 = KSP(batch, params->wm_prog_kernel) +
>>
>>  brw_wm_prog_data_prog_offset(prog_data, ps, 2);
>>}
>>
>> @@ -905,11 +912,11 @@ blorp_emit_ps_config(struct blorp_batch *batch,
>>   

[Mesa-dev] [RFC PATCH 06/14] anv/allocator: Add getters for anv_block_pool.

2018-12-07 Thread Rafael Antognolli
We will need specially the anv_block_pool_map, to find the
map relative to some BO that is not at the start of the block pool.
---
 src/intel/vulkan/anv_allocator.c   | 23 ---
 src/intel/vulkan/anv_batch_chain.c |  5 +++--
 src/intel/vulkan/anv_private.h |  7 +++
 src/intel/vulkan/genX_blorp_exec.c |  5 +++--
 4 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index cda6a1a9d25..acf3c80fbac 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -601,6 +601,15 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
return VK_SUCCESS;
 }
 
+struct anv_pool_map
+anv_block_pool_map(struct anv_block_pool *pool, int32_t offset)
+{
+   return (struct anv_pool_map) {
+  .map = pool->map,
+  .offset = offset,
+   };
+}
+
 /** Grows and re-centers the block pool.
  *
  * We grow the block pool in one or both directions in such a way that the
@@ -967,7 +976,9 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
st_idx + i);
state_i->alloc_size = pool->block_size;
state_i->offset = chunk_offset + pool->block_size * (i + 1);
-   state_i->map = pool->block_pool.map + state_i->offset;
+   struct anv_pool_map pool_map = 
anv_block_pool_map(>block_pool,
+ 
state_i->offset);
+   state_i->map = pool_map.map + pool_map.offset;
 }
 anv_state_table_push(>buckets[block_bucket].free_list,
  >table, st_idx, push_back);
@@ -983,7 +994,9 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
 st_idx + i);
 state_i->alloc_size = alloc_size;
 state_i->offset = chunk_offset + alloc_size * (i + 1);
-state_i->map = pool->block_pool.map + state_i->offset;
+struct anv_pool_map pool_map = 
anv_block_pool_map(>block_pool,
+  state_i->offset);
+state_i->map = pool_map.map + pool_map.offset;
  }
  anv_state_table_push(>buckets[bucket].free_list,
   >table, st_idx, push_back);
@@ -1002,7 +1015,11 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
state = anv_state_table_get(>table, idx);
state->offset = offset;
state->alloc_size = alloc_size;
-   state->map = pool->block_pool.map + offset;
+
+   struct anv_pool_map pool_map = anv_block_pool_map(>block_pool,
+ state->offset);
+   state->map = pool_map.map + pool_map.offset;
+
 
 done:
return *state;
diff --git a/src/intel/vulkan/anv_batch_chain.c 
b/src/intel/vulkan/anv_batch_chain.c
index a9f8c5b79b1..6c06858efe1 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -679,8 +679,9 @@ anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer 
*cmd_buffer,
   return (struct anv_state) { 0 };
 
state.offset = cmd_buffer->bt_next;
-   state.map = anv_binding_table_pool(device)->block_pool.map +
-  bt_block->offset + state.offset;
+   struct anv_pool_map pool_map =
+  anv_block_pool_map(_binding_table_pool(device)->block_pool, 
bt_block->offset + state.offset);
+   state.map = pool_map.map + pool_map.offset;
 
cmd_buffer->bt_next += state.alloc_size;
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 539523450ef..a364be8dad5 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -749,6 +749,11 @@ struct anv_state_stream {
struct anv_state_stream_block *block_list;
 };
 
+struct anv_pool_map {
+   void *map;
+   int32_t offset;
+};
+
 /* The block_pool functions exported for testing only.  The block pool should
  * only be used via a state pool (see below).
  */
@@ -762,6 +767,8 @@ int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
  uint32_t block_size);
 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
   uint32_t block_size);
+struct anv_pool_map anv_block_pool_map(struct anv_block_pool *pool,
+   int32_t offset);
 
 VkResult anv_state_pool_init(struct anv_state_pool *pool,
  struct anv_device *device,
diff --git a/src/intel/vulkan/genX_blorp_exec.c 
b/src/intel/vulkan/genX_blorp_exec.c
index c573e890946..5af6abb0894 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -63,8 +63,9 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t 
ss_offset,
if (result != VK_SUCCESS)
   anv_batch_set_error(_buffer->batch, result);
 
-   void *dest = 

[Mesa-dev] [RFC PATCH 13/14] anv/allocator: Add padding information.

2018-12-07 Thread Rafael Antognolli
It's possible that we still have some space left in the block pool, but
we try to allocate a state larger than that state. This means such state
would start somewhere within the range of the old block_pool, and end
after that range, within the range of the new size.

That's fine when we use userptr, since the memory in the block pool is
CPU mapped continuously. However, by the end of this series, we will
have the block_pool split into different BOs, with different CPU
mapping ranges that are not necessarily continuous. So we must avoid
such case of a given state being part of two different BOs in the block
pool.

This commit solves the issue by detecting that we are growing the
block_pool even though we are not at the end of the range. If that
happens, we don't use the space left at the end of the old size, and
consider it as "padding" that can't be used in the allocation. We update
the size requested from the block pool to take the padding into account,
and return the offset after the padding, which happens to be at the
start of the new address range.

Additionally, we return the amount of padding we used, so the caller
knows that this happens and can return that padding back into a list of
free states, that can be reused later. This way we hopefully don't waste
any space, but also avoid having a state split between two different
BOs.
---
 src/intel/vulkan/anv_allocator.c| 57 ++---
 src/intel/vulkan/anv_private.h  |  2 +-
 src/intel/vulkan/tests/block_pool_no_free.c |  2 +-
 3 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index bddeb4a0fbd..0d426edfb57 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -839,16 +839,35 @@ done:
 static uint32_t
 anv_block_pool_alloc_new(struct anv_block_pool *pool,
  struct anv_block_state *pool_state,
- uint32_t block_size)
+ uint32_t block_size, uint32_t *padding)
 {
struct anv_block_state state, old, new;
 
+   /* Most allocations won't generate any padding */
+   if (padding)
+  *padding = 0;
+
while (1) {
   state.u64 = __sync_fetch_and_add(_state->u64, block_size);
   if (state.next + block_size <= state.end) {
  assert(pool->map);
  return state.next;
   } else if (state.next <= state.end) {
+ if (pool->bo_flags & EXEC_OBJECT_PINNED && state.next < state.end) {
+/* We need to grow the block pool, but still have some leftover
+ * space that can't be used by that particular allocation. So we
+ * add that as a "padding", and return it.
+ */
+uint32_t leftover = state.end - state.next;
+block_size += leftover;
+
+/* If there is some leftover space in the pool, the caller must
+ * deal with it.
+ */
+assert(leftover == 0 || padding);
+*padding = leftover;
+ }
+
  /* We allocated the first block outside the pool so we have to grow
   * the pool.  pool_state->next acts a mutex: threads who try to
   * allocate now will get block indexes above the current limit and
@@ -872,9 +891,16 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
 
 int32_t
 anv_block_pool_alloc(struct anv_block_pool *pool,
- uint32_t block_size)
+ uint32_t block_size, uint32_t *padding)
 {
-   return anv_block_pool_alloc_new(pool, >state, block_size);
+   uint32_t offset;
+
+   offset = anv_block_pool_alloc_new(pool, >state, block_size, padding);
+
+   if (padding && *padding > 0)
+  offset += *padding;
+
+   return offset;
 }
 
 /* Allocates a block out of the back of the block pool.
@@ -891,7 +917,7 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool,
   uint32_t block_size)
 {
int32_t offset = anv_block_pool_alloc_new(pool, >back_state,
- block_size);
+ block_size, NULL);
 
/* The offset we get out of anv_block_pool_alloc_new() is actually the
 * number of bytes downwards from the middle to the end of the block.
@@ -947,16 +973,24 @@ static uint32_t
 anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
 struct anv_block_pool *block_pool,
 uint32_t state_size,
-uint32_t block_size)
+uint32_t block_size,
+uint32_t *padding)
 {
struct anv_block_state block, old, new;
uint32_t offset;
 
+   /* We don't always use anv_block_pool_alloc(), which would set *padding to
+* zero for us. So if we have a pointer to padding, we must zero it out
+* ourselves here, to make sure we always 

[Mesa-dev] [RFC PATCH 11/14] anv: Remove some asserts.

2018-12-07 Thread Rafael Antognolli
They won't be true anymore once we add support for multiple BOs with
non-userptr.
---
 src/intel/vulkan/genX_gpu_memcpy.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/intel/vulkan/genX_gpu_memcpy.c 
b/src/intel/vulkan/genX_gpu_memcpy.c
index 1bee1c6dc17..e20179fa675 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -133,9 +133,6 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer 
*cmd_buffer,
if (size == 0)
   return;
 
-   assert(dst.offset + size <= dst.bo->size);
-   assert(src.offset + size <= src.bo->size);
-
/* The maximum copy block size is 4 32-bit components at a time. */
assert(size % 4 == 0);
unsigned bs = gcd_pow2_u64(16, size);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 05/14] anv/allocator: Remove usage of anv_free_list.

2018-12-07 Thread Rafael Antognolli
Maybe we should already rename anv_free_list2 -> anv_free_list since the
old one is gone.
---
 src/intel/vulkan/anv_allocator.c | 55 
 src/intel/vulkan/anv_private.h   | 11 ---
 2 files changed, 66 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 2171a97970b..cda6a1a9d25 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -373,61 +373,6 @@ anv_state_table_pop(union anv_free_list2 *list,
return NULL;
 }
 
-static bool
-anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
-{
-   union anv_free_list current, new, old;
-
-   current.u64 = list->u64;
-   while (current.offset != EMPTY) {
-  /* We have to add a memory barrier here so that the list head (and
-   * offset) gets read before we read the map pointer.  This way we
-   * know that the map pointer is valid for the given offset at the
-   * point where we read it.
-   */
-  __sync_synchronize();
-
-  int32_t *next_ptr = *map + current.offset;
-  new.offset = VG_NOACCESS_READ(next_ptr);
-  new.count = current.count + 1;
-  old.u64 = __sync_val_compare_and_swap(>u64, current.u64, new.u64);
-  if (old.u64 == current.u64) {
- *offset = current.offset;
- return true;
-  }
-  current = old;
-   }
-
-   return false;
-}
-
-static void
-anv_free_list_push(union anv_free_list *list, void *map, int32_t offset,
-   uint32_t size, uint32_t count)
-{
-   union anv_free_list current, old, new;
-   int32_t *next_ptr = map + offset;
-
-   /* If we're returning more than one chunk, we need to build a chain to add
-* to the list.  Fortunately, we can do this without any atomics since we
-* own everything in the chain right now.  `offset` is left pointing to the
-* head of our chain list while `next_ptr` points to the tail.
-*/
-   for (uint32_t i = 1; i < count; i++) {
-  VG_NOACCESS_WRITE(next_ptr, offset + i * size);
-  next_ptr = map + offset + i * size;
-   }
-
-   old = *list;
-   do {
-  current = old;
-  VG_NOACCESS_WRITE(next_ptr, current.offset);
-  new.offset = offset;
-  new.count = current.count + 1;
-  old.u64 = __sync_val_compare_and_swap(>u64, current.u64, new.u64);
-   } while (old.u64 != current.u64);
-}
-
 /* All pointers in the ptr_free_list are assumed to be page-aligned.  This
  * means that the bottom 12 bits should all be zero.
  */
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index d068a4be5d8..539523450ef 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -605,16 +605,6 @@ anv_bo_init(struct anv_bo *bo, uint32_t gem_handle, 
uint64_t size)
  * both the block pool and the state pools.  Unfortunately, in order to
  * solve the ABA problem, we can't use a single uint32_t head.
  */
-union anv_free_list {
-   struct {
-  int32_t offset;
-
-  /* A simple count that is incremented every time the head changes. */
-  uint32_t count;
-   };
-   uint64_t u64;
-};
-
 union anv_free_list2 {
struct {
   uint32_t offset;
@@ -625,7 +615,6 @@ union anv_free_list2 {
uint64_t u64;
 };
 
-#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
 #define ANV_FREE_LIST2_EMPTY ((union anv_free_list2) { { UINT32_MAX, 0 } })
 
 struct anv_block_state {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >