[Mesa-dev] [PATCH] nv50: implement multisample textures

2013-10-21 Thread Bryan Cain
This is a port of 4da54c91d24da ("nvc0: implement multisample textures") to
nv50.

When coupled with the patch to only report 16 texture samplers (to fix
crashes), all of the Piglit tests in spec/arb_texture_multisample pass.
---
 .../nouveau/codegen/nv50_ir_lowering_nv50.cpp  |5 ++-
 src/gallium/drivers/nouveau/nv50/nv50_context.c|   46 
 src/gallium/drivers/nouveau/nv50/nv50_miptree.c|2 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |3 +-
 src/gallium/drivers/nouveau/nv50/nv50_tex.c|   20 +++--
 5 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index caaf09f..d5d1f1e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -569,6 +569,7 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
const int arg = i->tex.target.getArgCount();
const int dref = arg;
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
+   const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
 
// dref comes before bias/lod
if (i->tex.target.isShadow())
@@ -577,11 +578,11 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
 
// array index must be converted to u32
if (i->tex.target.isArray()) {
-  Value *layer = i->getSrc(arg - 1);
+  Value *layer = i->getSrc(lyr);
   LValue *src = new_LValue(func, FILE_GPR);
   bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer);
   bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511));
-  i->setSrc(arg - 1, src);
+  i->setSrc(lyr, src);
 
   if (i->tex.target.isCube()) {
  std::vector acube, a2d;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c 
b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index b6bdf79..45e3f5d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -194,6 +194,10 @@ nv50_invalidate_resource_storage(struct nouveau_context 
*ctx,
return ref;
 }
 
+static void
+nv50_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
+ float *);
+
 struct pipe_context *
 nv50_create(struct pipe_screen *pscreen, void *priv)
 {
@@ -237,6 +241,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
 
pipe->flush = nv50_flush;
pipe->texture_barrier = nv50_texture_barrier;
+   pipe->get_sample_position = nv50_context_get_sample_position;
 
if (!screen->cur_ctx) {
   screen->cur_ctx = nv50;
@@ -315,3 +320,44 @@ nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean 
on_flush)
  nv50_resource_validate(res, (unsigned)ref->priv_data);
}
 }
+
+static void
+nv50_context_get_sample_position(struct pipe_context *pipe,
+ unsigned sample_count, unsigned sample_index,
+ float *xy)
+{
+   static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
+   static const uint8_t ms2[2][2] = {
+  { 0x4, 0x4 }, { 0xc, 0xc } }; /* surface coords (0,0), (1,0) */
+   static const uint8_t ms4[4][2] = {
+  { 0x6, 0x2 }, { 0xe, 0x6 },   /* (0,0), (1,0) */
+  { 0x2, 0xa }, { 0xa, 0xe } }; /* (0,1), (1,1) */
+   static const uint8_t ms8[8][2] = {
+  { 0x1, 0x7 }, { 0x5, 0x3 },   /* (0,0), (1,0) */
+  { 0x3, 0xd }, { 0x7, 0xb },   /* (0,1), (1,1) */
+  { 0x9, 0x5 }, { 0xf, 0x1 },   /* (2,0), (3,0) */
+  { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */
+#if 0
+   /* NOTE: NVA3+ has alternative modes for MS2 and MS8, currently not used */
+   static const uint8_t ms8_alt[8][2] = {
+  { 0x9, 0x5 }, { 0x7, 0xb },   /* (2,0), (1,1) */
+  { 0xd, 0x9 }, { 0x5, 0x3 },   /* (3,1), (1,0) */
+  { 0x3, 0xd }, { 0x1, 0x7 },   /* (0,1), (0,0) */
+  { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */
+#endif
+
+   const uint8_t (*ptr)[2];
+
+   switch (sample_count) {
+   case 0:
+   case 1: ptr = ms1; break;
+   case 2: ptr = ms2; break;
+   case 4: ptr = ms4; break;
+   case 8: ptr = ms8; break;
+   default:
+  assert(0);
+  return; /* bad sample count -> undefined locations */
+   }
+   xy[0] = ptr[sample_index][0] * 0.0625f;
+   xy[1] = ptr[sample_index][1] * 0.0625f;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c 
b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index 513d8f9..1963a4a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -277,6 +277,8 @@ nv50_miptree_init_layout_tiled(struct nv50_miptree *mt)
 */
d = mt->layout_3d ? pt->depth0 : 1;
 
+   assert(!mt->ms_mode || !pt->last_level);
+
for (l = 0; l <= pt->last_level; ++l) {
   struct nv50_miptree_level *lvl = &mt->level[l];
   unsigned tsx, tsy, tsz;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_s

Re: [Mesa-dev] [PATCH] nv50: implement multisample textures

2013-10-25 Thread Bryan Cain
On 10/25/2013 01:35 PM, Emil Velikov wrote:
> On 21/10/13 23:23, Bryan Cain wrote:
>> This is a port of 4da54c91d24da ("nvc0: implement multisample textures") to
>> nv50.
>>
>> When coupled with the patch to only report 16 texture samplers (to fix
>> crashes), all of the Piglit tests in spec/arb_texture_multisample pass.
>>
> Hello Bryan,
>
> Big thanks for your work. As promised here is a quick piglit summary on
> my nv96
>
> pass/fail/crash
> 69/32/27
>
> * dmesg does not spit anything nouveau related during the tests
> * any geometry shader related tests were skipped
> (piglit: info: Failed to create GL 3.2 core context)
> * all the crashes are due to the following assert
> codegen/nv50_ir_emit_nv50.cpp:1393:emitTEX: Assertion `argc <= 4' failed.
>
> PASSarb_texture_multisample-*
> PASSfb-completeness/*
> FAILsample-position/*
> FAILtexelFetch fs sampler2DMS 4*
> CRASH   texelFetch fs sampler2DMSArray 4*
> FAILtexelFetch/*-*s-isampler2DMS
> CRASH   texelFetch/*-*s-isampler2DMSArray
> PASStextureSize/*
>
>
> Hope you find this useful :)
> No real world apps that use multisample textures were tested, yet.
>
> Cheers
> Emil

Hi Emil,

Thanks for testing on nv96.  It seems, though, that I messed up my
piglit-run command and didn't include all of the relevant tests as a
result.  Now that I've fixed that, I'm seeing the same failures and
crashes on my nva5.

It seems that multisampling is broken with texelFetch (both the
texelFetch and sample-position tests use it) but works otherwise, unless
it turns out not to produce the right results in real world applications
for pre-nva3 cards.

I'm going to take some time this weekend to see what's going on with
multisampling and texelFetch.

Thanks again,
Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50: implement multisample textures

2013-10-25 Thread Bryan Cain
On 10/25/2013 04:11 PM, Christoph Bumiller wrote:
> On 25.10.2013 20:35, Emil Velikov wrote:
>> On 21/10/13 23:23, Bryan Cain wrote:
>>> This is a port of 4da54c91d24da ("nvc0: implement multisample textures") to
>>> nv50.
>>>
>>> When coupled with the patch to only report 16 texture samplers (to fix
>>> crashes), all of the Piglit tests in spec/arb_texture_multisample pass.
>>>
>> Hello Bryan,
>>
>> Big thanks for your work. As promised here is a quick piglit summary on
>> my nv96
>>
>> pass/fail/crash
>> 69/32/27
>>
>> * dmesg does not spit anything nouveau related during the tests
>> * any geometry shader related tests were skipped
>> (piglit: info: Failed to create GL 3.2 core context)
>> * all the crashes are due to the following assert
>> codegen/nv50_ir_emit_nv50.cpp:1393:emitTEX: Assertion `argc <= 4' failed.
> I'm not sure how you'd get > 4 arguments there (x y layer sample ?).
> There's no mip maps for multisample textures.
>
> But either way you're probably going to have to do things by hand:
> E.g. MS8 textures contain contiguous 4x2 rectangles of samples for each
> pixel, so you multiply x by 4 and y by 2 to arrive at the sub-rectangle
> and then add the correct offsets for the sample id as seen in
> get_sample_position (store the info in a constant buffer, that has to be
> updated when texture changes).
>
> You might want to use a lookup table like in nve4 compute (look for "MS
> sample coordinate offsets") to map sample id to coordinate offset, that
> one works for any sample count as long as you don't use the ALT modes
> (nve4 doesn't need to for textures, but for images/surfaces/UAVs/RATs
> where the whole VM address calculation is done by hand).

You're probably right.  I don't know why MSAA appears to work for me,
but there's probably something wrong with the output that I haven't
noticed.  I'll work on implementing it properly this weekend.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50: implement multisample textures

2013-10-25 Thread Bryan Cain
On 10/25/2013 05:05 PM, Christoph Bumiller wrote:
> On 25.10.2013 23:51, Bryan Cain wrote:
>> On 10/25/2013 04:11 PM, Christoph Bumiller wrote:
>>> On 25.10.2013 20:35, Emil Velikov wrote:
>>>> On 21/10/13 23:23, Bryan Cain wrote:
>>>>> This is a port of 4da54c91d24da ("nvc0: implement multisample textures") 
>>>>> to
>>>>> nv50.
>>>>>
>>>>> When coupled with the patch to only report 16 texture samplers (to fix
>>>>> crashes), all of the Piglit tests in spec/arb_texture_multisample pass.
>>>>>
>>>> Hello Bryan,
>>>>
>>>> Big thanks for your work. As promised here is a quick piglit summary on
>>>> my nv96
>>>>
>>>> pass/fail/crash
>>>> 69/32/27
>>>>
>>>> * dmesg does not spit anything nouveau related during the tests
>>>> * any geometry shader related tests were skipped
>>>> (piglit: info: Failed to create GL 3.2 core context)
>>>> * all the crashes are due to the following assert
>>>> codegen/nv50_ir_emit_nv50.cpp:1393:emitTEX: Assertion `argc <= 4' failed.
>>> I'm not sure how you'd get > 4 arguments there (x y layer sample ?).
>>> There's no mip maps for multisample textures.
>>>
>>> But either way you're probably going to have to do things by hand:
>>> E.g. MS8 textures contain contiguous 4x2 rectangles of samples for each
>>> pixel, so you multiply x by 4 and y by 2 to arrive at the sub-rectangle
>>> and then add the correct offsets for the sample id as seen in
>>> get_sample_position (store the info in a constant buffer, that has to be
>>> updated when texture changes).
>>>
>>> You might want to use a lookup table like in nve4 compute (look for "MS
>>> sample coordinate offsets") to map sample id to coordinate offset, that
>>> one works for any sample count as long as you don't use the ALT modes
>>> (nve4 doesn't need to for textures, but for images/surfaces/UAVs/RATs
>>> where the whole VM address calculation is done by hand).
>> You're probably right.  I don't know why MSAA appears to work for me,
>> but there's probably something wrong with the output that I haven't
>> noticed.  I'll work on implementing it properly this weekend.
> MSAA itself (rendering and resolving) has been working before, the only
> thing that ARB_texture_multisample adds is texelFetch from MS resources.

I really should read an extension's spec carefully before trying to
implement it so that I don't waste other people's time.  Sorry.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/34] draw/gs: fix allocation of buffer for GS output vertices

2013-07-30 Thread Bryan Cain
On Tue, Jul 30, 2013 at 8:46 PM, Paul Berry  wrote:
> On 29 July 2013 11:09, Zack Rusin  wrote:
>>
>> That looks wrong to me. We already account for the "other fields" in the
>> vertex_size.
>
>
> This patch came from Bryan Cain's original geometry shader patch series--I
> admit I'm not familiar enough with Gallium code to know how to fix it.
> Anyone want to step in and address Zack's comment?  Or the Gallium-related
> comments on patches 08 and 24?
>
> If no one has time to work on Gallium geometry shaders right now, that's ok.
> I can pull the Gallium stuff out of this series and archive it in a branch
> until someone has time to pick it up.

This patch is a year old, and I don't remember what it was supposed to
fix.  The Gallium geometry shader code has changed significantly in
the last year, and it should be safe to leave this patch unmerged.  If
any problems come up as a result, they can be addressed then.

>
>>
>>
>> - Original Message -
>> > From: Bryan Cain 
>> >
>> > Before, it accounted for the size of the vertices but not the other
>> > fields
>> > in the vertex_header struct, which caused memory corruption.
>> > ---
>> >  src/gallium/auxiliary/draw/draw_gs.c | 3 ++-
>> >  1 file changed, 2 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/src/gallium/auxiliary/draw/draw_gs.c
>> > b/src/gallium/auxiliary/draw/draw_gs.c
>> > index cd63e2b..78727c6 100644
>> > --- a/src/gallium/auxiliary/draw/draw_gs.c
>> > +++ b/src/gallium/auxiliary/draw/draw_gs.c
>> > @@ -560,7 +560,8 @@ int draw_geometry_shader_run(struct
>> > draw_geometry_shader
>> > *shader,
>> > /* we allocate exactly one extra vertex per primitive to allow the
>> > GS to
>> > emit
>> >  * overflown vertices into some area where they won't harm anyone */
>> > output_verts->verts =
>> > -  (struct vertex_header *)MALLOC(output_verts->vertex_size *
>> > +  (struct vertex_header *)MALLOC(sizeof(struct vertex_header) +
>> > + output_verts->vertex_size *
>> >   max_out_prims *
>> >   shader->primitive_boundary);
>> >
>> > --
>> > 1.8.3.4
>> >
>> > ___
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Geometry shader support for nv50

2013-04-17 Thread Bryan Cain
The following patch set makes the necessary changes to support geometry shaders
in the nv50 driver.  There are no piglit tests yet for geometry shader corner
cases yet, so these changes were tested with all of the GS demos in mesa/demos
and several corner case tests, using Paul Berry's "gs" branch with support for
GL_ARB_geometry_shader4.   I have also confirmed that the set does not regress
any of the piglit tests in tests/shaders.

Although this set updates the nvc0 driver to handle the indirect addressing
changes in nv50_ir_from_tgsi, it hasn't been tested yet since I don't have
the hardware.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] nv50/ir: fix PFETCH and add RDSV to get VSTRIDE for GPs

2013-04-17 Thread Bryan Cain
From: Christoph Bumiller 

v2 (Bryan Cain ): Fix emission of PFETCH instructions
using the SHL form.
---
 src/gallium/drivers/nv50/codegen/nv50_ir.h |1 +
 .../drivers/nv50/codegen/nv50_ir_emit_nv50.cpp |   62 ++--
 src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp |1 +
 3 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h 
b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index ae365af..9d29b34 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -366,6 +366,7 @@ enum SVSemantic
SV_CLOCK,
SV_LBASE,
SV_SBASE,
+   SV_VERTEX_STRIDE,
SV_UNDEFINED,
SV_LAST
 };
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp 
b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
index bc5a833..67b6298 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
@@ -87,6 +87,7 @@ private:
void emitLOAD(const Instruction *);
void emitSTORE(const Instruction *);
void emitMOV(const Instruction *);
+   void emitRDSV(const Instruction *);
void emitNOP();
void emitINTERP(const Instruction *);
void emitPFETCH(const Instruction *);
@@ -772,6 +773,29 @@ CodeEmitterNV50::emitMOV(const Instruction *i)
}
 }
 
+static inline uint8_t getSRegEncoding(const ValueRef &ref)
+{
+   switch (SDATA(ref).sv.sv) {
+   case SV_PHYSID:return 0;
+   case SV_CLOCK: return 1;
+   case SV_VERTEX_STRIDE: return 3;
+// case SV_PM_COUNTER:return 4 + SDATA(ref).sv.index;
+   case SV_SAMPLE_INDEX:  return 8;
+   default:
+  assert(!"no sreg for system value");
+  return 0;
+   }
+}
+
+void
+CodeEmitterNV50::emitRDSV(const Instruction *i)
+{
+   code[0] = 0x0001;
+   code[1] = 0x6000 | (getSRegEncoding(i->src(0)) << 14);
+   defId(i->def(0), 2);
+   emitFlagsRd(i);
+}
+
 void
 CodeEmitterNV50::emitNOP()
 {
@@ -794,15 +818,40 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t 
lane, uint8_t quOp)
   srcId(i->src(0), 32 + 14);
 }
 
+/* NOTE: This returns the base address of a vertex inside the primitive.
+ * src0 is an immediate, the index (not offset) of the vertex
+ * inside the primitive. XXX: signed or unsigned ?
+ * src1 (may be NULL) should use whatever units the hardware requires
+ * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
+ */
 void
 CodeEmitterNV50::emitPFETCH(const Instruction *i)
 {
-   code[0] = 0x1181;
-   code[1] = 0x0420 | (0xf << 14);
+   const uint32_t prim = i->src(0).get()->reg.data.u32;
+   assert(prim <= 127);
 
-   defId(i->def(0), 2);
-   srcAddr8(i->src(0), 9);
-   setAReg16(i, 0);
+   if (i->def(0).getFile() == FILE_ADDRESS) {
+  // shl $aX a[] 0
+  code[0] = 0x0001 | ((DDATA(i->def(0)).id + 1) << 2);
+  code[1] = 0xc020;
+  code[0] |= prim << 7;
+  assert(!i->srcExists(1));
+   } else
+   if (i->srcExists(1)) {
+  // ld b32 $rX a[$aX+base]
+  code[0] = 0x0001;
+  code[1] = 0x0420 | (0xf << 14);
+  defId(i->def(0), 2);
+  code[0] |= prim << 9;
+  setARegBits(SDATA(i->src(1)).id + 1);
+   } else {
+  // mov b32 $rX a[]
+  code[0] = 0x1001;
+  code[1] = 0x0420 | (0xf << 14);
+  defId(i->def(0), 2);
+  code[0] |= prim << 9;
+   }
+   emitFlagsRd(i);
 }
 
 void
@@ -1620,6 +1669,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
case OP_PFETCH:
   emitPFETCH(insn);
   break;
+   case OP_RDSV:
+  emitRDSV(insn);
+  break;
case OP_LINTERP:
case OP_PINTERP:
   emitINTERP(insn);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index c7121bf..743e566 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -265,6 +265,7 @@ static const char *SemanticStr[SV_LAST + 1] =
"CLOCK",
"LBASE",
"SBASE",
+   "VERTEX_STRIDE",
"?",
"(INVALID)"
 };
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] nv50/ir: delay calculation of indirect addresses

2013-04-17 Thread Bryan Cain
Instead of emitting an SHL 4 io an address register on the TGSI ARL and UARL
instructions, emit the shift when the loaded address is actually used.  This
is necessary because input vertex and attribute indices in geometry shaders on
nv50 need to be shifted left by 2 instead of 4.
---
 .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp |   34 +--
 .../drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp |   97 +++-
 .../drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp |   13 +++
 3 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index d8abccd..786328a 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -1114,6 +1114,7 @@ private:
};
 
Value *getVertexBase(int s);
+   Value *getIndirectAddress(Value *ptr);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
Value *acquireDst(int d, int c);
@@ -1331,7 +1332,8 @@ Converter::getVertexBase(int s)
   if (tgsi.getSrc(s).isIndirect(1))
  rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
   vtxBaseValid |= 1 << s;
-  vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel);
+  vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(2, FILE_ADDRESS),
+  mkImm(index), rel);
}
return vtxBase[s];
 }
@@ -1390,6 +1392,14 @@ Converter::getArrayForFile(unsigned file, int idx)
 }
 
 Value *
+Converter::getIndirectAddress(Value *ptr)
+{
+   if(!ptr)
+  return NULL;
+   return mkOp2v(OP_SHL, TYPE_U32, getSSA(2, FILE_ADDRESS), ptr, mkImm(4));
+}
+
+Value *
 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
 {
const int idx2d = src.is2D() ? src.getIndex(1) : 0;
@@ -1401,7 +1411,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, 
int c, Value *ptr)
   assert(!ptr);
   return loadImm(NULL, info->immd.data[idx * 4 + swz]);
case TGSI_FILE_CONSTANT:
-  return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
+  return mkLoadv(TYPE_U32, srcToSym(src, c), getIndirectAddress(ptr));
case TGSI_FILE_INPUT:
   if (prog->getType() == Program::TYPE_FRAGMENT) {
  // don't load masked inputs, won't be assigned a slot
@@ -1409,9 +1419,13 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, 
int c, Value *ptr)
 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
 return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
- return interpolate(src, c, ptr);
+ return interpolate(src, c, getIndirectAddress(ptr));
   }
-  return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
+  else if (ptr && prog->getType() == Program::TYPE_GEOMETRY)
+ // nv50 and nvc0 need different things here, so let the lowering
+ // passes decide what to do with the address
+ return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
+  return mkLoadv(TYPE_U32, srcToSym(src, c), getIndirectAddress(ptr));
case TGSI_FILE_OUTPUT:
   assert(!"load from output file");
   return NULL;
@@ -1420,7 +1434,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, 
int c, Value *ptr)
   return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
default:
   return getArrayForFile(src.getFile(), idx2d)->load(
- sub.cur->values, idx, swz, ptr);
+ sub.cur->values, idx, swz, getIndirectAddress(ptr));
}
 }
 
@@ -1463,8 +1477,9 @@ Converter::storeDst(int d, int c, Value *val)
   break;
}
 
-   Value *ptr = dst.isIndirect(0) ?
-  fetchSrc(dst.getIndirect(0), 0, NULL) : NULL;
+   Value *ptr = NULL;
+   if (dst.isIndirect(0))
+  ptr = getIndirectAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
 
if (info->io.genUserClip > 0 &&
dst.getFile() == TGSI_FILE_OUTPUT &&
@@ -2166,12 +2181,11 @@ Converter::handleInstruction(const struct 
tgsi_full_instruction *insn)
   FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
  src0 = fetchSrc(0, c);
  mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M;
- mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4));
   }
   break;
case TGSI_OPCODE_UARL:
   FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
- mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4));
+ mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
   break;
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
@@ -2704,7 +2718,7 @@ Converter::Converter(Program *ir, const tgsi::Source 
*code) : BuildUtil(ir),
 
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
-   aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0);
+   aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FI

[Mesa-dev] [PATCH 3/3] nv50: add support for geometry shaders

2013-04-17 Thread Bryan Cain
Layer output probably doesn't work yet, but other than that everything seems
to be working.
---
 .../drivers/nv50/codegen/nv50_ir_emit_nv50.cpp |   25 +++-
 src/gallium/drivers/nv50/nv50_program.c|   17 +
 src/gallium/drivers/nv50/nv50_shader_state.c   |2 ++
 src/gallium/drivers/nv50/nv50_tex.c|2 ++
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp 
b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
index 67b6298..d1d8b52 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
@@ -493,7 +493,12 @@ CodeEmitterNV50::emitForm_MAD(const Instruction *i)
setSrc(i, 1, 1);
setSrc(i, 2, 2);
 
-   setAReg16(i, 1);
+   if (i->getIndirect(0, 0)) {
+  assert(!i->getIndirect(1, 0));
+  setAReg16(i, 0);
+   }
+   else
+  setAReg16(i, 1);
 }
 
 // like default form, but 2nd source in slot 2, and no 3rd source
@@ -512,7 +517,12 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i)
setSrc(i, 0, 0);
setSrc(i, 1, 2);
 
-   setAReg16(i, 1);
+   if (i->getIndirect(0, 0)) {
+  assert(!i->getIndirect(1, 0));
+  setAReg16(i, 0);
+   }
+   else
+  setAReg16(i, 1);
 }
 
 // default short form (rr, ar, rc, gr)
@@ -602,8 +612,11 @@ CodeEmitterNV50::emitLOAD(const Instruction *i)
 
switch (sf) {
case FILE_SHADER_INPUT:
-  // use 'mov' where we can
-  code[0] = i->src(0).isIndirect(0) ? 0x0001 : 0x1001;
+  if(progType == Program::TYPE_GEOMETRY)
+ code[0] = 0x1181;
+  else
+ // use 'mov' where we can
+ code[0] = i->src(0).isIndirect(0) ? 0x0001 : 0x1001;
   code[1] = 0x0020 | (i->lanes << 14);
   if (typeSizeof(i->dType) == 4)
  code[1] |= 0x0400;
@@ -1399,8 +1412,8 @@ CodeEmitterNV50::emitShift(const Instruction *i)
 void
 CodeEmitterNV50::emitOUT(const Instruction *i)
 {
-   code[0] = (i->op == OP_EMIT) ? 0xf200 : 0xf400;
-   code[1] = 0xc001;
+   code[0] = (i->op == OP_EMIT) ? 0xf201 : 0xf401;
+   code[1] = 0xc000;
 
emitFlagsRd(i);
 }
diff --git a/src/gallium/drivers/nv50/nv50_program.c 
b/src/gallium/drivers/nv50/nv50_program.c
index c17ffdc..1229002 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -359,6 +359,23 @@ nv50_program_translate(struct nv50_program *prog, uint16_t 
chipset)
   if (info->prop.fp.usesDiscard)
  prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
}
+   else if (prog->type == PIPE_SHADER_GEOMETRY) {
+  switch(info->prop.gp.outputPrim) {
+  case PIPE_PRIM_POINTS:
+ prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
+ break;
+  case PIPE_PRIM_LINE_STRIP:
+ prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
+ break;
+  case PIPE_PRIM_TRIANGLE_STRIP:
+ prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
+ break;
+  default:
+ assert(0);
+ break;
+  }
+  prog->gp.vert_count = info->prop.gp.maxVertices;
+   }
 
if (prog->pipe.stream_output.num_outputs)
   prog->so = nv50_program_create_strmout_state(info,
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c 
b/src/gallium/drivers/nv50/nv50_shader_state.c
index 7f05243..613d257 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -193,6 +193,8 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
struct nv50_program *gp = nv50->gmtyprog;
 
if (gp) {
+  if(!nv50_program_validate(nv50, gp))
+ return;
   BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);
   PUSH_DATA (push, gp->max_gpr);
   BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c 
b/src/gallium/drivers/nv50/nv50_tex.c
index 40b264d..be70fda 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -293,6 +293,7 @@ void nv50_validate_textures(struct nv50_context *nv50)
boolean need_flush;
 
need_flush  = nv50_validate_tic(nv50, 0);
+   need_flush  = nv50_validate_tic(nv50, 1);
need_flush |= nv50_validate_tic(nv50, 2);
 
if (need_flush) {
@@ -343,6 +344,7 @@ void nv50_validate_samplers(struct nv50_context *nv50)
boolean need_flush;
 
need_flush  = nv50_validate_tsc(nv50, 0);
+   need_flush  = nv50_validate_tsc(nv50, 1);
need_flush |= nv50_validate_tsc(nv50, 2);
 
if (need_flush) {
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl_to_tgsi: remove unnecessary dead code elimination pass

2014-05-05 Thread Bryan Cain
With the more advanced dead code elimination pass already being run,
eliminate_dead_code was making no difference in instruction count, and had
an undesirable O(n^2) runtime. So remove it and rename
eliminate_dead_code_advanced to eliminate_dead_code.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   50 +++-
 1 file changed, 5 insertions(+), 45 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6eb6c8a..b0e0782 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -460,8 +460,7 @@ public:
int get_last_temp_write(int index);
 
void copy_propagate(void);
-   void eliminate_dead_code(void);
-   int eliminate_dead_code_advanced(void);
+   int eliminate_dead_code(void);
void merge_registers(void);
void renumber_registers(void);
 
@@ -3663,7 +3662,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
 }
 
 /*
- * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for 
dead
+ * code elimination.
  *
  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
  * will occur.  As an example, a TXP production after copy propagation but 
@@ -3676,48 +3676,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
  * and after this pass:
  *
  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
- * 
- * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
- * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
- */
-void
-glsl_to_tgsi_visitor::eliminate_dead_code(void)
-{
-   int i;
-   
-   for (i=0; i < this->next_temp; i++) {
-  int last_read = get_last_temp_read(i);
-  int j = 0;
-  
-  foreach_list_safe(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
- if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
- j > last_read)
- {
-inst->remove();
-delete inst;
- }
- 
- j++;
-  }
-   }
-}
-
-/*
- * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for 
dead
- * code elimination.  This is less primitive than eliminate_dead_code(), as it
- * is per-channel and can detect consecutive writes without a read between them
- * as dead code.  However, there is some dead code that can be eliminated by 
- * eliminate_dead_code() but not this function - for example, this function 
- * cannot eliminate an instruction writing to a register that is never read and
- * is the only instruction writing to that register.
- *
- * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
- * will occur.
  */
 int
-glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
 {
glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
  glsl_to_tgsi_instruction 
*,
@@ -5245,9 +5206,8 @@ get_mesa_program(struct gl_context *ctx,
/* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
v->simplify_cmp();
v->copy_propagate();
-   while (v->eliminate_dead_code_advanced());
+   while (v->eliminate_dead_code());
 
-   v->eliminate_dead_code();
v->merge_registers();
v->renumber_registers();

-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/st: Don't copy propagate from swizzles.

2013-04-24 Thread Bryan Cain
On 04/20/2013 12:40 PM, Fabian Bieler wrote:
> Do not propagate a copy if source and destination are identical.
>
> Otherwise code like
>
> MOV TEMP[0].xyzw, TEMP[0].wzyx
> mov TEMP[1].xyzw, TEMP[0].xyzw
>
> is changed to
>
> MOV TEMP[0].xyzw, TEMP[0].wzyx
> mov TEMP[1].xyzw, TEMP[0].wzyx
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index f2eb3e7..b5d0534 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -3544,6 +3544,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>/* If this is a copy, add it to the ACP. */
>if (inst->op == TGSI_OPCODE_MOV &&
>inst->dst.file == PROGRAM_TEMPORARY &&
> +  !(inst->dst.file == inst->src[0].file &&
> + inst->dst.index == inst->src[0].index) &&
>!inst->dst.reladdr &&
>!inst->saturate &&
>!inst->src[0].reladdr &&

Nice catch.  FYI, it seems that the almost identical copy_progagate
function in ir_to_mesa also needs this fix.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nouveau: emit and flush fence in fence_signalled if needed

2013-05-07 Thread Bryan Cain
The Mesa state tracker expects us to emit the fence even if it doesn't call
fence_finish.  Notably, this occurs when glClientWaitSync is called with
timeout 0.

Fixes Portal and Left 4 Dead 2, which were both stalling on startup by
repeatedly calling glClientWaitSync with timeout 0 while waiting for commands
to complete.
---
 src/gallium/drivers/nouveau/nouveau_fence.c |   36 ++-
 src/gallium/drivers/nouveau/nouveau_fence.h |1 +
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c 
b/src/gallium/drivers/nouveau/nouveau_fence.c
index dea146c..722be01 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -167,6 +167,25 @@ nouveau_fence_update(struct nouveau_screen *screen, 
boolean flushed)
}
 }
 
+boolean
+nouveau_fence_ensure_flushed(struct nouveau_fence *fence)
+{
+   struct nouveau_screen *screen = fence->screen;
+
+   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
+  nouveau_fence_emit(fence);
+
+  if (fence == screen->fence.current)
+ nouveau_fence_new(screen, &screen->fence.current, FALSE);
+   }
+   if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
+  if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
+ return FALSE;
+   }
+
+   return TRUE;
+}
+
 #define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
 
 boolean
@@ -174,8 +193,9 @@ nouveau_fence_signalled(struct nouveau_fence *fence)
 {
struct nouveau_screen *screen = fence->screen;
 
-   if (fence->state >= NOUVEAU_FENCE_STATE_EMITTED)
-  nouveau_fence_update(screen, FALSE);
+   if (!nouveau_fence_ensure_flushed(fence))
+  return FALSE;
+   nouveau_fence_update(screen, FALSE);
 
return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
 }
@@ -189,16 +209,8 @@ nouveau_fence_wait(struct nouveau_fence *fence)
/* wtf, someone is waiting on a fence in flush_notify handler? */
assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
 
-   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
-  nouveau_fence_emit(fence);
-
-  if (fence == screen->fence.current)
- nouveau_fence_new(screen, &screen->fence.current, FALSE);
-   }
-   if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
-  if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
- return FALSE;
-   }
+   if (!nouveau_fence_ensure_flushed(fence))
+  return FALSE;
 
do {
   nouveau_fence_update(screen, FALSE);
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h 
b/src/gallium/drivers/nouveau/nouveau_fence.h
index 3984a9a..d497c7f 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -34,6 +34,7 @@ boolean nouveau_fence_new(struct nouveau_screen *, struct 
nouveau_fence **,
 boolean nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
 voidnouveau_fence_update(struct nouveau_screen *, boolean flushed);
 voidnouveau_fence_next(struct nouveau_screen *);
+boolean nouveau_fence_ensure_flushed(struct nouveau_fence *);
 boolean nouveau_fence_wait(struct nouveau_fence *);
 boolean nouveau_fence_signalled(struct nouveau_fence *);
 
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/tgsi: clarify (possibly change) TGSI_OPCODE_UCMP definition

2013-05-08 Thread Bryan Cain
On 05/08/2013 03:26 PM, Roland Scheidegger wrote:
> There's some other
> somewhat bogus code with comments saying UCMP should be used instead of
> some poor emulation with i2f instructions.

Yeah, sorry about that.  I wrote that code (and comment) a few days
before the UCMP opcode was added to TGSI, and didn't notice until today
that it was never updated to actually use UCMP.  There are a few changes
and clean-ups I've been wanting to make to glsl_to_tgsi anyway - I'll
include an update to emit UCMP if someone hasn't already done so by the
time I finish that and send out a patch set.

Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] RFC: mesa/st dynamic sampler support in tgsi

2014-08-05 Thread Bryan Cain
On Mon, Aug 4, 2014 at 11:54 PM, Ilia Mirkin  wrote:

> Another apporach I've tried is to just use a TEMP register as the
> indirect offset here. Unfortunately this gets destroyed by
> st_glsl_to_tgsi's various optimizations which assume that temp
> registers can't be reladdr's and so messes up the values. I started
> adding support for that, but then quickly realized that was probably
> not the right thing to do.
>
> So... should I increase the number of address registers to 1? Or is
> there some other simple approach that I'm missing?
>
> Thanks,
>
>   -ilia
>

The problem you are having with the optimizations in glsl_to_tgsi is
because, as you say, it doesn't check the reladdr fields when calculating
live intervals.  I'm currently in the middle of reworking the live interval
calculation - if you don't mind waiting a day or two for me to finish the
patch set and send it to the list, I can add that change to the reworked
function in the patch set.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] RFC: mesa/st dynamic sampler support in tgsi

2014-08-05 Thread Bryan Cain
On Tue, Aug 5, 2014 at 3:23 PM, Ilia Mirkin  wrote:

> On Tue, Aug 5, 2014 at 4:14 PM, Bryan Cain  wrote:
> > On Mon, Aug 4, 2014 at 11:54 PM, Ilia Mirkin 
> wrote:
> >>
> >> Another apporach I've tried is to just use a TEMP register as the
> >> indirect offset here. Unfortunately this gets destroyed by
> >> st_glsl_to_tgsi's various optimizations which assume that temp
> >> registers can't be reladdr's and so messes up the values. I started
> >> adding support for that, but then quickly realized that was probably
> >> not the right thing to do.
> >>
> >> So... should I increase the number of address registers to 1? Or is
> >> there some other simple approach that I'm missing?
> >>
> >> Thanks,
> >>
> >>   -ilia
> >
> >
> > The problem you are having with the optimizations in glsl_to_tgsi is
> > because, as you say, it doesn't check the reladdr fields when calculating
> > live intervals.  I'm currently in the middle of reworking the live
> interval
> > calculation - if you don't mind waiting a day or two for me to finish the
> > patch set and send it to the list, I can add that change to the reworked
> > function in the patch set.
>
> I can certainly wait a little while. However from everything I can
> tell, all current reladdr's are in the ADDR register file, so perhaps
> I should just stick with that?
>

Either way is workable from glsl_to_tgsi's standpoint.  I assume the
gallium interface people have a better idea of what the preferred solution
is.

[Resending because I forgot to CC the list before.]
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GS plans?

2013-01-30 Thread Bryan Cain
On 01/29/2013 12:44 PM, Paul Berry wrote:
> On 29 January 2013 09:33, Ian Romanick  > wrote:
>
> On 01/29/2013 09:02 AM, Brian Paul wrote:
>
>
> Hi Bryan,
>
> Back in July you announced your work on geometry shaders:
> http://lists.freedesktop.org/archives/mesa-dev/2012-July/024792.html
>
> But it looks like it hasn't been touched since October.  I was
> wondering
> what plans you might have for that.
>
> I believe the Intel guys also were planning on tackling GS
> support in
> Mesa this year.  Ian, Eric, do you have some idea of when
> you'll be
> working on that?  Were you going to use Bryan's work?
>
>
> I believe Paul is going to chip away at a bit of it.  I think he's
> planning to use some of Bryan's work.  I believe they've been in
> contact, but I'm not 100% sure.
>
>
> Yes, we have.  It looks like Bryan made a lot of headway before he set
> the branch aside in October, so I'm planning to use that as a starting
> point.  My impression from talking to Bryan is that he doesn't have
> too much time/interest to put into geometry shaders these days, so for
> the moment I'm planning to take over responsibility for the branch myself.
>
> My first order of business is to write some piglit tests for geometry
> shaders, so that I don't have to push anything to Mesa master that
> isn't well-tested.  I have an nVidia system that I can use as a
> reference platform to make sure my tests are correct.  I also want to
> spend a little bit of time prototyping some i965 back-end code just to
> increase my familiarity with the problem domain and to find out if
> i965 hardware has any sharp corners I need to watch out for.  Once
> I've done those two things, my plan is to start rebasing Bryan's patch
> series onto master and sending it out for review piece by piece.

It's true that I don't have too much time/interest to put into geometry
shaders anymore, but before you get to the rebasing part of your plan, I
do hope to have reorganized my the commits from my
geometry-shaders-rebase branch into a more modular form which would be
suitable for review on the mailing list as a patch set.  I've already
started the process of figuring out what this should look like, so if
all goes well it will be ready by the time it's needed.  I'll notify you
(and the mailing list) when it's ready.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
These patches add support for clip distances in the Gallium interface and the
Mesa state tracker, respectively.  This should take care of gl_ClipDistance,
one of the few GLSL 1.30 features not yet implemented in Gallium.  If this is
merged, driver developers will need to add support to their drivers for the new
TGSI_SEMANTIC_CLIPDIST.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium: add TGSI_SEMANTIC_CLIPDIST for clip distance

2011-12-13 Thread Bryan Cain
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c |3 ++-
 src/gallium/include/pipe/p_shader_tokens.h |3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e830aa5..bd299b0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -129,7 +129,8 @@ static const char *semantic_names[] =
"PRIM_ID",
"INSTANCEID",
"VERTEXID",
-   "STENCIL"
+   "STENCIL",
+   "CLIPDIST"
 };
 
 static const char *immediate_type_names[] =
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index 10cfaf6..330e0ba 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -146,7 +146,8 @@ struct tgsi_declaration_dimension
 #define TGSI_SEMANTIC_INSTANCEID 10
 #define TGSI_SEMANTIC_VERTEXID   11
 #define TGSI_SEMANTIC_STENCIL12
-#define TGSI_SEMANTIC_COUNT  13 /**< number of semantic values */
+#define TGSI_SEMANTIC_CLIPDIST   13
+#define TGSI_SEMANTIC_COUNT  14 /**< number of semantic values */
 
 struct tgsi_declaration_semantic
 {
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/mesa: add support for gl_ClipDistance

2011-12-13 Thread Bryan Cain
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |1 +
 src/mesa/state_tracker/st_program.c|   18 ++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9ef65c8..d50176d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5036,6 +5036,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   || progress;
 
  progress = lower_quadop_vector(ir, false) || progress;
+ progress = lower_clip_distance(ir) || progress;
 
  if (options->MaxIfDepth == 0)
 progress = lower_discard(ir) || progress;
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index 04d3ef6..73581dd 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -244,6 +244,14 @@ st_prepare_vertex_program(struct gl_context *ctx,
 stvp->output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
 stvp->output_semantic_index[slot] = 0;
 break;
+ case VERT_RESULT_CLIP_DIST0:
+stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+stvp->output_semantic_index[slot] = 0;
+break;
+ case VERT_RESULT_CLIP_DIST1:
+stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+stvp->output_semantic_index[slot] = 1;
+break;
  case VERT_RESULT_EDGE:
 assert(0);
 break;
@@ -541,6 +549,16 @@ st_translate_fragment_program(struct st_context *st,
input_semantic_index[slot] = 0;
interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
break;
+case FRAG_ATTRIB_CLIP_DIST0:
+   input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+   input_semantic_index[slot] = 0;
+   interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+   break;
+case FRAG_ATTRIB_CLIP_DIST1:
+   input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+   input_semantic_index[slot] = 1;
+   interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+   break;
/* In most cases, there is nothing special about these
 * inputs, so adopt a convention to use the generic
 * semantic name and the mesa FRAG_ATTRIB_ number as the
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
This is an updated version of the patch set I sent to the list a few hours
ago.  There is now a TGSI property called TGSI_PROPERTY_NUM_CLIP_DISTANCES
that drivers can use to determine how many of the 8 available clip distances
are actually used by a shader.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium: add support for clip distances

2011-12-13 Thread Bryan Cain
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c |6 --
 src/gallium/auxiliary/tgsi/tgsi_text.c |3 ++-
 src/gallium/auxiliary/tgsi/tgsi_ureg.c |   14 ++
 src/gallium/auxiliary/tgsi/tgsi_ureg.h |3 +++
 src/gallium/include/pipe/p_shader_tokens.h |6 --
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e830aa5..4534731 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -129,7 +129,8 @@ static const char *semantic_names[] =
"PRIM_ID",
"INSTANCEID",
"VERTEXID",
-   "STENCIL"
+   "STENCIL",
+   "CLIPDIST"
 };
 
 static const char *immediate_type_names[] =
@@ -174,7 +175,8 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
"FS_COORD_ORIGIN",
"FS_COORD_PIXEL_CENTER",
"FS_COLOR0_WRITES_ALL_CBUFS",
-   "FS_DEPTH_LAYOUT"
+   "FS_DEPTH_LAYOUT",
+   "NUM_CLIP_DISTANCES"
 };
 
 static const char *tgsi_type_names[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c 
b/src/gallium/auxiliary/tgsi/tgsi_text.c
index eb9190c..f46ba19 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1024,7 +1024,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
"PRIM_ID",
"INSTANCEID",
"VERTEXID",
-   "STENCIL"
+   "STENCIL",
+   "CLIPDIST"
 };
 
 static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index ee013a5..5966c8c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -162,6 +162,7 @@ struct ureg_program
unsigned char property_fs_coord_pixel_center; /* = 
TGSI_FS_COORD_PIXEL_CENTER_* */
unsigned char property_fs_color0_writes_all_cbufs; /* = 
TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */
unsigned char property_fs_depth_layout; /* TGSI_FS_DEPTH_LAYOUT */
+   unsigned char property_num_clip_distances;
 
unsigned nr_addrs;
unsigned nr_preds;
@@ -312,6 +313,13 @@ ureg_property_fs_depth_layout(struct ureg_program *ureg,
ureg->property_fs_depth_layout = fs_depth_layout;
 }
 
+void
+ureg_property_num_clip_distances(struct ureg_program *ureg,
+ unsigned num_clip_distances)
+{
+   ureg->property_num_clip_distances = num_clip_distances;
+}
+
 struct ureg_src
 ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
unsigned semantic_name,
@@ -1404,6 +1412,12 @@ static void emit_decls( struct ureg_program *ureg )
 ureg->property_fs_depth_layout);
}
 
+   if (ureg->property_num_clip_distances) {
+  emit_property(ureg,
+TGSI_PROPERTY_NUM_CLIP_DISTANCES,
+ureg->property_num_clip_distances);
+   }
+
if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
   for (i = 0; i < UREG_MAX_INPUT; i++) {
  if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index a70d30f..f313a6f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -161,6 +161,9 @@ void
 ureg_property_fs_depth_layout(struct ureg_program *ureg,
   unsigned fs_depth_layout);
 
+void
+ureg_property_num_clip_distances(struct ureg_program *ureg,
+ unsigned num_clip_distances);
 
 /***
  * Build shader declarations:
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index 10cfaf6..e5e6d46 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -146,7 +146,8 @@ struct tgsi_declaration_dimension
 #define TGSI_SEMANTIC_INSTANCEID 10
 #define TGSI_SEMANTIC_VERTEXID   11
 #define TGSI_SEMANTIC_STENCIL12
-#define TGSI_SEMANTIC_COUNT  13 /**< number of semantic values */
+#define TGSI_SEMANTIC_CLIPDIST   13
+#define TGSI_SEMANTIC_COUNT  14 /**< number of semantic values */
 
 struct tgsi_declaration_semantic
 {
@@ -189,7 +190,8 @@ union tgsi_immediate_data
 #define TGSI_PROPERTY_FS_COORD_PIXEL_CENTER  4
 #define TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS 5
 #define TGSI_PROPERTY_FS_DEPTH_LAYOUT6
-#define TGSI_PROPERTY_COUNT  7
+#define TGSI_PROPERTY_NUM_CLIP_DISTANCES 7
+#define TGSI_PROPERTY_COUNT  8
 
 struct tgsi_property {
unsigned Type : 4;  /**< TGSI_TOKEN_TYPE_PROPERTY */
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/mesa: add support for gl_ClipDistance

2011-12-13 Thread Bryan Cain
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   39 ++-
 src/mesa/state_tracker/st_program.c|   18 +
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9ef65c8..5e54465 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -304,6 +304,7 @@ public:
int samplers_used;
bool indirect_addr_temps;
bool indirect_addr_consts;
+   int num_clip_distances;

int glsl_version;
bool native_integers;
@@ -4738,6 +4739,9 @@ st_translate_program(
  t->samplers[i] = ureg_DECL_sampler(ureg, i);
   }
}
+   
+   /* Set the number of clip distances used in the shader. */
+   ureg_property_num_clip_distances(ureg, program->num_clip_distances);
 
/* Emit each instruction in turn:
 */
@@ -4797,7 +4801,8 @@ out:
 static struct gl_program *
 get_mesa_program(struct gl_context *ctx,
  struct gl_shader_program *shader_program,
-struct gl_shader *shader)
+ struct gl_shader *shader,
+ int num_clip_distances)
 {
glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
struct gl_program *prog;
@@ -4842,6 +4847,7 @@ get_mesa_program(struct gl_context *ctx,
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
+   v->num_clip_distances = num_clip_distances;
 
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
   prog->Parameters);
@@ -4971,6 +4977,27 @@ get_mesa_program(struct gl_context *ctx,
return prog;
 }
 
+/**
+ * Searches through the IR for a declaration of gl_ClipDistance and returns the
+ * declared size of the gl_ClipDistance array.  Returns 0 if gl_ClipDistance is
+ * not declared in the IR.
+ */
+int get_clip_distance_size(exec_list *ir)
+{
+   foreach_iter (exec_list_iterator, iter, *ir) {
+  ir_instruction *inst = (ir_instruction *)iter.get();
+  ir_variable *var = inst->as_variable();
+  if (var == NULL) continue;
+  if (!strcmp(var->name, "gl_ClipDistance"))
+  {
+ fprintf(stderr, "gl_ClipDistance found with size %i\n", 
var->type->length);
+ return var->type->length;
+  }
+   }
+   
+   return 0;
+}
+
 extern "C" {
 
 struct gl_shader *
@@ -5009,6 +5036,7 @@ st_new_shader_program(struct gl_context *ctx, GLuint name)
 GLboolean
 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 {
+   int num_clip_distances[MESA_SHADER_TYPES];
assert(prog->LinkStatus);
 
for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
@@ -5020,6 +5048,11 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   const struct gl_shader_compiler_options *options =
 
&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
 
+  /* We have to determine the length of the gl_ClipDistance array before 
the
+   * array is lowered to two vec4s by lower_clip_distance().
+   */
+  num_clip_distances[i] = get_clip_distance_size(ir);
+
   do {
  progress = false;
 
@@ -5036,6 +5069,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   || progress;
 
  progress = lower_quadop_vector(ir, false) || progress;
+ progress = lower_clip_distance(ir) || progress;
 
  if (options->MaxIfDepth == 0)
 progress = lower_discard(ir) || progress;
@@ -5070,7 +5104,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   if (prog->_LinkedShaders[i] == NULL)
  continue;
 
-  linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+  linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i],
+ num_clip_distances[i]);
 
   if (linked_prog) {
 static const GLenum targets[] = {
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index 04d3ef6..73581dd 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -244,6 +244,14 @@ st_prepare_vertex_program(struct gl_context *ctx,
 stvp->output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
 stvp->output_semantic_index[slot] = 0;
 break;
+ case VERT_RESULT_CLIP_DIST0:
+stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+stvp->output_semantic_index[slot] = 0;
+break;
+ case VERT_RESULT_CLIP_DIST1:
+stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+stvp->output_semantic_index[slot] = 1;
+break;
  case VERT_RESULT_EDGE:
 assert(0);
 break;
@@ -541,6 +549,16 @@ st_translate_fragment_program(struct st_context *st,
   

Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
On 12/13/2011 02:11 PM, Jose Fonseca wrote:
> - Original Message -
>> This is an updated version of the patch set I sent to the list a few
>> hours
>> ago.  
>> There is now a TGSI property called
>> TGSI_PROPERTY_NUM_CLIP_DISTANCES
>> that drivers can use to determine how many of the 8 available clip
>> distances
>> are actually used by a shader.
> Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be easily derived from the 
> shader, and queried through src/gallium/auxiliary/tgsi/tgsi_scan.h ?

No.  The clip distances can be indirectly addressed (there are up to 2
of them in vec4 form for a total of 8 floats), which makes it impossible
to determine which ones are used by analyzing the shader.

> Could you also elaborate on why TGSI_SEMANTIC_CLIPDIST is useful for the 
> drivers? I personally don't have nothing against it, but just like to 
> understand why it makes a difference.
>
> Jose

Unless my understanding of clip distances is wrong, the GPU uses clip
distances to decide whether a vertex should be clipped, and thus needs
to know which of the vertex shader outputs are clip distances.

Bryan


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
On 12/13/2011 03:09 PM, Jose Fonseca wrote:
>
> - Original Message -
>> On 12/13/2011 12:26 PM, Bryan Cain wrote:
>>> On 12/13/2011 02:11 PM, Jose Fonseca wrote:
>>>> - Original Message -
>>>>> This is an updated version of the patch set I sent to the list a
>>>>> few
>>>>> hours
>>>>> ago.
>>>>> There is now a TGSI property called
>>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES
>>>>> that drivers can use to determine how many of the 8 available
>>>>> clip
>>>>> distances
>>>>> are actually used by a shader.
>>>> Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be easily
>>>> derived from the shader, and queried through
>>>> src/gallium/auxiliary/tgsi/tgsi_scan.h ?
>>> No.  The clip distances can be indirectly addressed (there are up
>>> to 2
>>> of them in vec4 form for a total of 8 floats), which makes it
>>> impossible
>>> to determine which ones are used by analyzing the shader.
>> The description is almost complete. :)  The issue is that the shader
>> may
>> declare
>>
>> out float gl_ClipDistance[4];
>>
>> the use non-constant addressing of the array.  The compiler knows
>> that
>> gl_ClipDistance has at most 4 elements, but post-hoc analysis would
>> not
>> be able to determine that.  Often the fixed-function hardware (see
>> below) needs to know which clip distance values are actually written.
> But don't all the clip distances written by the shader need to be declared?
>
> E.g.:
>
> DCL OUT[0], CLIPDIST[0]
> DCL OUT[1], CLIPDIST[1]
> DCL OUT[2], CLIPDIST[2]
> DCL OUT[3], CLIPDIST[3]
>
> therefore a trivial analysis of the declarations convey that?

No.  Clip distance is an array of up to 8 floats in GLSL, but it's
represented in the hardware as 2 vec4s.  You can tell by analyzing the
declarations whether there are more than 4 clip distances in use, but
not which components the shader writes to. 
TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components in use, not
the number of full vectors.

Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallium: add TGSI_SEMANTIC_CLIPDIST for clip distance

2011-12-13 Thread Bryan Cain
On 12/13/2011 03:07 PM, Brian Paul wrote:
> On Tue, Dec 13, 2011 at 9:59 AM, Bryan Cain  wrote:
>> ---
>>  src/gallium/auxiliary/tgsi/tgsi_dump.c |3 ++-
>>  src/gallium/include/pipe/p_shader_tokens.h |3 ++-
>>  2 files changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
>> b/src/gallium/auxiliary/tgsi/tgsi_dump.c
>> index e830aa5..bd299b0 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
>> @@ -129,7 +129,8 @@ static const char *semantic_names[] =
>>"PRIM_ID",
>>"INSTANCEID",
>>"VERTEXID",
>> -   "STENCIL"
>> +   "STENCIL",
>> +   "CLIPDIST"
>>  };
>>
>>  static const char *immediate_type_names[] =
>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
>> b/src/gallium/include/pipe/p_shader_tokens.h
>> index 10cfaf6..330e0ba 100644
>> --- a/src/gallium/include/pipe/p_shader_tokens.h
>> +++ b/src/gallium/include/pipe/p_shader_tokens.h
>> @@ -146,7 +146,8 @@ struct tgsi_declaration_dimension
>>  #define TGSI_SEMANTIC_INSTANCEID 10
>>  #define TGSI_SEMANTIC_VERTEXID   11
>>  #define TGSI_SEMANTIC_STENCIL12
>> -#define TGSI_SEMANTIC_COUNT  13 /**< number of semantic values */
>> +#define TGSI_SEMANTIC_CLIPDIST   13
>> +#define TGSI_SEMANTIC_COUNT  14 /**< number of semantic values */
>>
>>  struct tgsi_declaration_semantic
>>  {
> Reviewed-by: Brian Paul http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
On 12/13/2011 03:25 PM, Jose Fonseca wrote:
>
> - Original Message -
>> On 12/13/2011 03:09 PM, Jose Fonseca wrote:
>>> - Original Message -
>>>> On 12/13/2011 12:26 PM, Bryan Cain wrote:
>>>>> On 12/13/2011 02:11 PM, Jose Fonseca wrote:
>>>>>> - Original Message -
>>>>>>> This is an updated version of the patch set I sent to the list
>>>>>>> a
>>>>>>> few
>>>>>>> hours
>>>>>>> ago.
>>>>>>> There is now a TGSI property called
>>>>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES
>>>>>>> that drivers can use to determine how many of the 8 available
>>>>>>> clip
>>>>>>> distances
>>>>>>> are actually used by a shader.
>>>>>> Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be easily
>>>>>> derived from the shader, and queried through
>>>>>> src/gallium/auxiliary/tgsi/tgsi_scan.h ?
>>>>> No.  The clip distances can be indirectly addressed (there are up
>>>>> to 2
>>>>> of them in vec4 form for a total of 8 floats), which makes it
>>>>> impossible
>>>>> to determine which ones are used by analyzing the shader.
>>>> The description is almost complete. :)  The issue is that the
>>>> shader
>>>> may
>>>> declare
>>>>
>>>> out float gl_ClipDistance[4];
>>>>
>>>> the use non-constant addressing of the array.  The compiler knows
>>>> that
>>>> gl_ClipDistance has at most 4 elements, but post-hoc analysis
>>>> would
>>>> not
>>>> be able to determine that.  Often the fixed-function hardware (see
>>>> below) needs to know which clip distance values are actually
>>>> written.
>>> But don't all the clip distances written by the shader need to be
>>> declared?
>>>
>>> E.g.:
>>>
>>> DCL OUT[0], CLIPDIST[0]
>>> DCL OUT[1], CLIPDIST[1]
>>> DCL OUT[2], CLIPDIST[2]
>>> DCL OUT[3], CLIPDIST[3]
>>>
>>> therefore a trivial analysis of the declarations convey that?
>> No.  Clip distance is an array of up to 8 floats in GLSL, but it's
>> represented in the hardware as 2 vec4s.  You can tell by analyzing
>> the
>> declarations whether there are more than 4 clip distances in use, but
>> not which components the shader writes to.
>> TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components in use,
>> not
>> the number of full vectors.
> Lets imagine 
>
>   out float gl_ClipDistance[6];
>
> Each a clip distance is a scalar float.
>
> Either all hardware represents the 8 clip distances as two 4 vectors, and we 
> do:
>
>   DCL OUT[0].xywz, CLIPDIST[0]
>   DCL OUT[1].xy, CLIPDIST[1]
>
> using the full range of struct tgsi_declaration::UsageMask [1] or we 
> represent them as as scalars:
>
>   DCL OUT[0].x, CLIPDIST[0]
>   DCL OUT[1].x, CLIPDIST[1]
>   DCL OUT[2].x, CLIPDIST[2]
>   DCL OUT[3].x, CLIPDIST[3]
>   DCL OUT[4].x, CLIPDIST[4]
>   DCL OUT[5].x, CLIPDIST[5]
>
> If indirect addressing is allowed as I read bore, then maybe the later is 
> better.
>
> I confess my ignorance about clipping and maybe I'm being dense, but I still 
> don't see the need for this TGSI_PROPERTY_NUM_CLIP_DISTANCES.  Could you 
> please draft an example TGSI shader showing this property (or just paste one 
> generated with your change)?  I think that would help a lot.
>
>
> Jose
>
>
> [1] I don't know if tgsi_dump pays much attention to  
> tgsi_declaration::UsageMask, but it does exist.

UsageMask might work, but before that can be considered a viable
solution, someone will need to make it possible to actually declare it
from ureg.  As it is, ureg is hardcoded to set UsageMask to xyzw no
matter what on all declared inputs and outputs.

Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
On 12/13/2011 03:48 PM, Jose Fonseca wrote:
> - Original Message -
>> On 12/13/2011 03:25 PM, Jose Fonseca wrote:
>>> - Original Message -
>>>> On 12/13/2011 03:09 PM, Jose Fonseca wrote:
>>>>> - Original Message -----
>>>>>> On 12/13/2011 12:26 PM, Bryan Cain wrote:
>>>>>>> On 12/13/2011 02:11 PM, Jose Fonseca wrote:
>>>>>>>> - Original Message -
>>>>>>>>> This is an updated version of the patch set I sent to the
>>>>>>>>> list
>>>>>>>>> a
>>>>>>>>> few
>>>>>>>>> hours
>>>>>>>>> ago.
>>>>>>>>> There is now a TGSI property called
>>>>>>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES
>>>>>>>>> that drivers can use to determine how many of the 8 available
>>>>>>>>> clip
>>>>>>>>> distances
>>>>>>>>> are actually used by a shader.
>>>>>>>> Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be easily
>>>>>>>> derived from the shader, and queried through
>>>>>>>> src/gallium/auxiliary/tgsi/tgsi_scan.h ?
>>>>>>> No.  The clip distances can be indirectly addressed (there are
>>>>>>> up
>>>>>>> to 2
>>>>>>> of them in vec4 form for a total of 8 floats), which makes it
>>>>>>> impossible
>>>>>>> to determine which ones are used by analyzing the shader.
>>>>>> The description is almost complete. :)  The issue is that the
>>>>>> shader
>>>>>> may
>>>>>> declare
>>>>>>
>>>>>> out float gl_ClipDistance[4];
>>>>>>
>>>>>> the use non-constant addressing of the array.  The compiler
>>>>>> knows
>>>>>> that
>>>>>> gl_ClipDistance has at most 4 elements, but post-hoc analysis
>>>>>> would
>>>>>> not
>>>>>> be able to determine that.  Often the fixed-function hardware
>>>>>> (see
>>>>>> below) needs to know which clip distance values are actually
>>>>>> written.
>>>>> But don't all the clip distances written by the shader need to be
>>>>> declared?
>>>>>
>>>>> E.g.:
>>>>>
>>>>> DCL OUT[0], CLIPDIST[0]
>>>>> DCL OUT[1], CLIPDIST[1]
>>>>> DCL OUT[2], CLIPDIST[2]
>>>>> DCL OUT[3], CLIPDIST[3]
>>>>>
>>>>> therefore a trivial analysis of the declarations convey that?
>>>> No.  Clip distance is an array of up to 8 floats in GLSL, but it's
>>>> represented in the hardware as 2 vec4s.  You can tell by analyzing
>>>> the
>>>> declarations whether there are more than 4 clip distances in use,
>>>> but
>>>> not which components the shader writes to.
>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components in
>>>> use,
>>>> not
>>>> the number of full vectors.
>>> Lets imagine
>>>
>>>   out float gl_ClipDistance[6];
>>>
>>> Each a clip distance is a scalar float.
>>>
>>> Either all hardware represents the 8 clip distances as two 4
>>> vectors, and we do:
>>>
>>>   DCL OUT[0].xywz, CLIPDIST[0]
>>>   DCL OUT[1].xy, CLIPDIST[1]
>>>
>>> using the full range of struct tgsi_declaration::UsageMask [1] or
>>> we represent them as as scalars:
>>>
>>>   DCL OUT[0].x, CLIPDIST[0]
>>>   DCL OUT[1].x, CLIPDIST[1]
>>>   DCL OUT[2].x, CLIPDIST[2]
>>>   DCL OUT[3].x, CLIPDIST[3]
>>>   DCL OUT[4].x, CLIPDIST[4]
>>>   DCL OUT[5].x, CLIPDIST[5]
>>>
>>> If indirect addressing is allowed as I read bore, then maybe the
>>> later is better.
>>>
>>> I confess my ignorance about clipping and maybe I'm being dense,
>>> but I still don't see the need for this
>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES.  Could you please draft an
>>> example TGSI shader showing this property (or just paste one
>>> generated with your change)?  I think that would help a lot.
>>>
>>>
>>> Jose
>>>
>>>
>>> [1] I don't

Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
On 12/13/2011 03:48 PM, Jose Fonseca wrote:
> - Original Message -
>> On 12/13/2011 03:25 PM, Jose Fonseca wrote:
>>> - Original Message -
>>>> On 12/13/2011 03:09 PM, Jose Fonseca wrote:
>>>>> - Original Message -----
>>>>>> On 12/13/2011 12:26 PM, Bryan Cain wrote:
>>>>>>> On 12/13/2011 02:11 PM, Jose Fonseca wrote:
>>>>>>>> - Original Message -
>>>>>>>>> This is an updated version of the patch set I sent to the
>>>>>>>>> list
>>>>>>>>> a
>>>>>>>>> few
>>>>>>>>> hours
>>>>>>>>> ago.
>>>>>>>>> There is now a TGSI property called
>>>>>>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES
>>>>>>>>> that drivers can use to determine how many of the 8 available
>>>>>>>>> clip
>>>>>>>>> distances
>>>>>>>>> are actually used by a shader.
>>>>>>>> Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be easily
>>>>>>>> derived from the shader, and queried through
>>>>>>>> src/gallium/auxiliary/tgsi/tgsi_scan.h ?
>>>>>>> No.  The clip distances can be indirectly addressed (there are
>>>>>>> up
>>>>>>> to 2
>>>>>>> of them in vec4 form for a total of 8 floats), which makes it
>>>>>>> impossible
>>>>>>> to determine which ones are used by analyzing the shader.
>>>>>> The description is almost complete. :)  The issue is that the
>>>>>> shader
>>>>>> may
>>>>>> declare
>>>>>>
>>>>>> out float gl_ClipDistance[4];
>>>>>>
>>>>>> the use non-constant addressing of the array.  The compiler
>>>>>> knows
>>>>>> that
>>>>>> gl_ClipDistance has at most 4 elements, but post-hoc analysis
>>>>>> would
>>>>>> not
>>>>>> be able to determine that.  Often the fixed-function hardware
>>>>>> (see
>>>>>> below) needs to know which clip distance values are actually
>>>>>> written.
>>>>> But don't all the clip distances written by the shader need to be
>>>>> declared?
>>>>>
>>>>> E.g.:
>>>>>
>>>>> DCL OUT[0], CLIPDIST[0]
>>>>> DCL OUT[1], CLIPDIST[1]
>>>>> DCL OUT[2], CLIPDIST[2]
>>>>> DCL OUT[3], CLIPDIST[3]
>>>>>
>>>>> therefore a trivial analysis of the declarations convey that?
>>>> No.  Clip distance is an array of up to 8 floats in GLSL, but it's
>>>> represented in the hardware as 2 vec4s.  You can tell by analyzing
>>>> the
>>>> declarations whether there are more than 4 clip distances in use,
>>>> but
>>>> not which components the shader writes to.
>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components in
>>>> use,
>>>> not
>>>> the number of full vectors.
>>> Lets imagine
>>>
>>>   out float gl_ClipDistance[6];
>>>
>>> Each a clip distance is a scalar float.
>>>
>>> Either all hardware represents the 8 clip distances as two 4
>>> vectors, and we do:
>>>
>>>   DCL OUT[0].xywz, CLIPDIST[0]
>>>   DCL OUT[1].xy, CLIPDIST[1]
>>>
>>> using the full range of struct tgsi_declaration::UsageMask [1] or
>>> we represent them as as scalars:
>>>
>>>   DCL OUT[0].x, CLIPDIST[0]
>>>   DCL OUT[1].x, CLIPDIST[1]
>>>   DCL OUT[2].x, CLIPDIST[2]
>>>   DCL OUT[3].x, CLIPDIST[3]
>>>   DCL OUT[4].x, CLIPDIST[4]
>>>   DCL OUT[5].x, CLIPDIST[5]
>>>
>>> If indirect addressing is allowed as I read bore, then maybe the
>>> later is better.
>>>
>>> I confess my ignorance about clipping and maybe I'm being dense,
>>> but I still don't see the need for this
>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES.  Could you please draft an
>>> example TGSI shader showing this property (or just paste one
>>> generated with your change)?  I think that would help a lot.
>>>
>>>
>>> Jose
>>>
>>>
>>> [1] I don't know if tgsi_dump pays much attention to
>>>  tgsi_declaration::UsageMask, but it does exist.
>> UsageMask might work, but before that can be considered a viable
>> solution, someone will need to make it possible to actually declare
>> it
>> from ureg.  As it is, ureg is hardcoded to set UsageMask to xyzw no
>> matter what on all declared inputs and outputs.
> ureg automatically fills the UsageMask from the destionation register masks, 
> since it easy to determine from the opcodes.

Wait, where does it do that?  When I search through tgsi_ureg.c for
"UsageMask", all it shows are assignments of TGSI_WRITEMASK_XYZW to the
UsageMask property.

Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2 v2] Add support for clip distances in Gallium

2011-12-13 Thread Bryan Cain
On 12/13/2011 04:22 PM, Jose Fonseca wrote:
> - Original Message -
>>
>> - Original Message -
>>> On 12/13/2011 03:48 PM, Jose Fonseca wrote:
>>>> - Original Message -
>>>>> On 12/13/2011 03:25 PM, Jose Fonseca wrote:
>>>>>> - Original Message -
>>>>>>> On 12/13/2011 03:09 PM, Jose Fonseca wrote:
>>>>>>>> - Original Message -
>>>>>>>>> On 12/13/2011 12:26 PM, Bryan Cain wrote:
>>>>>>>>>> On 12/13/2011 02:11 PM, Jose Fonseca wrote:
>>>>>>>>>>> - Original Message -
>>>>>>>>>>>> This is an updated version of the patch set I sent to the
>>>>>>>>>>>> list
>>>>>>>>>>>> a
>>>>>>>>>>>> few
>>>>>>>>>>>> hours
>>>>>>>>>>>> ago.
>>>>>>>>>>>> There is now a TGSI property called
>>>>>>>>>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES
>>>>>>>>>>>> that drivers can use to determine how many of the 8
>>>>>>>>>>>> available
>>>>>>>>>>>> clip
>>>>>>>>>>>> distances
>>>>>>>>>>>> are actually used by a shader.
>>>>>>>>>>> Can't the info in TGSI_PROPERTY_NUM_CLIP_DISTANCES be
>>>>>>>>>>> easily
>>>>>>>>>>> derived from the shader, and queried through
>>>>>>>>>>> src/gallium/auxiliary/tgsi/tgsi_scan.h ?
>>>>>>>>>> No.  The clip distances can be indirectly addressed (there
>>>>>>>>>> are
>>>>>>>>>> up
>>>>>>>>>> to 2
>>>>>>>>>> of them in vec4 form for a total of 8 floats), which makes
>>>>>>>>>> it
>>>>>>>>>> impossible
>>>>>>>>>> to determine which ones are used by analyzing the shader.
>>>>>>>>> The description is almost complete. :)  The issue is that
>>>>>>>>> the
>>>>>>>>> shader
>>>>>>>>> may
>>>>>>>>> declare
>>>>>>>>>
>>>>>>>>> out float gl_ClipDistance[4];
>>>>>>>>>
>>>>>>>>> the use non-constant addressing of the array.  The compiler
>>>>>>>>> knows
>>>>>>>>> that
>>>>>>>>> gl_ClipDistance has at most 4 elements, but post-hoc
>>>>>>>>> analysis
>>>>>>>>> would
>>>>>>>>> not
>>>>>>>>> be able to determine that.  Often the fixed-function
>>>>>>>>> hardware
>>>>>>>>> (see
>>>>>>>>> below) needs to know which clip distance values are actually
>>>>>>>>> written.
>>>>>>>> But don't all the clip distances written by the shader need
>>>>>>>> to
>>>>>>>> be
>>>>>>>> declared?
>>>>>>>>
>>>>>>>> E.g.:
>>>>>>>>
>>>>>>>> DCL OUT[0], CLIPDIST[0]
>>>>>>>> DCL OUT[1], CLIPDIST[1]
>>>>>>>> DCL OUT[2], CLIPDIST[2]
>>>>>>>> DCL OUT[3], CLIPDIST[3]
>>>>>>>>
>>>>>>>> therefore a trivial analysis of the declarations convey that?
>>>>>>> No.  Clip distance is an array of up to 8 floats in GLSL, but
>>>>>>> it's
>>>>>>> represented in the hardware as 2 vec4s.  You can tell by
>>>>>>> analyzing
>>>>>>> the
>>>>>>> declarations whether there are more than 4 clip distances in
>>>>>>> use,
>>>>>>> but
>>>>>>> not which components the shader writes to.
>>>>>>> TGSI_PROPERTY_NUM_CLIP_DISTANCES is the number of components
>>>>>>> in
>>>>>>> use,
>>>>>>> not
>>&

[Mesa-dev] [PATCH 0/2 v3] Add support for clip distances in Gallium

2011-12-17 Thread Bryan Cain
This is the third revision of my changes to add support for gl_ClipDistance
with Gallium.  The difference between this set and v2 is that this set does
not add a new TGSI_PROPERTY indicating the number of clip distances used.
Instead, the UsageMask of the CLIPDIST output registers is set to indicate
which clip distances are used.  Some changes to ureg were necessary to set
the UsageMask to a value other than XYZW.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium: add support for clip distances

2011-12-17 Thread Bryan Cain
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c |3 +-
 src/gallium/auxiliary/tgsi/tgsi_text.c |3 +-
 src/gallium/auxiliary/tgsi/tgsi_ureg.c |   36 +---
 src/gallium/auxiliary/tgsi/tgsi_ureg.h |6 
 src/gallium/include/pipe/p_shader_tokens.h |3 +-
 5 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e830aa5..bd299b0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -129,7 +129,8 @@ static const char *semantic_names[] =
"PRIM_ID",
"INSTANCEID",
"VERTEXID",
-   "STENCIL"
+   "STENCIL",
+   "CLIPDIST"
 };
 
 static const char *immediate_type_names[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c 
b/src/gallium/auxiliary/tgsi/tgsi_text.c
index eb9190c..f46ba19 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1024,7 +1024,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
"PRIM_ID",
"INSTANCEID",
"VERTEXID",
-   "STENCIL"
+   "STENCIL",
+   "CLIPDIST"
 };
 
 static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 17f9ce2..56c4492 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -122,6 +122,7 @@ struct ureg_program
struct {
   unsigned semantic_name;
   unsigned semantic_index;
+  unsigned usage_mask;
} output[UREG_MAX_OUTPUT];
unsigned nr_outputs;
 
@@ -396,21 +397,25 @@ ureg_DECL_system_value(struct ureg_program *ureg,
 
 
 struct ureg_dst 
-ureg_DECL_output( struct ureg_program *ureg,
-  unsigned name,
-  unsigned index )
+ureg_DECL_output_masked( struct ureg_program *ureg,
+ unsigned name,
+ unsigned index,
+ unsigned usage_mask )
 {
unsigned i;
 
for (i = 0; i < ureg->nr_outputs; i++) {
   if (ureg->output[i].semantic_name == name &&
-  ureg->output[i].semantic_index == index) 
+  ureg->output[i].semantic_index == index) { 
+ ureg->output[i].usage_mask |= usage_mask;
  goto out;
+  }
}
 
if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
   ureg->output[i].semantic_name = name;
   ureg->output[i].semantic_index = index;
+  ureg->output[i].usage_mask = usage_mask;
   ureg->nr_outputs++;
}
else {
@@ -422,6 +427,15 @@ out:
 }
 
 
+struct ureg_dst 
+ureg_DECL_output( struct ureg_program *ureg,
+  unsigned name,
+  unsigned index )
+{
+   return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW);
+}
+
+
 /* Returns a new constant register.  Keep track of which have been
  * referred to so that we can emit decls later.
  *
@@ -1181,7 +1195,8 @@ emit_decl_semantic(struct ureg_program *ureg,
unsigned file,
unsigned index,
unsigned semantic_name,
-   unsigned semantic_index)
+   unsigned semantic_index,
+   unsigned usage_mask)
 {
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
 
@@ -1189,7 +1204,7 @@ emit_decl_semantic(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = file;
-   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
+   out[0].decl.UsageMask = usage_mask;
out[0].decl.Semantic = 1;
 
out[1].value = 0;
@@ -1427,7 +1442,8 @@ static void emit_decls( struct ureg_program *ureg )
 TGSI_FILE_INPUT,
 ureg->gs_input[i].index,
 ureg->gs_input[i].semantic_name,
-ureg->gs_input[i].semantic_index);
+ureg->gs_input[i].semantic_index,
+TGSI_WRITEMASK_XYZW);
   }
}
 
@@ -1436,7 +1452,8 @@ static void emit_decls( struct ureg_program *ureg )
  TGSI_FILE_SYSTEM_VALUE,
  ureg->system_value[i].index,
  ureg->system_value[i].semantic_name,
- ureg->system_value[i].semantic_index);
+ ureg->system_value[i].semantic_index,
+ TGSI_WRITEMASK_XYZW);
}
 
for (i = 0; i < ureg->nr_outputs; i++) {
@@ -1444,7 +1461,8 @@ static void emit_decls( struct ureg_program *ureg )
  TGSI_FILE_OUTPUT,
  i,
  ureg->output[i].semantic_name,
- ureg->output[i].semantic_index);
+ ureg->output[i].semantic_index,
+ ureg->output[i].usage_mask);
}
 
for (i = 0; i < ureg->nr_sampler

[Mesa-dev] [PATCH 2/2] st/mesa: add support for gl_ClipDistance

2011-12-17 Thread Bryan Cain
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   49 +---
 src/mesa/state_tracker/st_program.c|   18 ++
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b929806..3e8df78 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -304,6 +304,7 @@ public:
int samplers_used;
bool indirect_addr_temps;
bool indirect_addr_consts;
+   int num_clip_distances;

int glsl_version;
bool native_integers;
@@ -4618,9 +4619,16 @@ st_translate_program(
   }
 
   for (i = 0; i < numOutputs; i++) {
- t->outputs[i] = ureg_DECL_output(ureg,
-  outputSemanticName[i],
-  outputSemanticIndex[i]);
+ if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) {
+int mask = ((1 << (program->num_clip_distances - 
4*outputSemanticIndex[i])) - 1) & 0xf;
+t->outputs[i] = ureg_DECL_output_masked(ureg,
+outputSemanticName[i],
+outputSemanticIndex[i],
+mask);
+ } else
+t->outputs[i] = ureg_DECL_output(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i]);
  if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
 /* Writing to the point size result register requires special
  * handling to implement clamping.
@@ -4797,7 +4805,8 @@ out:
 static struct gl_program *
 get_mesa_program(struct gl_context *ctx,
  struct gl_shader_program *shader_program,
-struct gl_shader *shader)
+ struct gl_shader *shader,
+ int num_clip_distances)
 {
glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
struct gl_program *prog;
@@ -4842,6 +4851,7 @@ get_mesa_program(struct gl_context *ctx,
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
+   v->num_clip_distances = num_clip_distances;
 
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
   prog->Parameters);
@@ -4971,6 +4981,27 @@ get_mesa_program(struct gl_context *ctx,
return prog;
 }
 
+/**
+ * Searches through the IR for a declaration of gl_ClipDistance and returns the
+ * declared size of the gl_ClipDistance array.  Returns 0 if gl_ClipDistance is
+ * not declared in the IR.
+ */
+int get_clip_distance_size(exec_list *ir)
+{
+   foreach_iter (exec_list_iterator, iter, *ir) {
+  ir_instruction *inst = (ir_instruction *)iter.get();
+  ir_variable *var = inst->as_variable();
+  if (var == NULL) continue;
+  if (!strcmp(var->name, "gl_ClipDistance"))
+  {
+ fprintf(stderr, "gl_ClipDistance found with size %i\n", 
var->type->length);
+ return var->type->length;
+  }
+   }
+   
+   return 0;
+}
+
 extern "C" {
 
 struct gl_shader *
@@ -5009,6 +5040,7 @@ st_new_shader_program(struct gl_context *ctx, GLuint name)
 GLboolean
 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 {
+   int num_clip_distances[MESA_SHADER_TYPES];
assert(prog->LinkStatus);
 
for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
@@ -5020,6 +5052,11 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   const struct gl_shader_compiler_options *options =
 
&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
 
+  /* We have to determine the length of the gl_ClipDistance array before
+   * the array is lowered to two vec4s by lower_clip_distance().
+   */
+  num_clip_distances[i] = get_clip_distance_size(ir);
+
   do {
  progress = false;
 
@@ -5036,6 +5073,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   || progress;
 
  progress = lower_quadop_vector(ir, false) || progress;
+ progress = lower_clip_distance(ir) || progress;
 
  if (options->MaxIfDepth == 0)
 progress = lower_discard(ir) || progress;
@@ -5070,7 +5108,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   if (prog->_LinkedShaders[i] == NULL)
  continue;
 
-  linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+  linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i],
+ num_clip_distances[i]);
 
   if (linked_prog) {
 static const GLenum targets[] = {
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index b83c561..b404503 1

[Mesa-dev] [PATCH 0/2 v4] Add support for clip distances in Gallium

2012-01-02 Thread Bryan Cain
This is the fourth revision of my changes to add support for gl_ClipDistance
with Gallium.  The first three revisions were sent in closer succession about
two weeks ago.  This revision is identical to v3 except for the inclusion of
the changes suggested by Brian Paul in reply to the v3 patches.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium: add support for clip distances

2012-01-02 Thread Bryan Cain
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c |3 +-
 src/gallium/auxiliary/tgsi/tgsi_text.c |3 +-
 src/gallium/auxiliary/tgsi/tgsi_ureg.c |   38 +--
 src/gallium/auxiliary/tgsi/tgsi_ureg.h |6 
 src/gallium/include/pipe/p_shader_tokens.h |3 +-
 5 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e830aa5..bd299b0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -129,7 +129,8 @@ static const char *semantic_names[] =
"PRIM_ID",
"INSTANCEID",
"VERTEXID",
-   "STENCIL"
+   "STENCIL",
+   "CLIPDIST"
 };
 
 static const char *immediate_type_names[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c 
b/src/gallium/auxiliary/tgsi/tgsi_text.c
index eb9190c..f46ba19 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1024,7 +1024,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
"PRIM_ID",
"INSTANCEID",
"VERTEXID",
-   "STENCIL"
+   "STENCIL",
+   "CLIPDIST"
 };
 
 static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 17f9ce2..0f9aa3a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -122,6 +122,7 @@ struct ureg_program
struct {
   unsigned semantic_name;
   unsigned semantic_index;
+  unsigned usage_mask; /* = TGSI_WRITEMASK_* */
} output[UREG_MAX_OUTPUT];
unsigned nr_outputs;
 
@@ -396,21 +397,27 @@ ureg_DECL_system_value(struct ureg_program *ureg,
 
 
 struct ureg_dst 
-ureg_DECL_output( struct ureg_program *ureg,
-  unsigned name,
-  unsigned index )
+ureg_DECL_output_masked( struct ureg_program *ureg,
+ unsigned name,
+ unsigned index,
+ unsigned usage_mask )
 {
unsigned i;
 
+   assert(usage_mask != 0);
+
for (i = 0; i < ureg->nr_outputs; i++) {
   if (ureg->output[i].semantic_name == name &&
-  ureg->output[i].semantic_index == index) 
+  ureg->output[i].semantic_index == index) { 
+ ureg->output[i].usage_mask |= usage_mask;
  goto out;
+  }
}
 
if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
   ureg->output[i].semantic_name = name;
   ureg->output[i].semantic_index = index;
+  ureg->output[i].usage_mask = usage_mask;
   ureg->nr_outputs++;
}
else {
@@ -422,6 +429,15 @@ out:
 }
 
 
+struct ureg_dst 
+ureg_DECL_output( struct ureg_program *ureg,
+  unsigned name,
+  unsigned index )
+{
+   return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW);
+}
+
+
 /* Returns a new constant register.  Keep track of which have been
  * referred to so that we can emit decls later.
  *
@@ -1181,7 +1197,8 @@ emit_decl_semantic(struct ureg_program *ureg,
unsigned file,
unsigned index,
unsigned semantic_name,
-   unsigned semantic_index)
+   unsigned semantic_index,
+   unsigned usage_mask)
 {
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
 
@@ -1189,7 +1206,7 @@ emit_decl_semantic(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = file;
-   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
+   out[0].decl.UsageMask = usage_mask;
out[0].decl.Semantic = 1;
 
out[1].value = 0;
@@ -1427,7 +1444,8 @@ static void emit_decls( struct ureg_program *ureg )
 TGSI_FILE_INPUT,
 ureg->gs_input[i].index,
 ureg->gs_input[i].semantic_name,
-ureg->gs_input[i].semantic_index);
+ureg->gs_input[i].semantic_index,
+TGSI_WRITEMASK_XYZW);
   }
}
 
@@ -1436,7 +1454,8 @@ static void emit_decls( struct ureg_program *ureg )
  TGSI_FILE_SYSTEM_VALUE,
  ureg->system_value[i].index,
  ureg->system_value[i].semantic_name,
- ureg->system_value[i].semantic_index);
+ ureg->system_value[i].semantic_index,
+ TGSI_WRITEMASK_XYZW);
}
 
for (i = 0; i < ureg->nr_outputs; i++) {
@@ -1444,7 +1463,8 @@ static void emit_decls( struct ureg_program *ureg )
  TGSI_FILE_OUTPUT,
  i,
  ureg->output[i].semantic_name,
- ureg->output[i].semantic_index);
+ ureg->output[i].semantic_index,
+ ureg->output[i].us

[Mesa-dev] [PATCH 2/2] st/mesa: add support for gl_ClipDistance

2012-01-02 Thread Bryan Cain
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   48 +---
 src/mesa/state_tracker/st_program.c|   18 ++
 2 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 1515fc1..bc3005e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -304,6 +304,7 @@ public:
int samplers_used;
bool indirect_addr_temps;
bool indirect_addr_consts;
+   int num_clip_distances;

int glsl_version;
bool native_integers;
@@ -4627,9 +4628,17 @@ st_translate_program(
   }
 
   for (i = 0; i < numOutputs; i++) {
- t->outputs[i] = ureg_DECL_output(ureg,
-  outputSemanticName[i],
-  outputSemanticIndex[i]);
+ if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) {
+int mask = ((1 << (program->num_clip_distances - 
4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW;
+t->outputs[i] = ureg_DECL_output_masked(ureg,
+outputSemanticName[i],
+outputSemanticIndex[i],
+mask);
+ } else {
+t->outputs[i] = ureg_DECL_output(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i]);
+ }
  if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
 /* Writing to the point size result register requires special
  * handling to implement clamping.
@@ -4806,7 +4815,8 @@ out:
 static struct gl_program *
 get_mesa_program(struct gl_context *ctx,
  struct gl_shader_program *shader_program,
-struct gl_shader *shader)
+ struct gl_shader *shader,
+ int num_clip_distances)
 {
glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
struct gl_program *prog;
@@ -4851,6 +4861,7 @@ get_mesa_program(struct gl_context *ctx,
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
+   v->num_clip_distances = num_clip_distances;
 
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
   prog->Parameters);
@@ -4980,6 +4991,25 @@ get_mesa_program(struct gl_context *ctx,
return prog;
 }
 
+/**
+ * Searches through the IR for a declaration of gl_ClipDistance and returns the
+ * declared size of the gl_ClipDistance array.  Returns 0 if gl_ClipDistance is
+ * not declared in the IR.
+ */
+int get_clip_distance_size(exec_list *ir)
+{
+   foreach_iter (exec_list_iterator, iter, *ir) {
+  ir_instruction *inst = (ir_instruction *)iter.get();
+  ir_variable *var = inst->as_variable();
+  if (var == NULL) continue;
+  if (!strcmp(var->name, "gl_ClipDistance")) {
+ return var->type->length;
+  }
+   }
+   
+   return 0;
+}
+
 extern "C" {
 
 struct gl_shader *
@@ -5018,6 +5048,7 @@ st_new_shader_program(struct gl_context *ctx, GLuint name)
 GLboolean
 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 {
+   int num_clip_distances[MESA_SHADER_TYPES];
assert(prog->LinkStatus);
 
for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
@@ -5029,6 +5060,11 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   const struct gl_shader_compiler_options *options =
 
&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
 
+  /* We have to determine the length of the gl_ClipDistance array before
+   * the array is lowered to two vec4s by lower_clip_distance().
+   */
+  num_clip_distances[i] = get_clip_distance_size(ir);
+
   do {
  progress = false;
 
@@ -5045,6 +5081,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   || progress;
 
  progress = lower_quadop_vector(ir, false) || progress;
+ progress = lower_clip_distance(ir) || progress;
 
  if (options->MaxIfDepth == 0)
 progress = lower_discard(ir) || progress;
@@ -5079,7 +5116,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   if (prog->_LinkedShaders[i] == NULL)
  continue;
 
-  linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+  linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i],
+ num_clip_distances[i]);
 
   if (linked_prog) {
 static const GLenum targets[] = {
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index d62bfcd..aceaaf8 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/s

Re: [Mesa-dev] [PATCH 0/2 v4] Add support for clip distances in Gallium

2012-01-04 Thread Bryan Cain
On 01/02/2012 02:58 PM, Bryan Cain wrote:
> This is the fourth revision of my changes to add support for gl_ClipDistance
> with Gallium.  The first three revisions were sent in closer succession about
> two weeks ago.  This revision is identical to v3 except for the inclusion of
> the changes suggested by Brian Paul in reply to the v3 patches.

Does anyone mind if I push this?  It's been on the list for two full
days with no objections.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] softpipe GL3 status

2012-01-06 Thread Bryan Cain
On 01/06/2012 01:26 PM, Ian Romanick wrote:
> On 01/06/2012 09:04 AM, Dave Airlie wrote:
>> Hi guys,
>>
>> Just a quick note, I've just spent a week or so trying to see where
>> gallium and softpipe were w.r.t GL3.0 support.
>>
>> I've pushed a branch to my repo called softpipe-gl3. It contains
>> patches in various state of usefulness but it brings the piglit
>> results to 220 failures in 7623 tests, which isn't bad.
>>
>> Outstanding known problems (stuff I've dug into).
>>
>> smooth interpolation is broken in softpipe, worth about 70-100 fixes
>> at a quick guess.
>>
>> integer abs - we have no TGSI representation for this, should we lower
>> it to something?
>
> Or just generate some TGSI instructions to implement it.  You should
> be able to fake it with a CMP-like instruction.  I think that's how
> i915 does it in hardware.

Depends on whether there's any hardware with a native integer abs
instruciton.  If there is, we should just add a new IABS instruction to
TGSI and let drivers implement it how they want.  Otherwise, your
suggestion should work.

>
>> integer SSG (set sign) - no TGSI for this, lower it?
>
> Where is SSG being generated?  I thought ir_to_mesa was the only thing
> that generated it, and Gallium shouldn't hit that path.

glsl_to_tgsi still generates the TGSI equivalent; that part hasn't been
changed from ir_to_mesa.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium: add an IABS opcode to TGSI

2012-01-07 Thread Bryan Cain
This is a necessary operation that is missing from TGSI.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c |4 
 src/gallium/auxiliary/tgsi/tgsi_info.c |1 +
 src/gallium/docs/source/tgsi.rst   |   13 +
 src/gallium/include/pipe/p_shader_tokens.h |3 ++-
 4 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 7ea8511..3e2b899 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -4193,6 +4193,10 @@ exec_instruction(
   exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
   break;
 
+   case TGSI_OPCODE_IABS:
+  exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_INT);
+  break;
+
default:
   assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 6cd580a..c9acdb9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -192,6 +192,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =

{ 1, 1, 0, 0, 0, 0, "UARL", TGSI_OPCODE_UARL },
{ 1, 3, 0, 0, 0, 0, "UCMP", TGSI_OPCODE_UCMP },
+   { 1, 1, 0, 0, 0, 0, "IABS", TGSI_OPCODE_IABS },
 };
 
 const struct tgsi_opcode_info *
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 45af528..7e7010f 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1043,6 +1043,19 @@ XXX so let's discuss it, yeah?
   destination register, which is assumed to be an address (ADDR) register.
 
 
+.. opcode:: IABS - Integer Absolute Value
+
+.. math::
+
+  dst.x = |src.x|
+
+  dst.y = |src.y|
+
+  dst.z = |src.z|
+
+  dst.w = |src.w|
+
+
 .. opcode:: SAD - Sum Of Absolute Differences
 
 .. math::
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index b24b64c..75e17a1 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -376,8 +376,9 @@ struct tgsi_property_data {
 
 #define TGSI_OPCODE_UARL157
 #define TGSI_OPCODE_UCMP158
+#define TGSI_OPCODE_IABS159
 
-#define TGSI_OPCODE_LAST159
+#define TGSI_OPCODE_LAST160
 
 #define TGSI_SAT_NONE0  /* do not saturate */
 #define TGSI_SAT_ZERO_ONE1  /* clamp to [0,1] */
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] softpipe GL3 status

2012-01-07 Thread Bryan Cain
On 01/07/2012 02:44 AM, Dave Airlie wrote:
>> Let's add new opcodes for things like this.
> I'll add IABS.

I sent a patch to the mailing list that adds IABS about an hour ago,
before reading this message.

Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] glsl_to_tgsi: Create a new variable_store class replacing variables field in glsl_to_tgsi_visitor

2012-01-07 Thread Bryan Cain
This is good work.  I just have a few suggested changes.

On 01/07/2012 12:26 PM, Vincent Lejeune wrote:
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  414 
> +---
>  1 files changed, 309 insertions(+), 105 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index cecceca..17ae525 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -230,14 +230,16 @@ public:
>  class variable_storage : public exec_node {
>  public:
> variable_storage(ir_variable *var, gl_register_file file, int index)
> -  : file(file), index(index), var(var)
> +  : file(file), index(index), type(var->type), 
> is_array(var->type->is_array() || var->type->is_record() || 
> var->type->is_matrix()), is_reladdressed(false)
> {
>/* empty */
> }
>  
> gl_register_file file;
> int index;
> -   ir_variable *var; /* variable that maps to this, if any */
> +   const glsl_type *type; /* variable that maps to this, if any */

The comment there doesn't make sense anymore.

> +   bool is_array;
> +   bool is_reladdressed;
>  };
>  
>  class immediate_storage : public exec_node {
> @@ -286,6 +288,242 @@ public:
> st_src_reg return_reg;
>  };
>  
> +static int type_size(const glsl_type *type);
> +static int swizzle_for_size(int size);
> +
> +
> +/**
> + * Single place to store all temporary values (either explicit - 
> ir_variable* -
> + * or implicit - returned by get_temp -).

I don't think the last " -" is supposed to be there.

> + *
> + * Explicit temps are stored in variables hash_table.
> + * Implicit temps are stored in rvalue_regs array.
> + */
> +class variable_store {
> +   friend class glsl_to_tgsi_variable_allocator;
> +protected:
> +   void *mem_ctx;
> +   hash_table* variables;
> +   unsigned next_temp;
> +   unsigned next_temp_array;
> +   static void reindex_reladdress(const void *, void *, void *);
> +   static void reindex_non_reladdress(const void *, void *, void *);
> +   void reindex_rvalue();
> +   void reindex_rvalue_reladdressed();
> +   variable_storage* rvalue_regs;
> +   unsigned rvalue_regs_count;
> +
> +public:
> +   bool native_integers;
> +   unsigned temp_amount() const;
> +   unsigned temp_array_amount() const;
> +   variable_store();
> +   ~variable_store();
> +   variable_storage *find_variable_storage(class ir_variable *var) const;
> +   variable_storage *push(class ir_variable *, gl_register_file, int);
> +   variable_storage *push(class ir_variable *);
> +   variable_storage *retrieve_anonymous_temp(unsigned);
> +   st_src_reg get_temp(const glsl_type *type);
> +   void optimise_access(void);
> +   unsigned *reindex_table;
> +};
> +
> +unsigned
> +variable_store::temp_amount() const
> +{
> +   return next_temp;
> +}
> +
> +unsigned
> +variable_store::temp_array_amount() const
> +{
> +   return next_temp_array;
> +}
> +
> +variable_store::variable_store():mem_ctx(ralloc_context(NULL)),next_temp(1),next_temp_array(1),rvalue_regs(NULL),rvalue_regs_count(0)
> +{
> +   variables = 
> hash_table_ctor(0,hash_table_pointer_hash,hash_table_pointer_compare);
> +}
> +
> +variable_store::~variable_store()
> +{
> +   hash_table_dtor(variables);
> +   ralloc_free(mem_ctx);
> +}
> +
> +variable_storage *
> +variable_store::find_variable_storage(ir_variable *var) const
> +{
> +   return (class variable_storage *) hash_table_find(variables,var);

The convention in the rest of glsl_to_tgsi (and Mesa) is to put a space
after the comma separating arguments.  I'd prefer it if this patch
followed the same convention.

> +}
> +
> +variable_storage*
> +variable_store::push(class ir_variable *var, gl_register_file file, int 
> index)
> +{
> +   variable_storage *storage = new (mem_ctx) 
> variable_storage(var,file,index);
> +   hash_table_insert(variables,storage,var);
> +   return storage;
> +}
> +
> +variable_storage*
> +variable_store::push(ir_variable *ir)
> +{
> +   variable_storage* retval = push(ir, PROGRAM_TEMPORARY, next_temp);
> +   next_temp += type_size(ir->type);
> +   if (ir->type->is_array() || ir->type->is_record() || 
> ir->type->is_matrix()) {
> +  retval->is_array = true;
> +   }
> +   return retval;
> +}
> +
> +variable_storage*
> +variable_store::retrieve_anonymous_temp(unsigned reg)
> +{
> +   for (unsigned i = 0; i < rvalue_regs_count; i++) {
> +  unsigned range_start = rvalue_regs[i].index;
> +  unsigned range_end = range_start + type_size(rvalue_regs[i].type) - 1;
> +  if (reg >= range_start && reg <= range_end) {
> + return rvalue_regs + i;
> + }
> +   }
> +   printf ("Failed to get storage");
> +   exit(1);

I don't think writing to stdout and exiting cleanly like this is the
right way to handle a fatal error.  It should probably write the error
message to stderr (making clear that it's a fatal error in Mesa, not an
error in the application) and assert (so that a debugger can catch

[Mesa-dev] [PATCH] mesa, glsl_to_tgsi: fixes for native integers

2011-08-26 Thread Bryan Cain
This fixes all of the piglit regressions in softpipe when native integers are
enabled.
---
 src/mesa/main/uniforms.c   |8 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   45 ++--
 2 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index cda840f..0801476 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -776,13 +776,7 @@ set_program_uniform(struct gl_context *ctx, struct 
gl_program *program,
  /* if the uniform is bool-valued, convert to 1 or 0 */
  if (isUniformBool) {
 for (i = 0; i < elems; i++) {
-   if (basicType == GL_FLOAT)
-  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
-   else
-  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
-   
-   if (!ctx->Const.NativeIntegers)
-  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
+   uniformVal[i].f = uniformVal[i].f != 0.0f ? 1.0f : 0.0f;
 }
  }
   }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9cac309..d1674eb 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -385,6 +385,8 @@ public:
void emit_scalar(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
+   void try_emit_integer_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
+
void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 
void emit_scs(ir_instruction *ir, unsigned op,
@@ -563,6 +565,8 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 
this->instructions.push_tail(inst);

+   try_emit_integer_set(ir, op, dst);
+   
return inst;
 }
 
@@ -589,6 +593,32 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
 }
 
 /**
+ * Emits the code to convert the result of integer SET instructions to floats.
+ */
+void
+glsl_to_tgsi_visitor::try_emit_integer_set(ir_instruction *ir, unsigned op,
+st_dst_reg dst)
+{
+   st_src_reg src;
+
+   if (!(op == TGSI_OPCODE_USEQ ||
+ op == TGSI_OPCODE_USNE ||
+ op == TGSI_OPCODE_ISGE ||
+ op == TGSI_OPCODE_USGE ||
+ op == TGSI_OPCODE_ISLT ||
+ op == TGSI_OPCODE_USLT))
+  return;
+
+   src = st_src_reg(dst);
+   src.type = GLSL_TYPE_INT;
+
+   dst.type = GLSL_TYPE_FLOAT;
+   emit(ir, TGSI_OPCODE_I2F, dst, src);
+   src.type = GLSL_TYPE_FLOAT;
+   emit(ir, TGSI_OPCODE_SNE, dst, src, st_src_reg_for_float(0.0));
+}
+
+/**
  * Determines whether to use an integer, unsigned integer, or float opcode 
  * based on the operands and input opcode, then emits the result.
  * 
@@ -1662,7 +1692,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
   emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
   break;
case ir_unop_i2f:
-   case ir_unop_b2f:
   if (native_integers) {
  emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
  break;
@@ -1670,10 +1699,14 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
case ir_unop_i2u:
case ir_unop_u2i:
   /* Converting between signed and unsigned integers is a no-op. */
-   case ir_unop_b2i:
-  /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
+   case ir_unop_b2f:
+  /* Booleans are stored as floats. */
   result_src = op[0];
   break;
+   case ir_unop_b2i:
+  if (native_integers)
+ emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
+  break;
case ir_unop_f2i:
   if (native_integers)
  emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
@@ -1681,6 +1714,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
  emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
   break;
case ir_unop_f2b:
+  if (native_integers) {
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
st_src_reg_for_float(0.0));
+ emit(ir, TGSI_OPCODE_F2I, result_dst, result_src);
+  }
+  break;
case ir_unop_i2b:
   emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
 st_src_reg_for_type(result_dst.type, 0));
@@ -2154,6 +2192,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
   inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
   new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], 
inst->src[2]);
   new_inst->saturate = inst->saturate;
+  inst->dead_mask = inst->dst.writemask;
} else {
   for (i = 0; i < type_size(ir->lhs->type); i++) {
  emit(ir, TGSI_OPCODE_MOV, l, r);
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa, glsl_to_tgsi: fixes for native integers

2011-08-27 Thread Bryan Cain
On 08/27/2011 05:39 AM, Christoph Bumiller wrote:
> On 27.08.2011 04:58, Bryan Cain wrote:
>> This fixes all of the piglit regressions in softpipe when native integers are
>> enabled.
>> ---
>>  src/mesa/main/uniforms.c   |8 +
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   45 
>> ++--
>>  2 files changed, 43 insertions(+), 10 deletions(-)
>>
>> diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
>> index cda840f..0801476 100644
>> --- a/src/mesa/main/uniforms.c
>> +++ b/src/mesa/main/uniforms.c
>> @@ -776,13 +776,7 @@ set_program_uniform(struct gl_context *ctx, struct 
>> gl_program *program,
>>   /* if the uniform is bool-valued, convert to 1 or 0 */
>>   if (isUniformBool) {
>>  for (i = 0; i < elems; i++) {
>> -   if (basicType == GL_FLOAT)
>> -  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
>> -   else
>> -  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
>> -   
>> -   if (!ctx->Const.NativeIntegers)
>> -  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
>> +   uniformVal[i].f = uniformVal[i].f != 0.0f ? 1.0f : 0.0f;
>>  }
>>   }
>>}
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index 9cac309..d1674eb 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -385,6 +385,8 @@ public:
>> void emit_scalar(ir_instruction *ir, unsigned op,
>>  st_dst_reg dst, st_src_reg src0, st_src_reg src1);
>>  
>> +   void try_emit_integer_set(ir_instruction *ir, unsigned op, st_dst_reg 
>> dst);
>> +
>> void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
>>  
>> void emit_scs(ir_instruction *ir, unsigned op,
>> @@ -563,6 +565,8 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned 
>> op,
>>  
>> this->instructions.push_tail(inst);
>> 
>> +   try_emit_integer_set(ir, op, dst);
>> +   
>> return inst;
>>  }
>>  
>> @@ -589,6 +593,32 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned 
>> op)
>>  }
>>  
>>  /**
>> + * Emits the code to convert the result of integer SET instructions to 
>> floats.
>> + */
>> +void
>> +glsl_to_tgsi_visitor::try_emit_integer_set(ir_instruction *ir, unsigned op,
>> + st_dst_reg dst)
>> +{
>> +   st_src_reg src;
>> +
>> +   if (!(op == TGSI_OPCODE_USEQ ||
>> + op == TGSI_OPCODE_USNE ||
>> + op == TGSI_OPCODE_ISGE ||
>> + op == TGSI_OPCODE_USGE ||
>> + op == TGSI_OPCODE_ISLT ||
>> + op == TGSI_OPCODE_USLT))
>> +   return;
>> +
>> +   src = st_src_reg(dst);
>> +   src.type = GLSL_TYPE_INT;
>> +
>> +   dst.type = GLSL_TYPE_FLOAT;
>> +   emit(ir, TGSI_OPCODE_I2F, dst, src);
>> +   src.type = GLSL_TYPE_FLOAT;
>> +   emit(ir, TGSI_OPCODE_SNE, dst, src, st_src_reg_for_float(0.0));
>> +}
>> +
> emit(ir, TGSI_OPCODE_AND, dst, src, st_src_reg_for_float(1.0f));
>
> I still don't quite like booleans as floats, but I guess I can just
> easily track back to the source SET to emit my set-predicate-register op
> without having all the fuss in between.

For now, I'm trying to get things working.  Once integers are working on
softpipe and r600g with no piglit regressions, I'll look into
optimizations like using AND.

>> +/**
>>   * Determines whether to use an integer, unsigned integer, or float opcode 
>>   * based on the operands and input opcode, then emits the result.
>>   * 
>> @@ -1662,7 +1692,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>>emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
>>break;
>> case ir_unop_i2f:
>> -   case ir_unop_b2f:
>>if (native_integers) {
>>   emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
>>   break;
>> @@ -1670,10 +1699,14 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>> case ir_unop_i2u:
>> case ir_unop_u2i:
>>/* Converting between signed and unsigned integers is a no-op. */
>> -   case ir_unop_b2i:
>> -  /* Booleans are stored as integers (or floats in GLSL 1.20 and 
>> lower). */
>> +   case ir_unop_b2f:
>> +  /* Booleans are stored as floats. */
>>

[Mesa-dev] [PATCH] glsl: use a separate div_to_mul_rcp lowering flag for integers

2011-08-27 Thread Bryan Cain
TGSI, at the very least, has UDIV/IDIV instructions for integer division.
---
 src/glsl/ir_optimization.h |   13 +++--
 src/glsl/lower_instructions.cpp|4 +++-
 src/mesa/drivers/dri/i965/brw_shader.cpp   |1 +
 src/mesa/program/ir_to_mesa.cpp|2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |2 +-
 5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index f7808bd..48448d4 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -29,12 +29,13 @@
  */
 
 /* Operations for lower_instructions() */
-#define SUB_TO_ADD_NEG 0x01
-#define DIV_TO_MUL_RCP 0x02
-#define EXP_TO_EXP20x04
-#define POW_TO_EXP20x08
-#define LOG_TO_LOG20x10
-#define MOD_TO_FRACT   0x20
+#define SUB_TO_ADD_NEG 0x01
+#define DIV_TO_MUL_RCP 0x02
+#define EXP_TO_EXP20x04
+#define POW_TO_EXP20x08
+#define LOG_TO_LOG20x10
+#define MOD_TO_FRACT   0x20
+#define INT_DIV_TO_MUL_RCP 0x40
 
 bool do_common_optimization(exec_list *ir, bool linked, unsigned 
max_unroll_iterations);
 
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 23aa19b..fd0c760 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -265,7 +265,9 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
   break;
 
case ir_binop_div:
-  if (lowering(DIV_TO_MUL_RCP))
+  if (lowering(INT_DIV_TO_MUL_RCP) && ir->operands[1]->type->is_integer())
+div_to_mul_rcp(ir);
+  else if (lowering(DIV_TO_MUL_RCP))
 div_to_mul_rcp(ir);
   break;
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 3ff6bba..7e53097 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -100,6 +100,7 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   lower_instructions(shader->ir,
 MOD_TO_FRACT |
 DIV_TO_MUL_RCP |
+INT_DIV_TO_MUL_RCP |
 SUB_TO_ADD_NEG |
 EXP_TO_EXP2 |
 LOG_TO_LOG2);
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 6820e4c..dd154db 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -3232,7 +3232,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
 /* Lowering */
 do_mat_op_to_vec(ir);
 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
-| LOG_TO_LOG2
+| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
 
 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, 
options->EmitNoCont, options->EmitNoLoops) || progress;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9cac309..7aef4aa 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5012,7 +5012,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
  /* Lowering */
  do_mat_op_to_vec(ir);
  lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
-| LOG_TO_LOG2
+| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
 
  progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, 
options->EmitNoCont, options->EmitNoLoops) || progress;
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Use a separate div_to_mul_rcp lowering flag for integers.

2011-08-28 Thread Bryan Cain
On 08/27/2011 10:18 PM, Kenneth Graunke wrote:
> From: Bryan Cain 
>
> Using multiply and reciprocal for integer division involves potentially
> lossy floating point conversions.  This is okay for older GPUs that
> represent integers as floating point, but undesirable for GPUs with
> native integer division instructions.
>
> TGSI, for example, has UDIV/IDIV instructions for integer division,
> so it makes sense to handle this directly.  Likewise for i965.
>
> Signed-off-by: Bryan Cain 
> Signed-off-by: Kenneth Graunke 
> ---
>
> Bryan,
>
> I like your patch---it looks good!  It seems a bit cleaner to me to split
> div_to_mul_rcp into two separate functions, seeing as the two paths don't
> share any code and are now controlled by different flags.
>
> This version of the patch splits them out, and adds comments to the top
> of the file explaining the new flag.

Looks good to me.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa, glsl_to_tgsi: fixes for native integers

2011-08-28 Thread Bryan Cain
On 08/26/2011 09:58 PM, Bryan Cain wrote:
> This fixes all of the piglit regressions in softpipe when native integers are
> enabled.
> ---
>  src/mesa/main/uniforms.c   |8 +
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   45 
> ++--
>  2 files changed, 43 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
> index cda840f..0801476 100644
> --- a/src/mesa/main/uniforms.c
> +++ b/src/mesa/main/uniforms.c
> @@ -776,13 +776,7 @@ set_program_uniform(struct gl_context *ctx, struct 
> gl_program *program,
>   /* if the uniform is bool-valued, convert to 1 or 0 */
>   if (isUniformBool) {
>  for (i = 0; i < elems; i++) {
> -   if (basicType == GL_FLOAT)
> -  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
> -   else
> -  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
> -   
> -   if (!ctx->Const.NativeIntegers)
> -  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
> +   uniformVal[i].f = uniformVal[i].f != 0.0f ? 1.0f : 0.0f;
>  }
>   }
>}

I'd like to push this to master (with one typo fix in the glsl_to_tgsi
part of the patch) since no more objections have been raised, but could
someone verify that this core Mesa change is okay?

Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Use a separate div_to_mul_rcp lowering flag for integers.

2011-08-28 Thread Bryan Cain
On 08/28/2011 07:38 PM, Eric Anholt wrote:
> On Sat, 27 Aug 2011 20:18:55 -0700, Kenneth Graunke  
> wrote:
>> From: Bryan Cain 
>>
>> Using multiply and reciprocal for integer division involves potentially
>> lossy floating point conversions.  This is okay for older GPUs that
>> represent integers as floating point, but undesirable for GPUs with
>> native integer division instructions.
>>
>> TGSI, for example, has UDIV/IDIV instructions for integer division,
>> so it makes sense to handle this directly.  Likewise for i965.
>>
>> Signed-off-by: Bryan Cain 
>> Signed-off-by: Kenneth Graunke 
>
>> ---
>> case ir_binop_div:
>> -  if (lowering(DIV_TO_MUL_RCP))
>> +  if (lowering(INT_DIV_TO_MUL_RCP) && 
>> ir->operands[1]->type->is_integer())
>> + int_div_to_mul_rcp(ir);
>> +  else if (lowering(DIV_TO_MUL_RCP))
>>   div_to_mul_rcp(ir);
>>break;
>>  
> Sure looks odd to me for one of these to be checking the type and ther
> other not.

It works, though.  If it's not an integer type, it's going to be a float
type.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Missing integer "set" opcodes in Gallium

2011-08-29 Thread Bryan Cain
I somehow didn't notice this until now, but why are there no Gallium
"set" opcodes for integers that are equivalent to the SGT or SLE opcodes
for floats?  Does it have to do with available hardware features?

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Missing integer "set" opcodes in Gallium

2011-08-29 Thread Bryan Cain
On 08/29/2011 03:41 PM, Zack Rusin wrote:
> On Monday, August 29, 2011 04:02:08 PM Zack Rusin wrote:
>> Either way though if GL needs those ops then, like Brian mentioned, it'd be
>> a  good idea to add them.
> Actually I think it seems easier to just flip the ops rather than add new 
> instructions, i.e. change stuff like a > b, to b < a, and a <= b to b >= a. 
> Just an opinion though, I don't have strong preferences here.

You're right; that's an easier solution.  That way, the only opcode I'll
need to add is UARL.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] glsl_to_tgsi, mesa: fixes for native integers and integer booleans

2011-08-30 Thread Bryan Cain
With this patch, there are no piglit regressions on softpipe with native
integers enabled.  Unlike my previous patch, this uses integer values of
~0 and 0 for true and false, respectively, instead of the float values 1.0
and 0.0.
---
 src/mesa/main/uniforms.c   |6 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  160 
 2 files changed, 116 insertions(+), 50 deletions(-)

diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index cda840f..fa96fd3 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -777,12 +777,12 @@ set_program_uniform(struct gl_context *ctx, struct 
gl_program *program,
  if (isUniformBool) {
 for (i = 0; i < elems; i++) {
if (basicType == GL_FLOAT)
-  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
+  uniformVal[i].u = uniformVal[i].f != 0.0f ? ~0 : 0;
else
-  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
+  uniformVal[i].u = uniformVal[i].u ? ~0 : 0;

if (!ctx->Const.NativeIntegers)
-  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
+  uniformVal[i].f = uniformVal[i].u ? 1.0f : 0.0f;
 }
  }
   }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 2266083..c8f790a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -385,6 +385,8 @@ public:
void emit_scalar(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
+   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
+
void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 
void emit_scs(ir_instruction *ir, unsigned op,
@@ -562,7 +564,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
}
 
this->instructions.push_tail(inst);
-   
+
+   if (native_integers)
+  try_emit_float_set(ir, op, dst);
+
return inst;
 }
 
@@ -588,6 +593,25 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
+ /**
+ * Emits the code to convert the result of float SET instructions to integers.
+ */
+void
+glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
+st_dst_reg dst)
+{
+   if ((op == TGSI_OPCODE_SEQ ||
+op == TGSI_OPCODE_SNE ||
+op == TGSI_OPCODE_SGE ||
+op == TGSI_OPCODE_SLT))
+   {
+  st_src_reg src = st_src_reg(dst);
+  src.negate = ~src.negate;
+  dst.type = GLSL_TYPE_FLOAT;
+  emit(ir, TGSI_OPCODE_F2I, dst, src);
+   }
+}
+
 /**
  * Determines whether to use an integer, unsigned integer, or float opcode 
  * based on the operands and input opcode, then emits the result.
@@ -604,7 +628,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
   type = GLSL_TYPE_FLOAT;
else if (native_integers)
-  type = src0.type;
+  type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
 
 #define case4(c, f, i, u) \
case TGSI_OPCODE_##c: \
@@ -630,12 +654,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
   case3(SGE, ISGE, USGE);
   case3(SLT, ISLT, USLT);
   
-  case2iu(SHL, SHL);
   case2iu(ISHR, USHR);
-  case2iu(NOT, NOT);
-  case2iu(AND, AND);
-  case2iu(OR, OR);
-  case2iu(XOR, XOR);
   
   default: break;
}
@@ -1389,7 +1408,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
switch (ir->operation) {
case ir_unop_logic_not:
   if (result_dst.type != GLSL_TYPE_FLOAT)
- emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], 
st_src_reg_for_type(result_dst.type, 0));
+ emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
   else {
  /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
   * older GPUs implement SEQ using multiple instructions (i915 uses two
@@ -1489,10 +1508,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
   emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
   break;
case ir_binop_greater:
-  emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+  emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
   break;
case ir_binop_lequal:
-  emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+  emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
   break;
case ir_binop_gequal:
   emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
@@ -1605,41 +1624,52 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
}
 
case ir_binop_logic_xor:
-  emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+  if (native_integers)
+ emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
+  el

[Mesa-dev] [PATCH] mesa: Replace the EmitNoIfs compiler flag with a MaxIfLevel flag.

2011-08-31 Thread Bryan Cain
This is a better, more fine-grained way of lowering if statements.  Fixes the
game And Yet It Moves on nv50.
---
 src/mesa/drivers/dri/i915/i915_context.c   |2 +-
 src/mesa/main/mtypes.h |6 +-
 src/mesa/program/ir_to_mesa.cpp|8 
 src/mesa/state_tracker/st_extensions.c |2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |6 +++---
 5 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i915_context.c 
b/src/mesa/drivers/dri/i915/i915_context.c
index 11bee14..7a40ba1 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -189,7 +189,7 @@ i915CreateContext(int api,
 
struct gl_shader_compiler_options *const fs_options =
   & ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT];
-   fs_options->EmitNoIfs = GL_TRUE;
+   fs_options->MaxIfLevel = 0;
fs_options->EmitNoNoise = GL_TRUE;
fs_options->EmitNoPow = GL_TRUE;
fs_options->EmitNoMainReturn = GL_TRUE;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index f2eb889..9f95bcd 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2266,11 +2266,6 @@ struct gl_shader_compiler_options
/** Driver-selectable options: */
GLboolean EmitCondCodes; /**< Use condition codes? */
GLboolean EmitNVTempInitialization;  /**< 0-fill NV temp registers */
-   /**
-* Attempts to flatten all ir_if (OPCODE_IF) for GPUs that can't
-* support control flow.
-*/
-   GLboolean EmitNoIfs;
GLboolean EmitNoLoops;
GLboolean EmitNoFunctions;
GLboolean EmitNoCont;  /**< Emit CONT opcode? */
@@ -2288,6 +2283,7 @@ struct gl_shader_compiler_options
GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */
/*@}*/
 
+   GLuint MaxIfLevel;   /**< Maximum nested IF blocks */
GLuint MaxUnrollIterations;
 
struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index dd154db..7fb286e 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -3119,7 +3119,7 @@ get_mesa_program(struct gl_context *ctx,
 
   switch (mesa_inst->Opcode) {
   case OPCODE_IF:
-if (options->EmitNoIfs) {
+if (options->MaxIfLevel == 0) {
linker_warning(shader_program,
   "Couldn't flatten if-statement.  "
   "This will likely result in software "
@@ -3241,10 +3241,10 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
 
 progress = lower_quadop_vector(ir, true) || progress;
 
-if (options->EmitNoIfs) {
+if (options->MaxIfLevel == 0)
progress = lower_discard(ir) || progress;
-   progress = lower_if_to_cond_assign(ir) || progress;
-}
+
+progress = lower_if_to_cond_assign(ir, options->MaxIfLevel) || 
progress;
 
 if (options->EmitNoNoise)
progress = lower_noise(ir) || progress;
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 8e90093..9f429d9 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -173,7 +173,7 @@ void st_init_limits(struct st_context *st)
   options->EmitNoNoise = TRUE;
 
   /* TODO: make these more fine-grained if anyone needs it */
-  options->EmitNoIfs = !screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
+  options->MaxIfLevel = screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
   options->EmitNoLoops = !screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
   options->EmitNoFunctions = !screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_SUBROUTINES);
   options->EmitNoMainReturn = !screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_SUBROUTINES);
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 98bea69..f5232af 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4991,10 +4991,10 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
 
  progress = lower_quadop_vector(ir, false) || progress;
 
- if (options->EmitNoIfs) {
+ if (options->MaxIfLevel == 0)
 progress = lower_discard(ir) || progress;
-progress = lower_if_to_cond_assign(ir) || progress;
- }
+
+ progress = lower_if_to_cond_assign(ir, options->MaxIfLevel) || 
progress;
 
  if (options->EmitNoNoise)
 progress = lower_noise(ir) || progress;
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium: add TGSI opcodes UARL and UCMP

2011-09-02 Thread Bryan Cain
They are needed by glsl_to_tgsi for an efficient implementation using native
integers.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c |   30 
 src/gallium/auxiliary/tgsi/tgsi_info.c |3 ++
 src/gallium/include/pipe/p_shader_tokens.h |5 +++-
 3 files changed, 37 insertions(+), 1 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 38dc1ef..896d871 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3312,6 +3312,28 @@ micro_usne(union tgsi_exec_channel *dst,
 }
 
 static void
+micro_uarl(union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src)
+{
+   dst->i[0] = src->u[0];
+   dst->i[1] = src->u[1];
+   dst->i[2] = src->u[2];
+   dst->i[3] = src->u[3];
+}
+
+static void
+micro_ucmp(union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2)
+{
+   dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
+   dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
+   dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
+   dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
+}
+
+static void
 exec_instruction(
struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
@@ -4071,6 +4093,14 @@ exec_instruction(
   assert(0);
   break;
 
+   case TGSI_OPCODE_UARL:
+  exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_UINT);
+  break;
+
+   case TGSI_OPCODE_UCMP:
+  exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_UINT);
+  break;
+
default:
   assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 14ed56a..6cd580a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -189,6 +189,9 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 0, 0, 0, 0, "RESINFO", TGSI_OPCODE_RESINFO },
{ 1, 2, 0, 0, 0, 0, "SAMPLE_POS",  TGSI_OPCODE_SAMPLE_POS },
{ 1, 2, 0, 0, 0, 0, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
+   
+   { 1, 1, 0, 0, 0, 0, "UARL", TGSI_OPCODE_UARL },
+   { 1, 3, 0, 0, 0, 0, "UCMP", TGSI_OPCODE_UCMP },
 };
 
 const struct tgsi_opcode_info *
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index d3a3632..0a26e39 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -363,7 +363,10 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_SAMPLE_POS  155
 #define TGSI_OPCODE_SAMPLE_INFO 156
 
-#define TGSI_OPCODE_LAST157
+#define TGSI_OPCODE_UARL157
+#define TGSI_OPCODE_UCMP158
+
+#define TGSI_OPCODE_LAST159
 
 #define TGSI_SAT_NONE0  /* do not saturate */
 #define TGSI_SAT_ZERO_ONE1  /* clamp to [0,1] */
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] glsl_to_tgsi: use UARL instead of I2F and ARL

2011-09-02 Thread Bryan Cain
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   18 +++---
 1 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e2857ed..05d4d33 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -512,7 +512,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 
inst->function = NULL;

-   if (op == TGSI_OPCODE_ARL)
+   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
   this->num_address_regs = 1;

/* Update indirect addressing status used by TGSI */
@@ -724,16 +724,12 @@ void
 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
st_dst_reg dst, st_src_reg src0)
 {
-   st_src_reg tmp = get_temp(glsl_type::float_type);
+   int op = TGSI_OPCODE_ARL;
 
-   if (src0.type == GLSL_TYPE_INT)
-  emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
-   else if (src0.type == GLSL_TYPE_UINT)
-  emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
-   else
-  tmp = src0;
-   
-   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
+   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
+  op = TGSI_OPCODE_UARL;
+
+   emit(NULL, op, dst, src0);
 }
 
 /**
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl_to_tgsi, mesa: fixes for native integers and integer booleans

2011-09-02 Thread Bryan Cain
Are there any objections to pushing this?

Bryan

On 08/31/2011 01:33 AM, Bryan Cain wrote:
> With this patch, there are no piglit regressions on softpipe with native
> integers enabled.  Unlike my previous patch, this uses integer values of
> ~0 and 0 for true and false, respectively, instead of the float values 1.0
> and 0.0.
> ---
>  src/mesa/main/uniforms.c   |6 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  160 
> 
>  2 files changed, 116 insertions(+), 50 deletions(-)
>
> diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
> index cda840f..fa96fd3 100644
> --- a/src/mesa/main/uniforms.c
> +++ b/src/mesa/main/uniforms.c
> @@ -777,12 +777,12 @@ set_program_uniform(struct gl_context *ctx, struct 
> gl_program *program,
>   if (isUniformBool) {
>  for (i = 0; i < elems; i++) {
> if (basicType == GL_FLOAT)
> -  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
> +  uniformVal[i].u = uniformVal[i].f != 0.0f ? ~0 : 0;
> else
> -  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
> +  uniformVal[i].u = uniformVal[i].u ? ~0 : 0;
> 
> if (!ctx->Const.NativeIntegers)
> -  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
> +  uniformVal[i].f = uniformVal[i].u ? 1.0f : 0.0f;
>  }
>   }
>}
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 2266083..c8f790a 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -385,6 +385,8 @@ public:
> void emit_scalar(ir_instruction *ir, unsigned op,
>   st_dst_reg dst, st_src_reg src0, st_src_reg src1);
>  
> +   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
> +
> void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
>  
> void emit_scs(ir_instruction *ir, unsigned op,
> @@ -562,7 +564,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned 
> op,
> }
>  
> this->instructions.push_tail(inst);
> -   
> +
> +   if (native_integers)
> +  try_emit_float_set(ir, op, dst);
> +
> return inst;
>  }
>  
> @@ -588,6 +593,25 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned 
> op)
> return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
>  }
>  
> + /**
> + * Emits the code to convert the result of float SET instructions to 
> integers.
> + */
> +void
> +glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
> +  st_dst_reg dst)
> +{
> +   if ((op == TGSI_OPCODE_SEQ ||
> +op == TGSI_OPCODE_SNE ||
> +op == TGSI_OPCODE_SGE ||
> +op == TGSI_OPCODE_SLT))
> +   {
> +  st_src_reg src = st_src_reg(dst);
> +  src.negate = ~src.negate;
> +  dst.type = GLSL_TYPE_FLOAT;
> +  emit(ir, TGSI_OPCODE_F2I, dst, src);
> +   }
> +}
> +
>  /**
>   * Determines whether to use an integer, unsigned integer, or float opcode 
>   * based on the operands and input opcode, then emits the result.
> @@ -604,7 +628,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
> unsigned op,
> if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
>type = GLSL_TYPE_FLOAT;
> else if (native_integers)
> -  type = src0.type;
> +  type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
>  
>  #define case4(c, f, i, u) \
> case TGSI_OPCODE_##c: \
> @@ -630,12 +654,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
> unsigned op,
>case3(SGE, ISGE, USGE);
>case3(SLT, ISLT, USLT);
>
> -  case2iu(SHL, SHL);
>case2iu(ISHR, USHR);
> -  case2iu(NOT, NOT);
> -  case2iu(AND, AND);
> -  case2iu(OR, OR);
> -  case2iu(XOR, XOR);
>
>default: break;
> }
> @@ -1389,7 +1408,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
> switch (ir->operation) {
> case ir_unop_logic_not:
>if (result_dst.type != GLSL_TYPE_FLOAT)
> - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], 
> st_src_reg_for_type(result_dst.type, 0));
> + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
>else {
>   /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
>* older GPUs implement SEQ using multiple instructions (i915 uses 
> two
> @@ -1489,10 +1508,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>   

Re: [Mesa-dev] [PATCH v2] glsl_to_tgsi, mesa: fixes for native integers and integer booleans

2011-09-02 Thread Bryan Cain
On 09/02/2011 06:13 PM, Eric Anholt wrote:
> On Wed, 31 Aug 2011 01:33:59 -0500, Bryan Cain  wrote:
>> With this patch, there are no piglit regressions on softpipe with native
>> integers enabled.  Unlike my previous patch, this uses integer values of
>> ~0 and 0 for true and false, respectively, instead of the float values 1.0
>> and 0.0.
> This will break b2* conversions on our driver, since we expect true to
> be 1, not ~0.  The minimal change would require emit(AND(temp, uniform,
> 1)) when deferencing boolean components of uniform variables.

Can you please change your driver if the hardware is not able to support
it, then?  If that's not possible, we need to come up with a way to
upload booleans as ~0 for Gallium drivers and 1 for i965.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl_to_tgsi, mesa: fixes for native integers and integer booleans

2011-09-03 Thread Bryan Cain
On 09/02/2011 06:13 PM, Eric Anholt wrote:
> On Wed, 31 Aug 2011 01:33:59 -0500, Bryan Cain  wrote:
>> With this patch, there are no piglit regressions on softpipe with native
>> integers enabled.  Unlike my previous patch, this uses integer values of
>> ~0 and 0 for true and false, respectively, instead of the float values 1.0
>> and 0.0.
> This will break b2* conversions on our driver, since we expect true to
> be 1, not ~0.  The minimal change would require emit(AND(temp, uniform,
> 1)) when deferencing boolean components of uniform variables.

Your driver already emits an AND instruction on every compare, according
to this code from brw_fs_visitor.cpp:

   case ir_binop_less:
   case ir_binop_greater:
   case ir_binop_lequal:
   case ir_binop_gequal:
   case ir_binop_equal:
   case ir_binop_all_equal:
   case ir_binop_nequal:
   case ir_binop_any_nequal:
  temp = this->result;
  /* original gen4 does implicit conversion before comparison. */
  if (intel->gen < 5)
 temp.type = op[0].type;

  inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
  inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
  emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1));
  break;

If the hardware sets ~0 on compares anyway, why not use that in your driver?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: add a UniformBooleanTrue option

2011-09-05 Thread Bryan Cain
Drivers supporting native integers set UniformBooleanTrue to the integer value
that should be used for true when uploading uniform booleans.  This is ~0 for
Gallium and 1 for i965.
---
 src/mesa/drivers/dri/i965/brw_context.c |4 +++-
 src/mesa/main/mtypes.h  |6 ++
 src/mesa/main/uniforms.c|5 -
 src/mesa/state_tracker/st_extensions.c  |1 +
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 6ef0fcb..5ea7385 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -253,8 +253,10 @@ GLboolean brwCreateContext( int api,
/* If we're using the new shader backend, we require integer uniforms
 * stored as actual integers.
 */
-   if (brw->new_vs_backend)
+   if (brw->new_vs_backend) {
   ctx->Const.NativeIntegers = true;
+  ctx->Const.UniformBooleanTrue = 1;
+   }
 
return GL_TRUE;
 }
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 44ebf0a..ad59797 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2722,6 +2722,12 @@ struct gl_constants
 */
GLboolean NativeIntegers;
 
+   /**
+* If the driver supports real 32-bit integers, what integer value should be
+* used for boolean true in uniform uploads?  (Usually 1 or ~0.)
+*/
+   GLuint UniformBooleanTrue;
+
/** Which texture units support GL_ATI_envmap_bumpmap as targets */
GLbitfield SupportedBumpUnits;
 
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index fe1ce6d..b0f9c33 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -802,7 +802,10 @@ set_program_uniform(struct gl_context *ctx, struct 
gl_program *program,
else
   uniformVal[i].b = uniformVal[i].u ? 1 : 0;

-   if (!ctx->Const.NativeIntegers)
+   if (ctx->Const.NativeIntegers)
+  uniformVal[i].u =
+uniformVal[i].b ? ctx->Const.UniformBooleanTrue : 0;
+   else
   uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
 }
  }
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index aa7f3b5..76e84eb 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -207,6 +207,7 @@ void st_init_limits(struct st_context *st)
   c->MaxProgramTexelOffset = screen->get_param(screen, 
PIPE_CAP_MAX_TEXEL_OFFSET);
 
   c->GLSLVersion = 120;
+  c->UniformBooleanTrue = ~0;
}
 }
 
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl_to_tgsi: use UARL instead of I2F and ARL

2011-09-10 Thread Bryan Cain
Can one of the Gallium interface maintainers please review this patch so
I can push it?

Bryan

On 09/02/2011 11:09 AM, Bryan Cain wrote:
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   18 +++---
>  1 files changed, 7 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index e2857ed..05d4d33 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -512,7 +512,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned 
> op,
>  
> inst->function = NULL;
> 
> -   if (op == TGSI_OPCODE_ARL)
> +   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
>this->num_address_regs = 1;
> 
> /* Update indirect addressing status used by TGSI */
> @@ -724,16 +724,12 @@ void
>  glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
>   st_dst_reg dst, st_src_reg src0)
>  {
> -   st_src_reg tmp = get_temp(glsl_type::float_type);
> +   int op = TGSI_OPCODE_ARL;
>  
> -   if (src0.type == GLSL_TYPE_INT)
> -  emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
> -   else if (src0.type == GLSL_TYPE_UINT)
> -  emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
> -   else
> -  tmp = src0;
> -   
> -   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
> +   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
> +  op = TGSI_OPCODE_UARL;
> +
> +   emit(NULL, op, dst, src0);
>  }
>  
>  /**
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallium: add TGSI opcodes UARL and UCMP

2011-09-10 Thread Bryan Cain
Can one of the Gallium interface maintainers please review this patch so
I can push it?

Bryan

On 09/02/2011 11:09 AM, Bryan Cain wrote:
> They are needed by glsl_to_tgsi for an efficient implementation using native
> integers.
> ---
>  src/gallium/auxiliary/tgsi/tgsi_exec.c |   30 
> 
>  src/gallium/auxiliary/tgsi/tgsi_info.c |3 ++
>  src/gallium/include/pipe/p_shader_tokens.h |5 +++-
>  3 files changed, 37 insertions(+), 1 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 38dc1ef..896d871 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -3312,6 +3312,28 @@ micro_usne(union tgsi_exec_channel *dst,
>  }
>  
>  static void
> +micro_uarl(union tgsi_exec_channel *dst,
> +   const union tgsi_exec_channel *src)
> +{
> +   dst->i[0] = src->u[0];
> +   dst->i[1] = src->u[1];
> +   dst->i[2] = src->u[2];
> +   dst->i[3] = src->u[3];
> +}
> +
> +static void
> +micro_ucmp(union tgsi_exec_channel *dst,
> +   const union tgsi_exec_channel *src0,
> +   const union tgsi_exec_channel *src1,
> +   const union tgsi_exec_channel *src2)
> +{
> +   dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
> +   dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
> +   dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
> +   dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
> +}
> +
> +static void
>  exec_instruction(
> struct tgsi_exec_machine *mach,
> const struct tgsi_full_instruction *inst,
> @@ -4071,6 +4093,14 @@ exec_instruction(
>assert(0);
>break;
>  
> +   case TGSI_OPCODE_UARL:
> +  exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, 
> TGSI_EXEC_DATA_UINT);
> +  break;
> +
> +   case TGSI_OPCODE_UCMP:
> +  exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_FLOAT, 
> TGSI_EXEC_DATA_UINT);
> +  break;
> +
> default:
>assert( 0 );
> }
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
> b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 14ed56a..6cd580a 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -189,6 +189,9 @@ static const struct tgsi_opcode_info 
> opcode_info[TGSI_OPCODE_LAST] =
> { 1, 2, 0, 0, 0, 0, "RESINFO", TGSI_OPCODE_RESINFO },
> { 1, 2, 0, 0, 0, 0, "SAMPLE_POS",  TGSI_OPCODE_SAMPLE_POS },
> { 1, 2, 0, 0, 0, 0, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
> +   
> +   { 1, 1, 0, 0, 0, 0, "UARL", TGSI_OPCODE_UARL },
> +   { 1, 3, 0, 0, 0, 0, "UCMP", TGSI_OPCODE_UCMP },
>  };
>  
>  const struct tgsi_opcode_info *
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
> b/src/gallium/include/pipe/p_shader_tokens.h
> index d3a3632..0a26e39 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -363,7 +363,10 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_SAMPLE_POS  155
>  #define TGSI_OPCODE_SAMPLE_INFO 156
>  
> -#define TGSI_OPCODE_LAST157
> +#define TGSI_OPCODE_UARL157
> +#define TGSI_OPCODE_UCMP158
> +
> +#define TGSI_OPCODE_LAST159
>  
>  #define TGSI_SAT_NONE0  /* do not saturate */
>  #define TGSI_SAT_ZERO_ONE1  /* clamp to [0,1] */

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl_to_tgsi: use UARL instead of I2F and ARL

2011-09-10 Thread Bryan Cain
Disregard this, I meant to send this for patch 1/2 and not 2/2.  This
patch doesn't contain any Gallium interface changes.

Bryan

On 09/10/2011 11:43 AM, Bryan Cain wrote:
> Can one of the Gallium interface maintainers please review this patch so
> I can push it?
>
> Bryan
>
> On 09/02/2011 11:09 AM, Bryan Cain wrote:
>> ---
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   18 +++---
>>  1 files changed, 7 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index e2857ed..05d4d33 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -512,7 +512,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned 
>> op,
>>  
>> inst->function = NULL;
>> 
>> -   if (op == TGSI_OPCODE_ARL)
>> +   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
>>this->num_address_regs = 1;
>> 
>> /* Update indirect addressing status used by TGSI */
>> @@ -724,16 +724,12 @@ void
>>  glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
>>  st_dst_reg dst, st_src_reg src0)
>>  {
>> -   st_src_reg tmp = get_temp(glsl_type::float_type);
>> +   int op = TGSI_OPCODE_ARL;
>>  
>> -   if (src0.type == GLSL_TYPE_INT)
>> -  emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
>> -   else if (src0.type == GLSL_TYPE_UINT)
>> -  emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
>> -   else
>> -  tmp = src0;
>> -   
>> -   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
>> +   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
>> +  op = TGSI_OPCODE_UARL;
>> +
>> +   emit(NULL, op, dst, src0);
>>  }
>>  
>>  /**
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallium: add TGSI opcodes UARL and UCMP

2011-09-10 Thread Bryan Cain
On 09/10/2011 12:05 PM, Brian Paul wrote:
> On 09/10/2011 10:47 AM, Bryan Cain wrote:
>> Can one of the Gallium interface maintainers please review this patch so
>> I can push it?
>
>
> We need documentation for these new instructions in
> src/gallium/docs/source/tgsi.rst
>
> More comments below...
>
>> Bryan
>>
>> On 09/02/2011 11:09 AM, Bryan Cain wrote:
>>> They are needed by glsl_to_tgsi for an efficient implementation
>>> using native
>>> integers.
>>> ---
>>>   src/gallium/auxiliary/tgsi/tgsi_exec.c |   30
>>> 
>>>   src/gallium/auxiliary/tgsi/tgsi_info.c |3 ++
>>>   src/gallium/include/pipe/p_shader_tokens.h |5 +++-
>>>   3 files changed, 37 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>>> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>>> index 38dc1ef..896d871 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>>> @@ -3312,6 +3312,28 @@ micro_usne(union tgsi_exec_channel *dst,
>>>   }
>>>
>>>   static void
>>> +micro_uarl(union tgsi_exec_channel *dst,
>>> +   const union tgsi_exec_channel *src)
>>> +{
>>> +   dst->i[0] = src->u[0];
>>> +   dst->i[1] = src->u[1];
>>> +   dst->i[2] = src->u[2];
>>> +   dst->i[3] = src->u[3];
>>> +}
>>> +
>>> +static void
>>> +micro_ucmp(union tgsi_exec_channel *dst,
>>> +   const union tgsi_exec_channel *src0,
>>> +   const union tgsi_exec_channel *src1,
>>> +   const union tgsi_exec_channel *src2)
>>> +{
>>> +   dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
>>> +   dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
>>> +   dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
>>> +   dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
>>> +}
>
> Just curious: does UCMP directly correspond to an existing HW GPU
> instruction?  It seems a little unusual to have an instruction that
> takes a mix of float and uint arguments like that.
>

Yes, there is a UCMP instruction on nvc0 and r600.  The float arguments
is just the src and dst registers of the MOV part of the operation.  It
could be changed to uint without making a difference.  The idea behind
keeping the MOV arguments as floats was to stay consistent with CMP, but
I can change it if it's not important.

>
>>> +
>>> +static void
>>>   exec_instruction(
>>>  struct tgsi_exec_machine *mach,
>>>  const struct tgsi_full_instruction *inst,
>>> @@ -4071,6 +4093,14 @@ exec_instruction(
>>> assert(0);
>>> break;
>>>
>>> +   case TGSI_OPCODE_UARL:
>>> +  exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT,
>>> TGSI_EXEC_DATA_UINT);
>>> +  break;
>>> +
>>> +   case TGSI_OPCODE_UCMP:
>>> +  exec_vector_trinary(mach, inst, micro_ucmp,
>>> TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
>
> The parameter src_datatype=TGSI_EXEC_DATA_UINT indicates that all the
> source regs are uint, but that's not what micro_ucmp() takes. Granted,
> since the float/uint values are a union and we do no arithmetic, UCMP
> would seem to work with any combination of float/uint arguments.  If
> that's the intention, please document that.

I'll just change the arguments to all be uint.

>
>>> +  break;
>>> +
>>>  default:
>>> assert( 0 );
>>>  }
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> b/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> index 14ed56a..6cd580a 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> @@ -189,6 +189,9 @@ static const struct tgsi_opcode_info
>>> opcode_info[TGSI_OPCODE_LAST] =
>>>  { 1, 2, 0, 0, 0, 0, "RESINFO", TGSI_OPCODE_RESINFO },
>>>  { 1, 2, 0, 0, 0, 0, "SAMPLE_POS",  TGSI_OPCODE_SAMPLE_POS },
>>>  { 1, 2, 0, 0, 0, 0, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
>>> +
>>> +   { 1, 1, 0, 0, 0, 0, "UARL", TGSI_OPCODE_UARL },
>>> +   { 1, 3, 0, 0, 0, 0, "UCMP", TGSI_OPCODE_UCMP },
>>>   };
>>>
>>>   const struct tgsi_opcode_info *
>>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h
>>

Re: [Mesa-dev] [PATCH 2/2] glsl_to_tgsi: use UARL instead of I2F and ARL

2011-09-10 Thread Bryan Cain
It will only be emitted when the driver supports integer operations. 
The I2F+ARL combination is currently what is emitted when integer
support is enabled (float targets only need ARL) but I can make that
more clear in the commit message.

On 09/10/2011 12:07 PM, Brian Paul wrote:
> I guess my question is: do the drivers need to be updated for
> TGSI_OPCODE_UARL?  Or will this only be emitted when the driver
> supports integer operations?  If that's the case, please say so in the
> commit message or code.
>
> Otherwise: Reviewed-by: Brian Paul 
>
>
> On 09/10/2011 10:48 AM, Bryan Cain wrote:
>> Disregard this, I meant to send this for patch 1/2 and not 2/2.  This
>> patch doesn't contain any Gallium interface changes.
>>
>> Bryan
>>
>> On 09/10/2011 11:43 AM, Bryan Cain wrote:
>>> Can one of the Gallium interface maintainers please review this
>>> patch so
>>> I can push it?
>>>
>>> Bryan
>>>
>>> On 09/02/2011 11:09 AM, Bryan Cain wrote:
>>>> ---
>>>>   src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   18 +++---
>>>>   1 files changed, 7 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> index e2857ed..05d4d33 100644
>>>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> @@ -512,7 +512,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir,
>>>> unsigned op,
>>>>
>>>>  inst->function = NULL;
>>>>
>>>> -   if (op == TGSI_OPCODE_ARL)
>>>> +   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
>>>> this->num_address_regs = 1;
>>>>
>>>>  /* Update indirect addressing status used by TGSI */
>>>> @@ -724,16 +724,12 @@ void
>>>>   glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
>>>>   st_dst_reg dst, st_src_reg src0)
>>>>   {
>>>> -   st_src_reg tmp = get_temp(glsl_type::float_type);
>>>> +   int op = TGSI_OPCODE_ARL;
>>>>
>>>> -   if (src0.type == GLSL_TYPE_INT)
>>>> -  emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
>>>> -   else if (src0.type == GLSL_TYPE_UINT)
>>>> -  emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
>>>> -   else
>>>> -  tmp = src0;
>>>> -
>>>> -   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
>>>> +   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
>>>> +  op = TGSI_OPCODE_UARL;
>>>> +
>>>> +   emit(NULL, op, dst, src0);
>>>>   }
>>>>
>>>>   /**
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/2] gallium: add TGSI opcodes UARL and UCMP

2011-09-10 Thread Bryan Cain
They are needed by glsl_to_tgsi for an efficient implementation using native
integers.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c |   30 
 src/gallium/auxiliary/tgsi/tgsi_info.c |3 ++
 src/gallium/docs/source/tgsi.rst   |   19 +
 src/gallium/include/pipe/p_shader_tokens.h |5 +++-
 4 files changed, 56 insertions(+), 1 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index d9de41b..ce6399c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3367,6 +3367,28 @@ micro_usne(union tgsi_exec_channel *dst,
 }
 
 static void
+micro_uarl(union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src)
+{
+   dst->i[0] = src->u[0];
+   dst->i[1] = src->u[1];
+   dst->i[2] = src->u[2];
+   dst->i[3] = src->u[3];
+}
+
+static void
+micro_ucmp(union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2)
+{
+   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
+   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
+   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
+   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
+}
+
+static void
 exec_instruction(
struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
@@ -4126,6 +4148,14 @@ exec_instruction(
   assert(0);
   break;
 
+   case TGSI_OPCODE_UARL:
+  exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_UINT);
+  break;
+
+   case TGSI_OPCODE_UCMP:
+  exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_UINT);
+  break;
+
default:
   assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 14ed56a..6cd580a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -189,6 +189,9 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 0, 0, 0, 0, "RESINFO", TGSI_OPCODE_RESINFO },
{ 1, 2, 0, 0, 0, 0, "SAMPLE_POS",  TGSI_OPCODE_SAMPLE_POS },
{ 1, 2, 0, 0, 0, 0, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
+   
+   { 1, 1, 0, 0, 0, 0, "UARL", TGSI_OPCODE_UARL },
+   { 1, 3, 0, 0, 0, 0, "UCMP", TGSI_OPCODE_UCMP },
 };
 
 const struct tgsi_opcode_info *
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 5cf0875..d7f50b1 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1013,6 +1013,25 @@ XXX so let's discuss it, yeah?
   dst.w = src0.w \oplus src1.w
 
 
+.. opcode:: UCMP - Integer Conditional Move
+
+.. math::
+
+  dst.x = src0.x ? src1.x : src2.x
+
+  dst.y = src0.y ? src1.y : src2.y
+
+  dst.z = src0.z ? src1.z : src2.z
+
+  dst.w = src0.w ? src1.w : src2.w
+
+
+.. opcode:: UARL - Integer Address Register Load
+
+  Moves the contents of the source register, assumed to be an integer, into the
+  destination register, which is assumed to be an address (ADDR) register.
+
+
 .. opcode:: SAD - Sum Of Absolute Differences
 
 .. math::
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index b9e3dcf..7236c92 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -363,7 +363,10 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_SAMPLE_POS  155
 #define TGSI_OPCODE_SAMPLE_INFO 156
 
-#define TGSI_OPCODE_LAST157
+#define TGSI_OPCODE_UARL157
+#define TGSI_OPCODE_UCMP158
+
+#define TGSI_OPCODE_LAST159
 
 #define TGSI_SAT_NONE0  /* do not saturate */
 #define TGSI_SAT_ZERO_ONE1  /* clamp to [0,1] */
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] glsl_to_tgsi: use UARL instead of I2F and ARL

2011-09-10 Thread Bryan Cain
Since TGSI now has a UARL opcode that takes an integer as the source, it is
no longer necessary to hack around the lack of an integer ARL opcode using I2F.
UARL is only emitted when native integers are enabled; ARL is still used
otherwise.

Reviewed-by: Brian Paul 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   18 +++---
 1 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e2857ed..05d4d33 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -512,7 +512,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 
inst->function = NULL;

-   if (op == TGSI_OPCODE_ARL)
+   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
   this->num_address_regs = 1;

/* Update indirect addressing status used by TGSI */
@@ -724,16 +724,12 @@ void
 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
st_dst_reg dst, st_src_reg src0)
 {
-   st_src_reg tmp = get_temp(glsl_type::float_type);
+   int op = TGSI_OPCODE_ARL;
 
-   if (src0.type == GLSL_TYPE_INT)
-  emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
-   else if (src0.type == GLSL_TYPE_UINT)
-  emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
-   else
-  tmp = src0;
-   
-   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
+   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
+  op = TGSI_OPCODE_UARL;
+
+   emit(NULL, op, dst, src0);
 }
 
 /**
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] glsl_to_tgsi: use UARL instead of I2F and ARL

2011-09-14 Thread Bryan Cain
I don't see any reason why this patch would make a difference, but since
it apparently does, I'll take a look at it and fix it.  What program is
that?

Bryan

On 09/14/2011 07:01 AM, Marek Olšák wrote:
> Bryan,
>
> This commit causes hardlocks on r600g (integers disabled).
>
> Maybe you can see better than me what's wrong.
>
> This is a side-by-side diff of the failing TGSI shader. The right-hand
> one is from Mesa master. The left-hand one is with the commit
> reverted.
> http://pastebin.com/raw.php?i=QXB3SZAE
>
> Thanks,
> Marek
>
> On Sat, Sep 10, 2011 at 7:36 PM, Bryan Cain  wrote:
>> Since TGSI now has a UARL opcode that takes an integer as the source, it is
>> no longer necessary to hack around the lack of an integer ARL opcode using 
>> I2F.
>> UARL is only emitted when native integers are enabled; ARL is still used
>> otherwise.
>>
>> Reviewed-by: Brian Paul 
>> ---
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   18 +++---
>>  1 files changed, 7 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index e2857ed..05d4d33 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -512,7 +512,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned 
>> op,
>>
>>inst->function = NULL;
>>
>> -   if (op == TGSI_OPCODE_ARL)
>> +   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
>>   this->num_address_regs = 1;
>>
>>/* Update indirect addressing status used by TGSI */
>> @@ -724,16 +724,12 @@ void
>>  glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
>>st_dst_reg dst, st_src_reg src0)
>>  {
>> -   st_src_reg tmp = get_temp(glsl_type::float_type);
>> +   int op = TGSI_OPCODE_ARL;
>>
>> -   if (src0.type == GLSL_TYPE_INT)
>> -  emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
>> -   else if (src0.type == GLSL_TYPE_UINT)
>> -  emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
>> -   else
>> -  tmp = src0;
>> -
>> -   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
>> +   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
>> +  op = TGSI_OPCODE_UARL;
>> +
>> +   emit(NULL, op, dst, src0);
>>  }
>>
>>  /**
>> --
>> 1.7.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl_to_tgsi: implement ir_binop_all_equal and ir_binop_any_nequal for native integers

2011-09-19 Thread Bryan Cain
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  119 
 1 files changed, 85 insertions(+), 34 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 8921698..f68270d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1528,15 +1528,45 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
  st_src_reg temp = get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) 
:
glsl_type::vec4_type);
- assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
- emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
- 
- /* After the dot-product, the value will be an integer on the
-  * range [0,4].  Zero becomes 1.0, and positive values become zero.
-  */
- emit_dp(ir, result_dst, temp, temp, vector_elements);
  
- if (result_dst.type == GLSL_TYPE_FLOAT) {
+ if (native_integers) {
+st_dst_reg temp_dst = st_dst_reg(temp);
+st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
+
+emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
+
+/* Emit 1-3 AND operations to combine the SEQ results. */
+switch (ir->operands[0]->type->vector_elements) {
+case 2:
+   break;
+case 3:
+   temp_dst.writemask = WRITEMASK_Y;
+   temp1.swizzle = SWIZZLE_;
+   temp2.swizzle = SWIZZLE_;
+   emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+   break;
+case 4:
+   temp_dst.writemask = WRITEMASK_X;
+   temp1.swizzle = SWIZZLE_;
+   temp2.swizzle = SWIZZLE_;
+   emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+   temp_dst.writemask = WRITEMASK_Y;
+   temp1.swizzle = SWIZZLE_;
+   temp2.swizzle = SWIZZLE_;
+   emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+}
+
+temp1.swizzle = SWIZZLE_;
+temp2.swizzle = SWIZZLE_;
+emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
+ } else {
+emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+
+/* After the dot-product, the value will be an integer on the
+ * range [0,4].  Zero becomes 1.0, and positive values become zero.
+ */
+emit_dp(ir, result_dst, temp, temp, vector_elements);
+
 /* Negating the result of the dot-product gives values on the range
  * [-4, 0].  Zero becomes 1.0, and negative values become zero.
  * This is achieved using SGE.
@@ -1544,11 +1574,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 st_src_reg sge_src = result_src;
 sge_src.negate = ~sge_src.negate;
 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, 
st_src_reg_for_float(0.0));
- } else {
-/* The TGSI negate flag doesn't work for integers, so use SEQ 0
- * instead.
- */
-emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, 
st_src_reg_for_int(0));
  }
   } else {
  emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
@@ -1561,30 +1586,56 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
  st_src_reg temp = get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) 
:
glsl_type::vec4_type);
- assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
  emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
 
- /* After the dot-product, the value will be an integer on the
-  * range [0,4].  Zero stays zero, and positive values become 1.0.
-  */
- glsl_to_tgsi_instruction *const dp =
-   emit_dp(ir, result_dst, temp, temp, vector_elements);
- if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
- result_dst.type == GLSL_TYPE_FLOAT) {
-/* The clamping to [0,1] can be done for free in the fragment
- * shader with a saturate.
- */
-dp->saturate = true;
- } else if (result_dst.type == GLSL_TYPE_FLOAT) {
-/* Negating the result of the dot-product gives values on the range
- * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
- * achieved using SLT.
- */
-st_src_reg slt_src = result_src;
-slt_src.negate = ~slt_src.negate;
-emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, 
st_src_reg_for_float(0.0));
+ if (native_integers) {
+st_dst_reg temp_dst = st_dst_reg(temp);
+st_src

Re: [Mesa-dev] [PATCH 6/6] glsl_to_tgsi: Use _mesa_generate_parameters_list_for_uniforms

2011-10-07 Thread Bryan Cain
On 10/07/2011 07:06 PM, Ian Romanick wrote:
> From: Ian Romanick 
>
> Signed-off-by: Ian Romanick 
> Cc: Bryan Cain 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  119 
> +---
>  1 files changed, 2 insertions(+), 117 deletions(-)
>

Reviewed-by: Bryan Cain 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: kill instruction if writemask=0 in eliminate_dead_code_advanced()

2011-10-09 Thread Bryan Cain
I don't think there's any reason we can't eliminate a dead instruction
when the writemask is zero.  I do wonder, though, why this patch
actually makes a difference.  There's an "if (!inst->dead_mask ||
!inst->dst.writemask)" three lines before the code visible in the patch
that makes it not kill the instruction if the writemask is zero.  I
don't remember why that's there, but if it weren't there, and the
writemask is zero, the dead_mask should also be zero, so it should be
handled by the "else if" block.

In short, I think that entire if/else if/else statement could use a look.

Bryan

On 10/07/2011 10:40 AM, Brian Paul wrote:
> From: Brian Paul 
>
> This fixes a bug where we'd wind up emitting an invalid instruction like
> MOVE R[0]., R[1];  - note the empty/zero writemask.  If we don't write to
> any dest register channels, cull the instruction.
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |8 +++-
>  1 files changed, 7 insertions(+), 1 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index d8ef8a3..44b1149 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -3776,8 +3776,14 @@ 
> glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>   iter.remove();
>   delete inst;
>   removed++;
> -  } else
> +  } else {
>   inst->dst.writemask &= ~(inst->dead_mask);
> + if (inst->dst.writemask == 0) {
> +iter.remove();
> +delete inst;
> +removed++;
> + }
> +  }
> }
>  
> ralloc_free(write_level);
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: kill instruction if writemask=0 in eliminate_dead_code_advanced()

2011-10-09 Thread Bryan Cain
What does it do if there's no destination register?  In any case, I
don't think glsl_to_tgsi emits any ARLs of that form, so it shouldn't be
a problem.

Bryan

On 10/07/2011 01:06 PM, Marek Olšák wrote:
> I think ARL is allowed to have no destination register, right? In that
> case, there should be a special case not to eliminate ARLs.
>
> Marek
>
> On Fri, Oct 7, 2011 at 5:40 PM, Brian Paul  wrote:
>> From: Brian Paul 
>>
>> This fixes a bug where we'd wind up emitting an invalid instruction like
>> MOVE R[0]., R[1];  - note the empty/zero writemask.  If we don't write to
>> any dest register channels, cull the instruction.
>> ---
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |8 +++-
>>  1 files changed, 7 insertions(+), 1 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index d8ef8a3..44b1149 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -3776,8 +3776,14 @@ 
>> glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>>  iter.remove();
>>  delete inst;
>>  removed++;
>> -  } else
>> +  } else {
>>  inst->dst.writemask &= ~(inst->dead_mask);
>> + if (inst->dst.writemask == 0) {
>> +iter.remove();
>> +delete inst;
>> +removed++;
>> + }
>> +  }
>>}
>>
>>ralloc_free(write_level);
>> --
>> 1.7.3.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Short-circuit lower_if_to_cond_assign when MaxIfDepth is UINT_MAX.

2011-10-18 Thread Bryan Cain
Looks good to me.

Bryan

On 10/18/2011 05:20 PM, Kenneth Graunke wrote:
> Setting MaxIfDepth to UINT_MAX effectively means "don't lower anything."
>
> Explicitly checking for this common case allows us to avoid walking the
> IR, computing nesting levels, and so on.
>
> Signed-off-by: Kenneth Graunke 
> Cc: Bryan Cain 
> Cc: Ian Romanick 
> ---
>  src/glsl/lower_if_to_cond_assign.cpp |3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
>
> diff --git a/src/glsl/lower_if_to_cond_assign.cpp 
> b/src/glsl/lower_if_to_cond_assign.cpp
> index 7b89a15..2c5d561 100644
> --- a/src/glsl/lower_if_to_cond_assign.cpp
> +++ b/src/glsl/lower_if_to_cond_assign.cpp
> @@ -79,6 +79,9 @@ public:
>  bool
>  lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth)
>  {
> +   if (max_depth == UINT_MAX)
> +  return false;
> +
> ir_if_to_cond_assign_visitor v(max_depth);
>  
> visit_list_elements(&v, instructions);
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glsl: Add uniform_locations_assigned parameter to do_dead_code opt pass

2011-10-21 Thread Bryan Cain
If it's worth anything, the glsl_to_tgsi part is

Reviewed-by: Bryan Cain 

On 10/21/2011 01:49 PM, Ian Romanick wrote:
> From: Ian Romanick 
>
> Setting this flag prevents declarations of uniforms from being removed
> from the IR.  Since the IR is directly used by several API functions
> that query uniforms in shaders, uniform declarations cannot be removed
> after the locations have been set.  However, it should still be safe
> to reorder the declarations (this is not tested).
>
> Signed-off-by: Ian Romanick 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=41980
> Cc: Brian Paul 
> Cc: Bryan Cain 
> Cc: Vinson Lee 
> Cc: José Fonseca 
> Cc: Kenneth Graunke 
> ---
>  src/glsl/glsl_parser_extras.cpp|   23 +--
>  src/glsl/ir_optimization.h |6 --
>  src/glsl/linker.cpp|2 +-
>  src/glsl/main.cpp  |2 +-
>  src/glsl/opt_dead_code.cpp |   14 ++
>  src/glsl/test_optpass.cpp  |4 ++--
>  src/mesa/drivers/dri/i965/brw_shader.cpp   |3 ++-
>  src/mesa/main/ff_fragment_shader.cpp   |2 +-
>  src/mesa/program/ir_to_mesa.cpp|6 --
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |4 +++-
>  10 files changed, 49 insertions(+), 17 deletions(-)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] TGSI declarations missing type info

2011-11-13 Thread Bryan Cain
On 11/13/2011 09:06 AM, Dave Airlie wrote:
> Hi guys,
>
> Just been looking at llvmpipe integer support and it seems like we
> lose some information about the type of data stored into temporaries,
>
> after st_glsl_to_cpp we no longer know what type the temporaries are,
> and llvm would really like to know and I can't see any reason that
> TGSI doesn't contain the info. Having untyped temp decls means we'd
> have to allocate some sort of "union" via aliases I guess in llvmpipe
> for all temps so we can store int/float in them.
>
> I've attached a run of glsl-vs-loop from llvmpipe with integer opcodes
> forced on. (llvmpipe-int-test branch of my repo).
>
> Dave.

If you do add types to TGSI registers, it's worth noting that the
internal IR used by glsl_to_tgsi (glsl_to_tgsi_instruction) already the
types of all src and dst registers, and it's only lost when converting
that to TGSI.  However, it was only intended to be good enough to
determine whether to emit an integer or float instruction, so there
might be some mistakes remaining somewhere that would need to be corrected.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage

2011-11-16 Thread Bryan Cain
It's unfortunately been a while since I've done anything with
glsl_to_tgsi.  What are the "various functions that modify the TGSI IR
and try to propagate changes about that up to the gl_program"?  If I can
see where it is in the code, I can probably remember if there's a reason
it was done that way.

Bryan

On 11/16/2011 01:13 PM, Ian Romanick wrote:
> From: Ian Romanick 
>
> If glUniform1i and friends are going to dump data directly in
> driver-allocated, the pointers have to be updated when the storage
> moves.  This should fix the regressions seen with commit 7199096.
>
> I'm not sure if this is the only place that needs this treatment.  I'm
> a little uncertain about the various functions in st_glsl_to_tgsi that
> modify the TGSI IR and try to propagate changes about that up to the
> gl_program.  That seems sketchy to me.
>
> Signed-off-by: Ian Romanick 
> Cc: Vadim Girlin 
> Cc: Bryan Cain 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   14 ++
>  1 files changed, 14 insertions(+), 0 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 0bf6766..c8385bb 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -4571,6 +4571,13 @@ st_translate_program(
> t->pointSizeOutIndex = -1;
> t->prevInstWrotePointSize = GL_FALSE;
>  
> +   for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
> +  struct gl_uniform_storage *const storage =
> +  &program->shader_program->UniformStorage[i];
> +
> +  _mesa_uniform_detach_all_driver_storage(storage);
> +   }
> +
> /*
>  * Declare input attributes.
>  */
> @@ -4797,6 +4804,13 @@ st_translate_program(
> t->insn[t->labels[i].branch_target]);
> }
>  
> +   /* This has to be done last.  Any operation the can cause
> +* prog->ParameterValues to get reallocated (e.g., anything that adds a
> +* program constant) has to happen before creating this linkage.
> +*/
> +   _mesa_associate_uniform_storage(ctx, program->shader_program,
> +proginfo->Parameters);
> +
>  out:
> if (t) {
>FREE(t->insn);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: fix transform feedback of unsubscripted gl_ClipDistance array

2012-06-17 Thread Bryan Cain

On 6/16/2012 5:43 PM, Marcin Slusarz wrote:

gl_ClipDistance needs special treatment in form of lowering pass
which transforms gl_ClipDistance representation from float[] to
vec4[]. There are 2 implementations - at glsl linker level (enabled
by LowerClipDistance option) and at glsl_to_tgsi level (enabled
unconditionally for gallium drivers). Second implementation is
incomplete - it does not take into account transform feedback (see
commit 642e5b413e0890b2070ba78fde42db381eaf02e5 "mesa: Fix transform
feedback of unsubscripted gl_ClipDistance array" for details).

There are 2 possible fixes:
- adding transform feedback support into glsl_to_tgsi version
- ripping gl_ClipDistance support from glsl_to_tgsi and enabling
   gl_ClipDistance lowering on glsl linker side

This patch implements 2nd option. All it does is:
- reverts most of the commit 59be691638200797583bce39a83f641d30d97492
   "st/mesa: add support for gl_ClipDistance"
- changes LowerClipDistance to true

Fixes Piglit tests "EXT_transform_feedback/builtin-varyings
gl_ClipDistance[{2,3,4,5,6,7,8}]-no-subscript" on nv50.
---


I can't say I know much about how transform feedback works, but is there 
a reason that the first fix would be difficult?  It seems like a waste 
to not take advantage of hardware support for clip distances because the 
current implementation isn't complete.


-Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: fix transform feedback of unsubscripted gl_ClipDistance array

2012-06-18 Thread Bryan Cain
On Jun 17, 2012 6:55 PM, "Bryan Cain"  wrote:
>
> On 6/16/2012 5:43 PM, Marcin Slusarz wrote:
>>
>> gl_ClipDistance needs special treatment in form of lowering pass
>> which transforms gl_ClipDistance representation from float[] to
>> vec4[]. There are 2 implementations - at glsl linker level (enabled
>> by LowerClipDistance option) and at glsl_to_tgsi level (enabled
>> unconditionally for gallium drivers). Second implementation is
>> incomplete - it does not take into account transform feedback (see
>> commit 642e5b413e0890b2070ba78fde42db381eaf02e5 "mesa: Fix transform
>> feedback of unsubscripted gl_ClipDistance array" for details).
>>
>> There are 2 possible fixes:
>> - adding transform feedback support into glsl_to_tgsi version
>> - ripping gl_ClipDistance support from glsl_to_tgsi and enabling
>>   gl_ClipDistance lowering on glsl linker side
>>
>> This patch implements 2nd option. All it does is:
>> - reverts most of the commit 59be691638200797583bce39a83f641d30d97492
>>   "st/mesa: add support for gl_ClipDistance"
>> - changes LowerClipDistance to true
>>
>> Fixes Piglit tests "EXT_transform_feedback/builtin-varyings
>> gl_ClipDistance[{2,3,4,5,6,7,8}]-no-subscript" on nv50.
>> ---
>
>
> I can't say I know much about how transform feedback works, but is there
a reason that the first fix would be difficult?  It seems like a waste to
not take advantage of hardware support for clip distances because the
current implementation isn't complete.

Disregard this.  I finally realized what your patch is (and isn't) doing.

-Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): docs: Update GL3.txt.

2012-07-11 Thread Bryan Cain
On 07/11/2012 12:24 AM, Eric Anholt wrote:
> Kenneth Graunke  writes:
>> inverse() has been done for a while.
> Does the inverse() builtin constant expression handling work for
> you?  It doesn't here.
>
>> None of us know what "highp change" means;
> GLSL 1.40 spec: "Make the default precision qualification for fragment
> shader be high." -- it was also on our task list.

Like the commit message said, precision qualifiers are entirely ignored
by the GLSL compiler - they don't even make it to the IR stage.  So
there's no such thing as a "default" here since it doesn't have a value
at all for any variable in the IR.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: set position before i instead of i->next in NV50LoweringPreSSA::visit

2012-07-17 Thread Bryan Cain
Fixes rendering glitches in Psychonauts such as Raz's eyes flickering white.
Fixes https://bugs.freedesktop.org/show_bug.cgi?id=51962.
---
 .../drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp 
b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index 39e0cfa..b688172 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -1034,7 +1034,7 @@ NV50LoweringPreSSA::visit(Instruction *i)
   bld.setPosition(i->prev, true);
else
if (i->next)
-  bld.setPosition(i->next, false);
+  bld.setPosition(i, false);
else
   bld.setPosition(i->bb, true);
 
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Support for EXT/ARB_geometry_shader4

2012-07-27 Thread Bryan Cain
Some of you already know about this from IRC, but recently I've been
working on adding support for GL_EXT_geometry_shader4 and
GL_ARB_geometry_shader4 (which are essentially identical) to Mesa.  A
significant amount of the required code was already there from Zack
Rusin's work on geometry shaders about 2 years ago (before the new GLSL
compiler was merged), so the bulk of my changes are adding support to
the GLSL compiler and glsl_to_tgsi, which is still not a small task.

Anyway, as of yesterday, all of the GLSL demos in mesa/demos that use
EXT/ARB_geometry_shader4 are working fully in my branch with softpipe. 
There are still some things that need doing and/or fixing before it
should be considered for merging into master, but for now, my geometry
shader work (including future updates) can be found in a branch of my
Mesa repository on GitHub at:
https://github.com/Plombo/mesa/tree/geometry-shaders .

It's worth noting that geometry shaders in GL 3.2 (GLSL 1.50) core are
slightly different than the geometry shaders in the EXT and ARB
extensions.  However, the main change in the core version is a change in
the way inputs are accessed which is rather TGSI-unfriendly, so when
GLSL 1.50 is implemented, we will have to lower GS inputs to the
extension way of doing things.  So all of this code is in fact necessary
for GL 3.2 core even though it only implements the extensions.

-Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/16] Assorted gallium shader, sampler, sampler_view changes

2012-08-10 Thread Bryan Cain
On 08/09/2012 10:11 PM, Brian Paul wrote:
>
> The following patches are steps toward some gallium API clean-ups.
>
> 1. Eventually, replace
> pipe_context::bind_fragment/vertex/geometry/compute_sampler_states()
> with a single bind_sampler_states() entrypoint which takes a
> PIPE_SHADER_x to identify the shader stage and a 'start_slot' value
> like bind_compute_sampler_states() has.  The later seemed to be agreed
> upon a few weeks ago.
>
> 2. Similarly for pipe_context::set_{shader}_sampler_views().
>
> 3. Where possible, replace parallel arrays for
> vertex/fragment/geometry objects with a 2D array indexed by shader
> type.  For example:
>
> replace:
>struct pipe_sampler_state *vert_samplers[MAX_SAMPLERS];
>struct pipe_sampler_state *geom_samplers[MAX_SAMPLERS];
>struct pipe_sampler_state *frag_samplers[MAX_SAMPLERS];
> with:
>struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][MAX_SAMPLERS];
>
> 4. Add support for geometry shader stuff in a few places like the
> state tracker.
>
> I've touched about half the drivers so far.  There's a fair bit of
> work to be done before actually changing p_context.h
>
> -Brian

Hi, I just wanted to say thank you for working on this.  Getting
sampling to work in my geometry shaders branch was something I was
really not looking forward to, and once this work lands in master, it
should allow me to finish my geometry shader work at least a couple of
weeks before I could otherwise.

-Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: fix typo

2011-04-23 Thread Bryan Cain
---
 src/glsl/linker.cpp |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 1749235..725a198 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1349,7 +1349,7 @@ assign_attribute_locations(gl_shader_program *prog, 
unsigned max_attribute_index
 
qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare);
 
-   /* VERT_ATTRIB_GENERIC0 is a psdueo-alias for VERT_ATTRIB_POS.  It can only
+   /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS.  It can only
 * be explicitly assigned by via glBindAttribLocation.  Mark it as reserved
 * to prevent it from being automatically allocated below.
 */
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: fix more typos

2011-04-23 Thread Bryan Cain
---
 src/glsl/linker.cpp |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 725a198..255edc6 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -510,7 +510,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program 
*prog,
  */
 if (input->type != output->type) {
/* There is a bit of a special case for gl_TexCoord.  This
-* built-in is unsized by default.  Appliations that variable
+* built-in is unsized by default.  Applications that variable
 * access it must redeclare it with a size.  There is some
 * language in the GLSL spec that implies the fragment shader
 * and vertex shader do not have to agree on this size.  Other
@@ -997,7 +997,7 @@ update_array_sizes(struct gl_shader_program *prog)
/* If this is a built-in uniform (i.e., it's backed by some
 * fixed-function state), adjust the number of state slots to
 * match the new array size.  The number of slots per array entry
-* is not known.  It seems saft to assume that the total number of
+* is not known.  It seems safe to assume that the total number of
 * slots is an integer multiple of the number of array elements.
 * Determine the number of slots per array element by dividing by
 * the old (total) size.
@@ -1163,7 +1163,7 @@ assign_uniform_locations(struct gl_shader_program *prog)
 
 
 /**
- * Find a contiguous set of available bits in a bitmask
+ * Find a contiguous set of available bits in a bitmask.
  *
  * \param used_mask Bits representing used (1) and unused (0) locations
  * \param needed_count  Number of contiguous bits needed.
@@ -1210,7 +1210,7 @@ assign_attribute_locations(gl_shader_program *prog, 
unsigned max_attribute_index
 * 1. Invalidate the location assignments for all vertex shader inputs.
 *
 * 2. Assign locations for inputs that have user-defined (via
-*glBindVertexAttribLocation) locatoins.
+*glBindVertexAttribLocation) locations.
 *
 * 3. Sort the attributes without assigned locations by number of slots
 *required in decreasing order.  Fragmentation caused by attribute
@@ -1610,7 +1610,7 @@ link_shaders(struct gl_context *ctx, struct 
gl_shader_program *prog)
break;
   }
 
-  /* Validate the inputs of each stage with the output of the preceeding
+  /* Validate the inputs of each stage with the output of the preceding
* stage.
*/
   for (unsigned i = prev + 1; i < MESA_SHADER_TYPES; i++) {
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] GLSL IR to TGSI translator

2011-04-25 Thread Bryan Cain
Hi,

In the last week or so, I've been working on a direct translator from
GLSL IR to TGSI that does not go through Mesa IR.  Although it is still
a work in progress, it is now working and very usable.  So before I go
on, here is a link to the branch I've pushed to GitHub:

https://github.com/Plombo/mesa/tree/glsl-130

My main objective with this work is to make GLSL 1.30 support feasible
on Gallium drivers.  From what I understand, it would be difficult or
impossible to implement integer-specific opcodes such as shifting and
bit masking in Mesa IR, since it only supports floats.  TGSI, on the
other hand, doesn't have this problem, and already supports most or all
of the functionality required by GLSL 1.30.

The translator started as a modified version of ir_to_mesa, and that
origin is still obvious from reading the code.  Many parts of ir_to_mesa
are still untouched - glsl_to_tgsi is still a long way away from
eliminating all traces of Mesa IR.  It also contains a significant
amount of code adapted from st_mesa_to_tgsi, but modified to generate
TGSI code from the glsl_to_tgsi_instruction class instead of using Mesa
IR.  (It actually still generates Mesa IR instructions, but that could
be safely removed at some point since the generated Mesa IR instructions
are not actually used for anything.)  I'm planning to push more of the
conversion to TGSI higher up in the stack in the future, although the
remaining remnants of Mesa IR (such as the Mesa IR opcodes used by most
of glsl_to_tgsi) aren't doing any harm.

Since the _mesa_optimize_program function is vital to generating
optimized code with ir_to_mesa, and it is not available when not using
Mesa IR, I've written some new optimization passes for
glsl_to_tgsi_visitor that perform dead code elimination and
consolidation of the temporary register space.  Although they are rather
simple, they do make a huge difference in the quality of the output.  As
an example, here is what it generates for the vertex shader in the
Mandelbrot GLSL demo from the Mesa demos repository:

VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[10]
DCL OUT[2], GENERIC[11]
DCL CONST[0..14]
DCL TEMP[0..4]
IMM FLT32 {2., 0.,-0.5000, 5.}
  0: MUL TEMP[0], CONST[4], IN[0].
  1: MAD TEMP[0], CONST[5], IN[0]., TEMP[0]
  2: MAD TEMP[0], CONST[6], IN[0]., TEMP[0]
  3: MAD TEMP[0], CONST[7], IN[0]., TEMP[0]
  4: MUL TEMP[1].xyz, CONST[12].xyzz, IN[1].
  5: MAD TEMP[1], CONST[13].xyzz, IN[1]., TEMP[1].xyzz
  6: MAD TEMP[1], CONST[14].xyzz, IN[1]., TEMP[1].xyzz
  7: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
  8: RSQ TEMP[2].x, TEMP[2].
  9: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].
 10: ADD TEMP[2].xyz, CONST[3].xyzz, -TEMP[0].xyzz
 11: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 12: RSQ TEMP[3].x, TEMP[3].
 13: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].
 14: MOV TEMP[3].xyz, -TEMP[2].xyzx
 15: MOV TEMP[0].xyz, -TEMP[0].xyzx
 16: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[3].xyzz
 17: MUL TEMP[4].xyz, TEMP[4]., TEMP[1].xyzz
 18: MUL TEMP[4].xyz, IMM[0]., TEMP[4].xyzz
 19: ADD TEMP[3].xyz, TEMP[3].xyzz, -TEMP[4].xyzz
 20: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
 21: RSQ TEMP[4].x, TEMP[4].
 22: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].
 23: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
 24: MAX TEMP[0].x, TEMP[0]., IMM[0].
 25: POW TEMP[0].x, TEMP[0]., CONST[0].
 26: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[1].xyzz
 27: MAX TEMP[1].x, TEMP[1]., IMM[0].
 28: MUL TEMP[1].x, CONST[1]., TEMP[1].
 29: MAD TEMP[0], CONST[2]., TEMP[0]., TEMP[1].
 30: MOV OUT[2], TEMP[0].
 31: ADD TEMP[0], IN[2], IMM[0].
 32: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].
 33: MOV OUT[1].xyz, TEMP[0].xyzx
 34: MUL TEMP[0], CONST[8], IN[0].
 35: MAD TEMP[0], CONST[9], IN[0]., TEMP[0]
 36: MAD TEMP[0], CONST[10], IN[0]., TEMP[0]
 37: MAD TEMP[0], CONST[11], IN[0]., TEMP[0]
 38: MOV OUT[0], TEMP[0]
 39: END

Here is the same shader as generated by ir_to_mesa and st_mesa_to_tgsi
in Mesa master:

VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[10]
DCL OUT[2], GENERIC[11]
DCL CONST[0..14]
DCL TEMP[0..4]
IMM FLT32 {2., 0.,-0.5000, 5.}
  0: MUL TEMP[0], CONST[4], IN[0].
  1: MAD TEMP[0], CONST[5], IN[0]., TEMP[0]
  2: MAD TEMP[0], CONST[6], IN[0]., TEMP[0]
  3: MAD TEMP[0], CONST[7], IN[0]., TEMP[0]
  4: MUL TEMP[1].xyz, CONST[12].xyzz, IN[1].
  5: MAD TEMP[1].xyz, CONST[13].xyzz, IN[1]., TEMP[1].xyzz
  6: MAD TEMP[1].xyz, CONST[14].xyzz, IN[1]., TEMP[1].xyzz
  7: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
  8: RSQ TEMP[2].x, TEMP[2].
  9: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].
 10: ADD TEMP[2].xyz, CONST[3].xyzz, -TEMP[0].xyzz
 11: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 12: RSQ TEMP[3].x, TEMP[3].
 13: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].
 14: MOV TEMP[3].xyz, -TEMP[2].xyzx
 15

Re: [Mesa-dev] Status of VDPAU and XvMC state-trackers (was Re: Build error on current xvmc-r600 pip

2011-04-27 Thread Bryan Cain
[Sending this message to the list since I accidentally sent it to the list
earlier from a non-subscribed address.]

2011/4/26 Christian König 

> Am Dienstag, den 26.04.2011, 17:53 + schrieb youne...@gmail.com:
> > Hi Christian,
> >
> > Thanks for spending so much time on continuing this. I haven't really
> > touched it since you started, but someone else had some patches for
> > basic NV50 support. I don't recall who but I hope they can comment and
> > are interested in getting their changes merged. Also, your
> > implementation of interlaced MC breaks older chips that lack shader
> > control flow if I'm not mistaken, but that can probably be fixed
> > without much trouble.. Finally, someone else (Jimmy Rentz) had some
> > patches that implemented hardware decoding on NV40 chips, but they
> > were never merged into nouveau DRM and the pipe-video patches won't
> > apply anymore anyhow. Those changes required a bit of work in
> > pipe-video to support planar surfaces, but it worked quite well with
> > the old vl_compositor. Recently Ben Skeggs added HW decoder bits to
> > nouveau's DRM so if anyone is motivated enough to rework the userspace
> > side it will require proper planar surface support in pipe-video.
> > (This is just an FYI for anyone who is paying attention to
> > pipe-video.)
> >
> > Cheers.
> If I remember correctly Bryan Cain was working on getting this to work
> again on NV50, he had MC working, but was struggling with the iDCT code.
>
> I also stumbled over the issue of planar texture resources, and solved
> it by implementing an abstraction class that uses up to three separate
> textures to emulate the behaviour of an YV12 or NV12 texture. If a
> hardware driver has native support for planar buffers it should be easy
> to override the creation function and use a native buffer instead.
>
> So things like: native idct -> shader base mc or shader based idct ->
> native mc should now be easily possible. But there is still allot of
> work todo.
>
> Regards,
> Christian.
>
>
Yes, I got MC working on nv50 but couldn't get the iDCT to work properly.  I
still need to send in the patches for what I did accomplish.  Should I send
them to the mesa-dev mailing list and just label them as being for the
pipe-video branch?

Regards,
Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Status of VDPAU and XvMC state-trackers (was Re: Build error on current xvmc-r600 pip

2011-04-28 Thread Bryan Cain
2011/4/26 Christian König 

> Am Dienstag, den 26.04.2011, 17:53 + schrieb youne...@gmail.com:
> > Hi Christian,
> >
> > Thanks for spending so much time on continuing this. I haven't really
> > touched it since you started, but someone else had some patches for
> > basic NV50 support. I don't recall who but I hope they can comment and
> > are interested in getting their changes merged. Also, your
> > implementation of interlaced MC breaks older chips that lack shader
> > control flow if I'm not mistaken, but that can probably be fixed
> > without much trouble.. Finally, someone else (Jimmy Rentz) had some
> > patches that implemented hardware decoding on NV40 chips, but they
> > were never merged into nouveau DRM and the pipe-video patches won't
> > apply anymore anyhow. Those changes required a bit of work in
> > pipe-video to support planar surfaces, but it worked quite well with
> > the old vl_compositor. Recently Ben Skeggs added HW decoder bits to
> > nouveau's DRM so if anyone is motivated enough to rework the userspace
> > side it will require proper planar surface support in pipe-video.
> > (This is just an FYI for anyone who is paying attention to
> > pipe-video.)
> >
> > Cheers.
> If I remember correctly Bryan Cain was working on getting this to work
> again on NV50, he had MC working, but was struggling with the iDCT code.
>
> I also stumbled over the issue of planar texture resources, and solved
> it by implementing an abstraction class that uses up to three separate
> textures to emulate the behaviour of an YV12 or NV12 texture. If a
> hardware driver has native support for planar buffers it should be easy
> to override the creation function and use a native buffer instead.
>
> So things like: native idct -> shader base mc or shader based idct ->
> native mc should now be easily possible. But there is still allot of
> work todo.
>
> Regards,
> Christian.
>
>
Yes, I got MC working on nv50 but couldn't get the iDCT to work properly.  I
still need to send in the patches for what I did accomplish.  Should I send
them to the mesa-dev mailing list and just label them as being for the
pipe-video branch?

Regards,
Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GLSL IR to TGSI translator

2011-04-28 Thread Bryan Cain

On 4/27/2011 10:23 PM, Brian Paul wrote:

On Tue, Apr 26, 2011 at 12:26 AM, Bryan Cain  wrote:

Hi,

In the last week or so, I've been working on a direct translator from
GLSL IR to TGSI that does not go through Mesa IR.  Although it is still
a work in progress, it is now working and very usable.  So before I go
on, here is a link to the branch I've pushed to GitHub:

https://github.com/Plombo/mesa/tree/glsl-130

My main objective with this work is to make GLSL 1.30 support feasible
on Gallium drivers.  From what I understand, it would be difficult or
impossible to implement integer-specific opcodes such as shifting and
bit masking in Mesa IR, since it only supports floats.  TGSI, on the
other hand, doesn't have this problem, and already supports most or all
of the functionality required by GLSL 1.30.

Unfortunately, TGSI doesn't have everything we need yet.  There's
opcodes for binary AND, OR, XOR, etc. and a few integer operations,
but it's incomplete.  It shouldn't be a big deal to add what's missing
but it'll take a little time.

I think everyone agrees that we want to eventually ditch Mesa's IR.  I
_think_ that the only classic Mesa driver that uses Mesa IR and hasn't
been deprecated by a Gallium driver, or already weaned from Mesa IR is
swrast.  How much does the i965 driver still rely on swrast for
fallbacks?  Do the Intel people see need for a GLSL IR executor for
swrast?


I must not have noticed the integer functionality missing from TGSI.  I 
assume they're just the arithmetic opcodes?



The translator started as a modified version of ir_to_mesa, and that
origin is still obvious from reading the code.  Many parts of ir_to_mesa
are still untouched - glsl_to_tgsi is still a long way away from
eliminating all traces of Mesa IR.  It also contains a significant
amount of code adapted from st_mesa_to_tgsi, but modified to generate
TGSI code from the glsl_to_tgsi_instruction class instead of using Mesa
IR.  (It actually still generates Mesa IR instructions, but that could
be safely removed at some point since the generated Mesa IR instructions
are not actually used for anything.)  I'm planning to push more of the
conversion to TGSI higher up in the stack in the future, although the
remaining remnants of Mesa IR (such as the Mesa IR opcodes used by most
of glsl_to_tgsi) aren't doing any harm.

I finally found a little time to look over your code.  As you said,
it's basically a copy&  paste of the ir_to_mesa.cpp and
st_mesa_to_tgsi.c code at this time.  Do you plan to eliminate all
remnants of Mesa IR there before adding support for GLSL 1.30?  One
easy step would be to replace use of Mesa IR opcodes with TGSI opcodes
and add new TGSI opcodes for integer ops.


I do plan to eliminate the Mesa IR remnants, or the opcodes at the very 
least, before working on GLSL 1.30 support.  The main reason I haven't 
replaced the Mesa IR opcodes yet is _mesa_num_src_regs and 
_mesa_num_dst_regs.  Are there equivalents to these that work with TGSI 
opcodes?



Since the _mesa_optimize_program function is vital to generating
optimized code with ir_to_mesa, and it is not available when not using
Mesa IR, I've written some new optimization passes for
glsl_to_tgsi_visitor that perform dead code elimination and
consolidation of the temporary register space.  Although they are rather
simple, they do make a huge difference in the quality of the output.  As
an example, here is what it generates for the vertex shader in the
Mandelbrot GLSL demo from the Mesa demos repository:

VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[10]
DCL OUT[2], GENERIC[11]
DCL CONST[0..14]
DCL TEMP[0..4]
IMM FLT32 {2., 0.,-0.5000, 5.}
  0: MUL TEMP[0], CONST[4], IN[0].
  1: MAD TEMP[0], CONST[5], IN[0]., TEMP[0]
  2: MAD TEMP[0], CONST[6], IN[0]., TEMP[0]
  3: MAD TEMP[0], CONST[7], IN[0]., TEMP[0]
  4: MUL TEMP[1].xyz, CONST[12].xyzz, IN[1].
  5: MAD TEMP[1], CONST[13].xyzz, IN[1]., TEMP[1].xyzz
  6: MAD TEMP[1], CONST[14].xyzz, IN[1]., TEMP[1].xyzz
  7: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
  8: RSQ TEMP[2].x, TEMP[2].
  9: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].
  10: ADD TEMP[2].xyz, CONST[3].xyzz, -TEMP[0].xyzz
  11: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
  12: RSQ TEMP[3].x, TEMP[3].
  13: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].
  14: MOV TEMP[3].xyz, -TEMP[2].xyzx
  15: MOV TEMP[0].xyz, -TEMP[0].xyzx
  16: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[3].xyzz
  17: MUL TEMP[4].xyz, TEMP[4]., TEMP[1].xyzz
  18: MUL TEMP[4].xyz, IMM[0]., TEMP[4].xyzz
  19: ADD TEMP[3].xyz, TEMP[3].xyzz, -TEMP[4].xyzz
  20: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
  21: RSQ TEMP[4].x, TEMP[4].
  22: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].
  23: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
  24: MAX TEMP[0].x, TEMP[0]., IMM[0].
  25: POW TEMP[0].x, TEMP[0

Re: [Mesa-dev] GLSL IR to TGSI translator

2011-05-02 Thread Bryan Cain
On 05/02/2011 11:55 AM, Ian Romanick wrote:
> On 04/27/2011 08:23 PM, Brian Paul wrote:
> > On Tue, Apr 26, 2011 at 12:26 AM, Bryan Cain 
> wrote:
> >> Hi,
> >>
> >> In the last week or so, I've been working on a direct translator from
> >> GLSL IR to TGSI that does not go through Mesa IR.  Although it is still
> >> a work in progress, it is now working and very usable.  So before I go
> >> on, here is a link to the branch I've pushed to GitHub:
> >>
> >> https://github.com/Plombo/mesa/tree/glsl-130
> >>
> >> My main objective with this work is to make GLSL 1.30 support feasible
> >> on Gallium drivers.  From what I understand, it would be difficult or
> >> impossible to implement integer-specific opcodes such as shifting and
> >> bit masking in Mesa IR, since it only supports floats.  TGSI, on the
> >> other hand, doesn't have this problem, and already supports most or all
> >> of the functionality required by GLSL 1.30.
>
> > Unfortunately, TGSI doesn't have everything we need yet.  There's
> > opcodes for binary AND, OR, XOR, etc. and a few integer operations,
> > but it's incomplete.  It shouldn't be a big deal to add what's missing
> > but it'll take a little time.
>
> > I think everyone agrees that we want to eventually ditch Mesa's IR.  I
> > _think_ that the only classic Mesa driver that uses Mesa IR and hasn't
> > been deprecated by a Gallium driver, or already weaned from Mesa IR is
> > swrast.  How much does the i965 driver still rely on swrast for
> > fallbacks?  Do the Intel people see need for a GLSL IR executor for
> > swrast?
>
> Right now i915, i965 (vertex shaders only), and r200 all use Mesa IR.
> All of the non-shader chips also use it for vertex shaders.  We're in
> the process of eliminating the use of Mesa IR in i965.  Mesa IR isn't a
> horrible fit for i915 or r200.  Once everything else works directly from
> GLSL IR, I'll probably evolve the use of Mesa IR to better match those
> targets.  It will basically become a (semi-)shared low-level IR used
> just by those drivers.
>
> I also need to finish changing the ARB_vertex_program (and friends)
> assemblers to generate GLSL IR directly.  It's mostly done.  There's
> really just a bunch of typing left.
>
> I don't think interpreter will go away from core Mesa any time soon.
> We'll still need it for fallbacks on a variety of chips and vertex
> shaders on non-shader GPUs.  That said, I don't think it needs to
> support any of the new functionality.  The only classic driver that is
> ever going to support 1.30+ is i965.  When we're on 1.30 shaders and
> would need a full software fallback, I think we'll opt for
> non-conformance instead.  I think there are only 3 or 4 places where we
> hit software fallbacks, and I think we can take that ding.
>
> >> The translator started as a modified version of ir_to_mesa, and that
> >> origin is still obvious from reading the code.  Many parts of
> ir_to_mesa
> >> are still untouched - glsl_to_tgsi is still a long way away from
> >> eliminating all traces of Mesa IR.  It also contains a significant
> >> amount of code adapted from st_mesa_to_tgsi, but modified to generate
> >> TGSI code from the glsl_to_tgsi_instruction class instead of using Mesa
> >> IR.  (It actually still generates Mesa IR instructions, but that could
> >> be safely removed at some point since the generated Mesa IR
> instructions
> >> are not actually used for anything.)  I'm planning to push more of the
> >> conversion to TGSI higher up in the stack in the future, although the
> >> remaining remnants of Mesa IR (such as the Mesa IR opcodes used by most
> >> of glsl_to_tgsi) aren't doing any harm.
>
> > I finally found a little time to look over your code.  As you said,
> > it's basically a copy & paste of the ir_to_mesa.cpp and
> > st_mesa_to_tgsi.c code at this time.  Do you plan to eliminate all
> > remnants of Mesa IR there before adding support for GLSL 1.30?  One
> > easy step would be to replace use of Mesa IR opcodes with TGSI opcodes
> > and add new TGSI opcodes for integer ops.
>
> Mesa IR and TGSI are, structurally, very similar.  It seems like
> starting with ir_to_mesa is the right plan.  That's how the i965
> fragment shader backend was written, and it's how we're planning to do
> the vertex shader backend.  At that point, it's better to change it to
> generate TGSI instructions / opcodes instead of Mesa IR.  Once you get a
> ba

Re: [Mesa-dev] Status of VDPAU and XvMC state-trackers (was Re: Build error on current xvmc-r600 pip

2011-05-05 Thread Bryan Cain
2011/4/27 Christian König 

> Regarding the not working idct on nvidia hardware I would guess that we
> use something that the driver currently doesn't supports, like multiple
> render targets, disabled opengl rasterisation rules, unsupported
> intermediate buffer format or something else.
>

Hi Christian,

I found out the other day that nv50 doesn't support disabled GL
rasterization rules, and doesn't plan to.  Could iDCT be changed to work
with GL rasterization rules enabled, or is there a reason that it can't or
shouldn't use them?

Regards,
Bryan.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Status of VDPAU and XvMC state-trackers (was Re: Build error on current xvmc-r600 pip

2011-05-06 Thread Bryan Cain
On 05/06/2011 03:59 AM, Christoph Bumiller wrote:
> On 06.05.2011 06:59, Bryan Cain wrote:
>> I found out the other day that nv50 doesn't support disabled GL
>> rasterization rules, and doesn't plan to.  Could iDCT be changed to
>> work with GL rasterization rules enabled, or is there a reason that it
>> can't or shouldn't use them?
> NV50 as in the hardware here. At least as far as we could find. I
> suppose adjusting the viewport translation by -0.5 might help, but I'd
> rather not. And if you want me to change the top-left rule into
> something else, you're completely out of luck.

I don't.  That's why I'm asking Christian to change the iDCT instead of
asking you to change the driver.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] GLSL IR int-to-float pass

2011-05-24 Thread Bryan Cain
Hi,

In the past few days, I've been working on native integer support in my
GLSL to TGSI translator.  Something that's come to my attention is that
supporting Gallium targets with and without integer support using a
single GLSL IR backend will more or less require a GLSL IR pass to
convert int, uint, and possibly bool variables and operations to floats.

Currently, this is done directly in the backend, in both ir_to_mesa and
st_glsl_to_tgsi.  However, the mod_to_fract and div_to_mul_rcp lowering
passes for GLSL IR need to know whether to lower integer modulus and
division operations to their corresponding float operations.  (They both
do this in Mesa master without asking the backend, but that will be easy
to change later.)  So a GLSL IR pass will be needed to do the type lowering.

Such a pass would also have the advantage of less duplicated
functionality between backends, since ir_to_mesa could also take
advantage of the pass to eliminate some code.

I'm more than willing to try writing such a pass myself if no one else
is interested in doing it, but I figure I should make sure there are no
objections before starting on it.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GLSL IR int-to-float pass

2011-05-25 Thread Bryan Cain
On 05/25/2011 08:41 AM, Keith Whitwell wrote:
> On Wed, 2011-05-25 at 09:32 -0400, Jerome Glisse wrote:
>> On Tue, May 24, 2011 at 8:09 PM, Bryan Cain  wrote:
>>> Hi,
>>>
>>> In the past few days, I've been working on native integer support in my
>>> GLSL to TGSI translator.  Something that's come to my attention is that
>>> supporting Gallium targets with and without integer support using a
>>> single GLSL IR backend will more or less require a GLSL IR pass to
>>> convert int, uint, and possibly bool variables and operations to floats.
>>>
>>> Currently, this is done directly in the backend, in both ir_to_mesa and
>>> st_glsl_to_tgsi.  However, the mod_to_fract and div_to_mul_rcp lowering
>>> passes for GLSL IR need to know whether to lower integer modulus and
>>> division operations to their corresponding float operations.  (They both
>>> do this in Mesa master without asking the backend, but that will be easy
>>> to change later.)  So a GLSL IR pass will be needed to do the type lowering.
>>>
>>> Such a pass would also have the advantage of less duplicated
>>> functionality between backends, since ir_to_mesa could also take
>>> advantage of the pass to eliminate some code.
>>>
>>> I'm more than willing to try writing such a pass myself if no one else
>>> is interested in doing it, but I figure I should make sure there are no
>>> objections before starting on it.
>>>
>>> Bryan
>> TGSI needs to grow type support (int, uint and possibly int8,16,32..)

TGSI actually already has opcodes for u32 and s32 operands.  For
example, UADD, IDIV, ISGE, AND, OR, SHL, MOD, UMOD, etc.

The problem is that the only driver that supports any of them currently,
as far as I know, is softpipe.  It will help if support for these
opcodes is added to the drivers for hardware with integer support.

> Or go away entirely...
>
> I'm not trying to impose a direction on this, but it seems like the GLSL
> IR->TGSI converter (once running) could be pushed down into the
> individual drivers and GLSL IR or a close cousin of it could become the
> gallium-level interface.  Then individual drivers could be modified to
> consume IR directly.
>
> Keith

The problem with trying to push it into the drivers is that Mesa insists
that uniforms must be in a prog_parameter list, which the GLSL IR
backend (i.e., the translator) has to set up.  I've already modified it
prog_parameter in my branch to support non-float parameters.  I
initially wanted to remove the dependency, but that idea died when I
realized just how much all of Mesa depends on it.


If someone actually does the work to remove the prog_parameter
dependency from Mesa, then we can consider moving the translator from
the state tracker into the drivers.

Bryan

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GLSL IR int-to-float pass

2011-05-25 Thread Bryan Cain
On 05/25/2011 12:48 PM, Jerome Glisse wrote:
> On Wed, May 25, 2011 at 9:41 AM, Keith Whitwell  wrote:
>> I'm not trying to impose a direction on this, but it seems like the GLSL
>> IR->TGSI converter (once running) could be pushed down into the
>> individual drivers and GLSL IR or a close cousin of it could become the
>> gallium-level interface.  Then individual drivers could be modified to
>> consume IR directly.
>>
>> Keith
> I am also in favor of getting rid of tgsi, i would prefer having the
> driver callback into mesa to set informations mesa needs from the
> shader, for instance that would allow driver to pick where they put
> attribute (might be a huge win on hw like r6xx or newer) and few
> others things like that.
>
> Cheers,
> Jerome

Even in that case, a GLSL int-to-float pass would still be useful for
drivers for hardware that doesn't support integers.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Status of the GLSL->TGSI translator

2011-06-15 Thread Bryan Cain
My work on the GLSL IR to TGSI translator I announced on the list this
April is now at the point where I think it is ready to be merged into
Mesa.  It is stable and doesn't regress any piglit tests on softpipe or
nv50.

It adds native integer support as required by GLSL 1.30, although it is
currently disabled for all drivers since GLSL 1.30 support is not
complete yet and most Gallium drivers haven't implemented the TGSI
integer opcodes.  (This would be a good time for Gallium driver
developers to add support for TGSI's integer opcodes, which are
currently only implemented in softpipe.)

Developing this necessitated significant changes elsewhere in Mesa, and
some small changes in Gallium.  This means that some of the commits in
my branch probably need to be reviewed by the developers of those
components.

If I had commit access to Mesa, I would create a branch for this work in
the main Mesa repository.  But since I am still waiting on my
freedesktop.org account to be created, I have pushed the latest version
to the "glsl-to-tgsi" branch of my personal Mesa repository on GitHub:

Git clone URL: git://github.com/Plombo/mesa.git
Web interface for viewing commits:
https://github.com/Plombo/mesa/commits/glsl-to-tgsi

Hopefully my freedesktop.org account will be created soon (I have
already had my account request approved), so that I can push this to a
branch in the central repository.

-Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Status of the GLSL->TGSI translator

2011-06-15 Thread Bryan Cain
On 06/15/2011 04:59 PM, Kenneth Graunke wrote:
> Bryan,
>
> Thanks for your work on this!  I'm glad to see GLSL IR->TGSI happening.
>
> A few quick comments on "mesa,st/mesa: add native support for integers
> in shaders"...
>
> glsl_type::get_vec4_type(base) is equivalent to
> glsl_type::get_instance(base, 4, 1) except that it returns error_type
> instead of NULL.  You might want to use get_instance directly or
> implement get_vec4_type on top of it.

Thanks, I didn't notice get_instance.  I'll go ahead and make a commit
that removes get_vec4_type and uses get_instance directly instead.

>
> For the ir_unop_u2f, ir_unop_bit_not, ir_binop_lshift,
> ir_binop_rshift, ir_binop_bit_and, ir_binop_bit_xor, and
> ir_binop_bit_or cases...you don't need the glsl_version >= 130 check. 
> These were first introduced in GLSL 1.30 and should never occur in
> prior versions.  (If anything, I'd assert instead of emitting nothing.)

Actually, they already assert when the GLSL version is less than GLSL
1.30 in order to match the behavior of ir_to_mesa.  It might not be
obvious from reading that code the first time, but the break for each of
those cases is inside the "glsl_version >= 130" block.  When the GLSL
version is 120 or lower for any of those cases, it falls through to the
assertion after ir_binop_round_even.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Status of the GLSL->TGSI translator

2011-06-16 Thread Bryan Cain
On Thu, Jun 16, 2011 at 12:46 AM, Dave Airlie  wrote:

>  On Thu, Jun 16, 2011 at 3:22 PM, Dave Airlie  wrote:
> > On Thu, Jun 16, 2011 at 7:38 AM, Bryan Cain 
> wrote:
> >> My work on the GLSL IR to TGSI translator I announced on the list this
> >> April is now at the point where I think it is ready to be merged into
> >> Mesa.  It is stable and doesn't regress any piglit tests on softpipe or
> >> nv50.
> >
> > I just pulled it into master here, and got this on build on an F15 box
> > with gcc 4.6.0.
> >
> > g++ -c -o state_tracker/st_glsl_to_tgsi.o
> > state_tracker/st_glsl_to_tgsi.cpp -DFEATURE_GL=1 -D_GNU_SOURCE
> > -DPTHREADS -DHAVE_POSIX_MEMALIGN -DUSE_EXTERNAL_DXTN_LIB=1
> > -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
> > -DHAVE_ALIAS -DHAVE_XEXTPROTO_71 -DGALLIUM_LLVMPIPE
> > -D__STDC_CONSTANT_MACROS -DHAVE_LLVM=0x0208 -I../../include
> > -I../../src/glsl -I../../src/mesa -I../../src/mapi
> > -I../../src/gallium/include -I../../src/gallium/auxiliary
> > -I/usr/include  -DNDEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS
> > -D__STDC_CONSTANT_MACROS -g -O2 -Wall -fno-strict-aliasing -fPIC
> > -fvisibility=hidden
> > state_tracker/st_glsl_to_tgsi.cpp:392:70: error: call of overloaded
> > ‘st_src_reg(gl_register_file, int, NULL)’ is ambiguous
> > state_tracker/st_glsl_to_tgsi.cpp:392:70: note: candidates are:
> > state_tracker/st_glsl_to_tgsi.cpp:103:4: note:
> > st_src_reg::st_src_reg(gl_register_file, int, int)
> > state_tracker/st_glsl_to_tgsi.cpp:90:4: note:
> > st_src_reg::st_src_reg(gl_register_file, int, const glsl_type*)
> > gmake[2]: *** [state_tracker/st_glsl_to_tgsi.o] Error 1
>
> I fixed this by casting NULL to (const glsl_type *)NULL, but not sure
> what the proper answer is,
>
> With that I get 0 piglit regressions due to this on r600g on evergreen.
>
> Dave.
>

Hm, I never got that error with my version of g++ (I think 4.5).  It looks
like it just doesn't know whether NULL is a pointer or an integer (stupid
C++), so casting it to (const glsl_type *)NULL is the correct fix.  I'll
commit a fix for that when I get back to my computer at home.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Status of the GLSL->TGSI translator

2011-06-16 Thread Bryan Cain
On Thu, Jun 16, 2011 at 9:08 AM, Brian Paul  wrote:
>
>
> Looks like nice work, Bryan.
>
> Just a few minor questions/comments for now:
>
> 1. The st_fragment/vertex/geometry_program structs now have a glsl_to_tgsi
> field.  I did a grep, but I couldn't find where that field is assigned.  Can
> you clue me in?
>

It's assigned at the end of the get_mesa_program function in
st_glsl_to_tgsi.cpp.


>
> 2. The above mentioned program structs contains an old Mesa instruction
> program AND/OR(?) a GLSL IR.  Do both types of representations co-exist
> sometimes?  Perhaps you could update the comments on those structs to
> explain that.
>

They used to co-exist, because after my first commit, st_glsl_to_tgsi still
generated Mesa IR in addition to TGSI.  But I removed the Mesa IR generation
in "st/mesa: stop generating Mesa IR in st_glsl_to_tgsi", so now it has
either one or the other - glsl_to_tgsi_visitor for GLSL shaders, Mesa IR
programs for everything else.


>
> 3. Kind of a follow-on: for glDrawPixels and glBitmap we take the original
> program code (in Mesa form) and prepend extra instructions for fetching the
> fragment color or doing the fragment kill.  Do we always have the Mesa
> instructions for this?  It seems we don't normally want to generate Mesa
> instructions all the time but we still need them sometimes.
>

No, I didn't realize Mesa did that, and we don't have the Mesa instructions
for GLSL programs anymore.  I'm not sure what the right way to handle that
is.


>
> 4. At least one commit message is slightly mis-named: changes to the
> gallium/util/tgsi/ files were labeled as "softpipe".  Not a big deal, but
> maybe be more careful about that.
>

I thought only softpipe used tgsi_exec, but I'll keep in mind to be more
careful in the future.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Status of the GLSL->TGSI translator

2011-06-22 Thread Bryan Cain
On 06/16/2011 12:43 PM, Brian Paul wrote:
> On 06/16/2011 10:34 AM, Bryan Cain wrote:
>> On Thu, Jun 16, 2011 at 9:08 AM, Brian Paul > <mailto:bri...@vmware.com>> wrote:
>>
>>
>> Looks like nice work, Bryan.
>>
>> Just a few minor questions/comments for now:
>>
>> 1. The st_fragment/vertex/geometry_program structs now have a
>> glsl_to_tgsi field.  I did a grep, but I couldn't find where that
>> field is assigned.  Can you clue me in?
>>
>> It's assigned at the end of the get_mesa_program function in
>> st_glsl_to_tgsi.cpp.
>
> Thanks.  I was using grep glsl_to_tgsi *.[ch]   Duh.
>
>
>> 2. The above mentioned program structs contains an old Mesa
>> instruction program AND/OR(?) a GLSL IR.  Do both types of
>> representations co-exist sometimes?  Perhaps you could update the
>> comments on those structs to explain that.
>>
>> They used to co-exist, because after my first commit, st_glsl_to_tgsi
>> still generated Mesa IR in addition to TGSI.  But I removed the Mesa
>> IR generation in "st/mesa: stop generating Mesa IR in
>> st_glsl_to_tgsi", so now it has either one or the other -
>> glsl_to_tgsi_visitor for GLSL shaders, Mesa IR programs for everything
>> else.
>
> OK, can you update the comments with this info?
>
>
>> 3. Kind of a follow-on: for glDrawPixels and glBitmap we take the
>> original program code (in Mesa form) and prepend extra
>> instructions for fetching the fragment color or doing the fragment
>> kill.  Do we always have the Mesa instructions for this?  It seems
>> we don't normally want to generate Mesa instructions all the time
>> but we still need them sometimes.
>>
>> No, I didn't realize Mesa did that, and we don't have the Mesa
>> instructions for GLSL programs anymore.  I'm not sure what the
>> right way to handle that is.
>
> How hard would it be to edit the IR to insert the extra operations?
>
> For glBitmaps it's basically sample a texture and then do a
> conditional fragment kill.  For glDrawPixels we need to sample the
> texture representing the image, then apply the pixel transfer ops
> (scale/bias, table lookup, etc).  We generate the code for that in
> st_atom_pixeltransfer.c.  That program is then concatenated with the
> current fragment program with _mesa_combine_program().
>
> If we ever propogate GLSL IR through the gallium interface there's
> lots of places where we'll need to do other kinds of IR editing.
>
> -Brian
>

I've been working on this for the last few days, but there are some
things I still haven't figured out yet.

I've written a function to generate the shader for
glDrawPixels/glCopyPixels in the form of glsl_to_tgsi_instructions, and
I doubt I'll have any problems writing the code to merge those
instructions with an existing shader.However, for the shader to work
correctly, I need to set stfp->glsl_to_tgsi in st_fragment_program, but
all of the fragment program variables in st_atom_pixeltransfer.c and
st_cb_drawpixels.c have the type gl_program or gl_fragment_program. 
When do these become st_fragment_program so they can be processed in
st_program.c?  Will I need to move the glsl_to_tgsi attribute into
gl_fragment program and give it a name not specific to the state tracker?

My other question is that if I don't have this part done before the
merge window closes, would it still be possible to merge the GLSL->TGSI
translator for 7.11 tomorrow or Friday, and for me to fix the
glBitmap/glDrawPixels/glCopyPixels issue in the stable branch before
Mesa 7.11 is released?  It's the only remaining issue I know of with the
GLSL->TGSI translator, and even it's something of a corner case.

-Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Merging glsl-to-tgsi to master

2011-07-12 Thread Bryan Cain
On 07/11/2011 11:53 AM, Egon Ashrafinia wrote:
> Hello guys.
>
> 1 month ago, we talked about merging glsl-to-tgsi to master but it
> still not happend. What about now? I could compile and test it a bit.
> It works.
> Anyone who could do it? What does Bryan Cain say about it?

Hi Egon,

Last month, I was trying to get it merged before the 7.11 merge window
closed.  When that didn't happen, I decided to stop being in a hurry and
instead make some more improvements before trying again to get it merged
to master.  Also, Brian Paul pointed out an issue that I hadn't noticed
before where st/mesa needed the current fragment shader to be in Mesa IR
form for glBitmap, glDrawPixels, and glCopyPixels to work correctly.

All of that is done now, as I finished the glBitmap path on Sunday and
the DrawPixels/CopyPixels path on Saturday.  So now I need to fix a few
minor things like commit messages and one case where the old path
generates better code than glsl_to_tgsi.  The merge might also be
delayed a bit further if it's decided that it should wait until after
7.11 is released, as Ian's reply suggested.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Case where opt_copy_propagation doesn't do its job

2011-07-12 Thread Bryan Cain
It appears that the copy propagation pass in the GLSL compiler (in
opt_copy_propagation.cpp) doesn't do copy propagation when the
components of a variable are initialized separately, like this:

(declare (temporary ) vec4 vec_ctor)
(assign  (w) (var_ref vec_ctor)  (constant float (1.00)) )
(assign  (xyz) (var_ref vec_ctor)  (var_ref assignment_tmp@16) )
(assign  (xyzw) (var_ref gl_FragColor)  (var_ref vec_ctor) )

In the past, this wasn't visible in the Mesa IR output because Mesa IR
optimization seems to do the copy propagation.  However, glsl_to_tgsi
doesn't do copy propagation to output registers - in fact, I believe
this is the only case left where ir_to_mesa produces better code than
glsl_to_tgsi.  I'm not very enthusiastic about the idea adding to the
copy propagation pass in glsl_to_tgsi, since this case is something that
should really be optimized by the GLSL compiler before it reaches the IR
backend.

So, is there a reason why the GLSL copy propagation pass doesn't operate
per-component?

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Case where opt_copy_propagation doesn't do its job

2011-07-12 Thread Bryan Cain
On 07/12/2011 04:36 PM, Eric Anholt wrote:
> On Tue, 12 Jul 2011 15:44:12 -0500, Bryan Cain  wrote:
>> It appears that the copy propagation pass in the GLSL compiler (in
>> opt_copy_propagation.cpp) doesn't do copy propagation when the
>> components of a variable are initialized separately, like this:
>>
>> (declare (temporary ) vec4 vec_ctor)
>> (assign  (w) (var_ref vec_ctor)  (constant float (1.00)) )
>> (assign  (xyz) (var_ref vec_ctor)  (var_ref assignment_tmp@16) )
>> (assign  (xyzw) (var_ref gl_FragColor)  (var_ref vec_ctor) )
>>
>> In the past, this wasn't visible in the Mesa IR output because Mesa IR
>> optimization seems to do the copy propagation.  However, glsl_to_tgsi
>> doesn't do copy propagation to output registers - in fact, I believe
>> this is the only case left where ir_to_mesa produces better code than
>> glsl_to_tgsi.  I'm not very enthusiastic about the idea adding to the
>> copy propagation pass in glsl_to_tgsi, since this case is something that
>> should really be optimized by the GLSL compiler before it reaches the IR
>> backend.
>>
>> So, is there a reason why the GLSL copy propagation pass doesn't operate
>> per-component?
> opt_copy_propagation_elements does that, but it doesn't split up the
> assignment into multiple assignments (otherwise, imagine initializing
> vec_ctor once and using it many times -- how do you decide whether copy
> propagate or not, to avoid pessimizing the code?).
>
> I think the operation you're looking for isn't really copy propagation,
> but more like register coalescing where you see that vec_ctor doesn't
> get redefined and used after being assigned to gl_FragColor, and just
> store the two in the same place.  And yes, I think it would be useful to
> have something like that.

Oops, I don't have a very good vocabulary of compiler terms, and after
looking at the file I started to realize it wasn't copy propagation I
was looking for.  But you're right, that's a good idea, and maybe I can
try writing a pass for that.  If I'm thinking about this right, it would
just track which variables are only dereferenced once, and where that
dereference is in an assignment; then it would change the LHS of the
assignment(s) to the first variable and replace it with the new one.

Actually, if that were done for all variables, it would involve tracking
whether the second variable is used in between assignment and
dereference of the first variable.  So I think I'll just do it for
ir_var_out variables instead.  They're the only ones that are causing me
a problem, anyway.

> Either that or generate new ir_quadop_vector in copy propagation.

I don't understand how that solution would work.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Merging glsl-to-tgsi to master

2011-07-13 Thread Bryan Cain
On Tue, Jul 12, 2011 at 12:00 PM, Ian Romanick  wrote:

> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA1
>
> On 07/11/2011 09:53 AM, Egon Ashrafinia wrote:
> > Hello guys.
> >
> > 1 month ago, we talked about merging glsl-to-tgsi to master but it still
> > not happend. What about now? I could compile and test it a bit. It works.
> > Anyone who could do it? What does Bryan Cain say about it?
>
> One thing to consider is whether or not this will make it more difficult
> to cherry pick fixes to the 7.11 release branch.  Bryan has been doing
> some really cool work, and I'd like to see it get merged.  However, I
> also want 7.11 to ship on time and with as few bugs as possible.
> Anything that will make that more difficult should wait until August.
>

I don't think merging the glsl-to-tgsi branch would make it much more
difficult to cherry pick fixes to 7.11.  The main change in the branch is
the addition of a new file to st/mesa (st_glsl_to_tgsi.cpp) containing the
GLSL->TGSI translator.  That in and of itself doesn't make it any harder.

The only existing files with non-trivial changes that might affect
cherry-picking are uniforms.c and prog_parameter.c in core Mesa.  Several
other files in st/mesa and core Mesa have minor changes to make them work
with either the prog_parameter changes or to work without Mesa IR, but I
don't think those changes are enough to make cherry-picking difficult.
They're at least not worse than most changes in master in that regard.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Common Subexpression Elimination

2011-07-16 Thread Bryan Cain
On 07/16/2011 10:28 AM, Vincent wrote:
> Hi,
>
> I wrote an optimisation pass that perform CSE (that is, it spots
> expressions that appears twice or more, and factor them in a temporary
> to avoid recalculation).
> This is my first patch to Mesa, I would like to receive feedback :
> case where the algorithm does not work, crashes, ...
> I tried to follow coding style, using exec_list*, ralloced objects...
> The algorithm currently only spot binary expressions (that is x + y, x
> * y, ...) but unary expression (exp(x), ln(x)...) should follow soon.
>
> Regards,
> Vincent.

Hi Vincent,

For future reference, the diff you sent is currently reversed - the
lines you added are marked as "-" and the lines you removed are marked
as "+".  It's best if you send patches using git-send-email to avoid this.

Bryan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >