[Nouveau] [PATCH] nv50/ir: avoid messing up arg1 of PFETCH

2015-05-23 Thread Ilia Mirkin
There can be scenarios where the indirect arg of a PFETCH becomes
known, and so the code will attempt to propagate it. Use this
opportunity to just fold it into the first argument, and prevent the
load propagation pass from touching PFETCH further.

This fixes gs-input-array-vec4-index-rd.shader_test and
vs-output-array-vec4-index-wr-before-gs.shader_test on nvc0 at least.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 72dd31e..98e3d1f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -236,6 +236,9 @@ LoadPropagation::visit(BasicBlock *bb)
   if (i-op == OP_CALL) // calls have args as sources, they must be in regs
  continue;
 
+  if (i-op == OP_PFETCH) // pfetch expects arg1 to be a reg
+ continue;
+
   if (i-srcExists(1))
  checkSwapSrc01(i);
 
@@ -581,6 +584,11 @@ ConstantFolding::expr(Instruction *i,
case OP_POPCNT:
   res.data.u32 = util_bitcount(a-data.u32  b-data.u32);
   break;
+   case OP_PFETCH:
+  // The two arguments to pfetch are logically added together. Normally
+  // the second argument will not be constant, but that can happen.
+  res.data.u32 = a-data.u32 + b-data.u32;
+  break;
default:
   return;
}
@@ -610,6 +618,8 @@ ConstantFolding::expr(Instruction *i,
  bld.setPosition(i, false);
  i-setSrc(1, bld.loadImm(NULL, res.data.u32));
   }
+   } else if (i-op == OP_PFETCH) {
+  // Leave PFETCH alone... we just folded its 2 args into 1.
} else {
   i-op = i-saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
}
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2] nv50/ir: avoid messing up arg1 of PFETCH

2015-05-23 Thread Ilia Mirkin
There can be scenarios where the indirect arg of a PFETCH becomes
known, and so the code will attempt to propagate it. Use this
opportunity to just fold it into the first argument, and prevent the
load propagation pass from touching PFETCH further.

This fixes gs-input-array-vec4-index-rd.shader_test and
vs-output-array-vec4-index-wr-before-gs.shader_test on nvc0 at least.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org
---

v1 - v2:
 - redo final section of ConstantFolding::expr using a switch, per tobijk

 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 72dd31e..b7fcd56 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -236,6 +236,9 @@ LoadPropagation::visit(BasicBlock *bb)
   if (i-op == OP_CALL) // calls have args as sources, they must be in regs
  continue;
 
+  if (i-op == OP_PFETCH) // pfetch expects arg1 to be a reg
+ continue;
+
   if (i-srcExists(1))
  checkSwapSrc01(i);
 
@@ -581,6 +584,11 @@ ConstantFolding::expr(Instruction *i,
case OP_POPCNT:
   res.data.u32 = util_bitcount(a-data.u32  b-data.u32);
   break;
+   case OP_PFETCH:
+  // The two arguments to pfetch are logically added together. Normally
+  // the second argument will not be constant, but that can happen.
+  res.data.u32 = a-data.u32 + b-data.u32;
+  break;
default:
   return;
}
@@ -595,7 +603,9 @@ ConstantFolding::expr(Instruction *i,
 
i-getSrc(0)-reg.data = res.data;
 
-   if (i-op == OP_MAD || i-op == OP_FMA) {
+   switch (i-op) {
+   case OP_MAD:
+   case OP_FMA: {
   i-op = OP_ADD;
 
   i-setSrc(1, i-getSrc(0));
@@ -610,8 +620,14 @@ ConstantFolding::expr(Instruction *i,
  bld.setPosition(i, false);
  i-setSrc(1, bld.loadImm(NULL, res.data.u32));
   }
-   } else {
+  break;
+   }
+   case OP_PFETCH:
+  // Leave PFETCH alone... we just folded its 2 args into 1.
+  break;
+   default:
   i-op = i-saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
+  break;
}
i-subOp = 0;
 }
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 1/2] nv30: avoid doing extra work on clear and hitting unexpected states

2015-05-23 Thread Ilia Mirkin
Clearing can happen at a time when various state objects are incoherent
and not ready for a draw. Some of the validation functions don't handle
this well, so only flush the framebuffer state. This has the advantage
of also not doing extra work.

This works around some crashes that can happen when clearing.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 src/gallium/drivers/nouveau/nv30/nv30_clear.c  |  2 +-
 src/gallium/drivers/nouveau/nv30/nv30_context.h|  2 +-
 src/gallium/drivers/nouveau/nv30/nv30_draw.c   |  4 ++--
 src/gallium/drivers/nouveau/nv30/nv30_state_validate.c | 10 ++
 src/gallium/drivers/nouveau/nv30/nv30_vbo.c|  2 +-
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c 
b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
index 1ab8929..83fd1fa 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -58,7 +58,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers,
struct pipe_framebuffer_state *fb = nv30-framebuffer;
uint32_t colr = 0, zeta = 0, mode = 0;
 
-   if (!nv30_state_validate(nv30, TRUE))
+   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, 
TRUE))
   return;
 
if (buffers  PIPE_CLEAR_COLOR  fb-nr_cbufs) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h 
b/src/gallium/drivers/nouveau/nv30/nv30_context.h
index 7b32aae..592cdbe 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h
@@ -204,7 +204,7 @@ void
 nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
 
 boolean
-nv30_state_validate(struct nv30_context *nv30, boolean hwtnl);
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl);
 
 void
 nv30_state_release(struct nv30_context *nv30);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c 
b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
index 3575c3d..38c31e9 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -129,7 +129,7 @@ nv30_render_draw_elements(struct vbuf_render *render,
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0);
}
 
-   if (!nv30_state_validate(nv30, FALSE))
+   if (!nv30_state_validate(nv30, ~0, FALSE))
   return;
 
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -174,7 +174,7 @@ nv30_render_draw_arrays(struct vbuf_render *render, 
unsigned start, uint nr)
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0);
}
 
-   if (!nv30_state_validate(nv30, FALSE))
+   if (!nv30_state_validate(nv30, ~0, FALSE))
   return;
 
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c 
b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
index 0f9d19d..86ac4f7 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
@@ -456,7 +456,7 @@ nv30_state_context_switch(struct nv30_context *nv30)
 }
 
 boolean
-nv30_state_validate(struct nv30_context *nv30, boolean hwtnl)
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
 {
struct nouveau_screen *screen = nv30-screen-base;
struct nouveau_pushbuf *push = nv30-base.pushbuf;
@@ -481,14 +481,16 @@ nv30_state_validate(struct nv30_context *nv30, boolean 
hwtnl)
else
   validate = swtnl_validate_list;
 
-   if (nv30-dirty) {
+   mask = nv30-dirty;
+
+   if (mask) {
   while (validate-func) {
- if (nv30-dirty  validate-mask)
+ if (mask  validate-mask)
 validate-func(nv30);
  validate++;
   }
 
-  nv30-dirty = 0;
+  nv30-dirty = ~mask;
}
 
nouveau_pushbuf_bufctx(push, bctx);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c 
b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
index 67ab829..d4e384b 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -564,7 +564,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
if (nv30-vbo_user  !(nv30-dirty  (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
   nv30_update_user_vbufs(nv30);
 
-   nv30_state_validate(nv30, TRUE);
+   nv30_state_validate(nv30, ~0, TRUE);
if (nv30-draw_flags) {
   nv30_render_vbo(pipe, info);
   return;
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 2/2] nv30: fix clip plane uploads and enable changes

2015-05-23 Thread Ilia Mirkin
nv30_validate_clip depends on the rasterizer state. Also we should
upload all the new clip planes on change since next time the plane data
won't have changed, but the enables might.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 src/gallium/drivers/nouveau/nv30/nv30_state_validate.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c 
b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
index 86ac4f7..a954dcc 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
@@ -272,15 +272,13 @@ nv30_validate_clip(struct nv30_context *nv30)
uint32_t clpd_enable = 0;
 
for (i = 0; i  6; i++) {
-  if (nv30-rast-pipe.clip_plane_enable  (1  i)) {
- if (nv30-dirty  NV30_NEW_CLIP) {
-BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
-PUSH_DATA (push, i);
-PUSH_DATAp(push, nv30-clip.ucp[i], 4);
- }
-
- clpd_enable |= 1  (1 + 4*i);
+  if (nv30-dirty  NV30_NEW_CLIP) {
+ BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
+ PUSH_DATA (push, i);
+ PUSH_DATAp(push, nv30-clip.ucp[i], 4);
   }
+  if (nv30-rast-pipe.clip_plane_enable  (1  i))
+ clpd_enable |= 2  (4*i);
}
 
BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);
@@ -389,7 +387,7 @@ static struct state_validate hwtnl_validate_list[] = {
 { nv30_validate_stipple,   NV30_NEW_STIPPLE },
 { nv30_validate_scissor,   NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER },
 { nv30_validate_viewport,  NV30_NEW_VIEWPORT },
-{ nv30_validate_clip,  NV30_NEW_CLIP },
+{ nv30_validate_clip,  NV30_NEW_CLIP | NV30_NEW_RASTERIZER },
 { nv30_fragprog_validate,  NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST },
 { nv30_vertprog_validate,  NV30_NEW_VERTPROG | NV30_NEW_VERTCONST |
NV30_NEW_FRAGPROG | NV30_NEW_RASTERIZER },
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Fermi+ shader header docs

2015-05-21 Thread Ilia Mirkin
On Thu, May 21, 2015 at 10:05 AM, Robert Morell rmor...@nvidia.com wrote:
 Hi Ilia,

 On Sat, May 02, 2015 at 12:34:21PM -0400, Ilia Mirkin wrote:
 Hi,

 As I'm looking to add some support to nouveau for features like atomic
 counters and images, I'm running into some confusion about what the
 first word of the shader header means. Here is the definition as we
 have it today:

 [...]

 However I know that these are somewhat wrong. I've seen shaders that
 use gmem accesses (i.e. mov r0, [r0]) that just have the LMEM enable
 bit set (and they use no lmem). And I've seen additional bits set, esp
 relating to images, but I haven't spent enough time looking at all the
 variations to make sense of it yet. For example, I think that Fermi
 and Kepler+ have different meanings for some of the bits.

 Those look pretty close :)

 I was hoping you could just release the docs for the shader headers,
 or at least the first word of the shader header.

 We've posted the specification for the full Shader Program Header to our
 GPU documentation site here:

 ftp://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html

 I hope it helps clear things up.

Yep, just a few follow-up questions:

- SPH Type 1 and type 2 appear to be flipped wrt the tables -- When
PS is used, field SphType in CommonWord0 must be set to 1; similarly,
when VTG is used, SphType in CommonWord0 must be set to 2. But the
Table 1. SPH Type 1 Definition is clearly meant for VTG and table 2
is clearly meant for PS...
- You skip over SassVersion -- what is that?
- You have a funny note in there -- Triangles generated by the
geometry shader always have all their edge flags set to TRUE -- that
is the *only* reference to edge flags in the whole document. Right now
we do some crazy thing to get edge flags right on fermi+ (and I think
we just get them wrong on tesla). Is there a way to emit edge flags
from vertex shader?
- To be clear: DoesLoadOrStore -- *any* load/store? Even LDC? ALD?

Thanks!

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Tessellation shaders get MEM_OUT_OF_BOUNDS errors / missing triangles

2015-05-26 Thread Ilia Mirkin
One additional observation that I just made is that on GK208, the blob
apparently doesn't use the result of S2R Rx, SR_INVOCATION_ID
wholesale in TCS. It either passes it through a I2I.S32.S32 Rx, |Rx|
(i.e. absolute value), or even more paradoxically, shl 2; shr 2; which
removes the top *2* bits, rather than just the top 1. However I see no
such behaviour on GF108.

I'm going to test out tomorrow whether this is the cause of my GK208 woes.

On Fri, May 22, 2015 at 5:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Mon, May 18, 2015 at 4:48 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 Hello,

 I've been debugging a few different tessellation shader issues with
 nouveau, but let's start small. I see this issue on my GK208 with high
 frequency, and I *think* I've seen it once or twice on my GF108, but
 it's exceedingly rare, if it does happen. I don't have a GK10x to test
 on, unfortunately, but I assume it'll have the same issue as the
 GK208.

 The issue is this -- a bunch of triangles that should come out of the
 tessellator end up black. I also see a GPC0/TPC1/MP trap:
 MEM_OUT_OF_BOUNDS error produced by nouveau -- this is output in
 response to a interrupt and MP trap generated by the hardware, read
 out with nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648)); (see
 gf100_gr_trap_mp). I assume some of the tessellation evaluation
 invocations get killed, but I have no proof of this.

 I also see this: TRAP ch 5 [0x003facf000 shader_runner[19044]]

 I would imagine that's some floating point number ending up in the
 register instead of an address, but the fp32 value of it
 (1.35107421875) does not seem familiar.

 Ben pointed out that the 0x3facf000 is a channel address, not a value
 from the shader. Oops. So that theory completely doesn't hold water.
 Perhaps some buffer isn't big enough? This ends up using 9 output
 vertices per patch, with 2 vec4's each. I've tried playing with the
 per-warp stack size to no avail, but I didn't *entirely* know what I
 was doing either though.


 Even when all the triangles show up, I still see the error on the
 GK208, so I'm not sure if they're the same issue or not.

 Now, here's the fun part -- this is completely non-deterministic.
 Sometimes everything shows up on the GK208, other times I see holes,
 in varying locations. I'm fairly sure that the actual shader code is
 correct... so I'm doing something funny wrong. (And yeah, tons of
 missed optimization opportunities in this code, but let's not dwell on
 that.)

 This is the piglit test:

 http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_tessellation_shader/execution/quads.shader_test

 It should be noted that other piglit tests don't exhibit this error,
 however they also tend to be simpler. One key difference is that they
 don't change the patch size in TCS. I'm including a link to a text
 file with the tessellation control and evaluation shaders (decoded
 with nvdisasm which you're hopefully more familiar with), along with
 the shader headers that we generate.

 FTR, this is how I feed the raw shader opcode bytes into nvdisasm:

 perl -ane 'foreach (@F) { print pack I, hex($_) }'  tt; nvdisasm -b SM35 
 tt

 (for some reason it doesn't want to read from a pipe or even a fd).

 http://people.freedesktop.org/~imirkin/tess_shaders_quads.txt

 My suspicion is that we're doing something wrong with the sched codes.
 We have an elaborate calculator, but... perhaps not elaborate enough?
 You can see it here:

 http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp#n2574

 The reason I think it's an error in sched codes is due to the TRAP
 memory location that I see -- could well be some stale value in the
 register and the value from S2R or VILD doesn't make it in there in
 time before the ALD reads it.

 If you should like to try this yourself, you can use
 https://github.com/imirkin/mesa/commits/gl4-integration-2 . This
 branch is good enough to run Unigine Heaven, but still has a lot of
 known shortcomings. (Both at the core and the nouveau levels.)

 Any advice or suggestions for debugging this would be greatly
 appreciated. And let me know if you'd like me to generate additional
 info on this. For example I can supply a full command trace that can
 be piped to demmt, if that's helpful.

 Thanks in advance,

   -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.

2015-07-07 Thread Ilia Mirkin
Lastly, from some discussions with ajax on IRC, it appears that DRI3
is half-baked at best wrt sync between server and client. I think we
should just disable it by default for now, until issues are ironed
out. (Rather than what this patch has, which is default-on for Xorg 
some version.)

On Sat, Jul 4, 2015 at 3:03 PM, Emil Velikov emil.l.veli...@gmail.com wrote:
 The DRI option with the intel ddx can be used to indicate the following
  - whether dri is disabled
  - the dri version - dri1, dri2, dri3
  - the dri module name - doo_dri.so bar_dri.so

 I'm not sure how exactly it's supposed to work/works, and I believe
 most of that is due to legacy reasons. I'm just saying let's not do
 the whole thing - just the dri version would be great (as you
 suggested).

 -Emil


 On 4 July 2015 at 19:28, Ilia Mirkin imir...@alum.mit.edu wrote:
 Erm, that's nuts. I also don't really understand what they're talking
 about there... i915g vs i915? Anyways, I just meant the version
 numbers :)

 On Sat, Jul 4, 2015 at 2:23 PM, Emil Velikov emil.l.veli...@gmail.com 
 wrote:
 That would be great, as long as it does only that and does not go into
 the drivername territory. As the said driver ;-)

 A driver name to use can be provided instead
 of simple boolean value, which will be passed to the GL implementation for
 it to load the appropriate backend.

 -Emil

 On 4 July 2015 at 18:17, Ilia Mirkin imir...@alum.mit.edu wrote:
 IMO it'd be nice to keep this compatible with the intel driver, which
 has a DRI option, which can take the values 1, 2, 3. Obviously for
 nouveau, 1 makes no sense as that was dropped quite some time ago.

 See 
 http://cgit.freedesktop.org/xorg/driver/xf86-video-intel/tree/man/intel.man#n68

 On Mon, Jun 29, 2015 at 11:30 PM, Mario Kleiner
 mario.kleiner...@gmail.com wrote:
 X-Server versions older than 1.16.3 have bugs in their
 DRI3/Present implementation which impair nouveau, so
 it is better to stick to good old DRI2 by default on
 such servers. E.g., page flipping doesn't work at all
 under DRI3/Present with older servers, and use of
 extensions like OML_sync_control, SGI_video_sync or
 INTEL_swap_events also causes failure of Present.

 nouveau's glamor accel backend currently doesn't work under
 DRI2, so continue to use DRI3 whenever it is supported.

 Under the exa accel backend, DRI2 works just fine, so
 disable DRI3 and choose DRI2 by default when nouveau
 is built for X-Server  1.16.3, and enable DRI3 if
 building on later X-Servers which work reasonably well
 under DRI3/Present.

 A new boolean xorg.conf Option DRI3 allows to enforce or
 prevent use of DRI3/Present under EXA acceleration for
 testing.

 Also add a bit more output about status of Present and
 DRI3 to aid debugging.

 Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com
 ---
  man/nouveau.man|  6 ++
  src/nouveau_dri2.c | 11 ++-
  src/nv_const.h |  2 ++
  src/nv_driver.c| 17 +++--
  4 files changed, 33 insertions(+), 3 deletions(-)

 diff --git a/man/nouveau.man b/man/nouveau.man
 index 129bb7f..12cfbc0 100644
 --- a/man/nouveau.man
 +++ b/man/nouveau.man
 @@ -125,6 +125,12 @@ that relies on correct presentation timing behaviour 
 as defined in that
  specification.
  .br
  Default: 1.
 +.TP
 +.BI Option \*qDRI3\*q \*q boolean \*q
 +Enable the DRI3 extension under exa acceleration if supported by server.
 +A setting of off will only use DRI2 instead. Under glamor acceleration,
 +DRI3 is always enabled if supported. Default: on for XOrg = 1.16.3, off 
 for
 +earlier versions.
  .SH SEE ALSO
  __xservername__(__appmansuffix__), __xconfigfile__(__filemansuffix__), 
 Xserver(__appmansuffix__), X(__miscmansuffix__)
  .SH AUTHORS
 diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c
 index f22e319..d818976 100644
 --- a/src/nouveau_dri2.c
 +++ b/src/nouveau_dri2.c
 @@ -1130,7 +1130,16 @@ nouveau_dri3_screen_init(ScreenPtr screen)
 if (buf  stat(buf, render) == 0 
 master.st_mode == render.st_mode) {
 pNv-render_node = buf;
 -   return dri3_screen_init(screen, 
 nouveau_dri3_screen_info);
 +   if (dri3_screen_init(screen, nouveau_dri3_screen_info)) {
 +   xf86DrvMsg(pScrn-scrnIndex, X_INFO,
 +  DRI3 on EXA enabled\n);
 +   return TRUE;
 +   }
 +   else {
 +   xf86DrvMsg(pScrn-scrnIndex, X_WARNING,
 +  DRI3 on EXA initialization failed\n);
 +   return FALSE;
 +   }
 } else
 free(buf);
  #endif
 diff --git a/src/nv_const.h b/src/nv_const.h
 index f1b4e9b..df1e398 100644
 --- a/src/nv_const.h
 +++ b/src/nv_const.h
 @@ -18,6 +18,7 @@ typedef enum {
  OPTION_SWAP_LIMIT,
  OPTION_ASYNC_COPY,
  OPTION_ACCELMETHOD,
 +OPTION_DRI3,
  } NVOpts;


 @@ -34,6 +35,7 @@ static const OptionInfoRec NVOptions

[Nouveau] RFC: drop glamor from nouveau ddx

2015-07-07 Thread Ilia Mirkin
Ben,

Looks like the reality is that glamor is just not hooked up properly
in the nouveau DDX. Mainly it's missing DRI2, which in turn means no
core GL contexts, and probably lots of other issues. While this could
probably be fixed somehow, I doubt there's any advantage to using the
nouveau DDX over something like modesetting nowadays.

How would you feel about dropping glamor support from the nouveau ddx
and failing to load for GPUs that don't have EXA support (unless
AccelMode = none is forced for them). That way it'll fall back to
loading modesetting which should be properly set up for DRI2 and so
on.

Cheers,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] enable dri3 support without glamor causes gnome-shell regression on nv4x

2015-08-03 Thread Ilia Mirkin
On Mon, Aug 3, 2015 at 9:02 AM, Hans de Goede hdego...@redhat.com wrote:
 Hi,

 On 30-07-15 16:09, Ilia Mirkin wrote:

 FWIW this is a fail on nv50+ as well. See for example
 https://bugs.freedesktop.org/show_bug.cgi?id=91445

 My suspicion is that this is due to the lack of PUSH_KICK in the *Done
 exa handlers -- works fine with DRI2, but DRI3 has no synchronization
 and so the commands never get flushed out. Easily verified by sticking
 PUSH_KICK's everywhere.


 I do not believe that that is the problem, in my case it clearly
 seems to be a pitch / swizzle problem rather then a synchronizarion
 problem, here is what my desktop with gnome shell looks like when
 using DRI2:

 https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-good.jpg

 And this is what it looks like when using DRI3:

 https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-bad.jpg

 The DRI2 screenshot is made with Mario's 2 patches on top of
 current master:

 http://lists.freedesktop.org/archives/nouveau/2015-July/021740.html
 http://lists.freedesktop.org/archives/nouveau/2015-July/021741.html

 And then adding Option DRI 2 to xorg.conf.

His patches should have defaulted it to DRI 2 I think, so this is
unnecessary. In fact you should have had to say DRI 3 to get DRI3
with his patches.
 --

 I've also tried disabling EXA using Option AccelMethod none,
 but that seems to also automatically disable all DRI, leading to
 software rendering.

 I discussed this with Ben this morning and he suggested that this
 is likely a Mesa issue since with DRI3 mesa rather then the ddx
 allocs the surfaces. I've tried disabling swizzling in the
 mesa code by forcing nv30_miptree_create() to always take
 the code path for linear textures, but that leads to the exact
 same result as before that change.

Ah yes. Very different problem indeed. I actually suspect it has to do
with swizzling. Look at the white pattern of the moon -- it's all in a
line. That means that it expected some locality and instead it got
drawn all on a line. If it were merely a stride problem, I'd expect to
see strips of the moon below and offset from one another.

So... take a look at nv30_miptree_from_handle -- I wonder if it can
now receive swizzled textures where it couldn't before.

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] DisplayPort support with 750 Ti

2015-08-13 Thread Ilia Mirkin
Supposed to? Sure! :) DP is finicky in general, and Maxwell is a
fairly new generation that not a lot of people have tested or had
access to, so quite expected for things to go wrong. Can you file a
bug at bugs.freedesktop.org xorg - Driver/nouveau with the output of
nouveau loaded with

drm.debug=0xe nouveau.debug=debug,VBIOS=trace

Hopefully that will give an idea of what's happening.

On Thu, Aug 13, 2015 at 12:57 AM, Tom Yan tom.t...@gmail.com wrote:
 Is nouveau supposed support DisplayPort output as of Linux 4.1? I have
 an EVGA GeForce GTX 750 Ti. TMDS outputs (HDMI/DVI) works while
 DisplayPort output failed and delays initialization of nouveau. It
 seems that nouveau failed to read EDID properly through DisplayPort as
 well. The following are grep'd kernel messages of booting with
 DP/DP+DVI/DVI connected respectively:

 [tom@localhost ~]$ grep nouveau dp
 Aug 12 11:28:17 localhost kernel: fb: switching to nouveaufb from EFI VGA
 Aug 12 11:28:17 localhost kernel: nouveau  [  DEVICE][:01:00.0]
 BOOT0  : 0x117000a2
 Aug 12 11:28:17 localhost kernel: nouveau  [  DEVICE][:01:00.0]
 Chipset: GM107 (NV117)
 Aug 12 11:28:17 localhost kernel: nouveau  [  DEVICE][:01:00.0]
 Family : NV110
 Aug 12 11:28:17 localhost kernel: nouveau  [   VBIOS][:01:00.0]
 using image from PROM
 Aug 12 11:28:17 localhost kernel: nouveau  [   VBIOS][:01:00.0]
 BIT signature found
 Aug 12 11:28:17 localhost kernel: nouveau  [   VBIOS][:01:00.0]
 version 82.07.32.00.38
 Aug 12 11:28:17 localhost kernel: nouveau  [ PMC][:01:00.0]
 MSI interrupts enabled
 Aug 12 11:28:17 localhost kernel: nouveau  [ PFB][:01:00.0]
 RAM type: GDDR5
 Aug 12 11:28:17 localhost kernel: nouveau  [ PFB][:01:00.0]
 RAM size: 2048 MiB
 Aug 12 11:28:17 localhost kernel: nouveau  [ PFB][:01:00.0]
 ZCOMP: 0 tags
 Aug 12 11:28:19 localhost kernel: nouveau  [  PTHERM][:01:00.0]
 FAN control: PWM
 Aug 12 11:28:19 localhost kernel: nouveau  [  PTHERM][:01:00.0]
 fan management: automatic
 Aug 12 11:28:19 localhost kernel: nouveau  [  PTHERM][:01:00.0]
 internal sensor: yes
 Aug 12 11:28:19 localhost kernel: nouveau  [ CLK][:01:00.0]
 07: core 405 MHz memory 810 MHz
 Aug 12 11:28:19 localhost kernel: nouveau  [ CLK][:01:00.0]
 0f: core 270-1293 MHz memory 5400 MHz
 Aug 12 11:28:19 localhost kernel: nouveau  [ CLK][:01:00.0]
 --: core 405 MHz memory 810 MHz
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] VRAM: 2048 MiB
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] GART: 1048576 MiB
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] TMDS table version 2.0
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB version 4.0
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB outp 00:
 01000f02 00020030
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB outp 01:
 02000f00 
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB outp 02:
 08011f82 00020010
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB outp 03:
 02822fa6 04420010
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB outp 04:
 02022f62 00020010
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB conn 00: 1030
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB conn 01: 00010161
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] DCB conn 02: 2246
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] MM: using COPY
 for buffer copies
 Aug 12 11:28:19 localhost kernel: nouveau  [ DRM] allocated
 1280x1024 fb: 0x6, bo 880211e2dc00
 Aug 12 11:28:19 localhost kernel: fbcon: nouveaufb (fb0) is primary device
 Aug 12 11:29:02 localhost kernel: nouveau :01:00.0: fb0: nouveaufb
 frame buffer device
 Aug 12 11:29:02 localhost kernel: nouveau :01:00.0: registered
 panic notifier
 Aug 12 11:29:02 localhost kernel: [drm] Initialized nouveau 1.2.2
 20120801 for :01:00.0 on minor 0

 [tom@localhost ~]$ grep nouveau dp+dvi
 Aug 12 11:24:23 localhost kernel: fb: switching to nouveaufb from EFI VGA
 Aug 12 11:24:23 localhost kernel: nouveau  [  DEVICE][:01:00.0]
 BOOT0  : 0x117000a2
 Aug 12 11:24:23 localhost kernel: nouveau  [  DEVICE][:01:00.0]
 Chipset: GM107 (NV117)
 Aug 12 11:24:23 localhost kernel: nouveau  [  DEVICE][:01:00.0]
 Family : NV110
 Aug 12 11:24:23 localhost kernel: nouveau  [   VBIOS][:01:00.0]
 using image from PROM
 Aug 12 11:24:23 localhost kernel: nouveau  [   VBIOS][:01:00.0]
 BIT signature found
 Aug 12 11:24:23 localhost kernel: nouveau  [   VBIOS][:01:00.0]
 version 82.07.32.00.38
 Aug 12 11:24:23 localhost kernel: nouveau  [ PMC][:01:00.0]
 MSI interrupts enabled
 Aug 12 11:24:23 localhost kernel: nouveau  [ PFB][:01:00.0]
 RAM type: GDDR5
 Aug 12 11:24:23 localhost kernel: nouveau  [ PFB][:01:00.0]
 RAM size: 2048 MiB
 Aug 12 11:24:23 localhost kernel: nouveau  [ PFB][:01:00.0]
 ZCOMP: 0 tags
 Aug 12 11:24:24 localhost kernel: 

Re: [Nouveau] [PATCH] glsl: Extend lowering pass for gl_ClipDistance to support other arrays

2015-08-17 Thread Ilia Mirkin
I said this on IRC, but I'll say it here too:

(a) please regenerate this with -M (not in the general case, but it
makes sense here)
(b) this seems odd as there's no support for cull distance elsewhere
yet. should be part of a series that adds cull distance support. right
now there is none, so this is out of place.

On Mon, Aug 17, 2015 at 10:50 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 This will come in handy when we want to lower gl_CullDistance into
 gl_CullDistanceMESA.

 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
  src/glsl/Makefile.sources|   2 +-
  src/glsl/ir_optimization.h   |   1 +
  src/glsl/lower_clip_distance.cpp | 574 
  src/glsl/lower_distance.cpp  | 606 
 +++
  4 files changed, 608 insertions(+), 575 deletions(-)
  delete mode 100644 src/glsl/lower_clip_distance.cpp
  create mode 100644 src/glsl/lower_distance.cpp

 diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
 index 0b77244..00ba480 100644
 --- a/src/glsl/Makefile.sources
 +++ b/src/glsl/Makefile.sources
 @@ -143,7 +143,7 @@ LIBGLSL_FILES = \
 loop_analysis.h \
 loop_controls.cpp \
 loop_unroll.cpp \
 -   lower_clip_distance.cpp \
 +   lower_distance.cpp \
 lower_const_arrays_to_uniforms.cpp \
 lower_discard.cpp \
 lower_discard_flow.cpp \
 diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
 index eef107e..fe62e74 100644
 --- a/src/glsl/ir_optimization.h
 +++ b/src/glsl/ir_optimization.h
 @@ -120,6 +120,7 @@ bool lower_variable_index_to_cond_assign(gl_shader_stage 
 stage,
  bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
  bool lower_const_arrays_to_uniforms(exec_list *instructions);
  bool lower_clip_distance(gl_shader *shader);
 +bool lower_cull_distance(gl_shader *shader);
  void lower_output_reads(unsigned stage, exec_list *instructions);
  bool lower_packing_builtins(exec_list *instructions, int op_mask);
  void lower_ubo_reference(struct gl_shader *shader, exec_list *instructions);
 diff --git a/src/glsl/lower_clip_distance.cpp 
 b/src/glsl/lower_clip_distance.cpp
 deleted file mode 100644
 index 1ada215..000
 --- a/src/glsl/lower_clip_distance.cpp
 +++ /dev/null
 @@ -1,574 +0,0 @@
 -/*
 - * Copyright © 2011 Intel Corporation
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and associated documentation files (the Software),
 - * to deal in the Software without restriction, including without limitation
 - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 - * and/or sell copies of the Software, and to permit persons to whom the
 - * Software is furnished to do so, subject to the following conditions:
 - *
 - * The above copyright notice and this permission notice (including the next
 - * paragraph) shall be included in all copies or substantial portions of the
 - * Software.
 - *
 - * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 - * DEALINGS IN THE SOFTWARE.
 - */
 -
 -/**
 - * \file lower_clip_distance.cpp
 - *
 - * This pass accounts for the difference between the way
 - * gl_ClipDistance is declared in standard GLSL (as an array of
 - * floats), and the way it is frequently implemented in hardware (as
 - * a pair of vec4s, with four clip distances packed into each).
 - *
 - * The declaration of gl_ClipDistance is replaced with a declaration
 - * of gl_ClipDistanceMESA, and any references to gl_ClipDistance are
 - * translated to refer to gl_ClipDistanceMESA with the appropriate
 - * swizzling of array indices.  For instance:
 - *
 - *   gl_ClipDistance[i]
 - *
 - * is translated into:
 - *
 - *   gl_ClipDistanceMESA[i2][i3]
 - *
 - * Since some hardware may not internally represent gl_ClipDistance as a pair
 - * of vec4's, this lowering pass is optional.  To enable it, set the
 - * LowerClipDistance flag in gl_shader_compiler_options to true.
 - */
 -
 -#include glsl_symbol_table.h
 -#include ir_rvalue_visitor.h
 -#include ir.h
 -#include program/prog_instruction.h /* For WRITEMASK_* */
 -
 -namespace {
 -
 -class lower_clip_distance_visitor : public ir_rvalue_visitor {
 -public:
 -   explicit lower_clip_distance_visitor(gl_shader_stage shader_stage)
 -  : progress(false), old_clip_distance_out_var(NULL),
 -old_clip_distance_in_var(NULL), new_clip_distance_out_var(NULL),
 -new_clip_distance_in_var(NULL), shader_stage(shader_stage)
 -   {
 -   }
 -
 -   

[Nouveau] [PATCH] fb/sddr3: add WR/CWL values seen on a GK208

2015-08-17 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---

Seen on my GK208. Trace available at

http://people.freedesktop.org/~imirkin/traces/gk208/gk208-mmiotrace.log.xz

Thanks to Roy for his assistance on finding the parameters. I tested
this on top of his patch bios/rammap: Identify DLLoff for =
GF100. [But not 100% sure if it was necessary.]

 drm/nouveau/nvkm/subdev/fb/sddr3.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drm/nouveau/nvkm/subdev/fb/sddr3.c 
b/drm/nouveau/nvkm/subdev/fb/sddr3.c
index 037deeb..2410383 100644
--- a/drm/nouveau/nvkm/subdev/fb/sddr3.c
+++ b/drm/nouveau/nvkm/subdev/fb/sddr3.c
@@ -53,7 +53,7 @@ static const struct ramxlat
 ramddr3_wr[] = {
{ 5, 1 }, { 6, 2 }, { 7, 3 }, { 8, 4 }, { 10, 5 }, { 12, 6 },
/* the below are mentioned in some, but not all, ddr3 docs */
-   { 14, 7 }, { 16, 0 },
+   { 14, 7 }, { 15, 7 }, { 16, 0 },
{ -1 }
 };
 
@@ -61,7 +61,7 @@ static const struct ramxlat
 ramddr3_cwl[] = {
{ 5, 0 }, { 6, 1 }, { 7, 2 }, { 8, 3 },
/* the below are mentioned in some, but not all, ddr3 docs */
-   { 9, 4 },
+   { 9, 4 }, { 10, 5 },
{ -1 }
 };
 
-- 
2.4.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nv50, nvc0: take level into account when doing eng2d multi-layer blits

2015-08-15 Thread Ilia Mirkin
This fixes arb_get_texture_sub_image-get, and any situation where the 2d
engine was being used for multi-layer blits to a non-0 level.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: 10.6 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nv50/nv50_surface.c | 14 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 14 ++
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c 
b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index b1ae016..77df5ff 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -1387,18 +1387,24 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct 
pipe_blit_info *info)
 PUSH_DATA (push, info-dst.box.z + i);
  } else {
 const unsigned z = info-dst.box.z + i;
+const uint64_t address = dst-base.address +
+   dst-level[info-dst.level].offset +
+   z * dst-layer_stride;
 BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2);
-PUSH_DATAh(push, dst-base.address + z * dst-layer_stride);
-PUSH_DATA (push, dst-base.address + z * dst-layer_stride);
+PUSH_DATAh(push, address);
+PUSH_DATA (push, address);
  }
  if (src-layout_3d) {
 /* not possible because of depth tiling */
 assert(0);
  } else {
 const unsigned z = info-src.box.z + i;
+const uint64_t address = src-base.address +
+   src-level[info-src.level].offset +
+   z * src-layer_stride;
 BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2);
-PUSH_DATAh(push, src-base.address + z * src-layer_stride);
-PUSH_DATA (push, src-base.address + z * src-layer_stride);
+PUSH_DATAh(push, address);
+PUSH_DATA (push, address);
  }
  BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */
  PUSH_DATA (push, srcy  32);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 51a6f93..136a68c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1336,18 +1336,24 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct 
pipe_blit_info *info)
 PUSH_DATA (push, info-dst.box.z + i);
  } else {
 const unsigned z = info-dst.box.z + i;
+const uint64_t address = dst-base.address +
+   dst-level[info-dst.level].offset +
+   z * dst-layer_stride;
 BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2);
-PUSH_DATAh(push, dst-base.address + z * dst-layer_stride);
-PUSH_DATA (push, dst-base.address + z * dst-layer_stride);
+PUSH_DATAh(push, address);
+PUSH_DATA (push, address);
  }
  if (src-layout_3d) {
 /* not possible because of depth tiling */
 assert(0);
  } else {
 const unsigned z = info-src.box.z + i;
+const uint64_t address = src-base.address +
+   src-level[info-src.level].offset +
+   z * src-layer_stride;
 BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2);
-PUSH_DATAh(push, src-base.address + z * src-layer_stride);
-PUSH_DATA (push, src-base.address + z * src-layer_stride);
+PUSH_DATAh(push, address);
+PUSH_DATA (push, address);
  }
  BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */
  PUSH_DATA (push, srcy  32);
-- 
2.4.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Mesa-dev] [PATCH 1/2] nvc0/ir: detect AND/SHR pairs and convert into EXTBF

2015-08-18 Thread Ilia Mirkin
On Tue, Aug 18, 2015 at 9:57 PM, Matt Turner matts...@gmail.com wrote:
 On Tue, Aug 18, 2015 at 6:49 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 Some shaders appear to extract bits using shift/and combos. Detect
 (some) of those and convert to EXTBF instead.

 What is EXTBF? Extract byte to float?

Extract Bitfield.


 I ask because Unigine Heaven has shaders that pack 3x byte-integers
 into one component of a vec4 and extracts them with shifts/ands and
 converts them to floats, and i965 could do the extraction and
 conversion in a single instruction. I'm curious if this is the same
 thing you're optimizing.

 I thought about adding an extract_byte(src, byte_num) operation, but
 i965's copy propagation caused me some headache and I shelved it.

Yes, I think it's the same shader... it's doing a texelFetch() and
then grabbing bytes 0, 1, 2 off that.

The generated shader code after the second patch does:

/*05d0*/   TLD.LL.P R0, R24, 0x0, 2D, 0x3;
/*05d8*/   TEXDEPBAR 0x0;
/*05e0*/   I2F.F32.U8 R2, R1;
/*05e8*/   FFMA.FTZ R2, R2, R15, R19;
/*05f0*/   I2F.F32.U8 R8, R1.B1;
/*05f8*/   FFMA.FTZ R8, R8, R15, R19;
/*0608*/   I2F.F32.U8 R1, R1.B2;

I'll let you guess what these things mean. TLD = texelfetch :)

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nvc0: make use of conservative depth info for forcing early z tests

2015-08-23 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---

Entirely untested as there are no piglit tests for this
functionality. Won't push until some appear, but wanted to get it out
there.

 .../drivers/nouveau/codegen/nv50_ir_driver.h   |  2 +-
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  3 +++
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c|  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.h|  2 +-
 .../drivers/nouveau/nvc0/nvc0_shader_state.c   |  5 
 .../drivers/nouveau/nvc0/nvc0_state_validate.c | 30 --
 6 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 2b9edcf..14acb60 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -140,7 +140,7 @@ struct nv50_ir_prog_info
   struct {
  unsigned numColourResults;
  bool writesDepth;
- bool earlyFragTests;
+ bool depthLayout;
  bool separateFragData;
  bool usesDiscard;
   } fp;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index f153674..dcfa4c4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -947,6 +947,9 @@ void Source::scanProperty(const struct tgsi_full_property 
*prop)
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
   // we don't care
   break;
+   case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
+  info-prop.fp.depthLayout = prop-u[0].Data;
+  break;
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
   info-io.genUserClip = -1;
   break;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 12f1bb7..44d951b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -452,7 +452,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct 
nv50_ir_prog_info *info)
  fp-hdr[18] |= info-out[i].mask  info-out[i].slot[0];
}
 
-   fp-fp.early_z = info-prop.fp.earlyFragTests;
+   fp-fp.depth_layout = info-prop.fp.depthLayout;
 
return 0;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 390e0c7..fa14d68 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -44,7 +44,7 @@ struct nvc0_program {
   bool need_vertex_id;
} vp;
struct {
-  uint8_t early_z;
+  uint8_t depth_layout;
   uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
   uint8_t sample_interp;
} fp;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 8f8ac2d..1c87714 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -113,11 +113,6 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
  return;
nvc0_program_update_context_state(nvc0, fp, 4);
 
-   if (fp-fp.early_z != nvc0-state.early_z_forced) {
-  nvc0-state.early_z_forced = fp-fp.early_z;
-  IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp-fp.early_z);
-   }
-
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
PUSH_DATA (push, 0x51);
PUSH_DATA (push, fp-code_base);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 47bd66d..609b3b8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -517,25 +517,51 @@ nvc0_validate_global_residents(struct nvc0_context *nvc0,
}
 }
 
+static bool
+nvc0_depth_layout_test_compatible(unsigned depth_layout, unsigned test)
+{
+   if (depth_layout  (test == PIPE_FUNC_ALWAYS || test == PIPE_FUNC_NEVER))
+  return true;
+   switch (depth_layout) {
+   case TGSI_FS_DEPTH_LAYOUT_UNCHANGED:
+  return true;
+   case TGSI_FS_DEPTH_LAYOUT_GREATER:
+  return test == PIPE_FUNC_GREATER || test == PIPE_FUNC_GEQUAL;
+   case TGSI_FS_DEPTH_LAYOUT_LESS:
+  return test == PIPE_FUNC_LESS || test == PIPE_FUNC_LEQUAL;
+   default:
+  return false;
+   }
+}
+
 static void
 nvc0_validate_derived_1(struct nvc0_context *nvc0)
 {
struct nouveau_pushbuf *push = nvc0-base.pushbuf;
+   struct nvc0_program *fp = nvc0-fragprog;
bool rasterizer_discard;
+   bool early_z = false;
 
if (nvc0-rast  nvc0-rast-pipe.rasterizer_discard) {
   rasterizer_discard = true;
} else {
   bool zs = nvc0-zsa 
  (nvc0-zsa-pipe.depth.enabled || nvc0-zsa-pipe.stencil[0].enabled);
-  rasterizer_discard = !zs 
- (!nvc0-fragprog || !nvc0-fragprog-hdr[18]);
+  rasterizer_discard = !zs  (!fp || !fp

[Nouveau] [PATCH] nv50: avoid using inline vertex data submit when gl_VertexID is used

2015-08-24 Thread Ilia Mirkin
The hardware only generates vertexid when vertices come from a VBO. This
fixes:

  vertexid-drawelements
  vertexid-drawarrays

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: 11.0 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nv50/nv50_program.c| 1 +
 src/gallium/drivers/nouveau/nv50/nv50_program.h| 1 +
 src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 3 ++-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 8 
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 02dc367..eff4477 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
   case TGSI_SEMANTIC_VERTEXID:
  prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
  prog-vp.attrs[2] |= 
NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+ prog-vp.vertexid = 1;
  continue;
   default:
  break;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h 
b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 5d3ff56..f4e8e94 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,6 +76,7 @@ struct nv50_program {
   ubyte psiz;/* output slot of point size */
   ubyte bfc[2];  /* indices into varying for FFC (FP) or BFC (VP) */
   ubyte edgeflag;
+  ubyte vertexid;
   ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
   ubyte clpd_nr;
} vp;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index b304a17..66dcf43 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -503,7 +503,8 @@ static struct state_validate {
 { nv50_validate_samplers,  NV50_NEW_SAMPLERS },
 { nv50_stream_output_validate, NV50_NEW_STRMOUT |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
-{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
+   NV50_NEW_VERTPROG },
 { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c 
b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 600b973..fb4305f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -301,6 +301,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
unsigned i;
const unsigned n = MAX2(vertex-num_elements, nv50-state.num_vtxelts);
 
+   /* A vertexid is not generated for inline data uploads. Have to use a
+* VBO. This check must come after the vertprog has been validated,
+* otherwise vertexid may be unset.
+*/
+   assert(nv50-vertprog-translated);
+   if (nv50-vertprog-vp.vertexid)
+  nv50-vbo_push_hint = 0;
+
if (unlikely(vertex-need_conversion))
   nv50-vbo_fifo = ~0;
else
-- 
2.4.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] nv50: avoid using inline vertex data submit when gl_VertexID is used

2015-08-24 Thread Ilia Mirkin
On Mon, Aug 24, 2015 at 11:57 AM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:


 On 24.08.2015 17:51, Ilia Mirkin wrote:

 The hardware only generates vertexid when vertices come from a VBO. This
 fixes:

vertexid-drawelements
vertexid-drawarrays

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 Cc: 11.0 mesa-sta...@lists.freedesktop.org
 ---
   src/gallium/drivers/nouveau/nv50/nv50_program.c| 1 +
   src/gallium/drivers/nouveau/nv50/nv50_program.h| 1 +
   src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 3 ++-
   src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 8 
   4 files changed, 12 insertions(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c
 b/src/gallium/drivers/nouveau/nv50/nv50_program.c
 index 02dc367..eff4477 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
 @@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info
 *info)
 case TGSI_SEMANTIC_VERTEXID:
prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
prog-vp.attrs[2] |=
 NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
 + prog-vp.vertexid = 1;
continue;
 default:
break;
 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h
 b/src/gallium/drivers/nouveau/nv50/nv50_program.h
 index 5d3ff56..f4e8e94 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
 @@ -76,6 +76,7 @@ struct nv50_program {
 ubyte psiz;/* output slot of point size */
 ubyte bfc[2];  /* indices into varying for FFC (FP) or BFC
 (VP) */
 ubyte edgeflag;
 +  ubyte vertexid;
 ubyte clpd[2]; /* output slot of clip distance[i]'s 1st
 component */
 ubyte clpd_nr;
  } vp;
 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 index b304a17..66dcf43 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 @@ -503,7 +503,8 @@ static struct state_validate {
   { nv50_validate_samplers,  NV50_NEW_SAMPLERS },
   { nv50_stream_output_validate, NV50_NEW_STRMOUT |
  NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG
 },
 -{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
 +{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
 +   NV50_NEW_VERTPROG },
   { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
   };
   #define validate_list_len (sizeof(validate_list) /
 sizeof(validate_list[0]))
 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 index 600b973..fb4305f 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 @@ -301,6 +301,14 @@ nv50_vertex_arrays_validate(struct nv50_context
 *nv50)
  unsigned i;
  const unsigned n = MAX2(vertex-num_elements,
 nv50-state.num_vtxelts);
   +   /* A vertexid is not generated for inline data uploads. Have to use
 a
 +* VBO. This check must come after the vertprog has been validated,
 +* otherwise vertexid may be unset.
 +*/
 +   assert(nv50-vertprog-translated);
 +   if (nv50-vertprog-vp.vertexid)
 +  nv50-vbo_push_hint = 0;
 +
  if (unlikely(vertex-need_conversion))
 nv50-vbo_fifo = ~0;
  else

 LGTM!

Thanks. I was a little torn on whether to do it this way (which
penalizes someone switching vertex programs while keeping the vertex
setup the same), as opposed to do it the other way, which would
penalize every program that uses vertexid (by calling
nv50_vertex_arrays_validate twice for those).

Ideally I'd do it in a way that penalized neither, but that's just not
going to happen :)
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 1/2] nvc0/ir: detect AND/SHR pairs and convert into EXTBF

2015-08-18 Thread Ilia Mirkin
Some shaders appear to extract bits using shift/and combos. Detect
(some) of those and convert to EXTBF instead.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 66 +++---
 1 file changed, 46 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 3841c33..b0e74f0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1023,27 +1023,53 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
imm0, int s)
 
case OP_AND:
{
-  CmpInstruction *cmp = i-getSrc(t)-getInsn()-asCmp();
-  if (!cmp || cmp-op == OP_SLCT || cmp-getDef(0)-refCount()  1)
- return;
-  if (!prog-getTarget()-isOpSupported(cmp-op, TYPE_F32))
- return;
-  if (imm0.reg.data.f32 != 1.0)
- return;
-  if (i-getSrc(t)-getInsn()-dType != TYPE_U32)
- return;
+  Instruction *src = i-getSrc(t)-getInsn();
+  ImmediateValue imm1;
+  if (imm0.reg.data.u32 == 0) {
+ i-op = OP_MOV;
+ i-setSrc(0, new_ImmediateValue(prog, 0u));
+ i-src(0).mod = Modifier(0);
+ i-setSrc(1, NULL);
+  } else if (imm0.reg.data.u32 == ~0U) {
+ i-op = i-src(t).mod.getOp();
+ if (t) {
+i-setSrc(0, i-getSrc(t));
+i-src(0).mod = i-src(t).mod;
+ }
+ i-setSrc(1, NULL);
+  } else if (src-asCmp()) {
+ CmpInstruction *cmp = src-asCmp();
+ if (!cmp || cmp-op == OP_SLCT || cmp-getDef(0)-refCount()  1)
+return;
+ if (!prog-getTarget()-isOpSupported(cmp-op, TYPE_F32))
+return;
+ if (imm0.reg.data.f32 != 1.0)
+return;
+ if (cmp-dType != TYPE_U32)
+return;
 
-  i-getSrc(t)-getInsn()-dType = TYPE_F32;
-  if (i-src(t).mod != Modifier(0)) {
- assert(i-src(t).mod == Modifier(NV50_IR_MOD_NOT));
- i-src(t).mod = Modifier(0);
- cmp-setCond = inverseCondCode(cmp-setCond);
-  }
-  i-op = OP_MOV;
-  i-setSrc(s, NULL);
-  if (t) {
- i-setSrc(0, i-getSrc(t));
- i-setSrc(t, NULL);
+ cmp-dType = TYPE_F32;
+ if (i-src(t).mod != Modifier(0)) {
+assert(i-src(t).mod == Modifier(NV50_IR_MOD_NOT));
+i-src(t).mod = Modifier(0);
+cmp-setCond = inverseCondCode(cmp-setCond);
+ }
+ i-op = OP_MOV;
+ i-setSrc(s, NULL);
+ if (t) {
+i-setSrc(0, i-getSrc(t));
+i-setSrc(t, NULL);
+ }
+  } else if (prog-getTarget()-isOpSupported(OP_EXTBF, TYPE_U32) 
+ src-op == OP_SHR 
+ src-src(1).getImmediate(imm1) 
+ i-src(t).mod == Modifier(0) 
+ util_is_power_of_two(imm0.reg.data.u32 + 1)) {
+ // low byte = offset, high byte = width
+ uint32_t ext = (util_last_bit(imm0.reg.data.u32)  8) | 
imm1.reg.data.u32;
+ i-op = OP_EXTBF;
+ i-setSrc(0, src-getSrc(0));
+ i-setSrc(1, new_ImmediateValue(prog, ext));
   }
}
   break;
-- 
2.4.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 2/2] nvc0/ir: detect i2f/i2i which operate on specific bytes/words

2015-08-18 Thread Ilia Mirkin
Some Unigine shaders have been observed to unpack bytes out of 32-bit
integers and convert them to floats. I2F/I2I can handle this sort of
thing directly. Detect the handleable situations.

This misses 16-bit word capabilities in nv50, but I haven't seen shaders
that would actually make use of that.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp |  1 +
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp |  2 +
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  |  4 ++
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 79 --
 4 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index f06056f..8f15429 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -933,6 +933,7 @@ CodeEmitterGK110::emitCVT(const Instruction *i)
 
code[0] |= typeSizeofLog2(dType)  10;
code[0] |= typeSizeofLog2(i-sType)  12;
+   code[1] |= i-subOp  12;
 
if (isSignedIntType(dType))
   code[0] |= 0x4000;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index ef5c87d..6e22788 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -818,6 +818,7 @@ CodeEmitterGM107::emitI2F()
emitField(0x31, 1, (insn-op == OP_ABS) || insn-src(0).mod.abs());
emitCC   (0x2f);
emitField(0x2d, 1, (insn-op == OP_NEG) || insn-src(0).mod.neg());
+   emitField(0x29, 2, insn-subOp);
emitRND  (0x27, rnd, -1);
emitField(0x0d, 1, isSignedType(insn-sType));
emitField(0x0a, 2, util_logbase2(typeSizeof(insn-sType)));
@@ -850,6 +851,7 @@ CodeEmitterGM107::emitI2I()
emitField(0x31, 1, (insn-op == OP_ABS) || insn-src(0).mod.abs());
emitCC   (0x2f);
emitField(0x2d, 1, (insn-op == OP_NEG) || insn-src(0).mod.neg());
+   emitField(0x29, 2, insn-subOp);
emitField(0x0d, 1, isSignedType(insn-sType));
emitField(0x0c, 1, isSignedType(insn-dType));
emitField(0x0a, 2, util_logbase2(typeSizeof(insn-sType)));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 5703712..6bf5219 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1020,6 +1020,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i)
   code[0] |= util_logbase2(typeSizeof(dType))  20;
   code[0] |= util_logbase2(typeSizeof(i-sType))  23;
 
+  // for 8/16 source types, the byte/word is in subOp. word 1 is
+  // represented as 2.
+  code[1] |= i-subOp  0x17;
+
   if (sat)
  code[0] |= 0x20;
   if (abs)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index b0e74f0..e37420c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1312,7 +1312,8 @@ private:
void handleRCP(Instruction *);
void handleSLCT(Instruction *);
void handleLOGOP(Instruction *);
-   void handleCVT(Instruction *);
+   void handleCVT_NEG(Instruction *);
+   void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
 
BuildUtil bld;
@@ -1563,12 +1564,12 @@ AlgebraicOpt::handleLOGOP(Instruction *logop)
 // nv50:
 //  F2I(NEG(I2F(ABS(SET
 void
-AlgebraicOpt::handleCVT(Instruction *cvt)
+AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
 {
+   Instruction *insn = cvt-getSrc(0)-getInsn();
if (cvt-sType != TYPE_F32 ||
cvt-dType != TYPE_S32 || cvt-src(0).mod != Modifier(0))
   return;
-   Instruction *insn = cvt-getSrc(0)-getInsn();
if (!insn || insn-op != OP_NEG || insn-dType != TYPE_F32)
   return;
if (insn-src(0).mod != Modifier(0))
@@ -1598,6 +1599,74 @@ AlgebraicOpt::handleCVT(Instruction *cvt)
delete_Instruction(prog, cvt);
 }
 
+// Some shaders extract packed bytes out of words and convert them to
+// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
+// nv50 for word sizes.
+//
+// CVT(EXTBF(x, byte/word))
+// CVT(AND(bytemask, x))
+// CVT(AND(bytemask, SHR(x, 8/16/24)))
+void
+AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
+{
+   Instruction *insn = cvt-getSrc(0)-getInsn();
+   ImmediateValue imm0, imm1;
+   Value *arg = NULL;
+   unsigned width, offset;
+   if ((cvt-sType != TYPE_U32  cvt-sType != TYPE_S32) || !insn)
+  return;
+   if (insn-op == OP_EXTBF  insn-src(1).getImmediate(imm0)) {
+  width = (imm0.reg.data.u32  8)  0xff;
+  offset = imm0.reg.data.u32  0xff;
+  arg = insn-getSrc(0);
+
+  if (width != 8  width != 16)
+ return;
+  if (width == 8

[Nouveau] Constbuf uploads on G80 and GF100+

2015-06-29 Thread Ilia Mirkin
Hello,

It seems that NVIDIA GPUs, at least starting with G80, have an
optimized path for the sequence

draw;
update consts;
draw;
update consts;
etc

Whereby it will start processing draw2 before draw1 is done. To do
this, it appears there's some magic constbuf cache on the chip which
buffers the updates to the right draw, eventually serializing them all
out to memory as if it were all done serially.

In order to make it into this magic constbuf cache, there are special
constbuf upload entrypoints, on GF100 they are method 0x2390 and the
associated methods that come right before it.

However in order for it to all work out as one might hope, the CB
settings that were in place when the CB was bound (via method 0x2410)
have to match the ones used for upload, specifically the address. So
if you have a CB at address 0x1000 of size 0x1000, and you decide to
update its data at 0x800, it appears that you have to use that same
initial 0x1000 as the base and 0x800 as the offset. If you use an
address of 0x1800, it won't notice that the CB is bound.

This is easy enough to handle. But what do you do when some genius
wants to have two overlapping buffers, and updates the overlapping
area? For example

glBufferData(GL_UNIFORM_BUFFER, 0x1000)
glBindBufferRange(GL_UNIFORM_BUFFER, 1, buf, 0, 0x200);
glBindBufferRange(GL_UNIFORM_BUFFER, 2, buf, 0x100, 0x200);

and then try to do a glMapBufferRange(buf, 0x100 - 0x1000) or something.

Is there a way to handle it while playing nice with the CB update
buffer mechanism, or do you have to give up and do a serialize (method
0x110) followed by a memory barrier (0x21c)? Or do you just pick
whichever one you like, as long as any were bound and it's good?

Also, on G80-era GPUs the constbuf upload process is a bit different,
where it wants the uploads to go to a specific binding point. How
should the overlapping situation be handled there?

Thanks for any info on this!

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] fbcon/nv11-: correctly account for ring space usage

2015-06-29 Thread Ilia Mirkin
The RING_SPACE macro accounts how much space is used up so it's
important to ask it for the right amount. Incorrect accounting of this
can cause page faults down the line as writes are attempted outside of
the ring.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: sta...@vger.kernel.org
---
 drm/nouveau/nv04_fbcon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drm/nouveau/nv04_fbcon.c b/drm/nouveau/nv04_fbcon.c
index 4ef602c..495c576 100644
--- a/drm/nouveau/nv04_fbcon.c
+++ b/drm/nouveau/nv04_fbcon.c
@@ -203,7 +203,7 @@ nv04_fbcon_accel_init(struct fb_info *info)
if (ret)
return ret;
 
-   if (RING_SPACE(chan, 49)) {
+   if (RING_SPACE(chan, 49 + (device-info.chipset = 0x11 ? 4 : 0))) {
nouveau_fbcon_gpu_lockup(info);
return 0;
}
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] fbcon/g80: reduce PUSH_SPACE alloc, fire ring on accel init

2015-06-29 Thread Ilia Mirkin
Only 58 words get written to the ring, not 59. Also, normalize the accel
init wrt nvc0 and nv04 fbcon impls by firing the ring at accel init time
rather than waiting until later.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 drm/nouveau/nv50_fbcon.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drm/nouveau/nv50_fbcon.c b/drm/nouveau/nv50_fbcon.c
index 394c89a..901130b 100644
--- a/drm/nouveau/nv50_fbcon.c
+++ b/drm/nouveau/nv50_fbcon.c
@@ -188,7 +188,7 @@ nv50_fbcon_accel_init(struct fb_info *info)
if (ret)
return ret;
 
-   ret = RING_SPACE(chan, 59);
+   ret = RING_SPACE(chan, 58);
if (ret) {
nouveau_fbcon_gpu_lockup(info);
return ret;
@@ -252,6 +252,7 @@ nv50_fbcon_accel_init(struct fb_info *info)
OUT_RING(chan, info-var.yres_virtual);
OUT_RING(chan, upper_32_bits(fb-vma.offset));
OUT_RING(chan, lower_32_bits(fb-vma.offset));
+   FIRE_RING(chan);
 
return 0;
 }
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] What are the restrictions around loading indirect constbuf values

2015-06-29 Thread Ilia Mirkin
On Thu, Jun 25, 2015 at 10:41 AM, Ilia Mirkin imir...@alum.mit.edu wrote:
 Hello,

 We recently tracked down a bug on Tesla GPUs (i.e. G80-GT218) whereby
 it appears that instructions like

 0028: b5000409 08000780 add rn f32 $r2 $r2 neg c0[$a1]
 0040: b500060d 08004780 add rn f32 $r3 $r3 neg c0[$a1+0x4]

 or with nvdisasm:

 .headerflags@EF_CUDA_SM12 EF_CUDA_PTX_SM(EF_CUDA_SM12)
 /**/ FADD R2, R2, -c[0x0][A1+0x0];  /* 0x08000780b5000409 
 */
 /*0008*/ FADD R3, R3, -c[0x0][A1+0x1];  /* 0x08004780b500060d 
 */

 don't appear to execute properly. However just MOV'ing the values into
 registers works fine. This was observed on a G92 chip. See bug
 https://bugs.freedesktop.org/show_bug.cgi?id=91056.

 I was hoping you could save me some time and let me know what
 instructions can load things like c0[$a1+4] (or maybe it's only in
 combination with the modifier?), and which Tesla-family GPU's have
 those restrictions.

Hm, there's something more subtle going on here. Please disregard. A
simple shader on my GT215 for both vertex and fragment demonstrates
that those instructions work at least some of the time. (I didn't have
a nv50-era card plugged in when I was asking the question, so I
couldn't check for myself.) Perhaps there's something more subtle
going on here, like non-uniformity across execution units...

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] Add Option DRI to allow selection of maximum DRI level.

2015-07-29 Thread Ilia Mirkin
On Wed, Jul 29, 2015 at 6:54 AM, Mario Kleiner
mario.kleiner...@gmail.com wrote:
 Allow user to select the maximum level of DRI implementation
 to use, DRI2 or DRI3.

 exa accel supports both DRI2 and, if the kernel supports
 rendernodes, also DRI3. However, DRI3 still seems to have
 some bugs on current implementations, and additionally it
 doesn't work well at all for X-Servers older than 1.16.3
 due to X-Server bugs. Therefore we default to DRI2 on exa,
 but allow the user to enable DRI3 with this new option.

 nouveau's glamor accel backend currently doesn't work under
 DRI2 at all, so we continue to use DRI3 whenever it is
 supported and ignore this new option for now.

 Also add a bit more output about status of Present and
 DRI3 to aid debugging.

 Note: This was originally meant to be a boolean parameter,
   to just select between DRI3 on and off, but changed
   here to a DRI level to make it consistent with the
   same option in the released Intel-ddx.

 Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com
 Cc: Ilia Mirkin imir...@alum.mit.edu
 Cc: Emil Velikov emil.l.veli...@gmail.com
 Cc: Martin Peres martin.pe...@free.fr
 Cc: Ben Skeggs bske...@redhat.com
 ---
  man/nouveau.man  |  6 ++
  src/nouveau_dri2.c   | 11 ++-
  src/nouveau_glamor.c |  2 +-
  src/nv_const.h   |  2 ++
  src/nv_driver.c  | 30 --
  src/nv_type.h|  1 +
  6 files changed, 48 insertions(+), 4 deletions(-)

 diff --git a/man/nouveau.man b/man/nouveau.man
 index 129bb7f..3d5a428 100644
 --- a/man/nouveau.man
 +++ b/man/nouveau.man
 @@ -125,6 +125,12 @@ that relies on correct presentation timing behaviour as 
 defined in that
  specification.
  .br
  Default: 1.
 +.TP
 +.BI Option \*qDRI\*q \*q integer \*q
 +Define the maximum level of DRI to enable. Valid values are 2 or 3.
 +exa acceleration will honor the maximum level if it is supported.
 +Under glamor acceleration DRI3 is always enabled if supported,
 +as glamor currently does not support DRI2. Default: 2 on exa, 3 on glamor.
  .SH SEE ALSO
  __xservername__(__appmansuffix__), __xconfigfile__(__filemansuffix__), 
 Xserver(__appmansuffix__), X(__miscmansuffix__)
  .SH AUTHORS
 diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c
 index ce6f53e..81ee9be 100644
 --- a/src/nouveau_dri2.c
 +++ b/src/nouveau_dri2.c
 @@ -1134,7 +1134,16 @@ nouveau_dri3_screen_init(ScreenPtr screen)
 if (buf  stat(buf, render) == 0 
 master.st_mode == render.st_mode) {
 pNv-render_node = buf;
 -   return dri3_screen_init(screen, nouveau_dri3_screen_info);
 +   if (dri3_screen_init(screen, nouveau_dri3_screen_info)) {
 +   xf86DrvMsg(pScrn-scrnIndex, X_INFO,
 +  DRI3 on EXA enabled\n);
 +   return TRUE;
 +   }
 +   else {
 +   xf86DrvMsg(pScrn-scrnIndex, X_WARNING,
 +  DRI3 on EXA initialization failed\n);
 +   return FALSE;
 +   }
 } else
 free(buf);
  #endif
 diff --git a/src/nouveau_glamor.c b/src/nouveau_glamor.c
 index b8bca17..a8e9206 100644
 --- a/src/nouveau_glamor.c
 +++ b/src/nouveau_glamor.c
 @@ -240,7 +240,7 @@ nouveau_glamor_init(ScreenPtr screen)
 screen-SharePixmapBacking = nouveau_glamor_share_pixmap_backing;
 screen-SetSharedPixmapBacking = 
 nouveau_glamor_set_shared_pixmap_backing;

 -   xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised\n);
 +   xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised with 
 DRI3\n);
 pNv-Flush = nouveau_glamor_flush;
 return TRUE;
  }
 diff --git a/src/nv_const.h b/src/nv_const.h
 index f1b4e9b..3f18d23 100644
 --- a/src/nv_const.h
 +++ b/src/nv_const.h
 @@ -18,6 +18,7 @@ typedef enum {
  OPTION_SWAP_LIMIT,
  OPTION_ASYNC_COPY,
  OPTION_ACCELMETHOD,
 +OPTION_DRI,
  } NVOpts;


 @@ -34,6 +35,7 @@ static const OptionInfoRec NVOptions[] = {
  { OPTION_SWAP_LIMIT,   SwapLimit,OPTV_INTEGER,   {0}, FALSE },
  { OPTION_ASYNC_COPY,   AsyncUTSDFS,  OPTV_BOOLEAN,   {0}, FALSE },
  { OPTION_ACCELMETHOD,  AccelMethod,  OPTV_STRING,{0}, FALSE },
 +{ OPTION_DRI,  DRI,  OPTV_INTEGER,   {0}, FALSE },
  { -1,   NULL,   OPTV_NONE,  {0}, FALSE }
  };

 diff --git a/src/nv_driver.c b/src/nv_driver.c
 index 4218e4f..b284d96 100644
 --- a/src/nv_driver.c
 +++ b/src/nv_driver.c
 @@ -1095,6 +1095,25 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
 pNv-ce_enabled =
 xf86ReturnOptValBool(pNv-Options, OPTION_ASYNC_COPY, FALSE);

 +   /* Define maximum allowed level of DRI implementation to use.
 +* We default to DRI2 on EXA for now, as DRI3 still has some
 +* problems. However, the max_dri_level can be only honored
 +* by EXA, as GLAMOR only

Re: [Nouveau] [PATCH 2/2] Add Option DRI to allow selection of maximum DRI level. (v2)

2015-07-29 Thread Ilia Mirkin
Series is Reviewed-by: Ilia Mirkin imir...@alum.mit.edu

I'll let it sit for a day or so in case others have feedback.

On Wed, Jul 29, 2015 at 8:39 AM, Mario Kleiner
mario.kleiner...@gmail.com wrote:
 Allow user to select the maximum level of DRI implementation
 to use, DRI2 or DRI3.

 exa accel supports both DRI2 and, if the kernel supports
 rendernodes, also DRI3. However, DRI3 still seems to have
 some bugs on current implementations, and additionally it
 doesn't work well at all for X-Servers older than 1.16.3
 due to X-Server bugs. Therefore we default to DRI2 on exa,
 but allow the user to enable DRI3 with this new option.

 nouveau's glamor accel backend currently doesn't work under
 DRI2 at all, so we continue to use DRI3 whenever it is
 supported and ignore this new option for now.

 Also add a bit more output about status of Present and
 DRI3 to aid debugging.

 Note: This was originally meant to be a boolean parameter,
   to just select between DRI3 on and off, but changed
   here to a DRI level to make it consistent with the
   same option in the released Intel-ddx.

 v2: Use fixed up Bool return type of nouveau_present_init().

 Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com
 Cc: Ilia Mirkin imir...@alum.mit.edu
 Cc: Emil Velikov emil.l.veli...@gmail.com
 Cc: Martin Peres martin.pe...@free.fr
 Cc: Ben Skeggs bske...@redhat.com
 ---
  man/nouveau.man  |  6 ++
  src/nouveau_dri2.c   | 11 ++-
  src/nouveau_glamor.c |  2 +-
  src/nv_const.h   |  2 ++
  src/nv_driver.c  | 30 --
  src/nv_type.h|  1 +
  6 files changed, 48 insertions(+), 4 deletions(-)

 diff --git a/man/nouveau.man b/man/nouveau.man
 index 129bb7f..3d5a428 100644
 --- a/man/nouveau.man
 +++ b/man/nouveau.man
 @@ -125,6 +125,12 @@ that relies on correct presentation timing behaviour as 
 defined in that
  specification.
  .br
  Default: 1.
 +.TP
 +.BI Option \*qDRI\*q \*q integer \*q
 +Define the maximum level of DRI to enable. Valid values are 2 or 3.
 +exa acceleration will honor the maximum level if it is supported.
 +Under glamor acceleration DRI3 is always enabled if supported,
 +as glamor currently does not support DRI2. Default: 2 on exa, 3 on glamor.
  .SH SEE ALSO
  __xservername__(__appmansuffix__), __xconfigfile__(__filemansuffix__), 
 Xserver(__appmansuffix__), X(__miscmansuffix__)
  .SH AUTHORS
 diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c
 index ce6f53e..81ee9be 100644
 --- a/src/nouveau_dri2.c
 +++ b/src/nouveau_dri2.c
 @@ -1134,7 +1134,16 @@ nouveau_dri3_screen_init(ScreenPtr screen)
 if (buf  stat(buf, render) == 0 
 master.st_mode == render.st_mode) {
 pNv-render_node = buf;
 -   return dri3_screen_init(screen, nouveau_dri3_screen_info);
 +   if (dri3_screen_init(screen, nouveau_dri3_screen_info)) {
 +   xf86DrvMsg(pScrn-scrnIndex, X_INFO,
 +  DRI3 on EXA enabled\n);
 +   return TRUE;
 +   }
 +   else {
 +   xf86DrvMsg(pScrn-scrnIndex, X_WARNING,
 +  DRI3 on EXA initialization failed\n);
 +   return FALSE;
 +   }
 } else
 free(buf);
  #endif
 diff --git a/src/nouveau_glamor.c b/src/nouveau_glamor.c
 index b8bca17..a8e9206 100644
 --- a/src/nouveau_glamor.c
 +++ b/src/nouveau_glamor.c
 @@ -240,7 +240,7 @@ nouveau_glamor_init(ScreenPtr screen)
 screen-SharePixmapBacking = nouveau_glamor_share_pixmap_backing;
 screen-SetSharedPixmapBacking = 
 nouveau_glamor_set_shared_pixmap_backing;

 -   xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised\n);
 +   xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised with 
 DRI3\n);
 pNv-Flush = nouveau_glamor_flush;
 return TRUE;
  }
 diff --git a/src/nv_const.h b/src/nv_const.h
 index f1b4e9b..3f18d23 100644
 --- a/src/nv_const.h
 +++ b/src/nv_const.h
 @@ -18,6 +18,7 @@ typedef enum {
  OPTION_SWAP_LIMIT,
  OPTION_ASYNC_COPY,
  OPTION_ACCELMETHOD,
 +OPTION_DRI,
  } NVOpts;


 @@ -34,6 +35,7 @@ static const OptionInfoRec NVOptions[] = {
  { OPTION_SWAP_LIMIT,   SwapLimit,OPTV_INTEGER,   {0}, FALSE },
  { OPTION_ASYNC_COPY,   AsyncUTSDFS,  OPTV_BOOLEAN,   {0}, FALSE },
  { OPTION_ACCELMETHOD,  AccelMethod,  OPTV_STRING,{0}, FALSE },
 +{ OPTION_DRI,  DRI,  OPTV_INTEGER,   {0}, FALSE },
  { -1,   NULL,   OPTV_NONE,  {0}, FALSE }
  };

 diff --git a/src/nv_driver.c b/src/nv_driver.c
 index 4218e4f..514a8bc 100644
 --- a/src/nv_driver.c
 +++ b/src/nv_driver.c
 @@ -1095,6 +1095,25 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
 pNv-ce_enabled =
 xf86ReturnOptValBool(pNv-Options, OPTION_ASYNC_COPY, FALSE);

 +   /* Define maximum allowed level of DRI implementation

Re: [Nouveau] enable dri3 support without glamor causes gnome-shell regression on nv4x

2015-08-03 Thread Ilia Mirkin
On Mon, Aug 3, 2015 at 1:31 PM, Hans de Goede hdego...@redhat.com wrote:
 Hi,


 On 03-08-15 17:36, Ilia Mirkin wrote:

 On Mon, Aug 3, 2015 at 9:02 AM, Hans de Goede hdego...@redhat.com wrote:

 Hi,

 On 30-07-15 16:09, Ilia Mirkin wrote:


 FWIW this is a fail on nv50+ as well. See for example
 https://bugs.freedesktop.org/show_bug.cgi?id=91445

 My suspicion is that this is due to the lack of PUSH_KICK in the *Done
 exa handlers -- works fine with DRI2, but DRI3 has no synchronization
 and so the commands never get flushed out. Easily verified by sticking
 PUSH_KICK's everywhere.



 I do not believe that that is the problem, in my case it clearly
 seems to be a pitch / swizzle problem rather then a synchronizarion
 problem, here is what my desktop with gnome shell looks like when
 using DRI2:

 https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-good.jpg

 And this is what it looks like when using DRI3:

 https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-bad.jpg

 The DRI2 screenshot is made with Mario's 2 patches on top of
 current master:

 http://lists.freedesktop.org/archives/nouveau/2015-July/021740.html
 http://lists.freedesktop.org/archives/nouveau/2015-July/021741.html

 And then adding Option DRI 2 to xorg.conf.


 His patches should have defaulted it to DRI 2 I think, so this is
 unnecessary. In fact you should have had to say DRI 3 to get DRI3
 with his patches.
   --


 I've also tried disabling EXA using Option AccelMethod none,
 but that seems to also automatically disable all DRI, leading to
 software rendering.

 I discussed this with Ben this morning and he suggested that this
 is likely a Mesa issue since with DRI3 mesa rather then the ddx
 allocs the surfaces. I've tried disabling swizzling in the
 mesa code by forcing nv30_miptree_create() to always take
 the code path for linear textures, but that leads to the exact
 same result as before that change.


 Ah yes. Very different problem indeed. I actually suspect it has to do
 with swizzling. Look at the white pattern of the moon -- it's all in a
 line. That means that it expected some locality and instead it got
 drawn all on a line. If it were merely a stride problem, I'd expect to
 see strips of the moon below and offset from one another.

 So... take a look at nv30_miptree_from_handle -- I wonder if it can
 now receive swizzled textures where it couldn't before.


 Ok, that does go in the direction I am expecting the problem to be,
 but I'm afraid I'm going to need a bit more guidance, what exactly
 am I looking for in that function / which knobs should I try to
 vary / play with to maybe fix this ?

Unfortunately this is playing near (or past) the limits of my
knowledge as well. My understanding is that DRI3 passes pixmaps around
with dma-buf, aka bo_from_handle. DRI2 uses some other mechanism
which is not that (I think it just copies stuff around).

Now on nv50+, bo's have tile flags (and memtype and probably other
annoyances). The tile flags indicate the specific tiling mechanism
used on that bo (i.e. do you do 32x32 tiles? 32x64? etc). Take a look
at the nouveau_bo_new() call in nv50_miptree.c -- note how it takes a
bo config argument. This bo config can then later be retrieved using
some other syscall.

However on nv30 there appears to not be any such thing. The
nouveau_bo_new call just passes in NULL for creating the bo, which
means that there's no way to recover the are you swizzled
information after-the-fact.

Presumably you should create a nv04 bo config section in the union,
and just pass the single swizzled bit through. I'm not sure what, if
anything, is required on the kernel side for that. I don't think
there's any optionality in how the swizzling is done for pre-nv50.

Note that in the nv30_miptree logic, mt-swizzled implies that
mt-uniform_pitch = 0, but the level pitch is set properly (again,
see nv30_miptree_create).

Hope this sheds some light and doesn't cause you to go in the wrong
direction -- please take everything I say with a grain of salt -- I'm
often a bit off on some of the details.

Cheers,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] enable dri3 support without glamor causes gnome-shell regression on nv4x

2015-08-11 Thread Ilia Mirkin
On Mon, Aug 10, 2015 at 8:47 AM, Hans de Goede hdego...@redhat.com wrote:
 Hi,


 On 03-08-15 20:09, Ilia Mirkin wrote:

 On Mon, Aug 3, 2015 at 1:31 PM, Hans de Goede hdego...@redhat.com wrote:

 Hi,


 On 03-08-15 17:36, Ilia Mirkin wrote:


 On Mon, Aug 3, 2015 at 9:02 AM, Hans de Goede hdego...@redhat.com
 wrote:


 Hi,

 On 30-07-15 16:09, Ilia Mirkin wrote:



 FWIW this is a fail on nv50+ as well. See for example
 https://bugs.freedesktop.org/show_bug.cgi?id=91445

 My suspicion is that this is due to the lack of PUSH_KICK in the *Done
 exa handlers -- works fine with DRI2, but DRI3 has no synchronization
 and so the commands never get flushed out. Easily verified by sticking
 PUSH_KICK's everywhere.




 I do not believe that that is the problem, in my case it clearly
 seems to be a pitch / swizzle problem rather then a synchronizarion
 problem, here is what my desktop with gnome shell looks like when
 using DRI2:

 https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-good.jpg

 And this is what it looks like when using DRI3:

 https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-bad.jpg

 The DRI2 screenshot is made with Mario's 2 patches on top of
 current master:

 http://lists.freedesktop.org/archives/nouveau/2015-July/021740.html
 http://lists.freedesktop.org/archives/nouveau/2015-July/021741.html

 And then adding Option DRI 2 to xorg.conf.



 His patches should have defaulted it to DRI 2 I think, so this is
 unnecessary. In fact you should have had to say DRI 3 to get DRI3
 with his patches.
--



 I've also tried disabling EXA using Option AccelMethod none,
 but that seems to also automatically disable all DRI, leading to
 software rendering.

 I discussed this with Ben this morning and he suggested that this
 is likely a Mesa issue since with DRI3 mesa rather then the ddx
 allocs the surfaces. I've tried disabling swizzling in the
 mesa code by forcing nv30_miptree_create() to always take
 the code path for linear textures, but that leads to the exact
 same result as before that change.



 Ah yes. Very different problem indeed. I actually suspect it has to do
 with swizzling. Look at the white pattern of the moon -- it's all in a
 line. That means that it expected some locality and instead it got
 drawn all on a line. If it were merely a stride problem, I'd expect to
 see strips of the moon below and offset from one another.

 So... take a look at nv30_miptree_from_handle -- I wonder if it can
 now receive swizzled textures where it couldn't before.



 Ok, that does go in the direction I am expecting the problem to be,
 but I'm afraid I'm going to need a bit more guidance, what exactly
 am I looking for in that function / which knobs should I try to
 vary / play with to maybe fix this ?


 Unfortunately this is playing near (or past) the limits of my
 knowledge as well. My understanding is that DRI3 passes pixmaps around
 with dma-buf, aka bo_from_handle. DRI2 uses some other mechanism
 which is not that (I think it just copies stuff around).

 Now on nv50+, bo's have tile flags (and memtype and probably other
 annoyances). The tile flags indicate the specific tiling mechanism
 used on that bo (i.e. do you do 32x32 tiles? 32x64? etc). Take a look
 at the nouveau_bo_new() call in nv50_miptree.c -- note how it takes a
 bo config argument. This bo config can then later be retrieved using
 some other syscall.

 However on nv30 there appears to not be any such thing. The
 nouveau_bo_new call just passes in NULL for creating the bo, which
 means that there's no way to recover the are you swizzled
 information after-the-fact.

 Presumably you should create a nv04 bo config section in the union,


 That already exists, and indeed gets set by the nouveau_allocate_surface
 function from src/nv_accel_common.c from the ddx,

 and just pass the single swizzled bit through. I'm not sure what, if
 anything, is required on the kernel side for that. I don't think
 there's any optionality in how the swizzling is done for pre-nv50.

 Note that in the nv30_miptree logic, mt-swizzled implies that
 mt-uniform_pitch = 0, but the level pitch is set properly (again,
 see nv30_miptree_create).

 Hope this sheds some light and doesn't cause you to go in the wrong
 direction -- please take everything I say with a grain of salt -- I'm
 often a bit off on some of the details.


 Thanks this was helpful, I do feel we are getting somewhere, but I do
 need a bit more help.

 I've added some debug printf's to nv30_miptree.c, nv30_miptree_create
 and nv30_miptree_from_handle, where the latter is only used when using
 dri2 (e.g. in the working case).

 Doing a diff between a log of starting gnome-shell with dri vs dri3
 results in this:

 --- mesa.log.dri2   2015-08-10 14:18:03.182712022 +0200
 +++ mesa.log.dri3   2015-08-10 14:18:33.26338 +0200
 @@ -1,8 +1,8 @@
  nv30_miptree_create 512x32 uniform_pitch 0 usage 0 flags 0
 -nv30_miptree_from_handle 1x1 uniform_pitch 1024 usage 0 flags 0

Re: [Nouveau] Odd text behavior on Websites and others

2015-08-11 Thread Ilia Mirkin
On Tue, Aug 11, 2015 at 10:47 AM, Rudolf Künzli rudolf.kun...@gmail.com wrote:
 GeForce GTX 745 is a NVIDIA card in the NV117 (GM107) Family...
 The update was made using DNF (Yum) in my daily update procedure using
 the Fedora 22 Update repository.
 I am not familiar with details I just can report what happens right
 now...

OK, there's no EXA support for maxwell, so you're using glamor. Before
kernel 4.1, unless you had extracted your own ctxsw firmware, you
didn't have acceleration at all, that was likely the change that
triggered the issue.

The glamor integration in nouveau is, sadly, broken. But it's unclear
whether that's the cause of your issue. You can either add

Option NoAccel true

to the device section in your xorg.conf, which will disable 2d
acceleration and bring you back to the same state you were in before,
or you can add

Driver modesetting

(or uninstall xf86-video-nouveau) which will use the modesetting
driver which has a non-broken glamor integration. You may still get
the same issues though, as they could just be generic
mesa-sucks-on-maxwell issues (I don't have a Maxwell GPU, no one else
has been interested in debugging/fixing issues).

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Odd text behavior on Websites and others

2015-08-11 Thread Ilia Mirkin
No, you probably want something in /etc/X11... a lot of the time it's
split up into a bunch of separate files in like /etc/X11/xorg.conf.d
or something. You should consult your distro documentation for how to
make it happen.

On Tue, Aug 11, 2015 at 11:38 AM, Rudolf Künzli rudolf.kun...@gmail.com wrote:
 Thanks - the only xorg.conf I found is -
  /usr/share/abrt/conf.d/plugins/xorg.conf
 Is this the file to be edited?

 --
 Rudolf Künzli rudolf.kun...@gmail.com
 On Tue, 2015-08-11 at 10:56 -0400, Ilia Mirkin wrote:
 On Tue, Aug 11, 2015 at 10:47 AM, Rudolf Künzli 
 rudolf.kun...@gmail.com wrote:
   GeForce GTX 745 is a NVIDIA card in the NV117 (GM107) Family...
  The update was made using DNF (Yum) in my daily update procedure
  using
  the Fedora 22 Update repository.
  I am not familiar with details I just can report what happens right
  now...

 OK, there's no EXA support for maxwell, so you're using glamor.
 Before
 kernel 4.1, unless you had extracted your own ctxsw firmware, you
 didn't have acceleration at all, that was likely the change that
 triggered the issue.

 The glamor integration in nouveau is, sadly, broken. But it's unclear
 whether that's the cause of your issue. You can either add

 Option NoAccel true

 to the device section in your xorg.conf, which will disable 2d
 acceleration and bring you back to the same state you were in before,
 or you can add

 Driver modesetting

 (or uninstall xf86-video-nouveau) which will use the modesetting
 driver which has a non-broken glamor integration. You may still get
 the same issues though, as they could just be generic
 mesa-sucks-on-maxwell issues (I don't have a Maxwell GPU, no one else
 has been interested in debugging/fixing issues).

   -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Odd text behavior on Websites and others

2015-08-11 Thread Ilia Mirkin
Add a file to /etc/X11/xorg.conf.d, named anything-you-want.conf, which contains

Section Device
  Driver modesetting
EndSection

Hopefully that should do it.

On Tue, Aug 11, 2015 at 6:03 PM, Rudolf Künzli rudolf.kun...@gmail.com wrote:
 I don't have a file /etc/X11/xorg.conf but a folder
 /etc/X11/xorg.conf.d

 [rudolf@mephisto xorg.conf.d]$ ls -la /etc/X11/xorg.conf.d
 total 12
 drwxr-xr-x. 2 root root 4096 Aug  4 08:25 .
 drwxr-xr-x. 6 root root 4096 May 27 11:40 ..
 -rw-r--r--. 1 root root  265 Apr 21 17:06 00-keyboard.conf

 Then a folder /usr/share/X11/xorg.conf.d with serveral config files but
 I don't see which one to be edited...

 [rudolf@mephisto xorg.conf.d]$ ls -la /usr/share/X11/xorg.conf.d
 total 32
 drwxr-xr-x. 2 root root 4096 Jul 31 18:33 .
 drwxr-xr-x. 7 root root 4096 May 27 11:40 ..
 -rw-r--r--. 1 root root 1099 Jul 15 10:20 10-evdev.conf
 -rw-r--r--. 1 root root 1350 Jul 15 10:20 10-quirks.conf
 -rw-r--r--. 1 root root 2827 May  1 08:23 50-synaptics.conf
 -rw-r--r--. 1 root root  115 May 15 14:49 50-vmmouse.conf
 -rw-r--r--. 1 root root 1385 Mar 20 00:31 50-wacom.conf
 -rw-r--r--. 1 root root  789 Jul 13 00:54 90-libinput.conf

 Any other place to look for. find didn't help...

 I guess I'll have to run x config as root to get a xorg.conf which I
 can edit later...

 --
 Rudolf Künzli rudolf.kun...@gmail.com
 On Tue, 2015-08-11 at 11:42 -0400, Ilia Mirkin wrote:
 No, you probably want something in /etc/X11... a lot of the time it's
 split up into a bunch of separate files in like /etc/X11/xorg.conf.d
 or something. You should consult your distro documentation for how to
 make it happen.

 On Tue, Aug 11, 2015 at 11:38 AM, Rudolf Künzli 
 rudolf.kun...@gmail.com wrote:
  Thanks - the only xorg.conf I found is -
   /usr/share/abrt/conf.d/plugins/xorg.conf
  Is this the file to be edited?
 
  --
  Rudolf Künzli rudolf.kun...@gmail.com
  On Tue, 2015-08-11 at 10:56 -0400, Ilia Mirkin wrote:
   On Tue, Aug 11, 2015 at 10:47 AM, Rudolf Künzli 
   rudolf.kun...@gmail.com wrote:
 GeForce GTX 745 is a NVIDIA card in the NV117 (GM107)
 Family...
The update was made using DNF (Yum) in my daily update
procedure
using
the Fedora 22 Update repository.
I am not familiar with details I just can report what happens
right
now...
  
   OK, there's no EXA support for maxwell, so you're using glamor.
   Before
   kernel 4.1, unless you had extracted your own ctxsw firmware, you
   didn't have acceleration at all, that was likely the change that
   triggered the issue.
  
   The glamor integration in nouveau is, sadly, broken. But it's
   unclear
   whether that's the cause of your issue. You can either add
  
   Option NoAccel true
  
   to the device section in your xorg.conf, which will disable 2d
   acceleration and bring you back to the same state you were in
   before,
   or you can add
  
   Driver modesetting
  
   (or uninstall xf86-video-nouveau) which will use the modesetting
   driver which has a non-broken glamor integration. You may still
   get
   the same issues though, as they could just be generic
   mesa-sucks-on-maxwell issues (I don't have a Maxwell GPU, no one
   else
   has been interested in debugging/fixing issues).
  
 -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] Take shift in crtc positions for ZaphodHeads configs into account.

2015-08-06 Thread Ilia Mirkin
I don't understand this patch (what are all these masks? how are they
used?), and don't want to invest the time required to do so.

However Mario is probably the sole serious user of ZaphodHeads, and if
it fixes issues for him, probably fixes issues for others who try and
give up with ZaphodHeads. Any objections if I just push this out?

On Sat, Jun 27, 2015 at 8:33 PM, Mario Kleiner
mario.kleiner...@gmail.com wrote:
 In multi-x-screen ZaphodHeads configurations, there isn't a
 one-to-one mapping of kernel provided drmmode crtc index
 to the index of the corresponding xf86Crtc inside the
 xf86CrtcConfig crtc array anymore, ie. for kernel provided
 drmmode-mode_res-crtcs[i], the i'th crtc won't correspond
 to the xf86Crtc in the i'th slot of the x-screens xf86CrtcConfig
 anymore, once ZaphodHeads has only selected a subset of all crtcs
 of a graphics card for a given x-screen, instead of all crtcs.

 This breaks the mapping of bit positions in the bit masks returned
 in kencoder-possible_crtcs and kencoder-possible_clones. A 1 bit
 in position i of those masks allows use of the kernels i'th crtc for
 the given kencoder. The X-Servers dix code checks those bit masks
 for valid xf86Output - xf86Crtc assignments, assuming that the i'th
 slot xf86CrtcConfigPtr config-crtc[i] corresponds to bit i in the
 xf86Output-possibe_crtcs bitmask, and bails if the bitmask doesn't
 allow the specified assignment of crtc to output. If ZaphodHeads
 breaks the assumption of bit i - crtc slot i this ends in failure.

 Take this shift of crtc index positions wrt. encoder bitmask bit
 positions into account by bit-shifting positions accordingly when
 assigning encoder-possible_crtcs to output-possible_crtcs, so
 the proper indices match up again for validation by the dix.

 This problem wasn't apparent last year when testing the ZaphodHeads
 support on some Kepler cards, as apparently the encoder-possible_crtcs
 bitmasks returned for those cards by the kernel just had all 4
 lsb bits set for all tested encoders/output, so each of the cards 4
 crtcs could go with each output and things worked by chance.

 The current code breaks, e.g., on 2010 MacBookPro with nv50, where
 one crtc is hardwired to the internal lvds panel, and one crtc
 is hardwired to the external DP connector, resulting in a failure
 where dual-display on single-x-screen works fine, but assigning
 each output to a separate x-screen via ZaphodHeads fails due to
 the mismatched encoder-possible_crtcs bitmasks.

 This patch fixes the problem.

 Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com
 ---
  src/drmmode_display.c | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)

 diff --git a/src/drmmode_display.c b/src/drmmode_display.c
 index c30cb3a..3679482 100644
 --- a/src/drmmode_display.c
 +++ b/src/drmmode_display.c
 @@ -1214,7 +1214,7 @@ drmmode_zaphod_match(ScrnInfoPtr pScrn, const char *s, 
 char *output_name)
  }

  static unsigned int
 -drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num)
 +drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num, int 
 crtcshift)
  {
 NVPtr pNv = NVPTR(pScrn);
 xf86OutputPtr output;
 @@ -1296,8 +1296,8 @@ drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr 
 drmmode, int num)
 output-subpixel_order = subpixel_conv_table[koutput-subpixel];
 output-driver_private = drmmode_output;

 -   output-possible_crtcs = kencoder-possible_crtcs;
 -   output-possible_clones = kencoder-possible_clones;
 +   output-possible_crtcs = kencoder-possible_crtcs  crtcshift;
 +   output-possible_clones = kencoder-possible_clones  crtcshift;

 output-interlaceAllowed = true;
 output-doubleScanAllowed = true;
 @@ -1421,6 +1421,7 @@ Bool drmmode_pre_init(ScrnInfoPtr pScrn, int fd, int 
 cpp)
 NVEntPtr pNVEnt = NVEntPriv(pScrn);
 int i;
 unsigned int crtcs_needed = 0;
 +   int crtcshift;

 drmmode = xnfalloc(sizeof *drmmode);
 drmmode-fd = fd;
 @@ -1444,8 +1445,9 @@ Bool drmmode_pre_init(ScrnInfoPtr pScrn, int fd, int 
 cpp)
 }

 xf86DrvMsg(pScrn-scrnIndex, X_INFO, Initializing outputs ...\n);
 +   crtcshift = ffs(pNVEnt-assigned_crtcs ^ 0x) - 1;

Mario, any objections if I touch this up as

~pNVEnc-assigned_crtcs? XOR with ~0 is not a pattern I've seen a lot.

 for (i = 0; i  drmmode-mode_res-count_connectors; i++)
 -   crtcs_needed += drmmode_output_init(pScrn, drmmode, i);
 +   crtcs_needed += drmmode_output_init(pScrn, drmmode, i, 
 crtcshift);

 xf86DrvMsg(pScrn-scrnIndex, X_INFO,
%d crtcs needed for screen.\n, crtcs_needed);
 --
 1.9.1

 ___
 Nouveau mailing list
 Nouveau@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/nouveau
___
Nouveau mailing list
Nouveau@lists.freedesktop.org

Re: [Nouveau] [REGRESSION] nouveau: Crash in gk104_fifo_intr_runlist()

2015-08-09 Thread Ilia Mirkin
Alexandre, could you take a look? 0xbad* generally comes from bad mmio
reads.
On Aug 9, 2015 1:08 PM, Eric Biggers ebigge...@gmail.com wrote:

 Hi,

 I am testing Linux v4.2-rc5 and I am sporadically getting crashes shortly
 after
 startup in gk104_fifo_intr_runlist().  What I've found is that the 'mask'
 value
 read from offset 0x2a00 comes back as '0xbad0da00'.  This causes the 'engn'
 variable to be assigned the value 9, which is invalid; then wake_up() is
 called
 on an uninitialized waitqueue which causes the crash.

 Reverting commit 1addc12648521d (drm/nouveau/fifo/gk104: kick channels
 when
 deactivating them) seemed to make the problem go away, although I can't
 be 100%
 sure because the problem is sporadic.

 Attached an example of the kernel log up to the crash.

 Eric

 ___
 Nouveau mailing list
 Nouveau@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/nouveau


___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Fermi+ shader header docs

2015-08-14 Thread Ilia Mirkin
And as I've just started looking at GM107 traces to fix up
tessellation shader attribute address calculations, I noticed the
following unknown bits in CommonWord3 of TCP shaders:

PB: 0x0021   GM107_3D.SP[0x2].SELECT = { ENABLE | PROGRAM = TCP }
PB: 0x0830   GM107_3D.SP[0x2].START_ID = 0x830
HEADER:
0x04210861   0 = { SPH = VTG | VERSION = 3 | KIND = TCP | GMEM_STORE | SASS_VERS
0x0600   1 = { LMEM_POS_ALLOC = 0 | PATCH_ATTRIBUTES = 6 }
0x0300   2 = { LMEM_NEG_ALLOC = 0 | THREADS_PER_PRIM = 3 }
0x6000   3 = { WARP_CSTACK_SIZE = 0 | 0x6000 }
0xff00   4 = { MIN_OUT_READ_SLOT = 0 | MAX_OUT_READ_SLOT = 0xff }
0xf000   ATTR_EN_0 = 0xf000
0x   ATTR_EN_1 = 0
0x   ATTR_EN_2 = 0
0x   ATTR_EN_3 = 0
0x   ATTR_EN_4 = 0
0x   ATTR_EN_5 = { 0 }
0x   11 = 0
0x   12 = 0
0xf000   EXPORT_EN_0 = { HPOS = 0xf }
0x   EXPORT_EN_1 = 0
0x   EXPORT_EN_2 = 0
0x   EXPORT_EN_3 = 0
0x   EXPORT_EN_4 = 0
0x   EXPORT_EN_5 = { CLIP_DISTANCE = 0 | UNK12 = 0 }
0x   19 = 0

Anything that we need to also be setting?

  -ilia

On Mon, Jun 22, 2015 at 9:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 And an additional question: I have a trace here where a reserved bit
 from CommonWord0 is set. Is that just random values that aren't
 cleared by the driver, or does it have some significance? Here is the
 full shader:

 HEADER:
 0x06040461   0 = { SPH = VTG | VERSION = 3 | KIND = VP_B |
 SASS_VERSION = 2 | LDST_ENABLE | SO_MASK = 0 | 0x200 }
 0x   1 = { LMEM_POS_ALLOC = 0 | PATCH_ATTRIBUTES = 0 }
 0x   2 = { LMEM_NEG_ALLOC = 0 | THREADS_PER_PRIM = 0 }
 0x   3 = { WARP_CSTACK_SIZE = 0 | OUTPUT_PRIM = 0 }
 0x   4 = { MAX_OUTPUT_VERTS = 0 | MIN_OUT_READ_SLOT = 0 |
 MAX_OUT_READ_SLOT = 0 }
 0x   ATTR_EN_0 = 0
 0x   ATTR_EN_1 = 0
 0x   ATTR_EN_2 = 0
 0x   ATTR_EN_3 = 0
 0x   ATTR_EN_4 = 0
 0x   ATTR_EN_5 = { 0 }
 0x   11 = 0
 0x   12 = 0
 0x0001f000   EXPORT_EN_0 = { HPOS = 0xf | 0x1 }
 0x   EXPORT_EN_1 = 0
 0x   EXPORT_EN_2 = 0
 0x   EXPORT_EN_3 = 0
 0x   EXPORT_EN_4 = 0
 0x   EXPORT_EN_5 = { CLIP_DISTANCE = 0 | UNK12 = 0 }
 0x   19 = 0
 CODE:
 : a01088b0 08bcb810 sched 0x2c 0x22 0x4 0x28 0x4 0x2e 0x2f
 0008: 0b1ffc1e 5b601c07 set $p0 0x1 ge u32 0x0 c0[0x3858]
 0010: 103c 1200 $p0 bra 0x38
 0018: 0a1c0002 64c03c07 mov b32 $r0 c0[0x3850]
 0020: 0a9c0006 64c03c07 mov b32 $r1 c0[0x3854]
 0028: 001c cc80 ld b32 $r0 cg g[$r0d]
 0030: 041c003c 1200 bra 0x40

 0038: 7f9c0002 e4c03c00  C  mov b32 $r0 0x0

 0040: 9c108010 090c8c10  C  sched 0x4 0x20 0x4 0x27 0x4 0x23 0x43
 0048: 001c2802 e5c0 cvt rn f32 $r0 u32 $r0
 0050: 341c0006 64c03c00 mov b32 $r1 c0[0x1a0]
 0058: 349c000a 64c03c00 mov b32 $r2 c0[0x1a4]
 0060: 351c000e 64c03c00 mov b32 $r3 c0[0x1a8]
 0068: 359c0012 64c03c00 mov b32 $r4 c0[0x1ac]
 0070: 381ffc06 7f03fc00 st b32 a[0x70] $r1 0x0 0x0
 0078: 3a1ffc0a 7f03fc00 st b32 a[0x74] $r2 0x0 0x0
 0080: 3c110d0c 0801 sched 0x43 0x43 0x4 0x4f 0x0 0x0 0x0
 0088: 3c1ffc0e 7f03fc00 st b32 a[0x78] $r3 0x0 0x0
 0090: 3e1ffc12 7f03fc00 st b32 a[0x7c] $r4 0x0 0x0
 0098: 401ffc02 7f03fc00 st b32 a[0x80] $r0 0x0 0x0
 00a0: 001c003c 1800 exit

 00a8: fc1c003c 12007fff  C  bra 0xa8
 00b0: 001c3c02 8580 nop
 00b8: 001c3c02 8580 nop

 On Sat, May 23, 2015 at 5:35 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Thu, May 21, 2015 at 11:32 AM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Thu, May 21, 2015 at 10:05 AM, Robert Morell rmor...@nvidia.com wrote:
 Hi Ilia,

 On Sat, May 02, 2015 at 12:34:21PM -0400, Ilia Mirkin wrote:
 Hi,

 As I'm looking to add some support to nouveau for features like atomic
 counters and images, I'm running into some confusion about what the
 first word of the shader header means. Here is the definition as we
 have it today:

 [...]

 However I know that these are somewhat wrong. I've seen shaders that
 use gmem accesses (i.e. mov r0, [r0]) that just have the LMEM enable
 bit set (and they use no lmem). And I've seen additional bits set, esp
 relating to images, but I haven't spent enough time looking at all the
 variations to make sense of it yet. For example, I think that Fermi
 and Kepler+ have different meanings for some of the bits.

 Those look pretty close :)

 I was hoping you could just release the docs for the shader headers,
 or at least the first word of the shader header.

 We've posted the specification for the full Shader Program Header to our
 GPU documentation site here:

 ftp://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html

 I hope it helps clear things up.

 Yep, just a few

[Nouveau] [PATCH v2] nvc0: bind a fake tess control program when there isn't one available

2015-08-15 Thread Ilia Mirkin
Apparently this is necessary in order for tess factors to work in a tess
eval program without a tess control program bound. Probably because it
uses the fake program's shader header to work out the number of patch
constants.

Fixes vs-tes-tessinner-tessouter-inputs

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---

v1 - v2: improve some of the error handling

 src/gallium/drivers/nouveau/nvc0/nvc0_context.c  |  8 
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h  |  3 +++
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c  | 17 +
 src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c |  7 ++-
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 84f8db6..01080d0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -132,6 +132,9 @@ nvc0_context_unreference_resources(struct nvc0_context 
*nvc0)
   pipe_resource_reference(res, NULL);
}
util_dynarray_fini(nvc0-global_residents);
+
+   if (nvc0-tcp_empty)
+  nvc0-base.pipe.delete_tcs_state(nvc0-base.pipe, nvc0-tcp_empty);
 }
 
 static void
@@ -326,6 +329,11 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
 
/* shader builtin library is per-screen, but we need a context for m2mf */
nvc0_program_library_upload(nvc0);
+   nvc0_program_init_tcp_empty(nvc0);
+   if (!nvc0-tcp_empty)
+  goto out_err;
+   /* set the empty tctl prog on next draw in case one is never set */
+   nvc0-dirty |= NVC0_NEW_TCTLPROG;
 
/* add permanently resident buffers to bufctxts */
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index f449942..df1a891 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -128,6 +128,8 @@ struct nvc0_context {
struct nvc0_program *fragprog;
struct nvc0_program *compprog;
 
+   struct nvc0_program *tcp_empty;
+
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[6];
uint16_t constbuf_valid[6];
@@ -227,6 +229,7 @@ void nvc0_program_destroy(struct nvc0_context *, struct 
nvc0_program *);
 void nvc0_program_library_upload(struct nvc0_context *);
 uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
 uint32_t label);
+void nvc0_program_init_tcp_empty(struct nvc0_context *);
 
 /* nvc0_query.c */
 void nvc0_init_query_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 4941831..e9975ce 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -22,6 +22,8 @@
 
 #include pipe/p_defines.h
 
+#include tgsi/tgsi_ureg.h
+
 #include nvc0/nvc0_context.h
 
 #include codegen/nv50_ir_driver.h
@@ -803,3 +805,18 @@ nvc0_program_symbol_offset(const struct nvc0_program 
*prog, uint32_t label)
  return prog-code_base + base + syms[i].offset;
return prog-code_base; /* no symbols or symbol not found */
 }
+
+void
+nvc0_program_init_tcp_empty(struct nvc0_context *nvc0)
+{
+   struct ureg_program *ureg;
+
+   ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL);
+   if (!ureg)
+  return;
+
+   ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 1);
+   ureg_END(ureg);
+
+   nvc0-tcp_empty = ureg_create_shader_and_destroy(ureg, nvc0-base.pipe);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 8aa127a..8f8ac2d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -148,8 +148,13 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
   BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
   PUSH_DATA (push, tp-num_gprs);
} else {
-  BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+  tp = nvc0-tcp_empty;
+  /* not a whole lot we can do to handle this failure */
+  if (!nvc0_program_validate(nvc0, tp))
+ assert(!unable to validate empty tcp);
+  BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
   PUSH_DATA (push, 0x20);
+  PUSH_DATA (push, tp-code_base);
}
nvc0_program_update_context_state(nvc0, tp, 1);
 }
-- 
2.4.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [REGRESSION] nouveau: Crash in gk104_fifo_intr_runlist()

2015-08-11 Thread Ilia Mirkin
I'm guessing that optimus is the operative difference, not the
specific chip. Basically something that can be put to sleep via
ACPI...

On Tue, Aug 11, 2015 at 11:53 PM, Alexandre Courbot gnu...@gmail.com wrote:
 Sending the revert patch to Dave after receiving his green light for
 this, and will investigate the issue on my side. I should be able to find a
 gk107 somewhere...

 On Wed, Aug 12, 2015 at 12:35 PM, Alexandre Courbot gnu...@gmail.com wrote:
 Mmm in that case it is probably best to revert that commit for the
 time being. It was targeting GM20B (and maybe other Maxwells too) so
 reverting it should not hurt anyone at the moment. I think Ben is on
 holidays for now, is there anyone else who can send a pull request to
 Dave Airlie for this? We don't want 4.2 to ship with a crash every
 other reboot...

 On Wed, Aug 12, 2015 at 10:01 AM, Eric Biggers ebigge...@gmail.com wrote:
 Hi,

 I think I've done about 10 reboots with the commit reverted and I never
 experienced the crash.  But with 4.2.0-rc6 I get the crash on about every
 other reboot.

 Probably relevant: the computer on which the crash occurs has two GPUs (one
 Intel and one Nvidia).  The Intel one is actually being used, whereas I
 presume the Nvidia one is being automatically disabled shortly after boot,
 perhaps when the crash occurs...

 Eric

 On Mon, Aug 10, 2015 at 11:28 PM, Alexandre Courbot gnu...@gmail.com
 wrote:

 Indeed, and I am actually surprised to see one here. I will
 double-check that patch.

 Eric, would you be able to give an estimate of the repro rate for this
 issue? More testing with and without the patch would be welcome, it'd
 be good to know whether it is actually the culprit or not.

 On Mon, Aug 10, 2015 at 2:28 AM, Ilia Mirkin imir...@alum.mit.edu wrote:
  Alexandre, could you take a look? 0xbad* generally comes from bad mmio
  reads.
 
  On Aug 9, 2015 1:08 PM, Eric Biggers ebigge...@gmail.com wrote:
 
  Hi,
 
  I am testing Linux v4.2-rc5 and I am sporadically getting crashes
  shortly
  after
  startup in gk104_fifo_intr_runlist().  What I've found is that the
  'mask'
  value
  read from offset 0x2a00 comes back as '0xbad0da00'.  This causes the
  'engn'
  variable to be assigned the value 9, which is invalid; then wake_up()
  is
  called
  on an uninitialized waitqueue which causes the crash.
 
  Reverting commit 1addc12648521d (drm/nouveau/fifo/gk104: kick channels
  when
  deactivating them) seemed to make the problem go away, although I
  can't
  be 100%
  sure because the problem is sporadic.
 
  Attached an example of the kernel log up to the crash.
 
  Eric
 
  ___
  Nouveau mailing list
  Nouveau@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/nouveau
 
 


___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.

2015-07-24 Thread Ilia Mirkin
Here's an example of what happens with DRI3:
https://bugs.freedesktop.org/show_bug.cgi?id=91445

I'm not equipped to figure out why.

On Mon, Jul 13, 2015 at 11:43 PM, Mario Kleiner
mario.kleiner...@gmail.com wrote:
 On 07/07/2015 09:51 PM, Ilia Mirkin wrote:

 Lastly, from some discussions with ajax on IRC, it appears that DRI3
 is half-baked at best wrt sync between server and client. I think we
 should just disable it by default for now, until issues are ironed
 out. (Rather than what this patch has, which is default-on for Xorg 
 some version.)


 What are the remaining known trouble spots wrt. sync? It seems to work
 pretty well at least for single gpu + unredirected fullscreen windows (==
 kms page flipping can be used for Presents. That's the use case i usually
 test very obsessively, as it matters very much for my type of applications,
 but other than that i only lightly test it via regular desktop use, so
 maybe that's were problems remain?

 We can disable it by default on exa - intel and amd/radeon drivers also
 disable by default. However, on gpus = maxwell only glamor accel is
 supported and glamor on nouveau is either dri3/present or no hw accel at all
 afaics.

 Btw. there are also a few patches made by Chris Wilson floating on the
 mailing list since around january, some are reviewed and tested by myself,
 but not included in xorg master. Might be good for people to have a look at
 them and maybe get them into xorg 1.18?


 On Sat, Jul 4, 2015 at 3:03 PM, Emil Velikov emil.l.veli...@gmail.com
 wrote:

 The DRI option with the intel ddx can be used to indicate the following
   - whether dri is disabled
   - the dri version - dri1, dri2, dri3
   - the dri module name - doo_dri.so bar_dri.so

 I'm not sure how exactly it's supposed to work/works, and I believe
 most of that is due to legacy reasons. I'm just saying let's not do
 the whole thing - just the dri version would be great (as you
 suggested).


 I can change that to allow selection between 2 and 3 - at least for exa,
 on glamor the parameter 2 would either need to get ignored or it would
 completely disable hw acceleration. I went for consistency with the ati ddx
 because i found the intel variant too confusing. I think it changed multiple
 times during the last year.

 thanks,
 -mario


 -Emil


 On 4 July 2015 at 19:28, Ilia Mirkin imir...@alum.mit.edu wrote:

 Erm, that's nuts. I also don't really understand what they're talking
 about there... i915g vs i915? Anyways, I just meant the version
 numbers :)

 On Sat, Jul 4, 2015 at 2:23 PM, Emil Velikov emil.l.veli...@gmail.com
 wrote:

 That would be great, as long as it does only that and does not go into
 the drivername territory. As the said driver ;-)

 A driver name to use can be provided instead
 of simple boolean value, which will be passed to the GL implementation
 for
 it to load the appropriate backend.

 -Emil

 On 4 July 2015 at 18:17, Ilia Mirkin imir...@alum.mit.edu wrote:

 IMO it'd be nice to keep this compatible with the intel driver, which
 has a DRI option, which can take the values 1, 2, 3. Obviously for
 nouveau, 1 makes no sense as that was dropped quite some time ago.

 See
 http://cgit.freedesktop.org/xorg/driver/xf86-video-intel/tree/man/intel.man#n68

 On Mon, Jun 29, 2015 at 11:30 PM, Mario Kleiner
 mario.kleiner...@gmail.com wrote:

 X-Server versions older than 1.16.3 have bugs in their
 DRI3/Present implementation which impair nouveau, so
 it is better to stick to good old DRI2 by default on
 such servers. E.g., page flipping doesn't work at all
 under DRI3/Present with older servers, and use of
 extensions like OML_sync_control, SGI_video_sync or
 INTEL_swap_events also causes failure of Present.

 nouveau's glamor accel backend currently doesn't work under
 DRI2, so continue to use DRI3 whenever it is supported.

 Under the exa accel backend, DRI2 works just fine, so
 disable DRI3 and choose DRI2 by default when nouveau
 is built for X-Server  1.16.3, and enable DRI3 if
 building on later X-Servers which work reasonably well
 under DRI3/Present.

 A new boolean xorg.conf Option DRI3 allows to enforce or
 prevent use of DRI3/Present under EXA acceleration for
 testing.

 Also add a bit more output about status of Present and
 DRI3 to aid debugging.

 Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com

 ---
   man/nouveau.man|  6 ++
   src/nouveau_dri2.c | 11 ++-
   src/nv_const.h |  2 ++
   src/nv_driver.c| 17 +++--
   4 files changed, 33 insertions(+), 3 deletions(-)

 diff --git a/man/nouveau.man b/man/nouveau.man
 index 129bb7f..12cfbc0 100644
 --- a/man/nouveau.man
 +++ b/man/nouveau.man
 @@ -125,6 +125,12 @@ that relies on correct presentation timing
 behaviour as defined in that
   specification.
   .br
   Default: 1.
 +.TP
 +.BI Option \*qDRI3\*q \*q boolean \*q
 +Enable the DRI3 extension under exa acceleration if supported by
 server.
 +A setting of off will only use DRI2 instead. Under glamor

Re: [Nouveau] Tessellation shaders get MEM_OUT_OF_BOUNDS errors / missing triangles

2015-07-24 Thread Ilia Mirkin
Indeed, this fixed the original issue on the GK208. Additionally it
seems like starting with GK104 the mechanism for indirect offsets for
ALD/AST changed and a AL2P instruction must now be used to determine
the indirect or physical offset. Once nouveau was adjusted to do
this, all MEM_OUT_OF_BOUNDS errors with tessellation shaders are gone.

On Thu, Jul 23, 2015 at 2:36 AM, Ilia Mirkin imir...@alum.mit.edu wrote:
 I think I figured out what was going on. Will re-check on the GK208,
 but on a GF108 the random blue splotches in Unigine Heaven are gone
 now. Turns out that with an instruction like

 /*00d0*/   ALD.128 R0, a[0x70], R0;
/* 0x7ecc381ffc02 */

 The hardware will internally split it up into roughly

 ALD R0, a[0x70], R0
 ALD R1, a[0x74], R0
 ALD R2, a[0x78], R0
 ALD R3, a[0x7c], R0

 Of course the first one of those overwrites R0, which makes the
 subsequent loads be full of fail. Adding a hazard in our RA for the
 indirect argument resolves the issue.

   -ilia


 On Tue, May 26, 2015 at 7:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 One additional observation that I just made is that on GK208, the blob
 apparently doesn't use the result of S2R Rx, SR_INVOCATION_ID
 wholesale in TCS. It either passes it through a I2I.S32.S32 Rx, |Rx|
 (i.e. absolute value), or even more paradoxically, shl 2; shr 2; which
 removes the top *2* bits, rather than just the top 1. However I see no
 such behaviour on GF108.

 I'm going to test out tomorrow whether this is the cause of my GK208 woes.

 On Fri, May 22, 2015 at 5:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Mon, May 18, 2015 at 4:48 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 Hello,

 I've been debugging a few different tessellation shader issues with
 nouveau, but let's start small. I see this issue on my GK208 with high
 frequency, and I *think* I've seen it once or twice on my GF108, but
 it's exceedingly rare, if it does happen. I don't have a GK10x to test
 on, unfortunately, but I assume it'll have the same issue as the
 GK208.

 The issue is this -- a bunch of triangles that should come out of the
 tessellator end up black. I also see a GPC0/TPC1/MP trap:
 MEM_OUT_OF_BOUNDS error produced by nouveau -- this is output in
 response to a interrupt and MP trap generated by the hardware, read
 out with nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648)); (see
 gf100_gr_trap_mp). I assume some of the tessellation evaluation
 invocations get killed, but I have no proof of this.

 I also see this: TRAP ch 5 [0x003facf000 shader_runner[19044]]

 I would imagine that's some floating point number ending up in the
 register instead of an address, but the fp32 value of it
 (1.35107421875) does not seem familiar.

 Ben pointed out that the 0x3facf000 is a channel address, not a value
 from the shader. Oops. So that theory completely doesn't hold water.
 Perhaps some buffer isn't big enough? This ends up using 9 output
 vertices per patch, with 2 vec4's each. I've tried playing with the
 per-warp stack size to no avail, but I didn't *entirely* know what I
 was doing either though.


 Even when all the triangles show up, I still see the error on the
 GK208, so I'm not sure if they're the same issue or not.

 Now, here's the fun part -- this is completely non-deterministic.
 Sometimes everything shows up on the GK208, other times I see holes,
 in varying locations. I'm fairly sure that the actual shader code is
 correct... so I'm doing something funny wrong. (And yeah, tons of
 missed optimization opportunities in this code, but let's not dwell on
 that.)

 This is the piglit test:

 http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_tessellation_shader/execution/quads.shader_test

 It should be noted that other piglit tests don't exhibit this error,
 however they also tend to be simpler. One key difference is that they
 don't change the patch size in TCS. I'm including a link to a text
 file with the tessellation control and evaluation shaders (decoded
 with nvdisasm which you're hopefully more familiar with), along with
 the shader headers that we generate.

 FTR, this is how I feed the raw shader opcode bytes into nvdisasm:

 perl -ane 'foreach (@F) { print pack I, hex($_) }'  tt; nvdisasm -b 
 SM35 tt

 (for some reason it doesn't want to read from a pipe or even a fd).

 http://people.freedesktop.org/~imirkin/tess_shaders_quads.txt

 My suspicion is that we're doing something wrong with the sched codes.
 We have an elaborate calculator, but... perhaps not elaborate enough?
 You can see it here:

 http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp#n2574

 The reason I think it's an error in sched codes is due to the TRAP
 memory location that I see -- could well be some stale value in the
 register and the value from S2R or VILD doesn't make it in there in
 time before the ALD reads it.

 If you should like to try this yourself, you can use
 https://github.com/imirkin/mesa

Re: [Nouveau] Tessellation shaders get MEM_OUT_OF_BOUNDS errors / missing triangles

2015-07-23 Thread Ilia Mirkin
I think I figured out what was going on. Will re-check on the GK208,
but on a GF108 the random blue splotches in Unigine Heaven are gone
now. Turns out that with an instruction like

/*00d0*/   ALD.128 R0, a[0x70], R0;
   /* 0x7ecc381ffc02 */

The hardware will internally split it up into roughly

ALD R0, a[0x70], R0
ALD R1, a[0x74], R0
ALD R2, a[0x78], R0
ALD R3, a[0x7c], R0

Of course the first one of those overwrites R0, which makes the
subsequent loads be full of fail. Adding a hazard in our RA for the
indirect argument resolves the issue.

  -ilia


On Tue, May 26, 2015 at 7:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 One additional observation that I just made is that on GK208, the blob
 apparently doesn't use the result of S2R Rx, SR_INVOCATION_ID
 wholesale in TCS. It either passes it through a I2I.S32.S32 Rx, |Rx|
 (i.e. absolute value), or even more paradoxically, shl 2; shr 2; which
 removes the top *2* bits, rather than just the top 1. However I see no
 such behaviour on GF108.

 I'm going to test out tomorrow whether this is the cause of my GK208 woes.

 On Fri, May 22, 2015 at 5:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Mon, May 18, 2015 at 4:48 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 Hello,

 I've been debugging a few different tessellation shader issues with
 nouveau, but let's start small. I see this issue on my GK208 with high
 frequency, and I *think* I've seen it once or twice on my GF108, but
 it's exceedingly rare, if it does happen. I don't have a GK10x to test
 on, unfortunately, but I assume it'll have the same issue as the
 GK208.

 The issue is this -- a bunch of triangles that should come out of the
 tessellator end up black. I also see a GPC0/TPC1/MP trap:
 MEM_OUT_OF_BOUNDS error produced by nouveau -- this is output in
 response to a interrupt and MP trap generated by the hardware, read
 out with nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648)); (see
 gf100_gr_trap_mp). I assume some of the tessellation evaluation
 invocations get killed, but I have no proof of this.

 I also see this: TRAP ch 5 [0x003facf000 shader_runner[19044]]

 I would imagine that's some floating point number ending up in the
 register instead of an address, but the fp32 value of it
 (1.35107421875) does not seem familiar.

 Ben pointed out that the 0x3facf000 is a channel address, not a value
 from the shader. Oops. So that theory completely doesn't hold water.
 Perhaps some buffer isn't big enough? This ends up using 9 output
 vertices per patch, with 2 vec4's each. I've tried playing with the
 per-warp stack size to no avail, but I didn't *entirely* know what I
 was doing either though.


 Even when all the triangles show up, I still see the error on the
 GK208, so I'm not sure if they're the same issue or not.

 Now, here's the fun part -- this is completely non-deterministic.
 Sometimes everything shows up on the GK208, other times I see holes,
 in varying locations. I'm fairly sure that the actual shader code is
 correct... so I'm doing something funny wrong. (And yeah, tons of
 missed optimization opportunities in this code, but let's not dwell on
 that.)

 This is the piglit test:

 http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_tessellation_shader/execution/quads.shader_test

 It should be noted that other piglit tests don't exhibit this error,
 however they also tend to be simpler. One key difference is that they
 don't change the patch size in TCS. I'm including a link to a text
 file with the tessellation control and evaluation shaders (decoded
 with nvdisasm which you're hopefully more familiar with), along with
 the shader headers that we generate.

 FTR, this is how I feed the raw shader opcode bytes into nvdisasm:

 perl -ane 'foreach (@F) { print pack I, hex($_) }'  tt; nvdisasm -b SM35 
 tt

 (for some reason it doesn't want to read from a pipe or even a fd).

 http://people.freedesktop.org/~imirkin/tess_shaders_quads.txt

 My suspicion is that we're doing something wrong with the sched codes.
 We have an elaborate calculator, but... perhaps not elaborate enough?
 You can see it here:

 http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp#n2574

 The reason I think it's an error in sched codes is due to the TRAP
 memory location that I see -- could well be some stale value in the
 register and the value from S2R or VILD doesn't make it in there in
 time before the ALD reads it.

 If you should like to try this yourself, you can use
 https://github.com/imirkin/mesa/commits/gl4-integration-2 . This
 branch is good enough to run Unigine Heaven, but still has a lot of
 known shortcomings. (Both at the core and the nouveau levels.)

 Any advice or suggestions for debugging this would be greatly
 appreciated. And let me know if you'd like me to generate additional
 info on this. For example I can supply a full command trace that can
 be piped to demmt, if that's helpful.

 Thanks in advance

Re: [Nouveau] [PATCH] configure: remove unneeded AC_SUBST statements

2015-07-21 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin imir...@alum.mit.edu

On Tue, Jul 21, 2015 at 5:51 PM, Emil Velikov emil.l.veli...@gmail.com wrote:
 The variables are already set/substituted by the PKG_CHECK_MODULES
 macro.

 Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
 ---
  configure.ac | 4 
  1 file changed, 4 deletions(-)

 diff --git a/configure.ac b/configure.ac
 index 03563c1..6048c5a 100644
 --- a/configure.ac
 +++ b/configure.ac
 @@ -82,8 +82,6 @@ XORG_DRIVER_CHECK_EXT(DRI2, [dri2proto = 2.6])
  # Checks for pkg-config packages
  PKG_CHECK_MODULES(LIBDRM, [libdrm = 2.4.60])
  PKG_CHECK_MODULES(LIBDRM_NOUVEAU, [libdrm_nouveau = 2.4.25])
 -AC_SUBST(LIBDRM_NOUVEAU_CFLAGS)
 -AC_SUBST(LIBDRM_NOUVEAU_LIBS)

  PKG_CHECK_MODULES(XORG, [xorg-server = 1.8] xproto fontsproto libdrm 
 $REQUIRED_MODULES)
  PKG_CHECK_MODULES(XEXT, [xextproto = 7.0.99.1],
 @@ -100,8 +98,6 @@ if test x$LIBUDEV = xyes; then
 AC_DEFINE(HAVE_LIBUDEV, 1, [libudev support])
  fi
  AM_CONDITIONAL(LIBUDEV, [ test x$LIBUDEV = xyes ] )
 -AC_SUBST([LIBUDEV_CFLAGS])
 -AC_SUBST([LIBUDEV_LIBS])

  # Checks for header files.
  AC_HEADER_STDC
 --
 2.4.4

 ___
 Nouveau mailing list
 Nouveau@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/nouveau
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.

2015-07-14 Thread Ilia Mirkin
On Mon, Jul 13, 2015 at 11:43 PM, Mario Kleiner
mario.kleiner...@gmail.com wrote:
 On 07/07/2015 09:51 PM, Ilia Mirkin wrote:

 Lastly, from some discussions with ajax on IRC, it appears that DRI3
 is half-baked at best wrt sync between server and client. I think we
 should just disable it by default for now, until issues are ironed
 out. (Rather than what this patch has, which is default-on for Xorg 
 some version.)


 What are the remaining known trouble spots wrt. sync? It seems to work
 pretty well at least for single gpu + unredirected fullscreen windows (==
 kms page flipping can be used for Presents. That's the use case i usually
 test very obsessively, as it matters very much for my type of applications,
 but other than that i only lightly test it via regular desktop use, so
 maybe that's were problems remain?

Adam is the one who actually understands it... I was just asking
questions and the answer was that's broken. A user was using DRI3
with EXA (nouveau DDX), and was seeing render fail in the form of
stale things on the screen. The nouveau DDX does a lot of implicit
sync stuff... it just emits commands into a pushbuf without kicking
it. However the libdrm code has cleverness to kick out any pushbufs if
you do a nouveau_bo_wait and that bo has been referenced.

HOWEVER if you have a pixmap and you share it using the fd thing, and
another process makes a (shared) bo out of it, and then does a
nouveau_bo_wait, that in no way will cause the DDX to kick its
pushbuf. Apparently there's some sort of sync thing that's supposed to
happen, but that's entirely unimplemented for DRI3. Unfortunately I
can't provide more details than that, as my knowledge of X internals
is quite limited. Some sort of DRI drawable or GLX drawable or ...
something.

I have no proof that this is the cause of the issue the user was
seeing, in fact it's just as likely to be something else. However this
seems like a pretty significant issue to me.


 We can disable it by default on exa - intel and amd/radeon drivers also
 disable by default. However, on gpus = maxwell only glamor accel is
 supported and glamor on nouveau is either dri3/present or no hw accel at all
 afaics.

You probably saw my patches to just remove glamor from nouveau :) That
integration doesn't support DRI2, which in turn means no core contexts
(due to lack of GLX_ARB_create_context_profile), and a slew of other
issues. Seemed easier to just tell people to go use modesetting, which
gets all of these things right(er).


 Btw. there are also a few patches made by Chris Wilson floating on the
 mailing list since around january, some are reviewed and tested by myself,
 but not included in xorg master. Might be good for people to have a look at
 them and maybe get them into xorg 1.18?

I would not oppose the reviewing of Chris's patches ;) However I'm in
no position to evaluate them myself.



 On Sat, Jul 4, 2015 at 3:03 PM, Emil Velikov emil.l.veli...@gmail.com
 wrote:

 The DRI option with the intel ddx can be used to indicate the following
   - whether dri is disabled
   - the dri version - dri1, dri2, dri3
   - the dri module name - doo_dri.so bar_dri.so

 I'm not sure how exactly it's supposed to work/works, and I believe
 most of that is due to legacy reasons. I'm just saying let's not do
 the whole thing - just the dri version would be great (as you
 suggested).


 I can change that to allow selection between 2 and 3 - at least for exa,
 on glamor the parameter 2 would either need to get ignored or it would
 completely disable hw acceleration. I went for consistency with the ati ddx
 because i found the intel variant too confusing. I think it changed multiple
 times during the last year.

Bleargh. The ati ddx option name is much newer. Has it seen a release
yet? It'd be really nice to get all the DDX's to just agree on
something, instead of having different-but-similar options which
confuse everyone.

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.

2015-07-14 Thread Ilia Mirkin
On Tue, Jul 14, 2015 at 3:00 PM, Mario Kleiner
mario.kleiner...@gmail.com wrote:
 On 07/14/2015 05:26 PM, Ilia Mirkin wrote:

 On Mon, Jul 13, 2015 at 11:43 PM, Mario Kleiner
 mario.kleiner...@gmail.com wrote:

 On 07/07/2015 09:51 PM, Ilia Mirkin wrote:


 Lastly, from some discussions with ajax on IRC, it appears that DRI3
 is half-baked at best wrt sync between server and client. I think we
 should just disable it by default for now, until issues are ironed
 out. (Rather than what this patch has, which is default-on for Xorg 
 some version.)



 What are the remaining known trouble spots wrt. sync? It seems to work
 pretty well at least for single gpu + unredirected fullscreen windows (==
 kms page flipping can be used for Presents. That's the use case i usually
 test very obsessively, as it matters very much for my type of
 applications,
 but other than that i only lightly test it via regular desktop use, so
 maybe that's were problems remain?


 Adam is the one who actually understands it... I was just asking
 questions and the answer was that's broken. A user was using DRI3
 with EXA (nouveau DDX), and was seeing render fail in the form of
 stale things on the screen. The nouveau DDX does a lot of implicit
 sync stuff... it just emits commands into a pushbuf without kicking
 it. However the libdrm code has cleverness to kick out any pushbufs if
 you do a nouveau_bo_wait and that bo has been referenced.

 HOWEVER if you have a pixmap and you share it using the fd thing, and
 another process makes a (shared) bo out of it, and then does a
 nouveau_bo_wait, that in no way will cause the DDX to kick its
 pushbuf. Apparently there's some sort of sync thing that's supposed to


 Although then i'd expect the other process to hang in nouveau_bo_wait? There
 probably isn't a specific fdo bug for this?

No, because it's blissfully unaware of the fact that there's anything
to wait on for the bo (since it hasn't been kicked out by the process
and submitted to the kernel), and thus decides that the bo is ready to
use.


 happen, but that's entirely unimplemented for DRI3. Unfortunately I
 can't provide more details than that, as my knowledge of X internals
 is quite limited. Some sort of DRI drawable or GLX drawable or ...
 something.

 I have no proof that this is the cause of the issue the user was
 seeing, in fact it's just as likely to be something else. However this
 seems like a pretty significant issue to me.


 Ok, thanks for the explanation. But would this problem then be limited to
 exa + nouveau? glamor uses opengl and thereby mesa's and i think mesa gets
 sync right afaik, at least in the glx/dri3 backend. Didn't look at egl
 backend though. At least i didn't see any such corruption?

 I'll change the patch to default to DRI 2 then for exa, and glamor will
 ignore the DRI parameter anyway and stick to DRI3.

TBH I have no idea how glamor works. Presumably it may have similar
issues, but perhaps it just sync's left and right so the problems
remain unseen. There's a block handler or something like that which
tends to flush things (in nouveau as well IIRC). Although that just
flushes out dirty pixmaps, not sure what causes something to get onto
the dirty pixmap list. That might still end up not flushing pending
commands out.



 We can disable it by default on exa - intel and amd/radeon drivers also
 disable by default. However, on gpus = maxwell only glamor accel is
 supported and glamor on nouveau is either dri3/present or no hw accel at
 all
 afaics.


 You probably saw my patches to just remove glamor from nouveau :) That
 integration doesn't support DRI2, which in turn means no core contexts
 (due to lack of GLX_ARB_create_context_profile), and a slew of other
 issues. Seemed easier to just tell people to go use modesetting, which
 gets all of these things right(er).


 Hm, a total removal would at least make me and my users rather unhappy atm.,
 as without glamor no hw accel at all on = maxwell. modesetting and nouveau
 are not on par feature-wise atm. E.g., modesetting as of the next xorg 1.18
 server only provides pageflipping via dri3/present and without pageflipping
 it is game over for many of my use cases on nouveau-kms. Also modesetting
 currently completely lacks ZaphodHeads support. Without ZaphodHeads i can't
 have a page-flipped fullscreen window on one subset of outputs and a regular
 desktop on another subset, something that is needed for
 neuroscience/medical/vr applications.

 So i'd rather like to preserve the choice of glamor.

 What makes glamor + dri2 difficult to support in nouveau vs. other drivers?

Lack of having been done, and a lack of desire by all parties involved
to do it. As-is the glamor integration is, unfortunately, quite
broken.

I have an EXA impl for maxwell in the works, although I haven't made
progress on it in months. It's missing some sort of
nouveau_scratch_data() style call to make a temp bo to put coordinates
in so that we can do draws on the 3d engine.

I

[Nouveau] [PATCH] avoid build fail without COMPOSITE

2015-07-14 Thread Ilia Mirkin
---
 src/nouveau_dri2.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c
index f22e319..4398559 100644
--- a/src/nouveau_dri2.c
+++ b/src/nouveau_dri2.c
@@ -142,6 +142,7 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr 
pDraw, RegionPtr pRegio
NVPtr pNv = NVPTR(xf86ScreenToScrn(pScreen));
RegionPtr pCopyClip;
GCPtr pGC;
+   PixmapPtr pPix;
DrawablePtr src_draw, dst_draw;
Bool translate = FALSE;
int off_x = 0, off_y = 0;
@@ -170,9 +171,13 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr 
pDraw, RegionPtr pRegio
}
 
if (translate  pDraw-type == DRAWABLE_WINDOW) {
-   PixmapPtr pPix = get_drawable_pixmap(pDraw);
-   off_x = pDraw-x - pPix-screen_x;
-   off_y = pDraw-y - pPix-screen_y;
+   off_x = pDraw-x;
+   off_y = pDraw-y;
+#ifdef COMPOSITE
+   pPix = get_drawable_pixmap(pDraw);
+   off_x -= pPix-screen_x;
+   off_y -= pPix-screen_y;
+#endif
}
 
pGC = GetScratchGC(pDraw-depth, pScreen);
@@ -194,8 +199,8 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr 
pDraw, RegionPtr pRegio
if (extents-x1 == 0  extents-y1 == 0 
extents-x2 == pDraw-width 
extents-y2 == pDraw-height) {
-   PixmapPtr fpix = get_drawable_pixmap(dst_draw);
-   struct nouveau_bo *bo = nouveau_pixmap_bo(fpix);
+   pPix = get_drawable_pixmap(dst_draw);
+   struct nouveau_bo *bo = nouveau_pixmap_bo(pPix);
if (bo)
nouveau_bo_wait(bo, NOUVEAU_BO_RD, pNv-client);
}
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] avoid build fail without COMPOSITE

2015-07-14 Thread Ilia Mirkin
Well, I don't pretend to know anything about X, but this is the commit
that added the code in question:

commit 297fd0d0755bda698be1d0b30cc60a41d7673c0b
Author: Dave Airlie airl...@redhat.com
Date:   Tue Oct 16 16:15:16 2012 +1000

nouveau/dri2: fix pixmap/window offset calcs.

This should fix prime rendering under kwin, and not break it under the
others.

Signed-off-by: Dave Airlie airl...@redhat.com

diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c
index 71cff26..7bd0b3a 100644
--- a/src/nouveau_dri2.c
+++ b/src/nouveau_dri2.c
@@ -165,9 +165,9 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen,
DrawablePtr pDraw, RegionPtr pRegio
translate = TRUE;

if (translate  pDraw-type == DRAWABLE_WINDOW) {
-   WindowPtr pWin = (WindowPtr)pDraw;
-   off_x = pWin-origin.x;
-   off_y = pWin-origin.y;
+   PixmapPtr pPix = get_drawable_pixmap(pDraw);
+   off_x = pDraw-x - pPix-screen_x;
+   off_y = pDraw-y - pPix-screen_y;
}

pGC = GetScratchGC(pDraw-depth, pScreen);


Now I sort of assume that pDraw-x == pWin-origin.x. But... who knows.

  -ilia


On Tue, Jul 14, 2015 at 5:46 PM, Emil Velikov emil.l.veli...@gmail.com wrote:
 On 14 July 2015 at 22:17, Ilia Mirkin imir...@alum.mit.edu wrote:
 ---
  src/nouveau_dri2.c | 15 ++-
  1 file changed, 10 insertions(+), 5 deletions(-)

 diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c
 index f22e319..4398559 100644
 --- a/src/nouveau_dri2.c
 +++ b/src/nouveau_dri2.c
 @@ -142,6 +142,7 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr 
 pDraw, RegionPtr pRegio
 NVPtr pNv = NVPTR(xf86ScreenToScrn(pScreen));
 RegionPtr pCopyClip;
 GCPtr pGC;
 +   PixmapPtr pPix;
 DrawablePtr src_draw, dst_draw;
 Bool translate = FALSE;
 int off_x = 0, off_y = 0;
 @@ -170,9 +171,13 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, 
 DrawablePtr pDraw, RegionPtr pRegio
 }

 if (translate  pDraw-type == DRAWABLE_WINDOW) {
 -   PixmapPtr pPix = get_drawable_pixmap(pDraw);
 -   off_x = pDraw-x - pPix-screen_x;
 -   off_y = pDraw-y - pPix-screen_y;
 +   off_x = pDraw-x;
 +   off_y = pDraw-y;
 +#ifdef COMPOSITE
 +   pPix = get_drawable_pixmap(pDraw);
 +   off_x -= pPix-screen_x;
 +   off_y -= pPix-screen_y;
 +#endif
 If I understand things correctly both ati and the intel ddx seems set
 the offsets to zero when composite is missing.
 I doubt that many people build xserver without it though :-)

 -Emil
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Mesa-dev] [PATCH] nvc0: fix geometry program revalidation of clipping params

2015-07-13 Thread Ilia Mirkin
Any one which, after using a geometry shader, enables an extra clip
distance. i.e. none.

On Mon, Jul 13, 2015 at 4:16 AM, Samuel Pitoiset
samuel.pitoi...@gmail.com wrote:
 What piglit test does this fix?

 On Sat, Jul 11, 2015 at 7:13 PM, Ilia Mirkin imir...@alum.mit.edu wrote:

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 Cc: mesa-sta...@lists.freedesktop.org
 ---

 Even though in practice a geometry program will never be using UCP's,
 we still were revalidating (aka recompiling) the program when more
 clip planes became enabled (which also are used for regular clip
 distances).

 This seems like it should have led to massive fail, but I guess you
 don't change the number of clip planes when using geometry shaders.
 But I'm going to put this through a full piglit run just in case
 there's something I'm missing.

  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 index 785e52e..11f2b10 100644
 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 @@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0,
nvc0_vertprog_validate(nvc0);
 else
 if (likely(vp == nvc0-gmtyprog))
 -  nvc0_vertprog_validate(nvc0);
 +  nvc0_gmtyprog_validate(nvc0);
 else
nvc0_tevlprog_validate(nvc0);
  }
 --
 2.3.6

 ___
 mesa-dev mailing list
 mesa-...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev




 --
 Best regards,
 Samuel Pitoiset.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Mesa-dev] [PATCH] nvc0: fix geometry program revalidation of clipping params

2015-07-13 Thread Ilia Mirkin
This was, btw, introduced in commit 3a8ae6ac243b (nvc0: adapt to new
clip state). Back then there was no real geometry support yet.

On Mon, Jul 13, 2015 at 2:05 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 Any one which, after using a geometry shader, enables an extra clip
 distance. i.e. none.

 On Mon, Jul 13, 2015 at 4:16 AM, Samuel Pitoiset
 samuel.pitoi...@gmail.com wrote:
 What piglit test does this fix?

 On Sat, Jul 11, 2015 at 7:13 PM, Ilia Mirkin imir...@alum.mit.edu wrote:

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 Cc: mesa-sta...@lists.freedesktop.org
 ---

 Even though in practice a geometry program will never be using UCP's,
 we still were revalidating (aka recompiling) the program when more
 clip planes became enabled (which also are used for regular clip
 distances).

 This seems like it should have led to massive fail, but I guess you
 don't change the number of clip planes when using geometry shaders.
 But I'm going to put this through a full piglit run just in case
 there's something I'm missing.

  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 index 785e52e..11f2b10 100644
 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
 @@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0,
nvc0_vertprog_validate(nvc0);
 else
 if (likely(vp == nvc0-gmtyprog))
 -  nvc0_vertprog_validate(nvc0);
 +  nvc0_gmtyprog_validate(nvc0);
 else
nvc0_tevlprog_validate(nvc0);
  }
 --
 2.3.6

 ___
 mesa-dev mailing list
 mesa-...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev




 --
 Best regards,
 Samuel Pitoiset.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Mesa-dev] [PATCH] nv50: avoid using inline vertex data submit when gl_VertexID is used

2015-08-24 Thread Ilia Mirkin
Edge flag stuff is annoying. Pretty sure only blender uses it. shade
model = flat should get fixed on nv50 before edge flags, since blender
uses that too, and it's produces much worse visual artifacts.

I'm having second thoughts about this patch. I think I'm going to go
back to my previous approach of just calling
nv50_vertex_arrays_validate when vbo_fifo  vertexid. I suspect that
vertexid usage with small draws from client buffers is next to
inexistent, no need to re-emit this stuff so often.

On Mon, Aug 24, 2015 at 4:07 PM, Samuel Pitoiset
samuel.pitoi...@gmail.com wrote:
 Reviewed-by: Samuel Pitoiset samuel.pitoi...@gmail.com

 This fix is simpler than I was expected. What about the edge flag stuff now?
 :)


 On 08/24/2015 05:51 PM, Ilia Mirkin wrote:

 The hardware only generates vertexid when vertices come from a VBO. This
 fixes:

vertexid-drawelements
vertexid-drawarrays

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 Cc: 11.0 mesa-sta...@lists.freedesktop.org
 ---
   src/gallium/drivers/nouveau/nv50/nv50_program.c| 1 +
   src/gallium/drivers/nouveau/nv50/nv50_program.h| 1 +
   src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 3 ++-
   src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 8 
   4 files changed, 12 insertions(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c
 b/src/gallium/drivers/nouveau/nv50/nv50_program.c
 index 02dc367..eff4477 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
 @@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info
 *info)
 case TGSI_SEMANTIC_VERTEXID:
prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
prog-vp.attrs[2] |=
 NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
 + prog-vp.vertexid = 1;
continue;
 default:
break;
 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h
 b/src/gallium/drivers/nouveau/nv50/nv50_program.h
 index 5d3ff56..f4e8e94 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
 @@ -76,6 +76,7 @@ struct nv50_program {
 ubyte psiz;/* output slot of point size */
 ubyte bfc[2];  /* indices into varying for FFC (FP) or BFC
 (VP) */
 ubyte edgeflag;
 +  ubyte vertexid;
 ubyte clpd[2]; /* output slot of clip distance[i]'s 1st
 component */
 ubyte clpd_nr;
  } vp;
 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 index b304a17..66dcf43 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
 @@ -503,7 +503,8 @@ static struct state_validate {
   { nv50_validate_samplers,  NV50_NEW_SAMPLERS },
   { nv50_stream_output_validate, NV50_NEW_STRMOUT |
  NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG
 },
 -{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
 +{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
 +   NV50_NEW_VERTPROG },
   { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
   };
   #define validate_list_len (sizeof(validate_list) /
 sizeof(validate_list[0]))
 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 index 600b973..fb4305f 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
 @@ -301,6 +301,14 @@ nv50_vertex_arrays_validate(struct nv50_context
 *nv50)
  unsigned i;
  const unsigned n = MAX2(vertex-num_elements,
 nv50-state.num_vtxelts);
   +   /* A vertexid is not generated for inline data uploads. Have to use
 a
 +* VBO. This check must come after the vertprog has been validated,
 +* otherwise vertexid may be unset.
 +*/
 +   assert(nv50-vertprog-translated);
 +   if (nv50-vertprog-vp.vertexid)
 +  nv50-vbo_push_hint = 0;
 +
  if (unlikely(vertex-need_conversion))
 nv50-vbo_fifo = ~0;
  else


___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Documentation request for MP warp error 0x10

2015-10-26 Thread Ilia Mirkin
On Fri, Oct 2, 2015 at 6:14 PM, Robert Morell <rmor...@nvidia.com> wrote:
> Hi Ilia,
>
> On Fri, Oct 02, 2015 at 06:05:21PM -0400, Ilia Mirkin wrote:
>> Hi Robert,
>>
>> Thanks for the quick response! That goes in line with my observations
>> which is that these things happen when using an ATOM/RED instruction.
>> I've checked and rechecked that I'm generating ops with identical bits
>> as what the proprietary driver does, however (and nvdisasm prints
>> identical output). Could you advise what the proper way of indicating
>> that the memory is "global" to the op? I'm sure I'm just missing
>> something simple. If you show me what to look for in SM35 I can
>> probably find it on my own for SM20/SM30/SM50.
>
> Unfortunately this isn't something I know a lot about, so I'm going to
> have do some research and get back to you, hopefully within a few days.

Hi Robert,

Were you able to find any further information out about this? Happy to
provide with any traces or additional details as to what I'm doing
(and which is failing).

Thanks,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] pci: enable c800 magic for Lenovo Y510P

2015-10-27 Thread Ilia Mirkin
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70354#c75
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---

Unclear if we want this. Someone with the same vendor/subvendor pci
ids didn't have any issues with nouveau at all:

https://bugs.launchpad.net/ubuntu/+source/compiz/+bug/1327624

[they had other issues though]

Not sure if this will do more harm than good.

 drm/nouveau/nvkm/engine/device/pci.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drm/nouveau/nvkm/engine/device/pci.c 
b/drm/nouveau/nvkm/engine/device/pci.c
index e8eb14e..c5fc909 100644
--- a/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drm/nouveau/nvkm/engine/device/pci.c
@@ -259,6 +259,12 @@ nvkm_device_pci_10de_0df4[] = {
 };
 
 static const struct nvkm_device_pci_vendor
+nvkm_device_pci_10de_0fcd[] = {
+   { 0x17aa, 0x3801, NULL, { .War00C800_0 = true } }, /* Lenovo Y510P */
+   {}
+};
+
+static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_0fd2[] = {
{ 0x1028, 0x0595, "GeForce GT 640M LE" },
{ 0x1028, 0x05b2, "GeForce GT 640M LE" },
@@ -1349,7 +1355,7 @@ nvkm_device_pci_10de[] = {
{ 0x0fc6, "GeForce GTX 650" },
{ 0x0fc8, "GeForce GT 740" },
{ 0x0fc9, "GeForce GT 730" },
-   { 0x0fcd, "GeForce GT 755M" },
+   { 0x0fcd, "GeForce GT 755M", nvkm_device_pci_10de_0fcd },
{ 0x0fce, "GeForce GT 640M LE" },
{ 0x0fd1, "GeForce GT 650M" },
{ 0x0fd2, "GeForce GT 640M", nvkm_device_pci_10de_0fd2 },
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nvc0: respect edgeflag attribute width

2015-10-23 Thread Ilia Mirkin
The edgeflag comes in as ubyte with glEdgeFlagPointer but as float with
plain immediate glEdgeFlag. Avoid reading bytes that weren't meant for
the edgeflag in the pointer case.

Fixes intermittent failures with gl-2.0-edgeflag piglit (and valgrind
complaints about reading uninitialized memory).

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index 8b23a48..efadeeb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -27,6 +27,7 @@ struct push_context {
struct {
   bool enabled;
   bool value;
+  uint8_t width;
   unsigned stride;
   const uint8_t *data;
} edgeflag;
@@ -100,6 +101,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct 
nvc0_context *nvc0,
struct nv04_resource *buf = nv04_resource(vb->buffer);
 
ctx->edgeflag.stride = vb->stride;
+   ctx->edgeflag.width = util_format_get_blocksize(ve->src_format);
if (buf) {
   unsigned offset = vb->buffer_offset + ve->src_offset;
   ctx->edgeflag.data = nouveau_resource_map_offset(>base,
@@ -139,8 +141,9 @@ prim_restart_search_i32(const uint32_t *elts, unsigned 
push, uint32_t index)
 static inline bool
 ef_value(const struct push_context *ctx, uint32_t index)
 {
-   float *pf = (float *)>edgeflag.data[index * ctx->edgeflag.stride];
-   return *pf ? true : false;
+   static const uint64_t zero = 0;
+   uint8_t *pf = (uint8_t *)>edgeflag.data[index * ctx->edgeflag.stride];
+   return !!memcmp(pf, , ctx->edgeflag.width);
 }
 
 static inline bool
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] drm/nouveau: Fix pre-nv50 pageflip events (v2)

2015-11-09 Thread Ilia Mirkin
On Mon, Nov 9, 2015 at 7:57 AM, Mario Kleiner
<mario.kleiner...@gmail.com> wrote:
> From: Daniel Vetter <daniel.vet...@ffwll.ch>
>
> Apparently pre-nv50 pageflip events happen before the actual vblank
> period. Therefore that functionality got semi-disabled in
>
> commit af4870e406126b7ac0ae7c7ce5751f25ebe60f28
> Author: Mario Kleiner <mario.kleiner...@gmail.com>
> Date:   Tue May 13 00:42:08 2014 +0200
>
> drm/nouveau/kms/nv04-nv40: fix pageflip events via special case.
>
> Unfortunately that hack got uprooted in
>
> commit cc1ef118fc099295ae6aabbacc8af94d8d8885eb
> Author: Thierry Reding <tred...@nvidia.com>
> Date:   Wed Aug 12 17:00:31 2015 +0200
>
> drm/irq: Make pipe unsigned and name consistent
>
> Trigering a warning when trying to sample the vblank timestamp for a
> non-existing pipe. There's a few ways to fix this:
>
> - Open-code the old behaviour, which just enshrines this slight
>   breakage of the userspace ABI.
>
> - Revert Mario's commit and again inflict broken timestamps, again not
>   pretty.
>
> - Fix this for real by delaying the pageflip TS until the next vblank
>   interrupt, thereby making it accurate.
>
> This patch implements the third option. Since having a page flip
> interrupt that happens when the pageflip gets armed and not when it
> completes in the next vblank seems to be fairly common (older i915 hw
> works very similarly) create a new helper to arm vblank events for
> such drivers.
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=106431
> Cc: Thierry Reding <tred...@nvidia.com>
> Cc: Mario Kleiner <mario.kleiner...@gmail.com>
> Cc: Ben Skeggs <bske...@redhat.com>
> Cc: Ilia Mirkin <imir...@alum.mit.edu>
>
> v2 (mario): Integrate my own review comments into Daniels patch.
>- Fix function prototypes in drmP.h
>- Add missing vblank_put() for pageflip completion without
>  pageflip event.
>- Initialize sequence number for queued pageflip event to avoid
>  trouble in drm_handle_vblank_events().
>- Remove dead code and spelling fix.
>
> Signed-off-by: Daniel Vetter <daniel.vet...@intel.com>
> Reviewed-by: Mario Kleiner <mario.kleiner...@gmail.com>

Without commenting on the actual patch, a few points of procedure:

(a) If you're sending the patch, you're supposed to add your
Signed-off-by. So you'd keep Daniel's and add yours.
(b) Since this is triggering warns for real people in real situations,
tack on a

Cc: sta...@vger.kernel.org # v4.3

Cheers,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] llvm TGSI backend (WIP) questions

2015-11-13 Thread Ilia Mirkin
On Fri, Nov 13, 2015 at 3:42 PM, Emil Velikov <emil.l.veli...@gmail.com> wrote:
> On 13 November 2015 at 14:38, Ilia Mirkin <imir...@alum.mit.edu> wrote:
>> On Fri, Nov 13, 2015 at 9:25 AM, Emil Velikov <emil.l.veli...@gmail.com> 
>> wrote:
>>> Hello Hans,
>>>
>>> Not to muddy the waters or anything, have you thought about the NIR
>>> integration that Rob was thinking about ?
>>> I'm pretty sure he'll be happy to have extra people helping him out.
>>
>> How would that in any way plug into llvm or nouveau? There's no OpenCL
>> C -> NIR, and there's no NIR -> nv50 IR...
>>
> I thought that you've been (remotely) exploring the latter
> possibility. Isn't that the case ?

Not to my knowledge. I did look at doing SPIR -> nv50 ir (not to be
confused with SPIR-V), but that was ~1.5y ago. I got stuck in control
flow and llvm ir frustration. The fact that I had to go out-of-ssa
didn't help. At this point I don't see any upside to using NIR.

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] help with push

2015-11-02 Thread Ilia Mirkin
See libdrm's pushbuf.c -- iirc push->cur points to a GART-mapped bo.

http://cgit.freedesktop.org/mesa/drm/tree/nouveau/pushbuf.c#n682

nouveau_pushbuf_data(push, NULL, 0, 0);
nouveau_bo_ref(bo, >bo);
nouveau_bo_ref(NULL, );

nvpb->bgn = nvpb->bo->map;
nvpb->ptr = nvpb->bgn;
push->cur = nvpb->bgn;
push->end = push->cur + (nvpb->bo->size / 4);
push->end -= 2 + push->rsvd_kick; /* space for suffix */

Not sure what problem you're trying to solve.

  -ilia

On Mon, Nov 2, 2015 at 12:31 PM, Daniel Melo Jorge da Cunha
 wrote:
> Hi, sorry if I misunderstood everything...
>
> In the file src/gallium/drivers/nouveau/nv30/nv30_screen.c there is loans of
> PUSH_DATA which is basically *push->curr = data;
>
> I'm thinking that somehow push->curr is the bo->map = drm_mmap(...)
> that is called in nouveau_bo_map. But I cannot see how they are linked...
> Because when nouveau_bo_map calls nouveau_bo_wait
> push = cli_push_get(client, bo) returns NULL...
>
> Is push->curr the region of memory that we send data to the card?
> If so, how is it mapped?
> Has bo->map something to do with it? If so, how are they linked?
>
> ___
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
>
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] help with push

2015-11-02 Thread Ilia Mirkin
E are you sure?

nv30_screen_create starts with a bunch of stuff init'ing objects, and then does:

   BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
   PUSH_DATA (push, screen->eng3d->handle);

And as you can see in nv30_winsys.h:

static inline void
BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
{
   PUSH_SPACE(push, size + 1);
   PUSH_DATA (push, 0x | (size << 18) | (subc << 13) | mthd);
}

and PUSH_SPACE in turn calls nouveau_pushbuf_space.

  -ilia

On Mon, Nov 2, 2015 at 1:36 PM, Daniel Melo Jorge da Cunha
<dmjcu...@gmail.com> wrote:
> But at the time the mesa3d file
> src/gallium/drivers/nouveau/nv30/nv30_screen.c
> is called and when the various PUSH_DATA begin to be called there is not yet
> a call to nouveau_pushbuf_space. So it would generate a seg fault in
> push->curr. Again, sorry for the confusion and thanks for the reply.
> Awaiting
> for an answer if possible. Thanks in advance.
>
> 2015-11-02 14:44 GMT-03:00 Ilia Mirkin <imir...@alum.mit.edu>:
>>
>> See libdrm's pushbuf.c -- iirc push->cur points to a GART-mapped bo.
>>
>> http://cgit.freedesktop.org/mesa/drm/tree/nouveau/pushbuf.c#n682
>>
>> nouveau_pushbuf_data(push, NULL, 0, 0);
>> nouveau_bo_ref(bo, >bo);
>> nouveau_bo_ref(NULL, );
>>
>> nvpb->bgn = nvpb->bo->map;
>> nvpb->ptr = nvpb->bgn;
>> push->cur = nvpb->bgn;
>> push->end = push->cur + (nvpb->bo->size / 4);
>> push->end -= 2 + push->rsvd_kick; /* space for suffix */
>>
>> Not sure what problem you're trying to solve.
>>
>>   -ilia
>>
>> On Mon, Nov 2, 2015 at 12:31 PM, Daniel Melo Jorge da Cunha
>> <dmjcu...@gmail.com> wrote:
>> > Hi, sorry if I misunderstood everything...
>> >
>> > In the file src/gallium/drivers/nouveau/nv30/nv30_screen.c there is
>> > loans of
>> > PUSH_DATA which is basically *push->curr = data;
>> >
>> > I'm thinking that somehow push->curr is the bo->map = drm_mmap(...)
>> > that is called in nouveau_bo_map. But I cannot see how they are
>> > linked...
>> > Because when nouveau_bo_map calls nouveau_bo_wait
>> > push = cli_push_get(client, bo) returns NULL...
>> >
>> > Is push->curr the region of memory that we send data to the card?
>> > If so, how is it mapped?
>> > Has bo->map something to do with it? If so, how are they linked?
>> >
>> > ___
>> > Nouveau mailing list
>> > Nouveau@lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/nouveau
>> >
>
>
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 1/2] disp: activate dual link TMDS links only when possible

2015-11-03 Thread Ilia Mirkin
From: Hauke Mehrtens <ha...@hauke-m.de>

Without this patch a pixel clock rate above 165 MHz on a TMDS link is
assumed to be dual link. This is true for DVI, but not for HDMI. HDMI
supports no dual link, but it supports pixel clock rates above 165 MHz.
Only activate Dual Link mode when it is actual possible.

Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de>
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nv50_display.c   | 8 
 drm/nouveau/nvkm/engine/disp/gf119.c | 2 +-
 drm/nouveau/nvkm/engine/disp/nv50.c  | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c
index c053c50..93bcfdf 100644
--- a/drm/nouveau/nv50_display.c
+++ b/drm/nouveau/nv50_display.c
@@ -1961,10 +1961,10 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct 
drm_display_mode *umode,
switch (nv_encoder->dcb->type) {
case DCB_OUTPUT_TMDS:
if (nv_encoder->dcb->sorconf.link & 1) {
-   if (mode->clock < 165000)
-   proto = 0x1;
-   else
-   proto = 0x5;
+   proto = 0x1;
+   if (mode->clock >= 165000 &&
+   nv_encoder->dcb->duallink_possible)
+   proto |= 0x4;
} else {
proto = 0x2;
}
diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c 
b/drm/nouveau/nvkm/engine/disp/gf119.c
index 186fd3a..8691b68 100644
--- a/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 
pclk, u32 *conf)
switch (outp->info.type) {
case DCB_OUTPUT_TMDS:
*conf = (ctrl & 0x0f00) >> 8;
-   if (pclk >= 165000)
+   if (pclk >= 165000 && outp->info.duallink_possible)
*conf |= 0x0100;
break;
case DCB_OUTPUT_LVDS:
diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c 
b/drm/nouveau/nvkm/engine/disp/nv50.c
index 32e73a9..ceecd0e 100644
--- a/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 
pclk, u32 *conf)
switch (outp->info.type) {
case DCB_OUTPUT_TMDS:
*conf = (ctrl & 0x0f00) >> 8;
-   if (pclk >= 165000)
+   if (pclk >= 165000 && outp->info.duallink_possible)
*conf |= 0x0100;
break;
case DCB_OUTPUT_LVDS:
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 2/2] connector: allow 225/297MHz pixel clocks for HDMI on Fermi/Kepler

2015-11-03 Thread Ilia Mirkin
Some Fermi's apparently alow allow 297MHz clocks, so create a parameter
which allows end-users to set it themselves until we have a reliable way
to determine the board's maximum pixel clocks.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nouveau_connector.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c
index 4c8f6ef..f5806eb 100644
--- a/drm/nouveau/nouveau_connector.c
+++ b/drm/nouveau/nouveau_connector.c
@@ -56,6 +56,10 @@ MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (default: 
enabled)");
 int nouveau_duallink = 1;
 module_param_named(duallink, nouveau_duallink, int, 0400);
 
+MODULE_PARM_DESC(hdmimhz, "Force a maximum HDMI pixel clock (in MHz)");
+int nouveau_hdmimhz = 0;
+module_param_named(hdmimhz, nouveau_hdmimhz, int, 0400);
+
 struct nouveau_encoder *
 find_encoder(struct drm_connector *connector, int type)
 {
@@ -815,6 +819,17 @@ get_tmds_link_bandwidth(struct drm_connector *connector)
struct nouveau_drm *drm = nouveau_drm(connector->dev);
struct dcb_output *dcb = nv_connector->detected_encoder->dcb;
 
+   if (drm_detect_hdmi_monitor(nv_connector->edid)) {
+   if (nouveau_hdmimhz > 0)
+   return nouveau_hdmimhz * 1000;
+   /* Note: these limits are conservative, some Fermi's
+* can do 297 MHz. Unclear how this can be determined.
+*/
+   if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
+   return 297000;
+   if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+   return 225000;
+   }
if (dcb->location != DCB_LOC_ON_CHIP ||
drm->device.info.chipset >= 0x46)
return 165000;
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nvkm: add/remove 0's to make 7 (or 9)-nibble constants use 8 nibbles

2015-11-04 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nvkm/engine/gr/ctxgk20a.c | 2 +-
 drm/nouveau/nvkm/subdev/fb/ramgk104.c | 8 
 drm/nouveau/nvkm/subdev/therm/nv40.c  | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c 
b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index ddaa16a..ad0a6cf 100644
--- a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -55,7 +55,7 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct 
gf100_grctx *info)
 
gk104_grctx_generate_rop_active_fbps(gr);
 
-   nvkm_mask(device, 0x5044b0, 0x800, 0x800);
+   nvkm_mask(device, 0x5044b0, 0x0800, 0x0800);
 
gf100_gr_wait_idle(gr);
 
diff --git a/drm/nouveau/nvkm/subdev/fb/ramgk104.c 
b/drm/nouveau/nvkm/subdev/fb/ramgk104.c
index 0d20563..2614365 100644
--- a/drm/nouveau/nvkm/subdev/fb/ramgk104.c
+++ b/drm/nouveau/nvkm/subdev/fb/ramgk104.c
@@ -216,11 +216,11 @@ r1373f4_fini(struct gk104_ramfuc *fuc)
ram_wr32(fuc, 0x1373ec, tmp | (v1 << 16));
ram_mask(fuc, 0x1373f0, (~ram->mode & 3), 0x);
if (ram->mode == 2) {
-   ram_mask(fuc, 0x1373f4, 0x0003, 0x2);
-   ram_mask(fuc, 0x1373f4, 0x1100, 0x0);
+   ram_mask(fuc, 0x1373f4, 0x0003, 0x0002);
+   ram_mask(fuc, 0x1373f4, 0x1100, 0x);
} else {
-   ram_mask(fuc, 0x1373f4, 0x0003, 0x1);
-   ram_mask(fuc, 0x1373f4, 0x0001, 0x0);
+   ram_mask(fuc, 0x1373f4, 0x0003, 0x0001);
+   ram_mask(fuc, 0x1373f4, 0x0001, 0x);
}
ram_mask(fuc, 0x10f800, 0x0030, (v0 ^ v1) << 4);
 }
diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c 
b/drm/nouveau/nvkm/subdev/therm/nv40.c
index 6326fdc..2c92ffb 100644
--- a/drm/nouveau/nvkm/subdev/therm/nv40.c
+++ b/drm/nouveau/nvkm/subdev/therm/nv40.c
@@ -107,7 +107,7 @@ nv40_fan_pwm_ctrl(struct nvkm_therm *therm, int line, bool 
enable)
 {
struct nvkm_subdev *subdev = >subdev;
struct nvkm_device *device = subdev->device;
-   u32 mask = enable ? 0x8000 : 0x000;
+   u32 mask = enable ? 0x8000 : 0x;
if  (line == 2) nvkm_mask(device, 0x0010f0, 0x8000, mask);
else if (line == 9) nvkm_mask(device, 0x0015f4, 0x8000, mask);
else {
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] HDMI pixel clock limits

2015-11-04 Thread Ilia Mirkin
In the meanwhile we're pushing out a change that just blanket allows
225MHz on Fermi and 297MHz on Kepler, with a kernel option override
available. At least one GF106 user claims to have working 297MHz with
proprietary drivers (and with nouveau in presence of the patches):

https://bugs.freedesktop.org/show_bug.cgi?id=91236

Having an accurate way to auto-detect this would be ideal though, as
higher bandwidth monitors are becoming more ubiquitous.

  -ilia


On Mon, Oct 26, 2015 at 1:35 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote:
> Hello,
>
> Various HDMI versions enable higher and higher pixel clocks. However
> individual GPUs are not required to support the maximum pixel clock
> supported by the spec in order to be compliant. It appears that some
> GPUs max out at 225MHz while others at 297MHz (while others still, I
> assume, are limited to 165MHz, esp among the older ones).
>
> We've been unable to find this in the VBIOS (I had a thought that it
> was in the table pointed to by the 'T' table, but we have a
> counterexample to that). Could you suggest a way to find this
> information either from the VBIOS or based on the GPU? Are there
> differences between regular HDMI and DP -> HDMI (passive)?
>
> This is becoming more and more relevant as 2560x1440/3840x2160
> displays are become more common, while (dual-link) DVI-D is on its way
> out.
>
> Thanks,
>
>   -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH mesa 0/5] nouveau: codegen: Make use of double immediates

2015-11-06 Thread Ilia Mirkin
Hi Hans,

All pushed. I made a few additional fixes and improvement to fp64
immediate handling along the way, but all your commits were fine
as-is. (Except that they enabled fp64 immediates on nv50 implicitly
which is wrong -- there are no immediate-taking variants on nv50, so I
fixed that glitch. But only the G200 can do fp64 in the first place,
and nouveau doesn't actually expose it. Corner case of a corner case
:) )

Thanks for taking care of this... it was a small bit of fp64 which I
always felt bad about not having finished up. (But not bad enough to
actually finish it myself.)

Cheers,

  -ilia


On Thu, Nov 5, 2015 at 8:32 AM, Hans de Goede  wrote:
> Hi All,
>
> This series implements using double immediates in the nouveau codegen code.
>
> This turns the following (nvc0) code:
>   1: mov u32 $r2 0x (8)
>   2: mov u32 $r3 0x3fe0 (8)
>   3: add f64 $r0d $r0d $r2d (8)
>
> Into:
>   1: add f64 $r0d $r0d 0.50 (8)
>
> This has been tested with the 2 double shader tests which I just send to
> the piglet list. On a gk208 (gk110 / SM35) card, and by checking the output
> of nouveau_compiler with both nvdisasm and envydis on gf100 / gk104 / gm107.
>
> Regards,
>
> Hans
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Documentation request for MP warp error 0x10

2015-11-06 Thread Ilia Mirkin
On Fri, Nov 6, 2015 at 4:19 PM, Robert Morell <rmor...@nvidia.com> wrote:
> On Fri, Nov 06, 2015 at 04:15:29PM -0500, Ilia Mirkin wrote:
>> In order for ATOM.*/RED.* to work, the addresses in question must
>> *NOT* be inside of the 16MB local/shared windows. So if I'm getting
>> that error, the address must be inside.
>
> Yes, that's my understanding.
>
>> If so, this may be a reasonable explanation for what I'm seeing --
>
> Cool, I'm happy it helps.

Looks like we were setting LOCAL_BASE (0x077c) to 0, which was
effectively shadowing the low 16M of g[] space, which is where our
buffers were ending up too. Setting it to some high-up far-off land
makes everything work!

Obviously I'll need some cleverer way to deal with this, but looks
like it's all exactly as you described. Documentation does wonders :)
Looks like I should be able to make progress on my atomics/ssbo work
now.

Thanks again,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Nouveau for FreeBSD

2015-11-04 Thread Ilia Mirkin
On Wed, Nov 4, 2015 at 3:38 AM, C Bergström  wrote:
> To bring this conversation back on track - where would someone start
> *exactly* to port this to another OS? What kernel dependencies are
> there?

drivers/gpu/drm/nouveau/{nvkm,nvif,usif} can be dropped in wholesale
with just a small handful of shims and helper functions. This is the
code that presents the GPU's internals in a fairly generation-agnostic
way, and also provides some high-level functionality.

drivers/gpu/drm/nouveau/* needs to be ported to the other OS's
infrastructure. If the OS also has a port of linux's DRM and TTM
infrastructures, a lot of that will be able to be taken wholesale.
This is the code that interacts with the core above. Decodes ioctls,
manages memory, performs kernel-side modesetting, etc.

Cheers,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 1/2] disp: activate dual link TMDS links only when possible

2015-11-03 Thread Ilia Mirkin
On Tue, Nov 3, 2015 at 7:02 PM, Ben Skeggs <skeg...@gmail.com> wrote:
> On 11/04/2015 08:41 AM, Ilia Mirkin wrote:
>> From: Hauke Mehrtens <ha...@hauke-m.de>
>>
>> Without this patch a pixel clock rate above 165 MHz on a TMDS link is
>> assumed to be dual link. This is true for DVI, but not for HDMI. HDMI
>> supports no dual link, but it supports pixel clock rates above 165 MHz.
>> Only activate Dual Link mode when it is actual possible.
>>
>> Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de>
>> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
>> ---
>>  drm/nouveau/nv50_display.c   | 8 
>>  drm/nouveau/nvkm/engine/disp/gf119.c | 2 +-
>>  drm/nouveau/nvkm/engine/disp/nv50.c  | 2 +-
>>  3 files changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c
>> index c053c50..93bcfdf 100644
>> --- a/drm/nouveau/nv50_display.c
>> +++ b/drm/nouveau/nv50_display.c
>> @@ -1961,10 +1961,10 @@ nv50_sor_mode_set(struct drm_encoder *encoder, 
>> struct drm_display_mode *umode,
>>   switch (nv_encoder->dcb->type) {
>>   case DCB_OUTPUT_TMDS:
>>   if (nv_encoder->dcb->sorconf.link & 1) {
>> - if (mode->clock < 165000)
>> - proto = 0x1;
>> - else
>> - proto = 0x5;
>> + proto = 0x1;
>> + if (mode->clock >= 165000 &&
>> + nv_encoder->dcb->duallink_possible)
>> + proto |= 0x4;
> This is a somewhat flaky condition, given that one could plug a
> single-link HDMI monitor into a duallink-capable TMDS connector.
>
> Still, it's an improvement :)

Yeah, FWIW I thought of that (for the second patch too). All this
stuff is pretty fragile. But... what are you gonna do. Is there some
other way of telling whether we're on HDMI or DVI?

>
>>   } else {
>>   proto = 0x2;
>>   }
>> diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c 
>> b/drm/nouveau/nvkm/engine/disp/gf119.c
>> index 186fd3a..8691b68 100644
>> --- a/drm/nouveau/nvkm/engine/disp/gf119.c
>> +++ b/drm/nouveau/nvkm/engine/disp/gf119.c
>> @@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, 
>> u32 pclk, u32 *conf)
>>   switch (outp->info.type) {
>>   case DCB_OUTPUT_TMDS:
>>   *conf = (ctrl & 0x0f00) >> 8;
>> - if (pclk >= 165000)
>> + if (pclk >= 165000 && outp->info.duallink_possible)
>>   *conf |= 0x0100;
> I think it might be more robust to key this off the SOR protocol, rather
> than duplicating the condition above.

You mean disp->sor.lvdsconf? What do I do with that? Or did you have
something else in mind?

>
>>   break;
>>   case DCB_OUTPUT_LVDS:
>> diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c 
>> b/drm/nouveau/nvkm/engine/disp/nv50.c
>> index 32e73a9..ceecd0e 100644
>> --- a/drm/nouveau/nvkm/engine/disp/nv50.c
>> +++ b/drm/nouveau/nvkm/engine/disp/nv50.c
>> @@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, 
>> u32 pclk, u32 *conf)
>>   switch (outp->info.type) {
>>   case DCB_OUTPUT_TMDS:
>>   *conf = (ctrl & 0x0f00) >> 8;
>> - if (pclk >= 165000)
>> + if (pclk >= 165000 && outp->info.duallink_possible)
>>   *conf |= 0x0100;
> Same here.
>
>>   break;
>>   case DCB_OUTPUT_LVDS:
>>
>
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] kms: no need to check for empty edid before drm_detect_hdmi_monitor

2015-11-03 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nv50_display.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c
index bdaba91..d9cba87 100644
--- a/drm/nouveau/nv50_display.c
+++ b/drm/nouveau/nv50_display.c
@@ -773,7 +773,6 @@ nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, bool 
update)
 */
if (nv_connector && ( nv_connector->underscan == UNDERSCAN_ON ||
 (nv_connector->underscan == UNDERSCAN_AUTO &&
- nv_connector->edid &&
  drm_detect_hdmi_monitor(nv_connector->edid {
u32 bX = nv_connector->underscan_hborder;
u32 bY = nv_connector->underscan_vborder;
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2 1/2] disp: activate dual link TMDS links only when possible

2015-11-03 Thread Ilia Mirkin
From: Hauke Mehrtens <ha...@hauke-m.de>

Without this patch a pixel clock rate above 165 MHz on a TMDS link is
assumed to be dual link. This is true for DVI, but not for HDMI. HDMI
supports no dual link, but it supports pixel clock rates above 165 MHz.
Only activate Dual Link mode when it is actually possible and requested.

Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de>
[imirkin: check for hdmi monitor for computing proto, use sor ctrl to
 enable extra config bit]
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nv50_display.c   | 18 ++
 drm/nouveau/nvkm/engine/disp/gf119.c |  2 +-
 drm/nouveau/nvkm/engine/disp/nv50.c  |  2 +-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c
index c053c50..57781fd 100644
--- a/drm/nouveau/nv50_display.c
+++ b/drm/nouveau/nv50_display.c
@@ -1961,10 +1961,20 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct 
drm_display_mode *umode,
switch (nv_encoder->dcb->type) {
case DCB_OUTPUT_TMDS:
if (nv_encoder->dcb->sorconf.link & 1) {
-   if (mode->clock < 165000)
-   proto = 0x1;
-   else
-   proto = 0x5;
+   proto = 0x1;
+   /* Only enable dual-link if:
+*  - DCB says we can
+*  - Need to (i.e. rate > 165MHz)
+*  - Not an HDMI monitor, since there's no dual-link
+*on HDMI. Of course in order to determine that,
+*we need the EDID. So if no EDID, just let it
+*slide.
+*/
+   if (mode->clock >= 165000 &&
+   nv_encoder->dcb->duallink_possible &&
+   (!nv_connector->edid ||
+!drm_detect_hdmi_monitor(nv_connector->edid)))
+   proto |= 0x4;
} else {
proto = 0x2;
}
diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c 
b/drm/nouveau/nvkm/engine/disp/gf119.c
index 186fd3a..f031466 100644
--- a/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 
pclk, u32 *conf)
switch (outp->info.type) {
case DCB_OUTPUT_TMDS:
*conf = (ctrl & 0x0f00) >> 8;
-   if (pclk >= 165000)
+   if (*conf == 5)
*conf |= 0x0100;
break;
case DCB_OUTPUT_LVDS:
diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c 
b/drm/nouveau/nvkm/engine/disp/nv50.c
index 32e73a9..4226d21 100644
--- a/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 
pclk, u32 *conf)
switch (outp->info.type) {
case DCB_OUTPUT_TMDS:
*conf = (ctrl & 0x0f00) >> 8;
-   if (pclk >= 165000)
+   if (*conf == 5)
*conf |= 0x0100;
break;
case DCB_OUTPUT_LVDS:
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2 2/2] connector: allow 225/297MHz pixel clocks for HDMI on Fermi/Kepler

2015-11-03 Thread Ilia Mirkin
Some Fermi's apparently alow allow 297MHz clocks, so create a parameter
which allows end-users to set it themselves until we have a reliable way
to determine the board's maximum pixel clocks.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nouveau_connector.c | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c
index 4c8f6ef..57bea79 100644
--- a/drm/nouveau/nouveau_connector.c
+++ b/drm/nouveau/nouveau_connector.c
@@ -56,6 +56,10 @@ MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (default: 
enabled)");
 int nouveau_duallink = 1;
 module_param_named(duallink, nouveau_duallink, int, 0400);
 
+MODULE_PARM_DESC(hdmimhz, "Force a maximum HDMI pixel clock (in MHz)");
+int nouveau_hdmimhz = 0;
+module_param_named(hdmimhz, nouveau_hdmimhz, int, 0400);
+
 struct nouveau_encoder *
 find_encoder(struct drm_connector *connector, int type)
 {
@@ -809,12 +813,23 @@ nouveau_connector_get_modes(struct drm_connector 
*connector)
 }
 
 static unsigned
-get_tmds_link_bandwidth(struct drm_connector *connector)
+get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi)
 {
struct nouveau_connector *nv_connector = nouveau_connector(connector);
struct nouveau_drm *drm = nouveau_drm(connector->dev);
struct dcb_output *dcb = nv_connector->detected_encoder->dcb;
 
+   if (hdmi) {
+   if (nouveau_hdmimhz > 0)
+   return nouveau_hdmimhz * 1000;
+   /* Note: these limits are conservative, some Fermi's
+* can do 297 MHz. Unclear how this can be determined.
+*/
+   if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
+   return 297000;
+   if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+   return 225000;
+   }
if (dcb->location != DCB_LOC_ON_CHIP ||
drm->device.info.chipset >= 0x46)
return 165000;
@@ -835,6 +850,7 @@ nouveau_connector_mode_valid(struct drm_connector 
*connector,
struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
unsigned min_clock = 25000, max_clock = min_clock;
unsigned clock = mode->clock;
+   bool hdmi;
 
switch (nv_encoder->dcb->type) {
case DCB_OUTPUT_LVDS:
@@ -847,8 +863,11 @@ nouveau_connector_mode_valid(struct drm_connector 
*connector,
max_clock = 40;
break;
case DCB_OUTPUT_TMDS:
-   max_clock = get_tmds_link_bandwidth(connector);
-   if (nouveau_duallink && nv_encoder->dcb->duallink_possible)
+   hdmi = !nv_connector->edid ||
+   drm_detect_hdmi_monitor(nv_connector->edid);
+   max_clock = get_tmds_link_bandwidth(connector, hdmi);
+   if (!hdmi && nouveau_duallink &&
+   nv_encoder->dcb->duallink_possible)
max_clock *= 2;
break;
case DCB_OUTPUT_ANALOG:
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v3 2/2] connector: allow 225/297MHz pixel clocks for HDMI on Fermi/Kepler

2015-11-03 Thread Ilia Mirkin
Some Fermi's apparently alow allow 297MHz clocks, so create a parameter
which allows end-users to set it themselves until we have a reliable way
to determine the board's maximum pixel clocks.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nouveau_connector.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c
index 4c8f6ef..8dd384b 100644
--- a/drm/nouveau/nouveau_connector.c
+++ b/drm/nouveau/nouveau_connector.c
@@ -56,6 +56,10 @@ MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (default: 
enabled)");
 int nouveau_duallink = 1;
 module_param_named(duallink, nouveau_duallink, int, 0400);
 
+MODULE_PARM_DESC(hdmimhz, "Force a maximum HDMI pixel clock (in MHz)");
+int nouveau_hdmimhz = 0;
+module_param_named(hdmimhz, nouveau_hdmimhz, int, 0400);
+
 struct nouveau_encoder *
 find_encoder(struct drm_connector *connector, int type)
 {
@@ -809,12 +813,23 @@ nouveau_connector_get_modes(struct drm_connector 
*connector)
 }
 
 static unsigned
-get_tmds_link_bandwidth(struct drm_connector *connector)
+get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi)
 {
struct nouveau_connector *nv_connector = nouveau_connector(connector);
struct nouveau_drm *drm = nouveau_drm(connector->dev);
struct dcb_output *dcb = nv_connector->detected_encoder->dcb;
 
+   if (hdmi) {
+   if (nouveau_hdmimhz > 0)
+   return nouveau_hdmimhz * 1000;
+   /* Note: these limits are conservative, some Fermi's
+* can do 297 MHz. Unclear how this can be determined.
+*/
+   if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
+   return 297000;
+   if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+   return 225000;
+   }
if (dcb->location != DCB_LOC_ON_CHIP ||
drm->device.info.chipset >= 0x46)
return 165000;
@@ -835,6 +850,7 @@ nouveau_connector_mode_valid(struct drm_connector 
*connector,
struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
unsigned min_clock = 25000, max_clock = min_clock;
unsigned clock = mode->clock;
+   bool hdmi;
 
switch (nv_encoder->dcb->type) {
case DCB_OUTPUT_LVDS:
@@ -847,8 +863,10 @@ nouveau_connector_mode_valid(struct drm_connector 
*connector,
max_clock = 40;
break;
case DCB_OUTPUT_TMDS:
-   max_clock = get_tmds_link_bandwidth(connector);
-   if (nouveau_duallink && nv_encoder->dcb->duallink_possible)
+   hdmi = drm_detect_hdmi_monitor(nv_connector->edid);
+   max_clock = get_tmds_link_bandwidth(connector, hdmi);
+   if (!hdmi && nouveau_duallink &&
+   nv_encoder->dcb->duallink_possible)
max_clock *= 2;
break;
case DCB_OUTPUT_ANALOG:
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v3 1/2] disp: activate dual link TMDS links only when possible

2015-11-03 Thread Ilia Mirkin
From: Hauke Mehrtens <ha...@hauke-m.de>

Without this patch a pixel clock rate above 165 MHz on a TMDS link is
assumed to be dual link. This is true for DVI, but not for HDMI. HDMI
supports no dual link, but it supports pixel clock rates above 165 MHz.
Only activate Dual Link mode when it is actually possible and requested.

Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de>
[imirkin: check for hdmi monitor for computing proto, use sor ctrl to
 enable extra config bit]
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nv50_display.c   | 15 +++
 drm/nouveau/nvkm/engine/disp/gf119.c |  2 +-
 drm/nouveau/nvkm/engine/disp/nv50.c  |  2 +-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c
index c053c50..bdaba91 100644
--- a/drm/nouveau/nv50_display.c
+++ b/drm/nouveau/nv50_display.c
@@ -1961,10 +1961,17 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct 
drm_display_mode *umode,
switch (nv_encoder->dcb->type) {
case DCB_OUTPUT_TMDS:
if (nv_encoder->dcb->sorconf.link & 1) {
-   if (mode->clock < 165000)
-   proto = 0x1;
-   else
-   proto = 0x5;
+   proto = 0x1;
+   /* Only enable dual-link if:
+*  - Need to (i.e. rate > 165MHz)
+*  - DCB says we can
+*  - Not an HDMI monitor, since there's no dual-link
+*on HDMI.
+*/
+   if (mode->clock >= 165000 &&
+   nv_encoder->dcb->duallink_possible &&
+   !drm_detect_hdmi_monitor(nv_connector->edid))
+   proto |= 0x4;
} else {
proto = 0x2;
}
diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c 
b/drm/nouveau/nvkm/engine/disp/gf119.c
index 186fd3a..f031466 100644
--- a/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 
pclk, u32 *conf)
switch (outp->info.type) {
case DCB_OUTPUT_TMDS:
*conf = (ctrl & 0x0f00) >> 8;
-   if (pclk >= 165000)
+   if (*conf == 5)
*conf |= 0x0100;
break;
case DCB_OUTPUT_LVDS:
diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c 
b/drm/nouveau/nvkm/engine/disp/nv50.c
index 32e73a9..4226d21 100644
--- a/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 
pclk, u32 *conf)
switch (outp->info.type) {
case DCB_OUTPUT_TMDS:
*conf = (ctrl & 0x0f00) >> 8;
-   if (pclk >= 165000)
+   if (*conf == 5)
*conf |= 0x0100;
break;
case DCB_OUTPUT_LVDS:
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Nouveau for FreeBSD

2015-11-03 Thread Ilia Mirkin
Nouveau kernel module has a largely os-agnostic "core" component (called
nvkm/nvif now) which encompasses the actual operation of the GPU. The drm
wrapper around it provides the relevant interfaces for KMS/ioctls/etc. Any
port would want the ioctl bits as well, since that's what the userspace
mesa/ddx components rely on.

That said, I'm not aware of any serious effort to port nouveau to any other
OS.

  -ilia

On Wed, Nov 4, 2015 at 2:08 AM,  wrote:

> Is anyone actually and or actively working on this?
> Github.com/pathscale/pscnv is totally bitrot but waaay more portable base.
> Nouveau made hard Linux assumptions that will be difficult to overcome
> afaik.
>
>
>
> *From: *Curtis Hamilton
> *Sent: *Wednesday, November 4, 2015 08:06
> *To: *nouveau@lists.freedesktop.org
> *Subject: *[Nouveau] Nouveau for FreeBSD
>
> Any progress on the FreeBSD front?
>
>
> ___
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
>
>
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Documentation request for MP warp error 0x10

2015-11-06 Thread Ilia Mirkin
On Fri, Nov 6, 2015 at 3:59 PM, Robert Morell <rmor...@nvidia.com> wrote:
> On Fri, Oct 02, 2015 at 06:05:21PM -0400, Ilia Mirkin wrote:
>> Could you advise what the proper way of indicating
>> that the memory is "global" to the op? I'm sure I'm just missing
>> something simple. If you show me what to look for in SM35 I can
>> probably find it on my own for SM20/SM30/SM50.
>
> Sorry again for the delay.  Here's what I've been able to find out about
> the generic thread address space (used by the SMs) and what types of
> memory it contains.  Hopefully this clears things up.
>
>
> Local memory is a per-thread space.
> Shared memory is a per-CTA space (compute shaders only).
>
> LDL and STL instructions access local memory with a zero offset.
> LDS, LSDLK, STS, and STSCUL instructions access shared memory with a zero
> offset.
>
> LD, ST, RED, ATOM, and CCTL.D instructions access the generic thread address
> space, which is layered on top of the channel's virtual address space.
>
> In the generic thread address space, there are 16MB windows into local and
> shared memory; everything not in a Local or Shared address window accesses
> global virtual memory.
>
> The local window offset within the generic thread address space is determined
> by the SetShaderLocalMemoryWindow class method (offset 0x77c in classes *97 
> and
> *c0).
>
> The shared window offset within the generic thread address space is determined
> by the SetShaderSharedMemoryWindow class method (offset 0x214 in classes *c0).
>
> For both methods, the offset is in bytes, but the window must be aligned to a
> 16MB boundary (so the lower 24 bits of the data must be zero). The upper 32
> bits of the windows are hard-coded to 0 (so they must be placed within the
> lower 4GB of address space).
>
> Generally, it is expected that software will reserve ranges in the global
> virtual address space where these windows will be placed. (Otherwise anything
> mapped there will be inaccessible to shaders.)
>
> For graphics shaders, the shared address space logic does not exist, so there
> is no need to reserve virtual memory for it.

Hi Robert, thanks so much for getting back to me. I believe I've
understood what you've said, but please confirm:

In order for ATOM.*/RED.* to work, the addresses in question must
*NOT* be inside of the 16MB local/shared windows. So if I'm getting
that error, the address must be inside.

If so, this may be a reasonable explanation for what I'm seeing --
while I knew about the local/shared windows, I didn't realize that the
windows were 16MB-sized.

Thanks again,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] pci: enable c800 magic for Medion Erazer X7827

2015-10-31 Thread Ilia Mirkin
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91557
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nvkm/engine/device/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drm/nouveau/nvkm/engine/device/pci.c 
b/drm/nouveau/nvkm/engine/device/pci.c
index e8eb14e..20318f4 100644
--- a/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drm/nouveau/nvkm/engine/device/pci.c
@@ -678,6 +678,7 @@ nvkm_device_pci_10de_1189[] = {
 static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_1199[] = {
{ 0x1458, 0xd001, "GeForce GTX 760" },
+   { 0x1462, 0x1106, "GeForce GTX 780M", { .War00C800_0 = true } }, /* 
Medion Erazer X7827 */
{}
 };
 
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 1/2] gr: document mp error 0x10

2015-10-07 Thread Ilia Mirkin
NVIDIA provided the documentation for mp error 0x10, INVALID_ADDR_SPACE,
which apparently happens when trying to use an atomic operation on
local or shared memory (instead of global memory).

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nvkm/engine/gr/gf100.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drm/nouveau/nvkm/engine/gr/gf100.c 
b/drm/nouveau/nvkm/engine/gr/gf100.c
index f1358a5..dda7a7d 100644
--- a/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -882,6 +882,7 @@ static const struct nvkm_enum gf100_mp_warp_error[] = {
{ 0x0d, "GPR_OUT_OF_BOUNDS" },
{ 0x0e, "MEM_OUT_OF_BOUNDS" },
{ 0x0f, "UNALIGNED_MEM_ACCESS" },
+   { 0x10, "INVALID_ADDR_SPACE" },
{ 0x11, "INVALID_PARAM" },
{}
 };
-- 
2.4.9

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 2/2] gr: add FERMI_COMPUTE_B class to GF110+

2015-10-07 Thread Ilia Mirkin
GF110+ supports both the A and B compute classes, make sure to accept
both.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nvkm/engine/gr/gf110.c | 1 +
 drm/nouveau/nvkm/engine/gr/gf117.c | 1 +
 drm/nouveau/nvkm/engine/gr/gf119.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drm/nouveau/nvkm/engine/gr/gf110.c 
b/drm/nouveau/nvkm/engine/gr/gf110.c
index d131874..d081ee4 100644
--- a/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -98,6 +98,7 @@ gf110_gr = {
{ -1, -1, FERMI_B, _fermi },
{ -1, -1, FERMI_C, _fermi },
{ -1, -1, FERMI_COMPUTE_A },
+   { -1, -1, FERMI_COMPUTE_B },
{}
}
 };
diff --git a/drm/nouveau/nvkm/engine/gr/gf117.c 
b/drm/nouveau/nvkm/engine/gr/gf117.c
index 28483d8..d8e8af4 100644
--- a/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -135,6 +135,7 @@ gf117_gr = {
{ -1, -1, FERMI_B, _fermi },
{ -1, -1, FERMI_C, _fermi },
{ -1, -1, FERMI_COMPUTE_A },
+   { -1, -1, FERMI_COMPUTE_B },
{}
}
 };
diff --git a/drm/nouveau/nvkm/engine/gr/gf119.c 
b/drm/nouveau/nvkm/engine/gr/gf119.c
index 9811a72..01faf9a 100644
--- a/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -189,6 +189,7 @@ gf119_gr = {
{ -1, -1, FERMI_B, _fermi },
{ -1, -1, FERMI_C, _fermi },
{ -1, -1, FERMI_COMPUTE_A },
+   { -1, -1, FERMI_COMPUTE_B },
{}
}
 };
-- 
2.4.9

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] gem: return only valid domain when there's only one

2015-10-19 Thread Ilia Mirkin
On nv50+, we restrict the valid domains to just the one where the buffer
was originally created. However after the buffer is evicted to system
memory, we might move it back to a different domain that was not
originally valid. When sharing the buffer and retrieving its GEM_INFO
data, we still want the domain that will be valid for this buffer in a
pushbuf, not the one where it currently happens to be.

This resolves fdo#92504 and several others. These are due to suspend
evicting all buffers, making it more likely that they temporarily end up
in the wrong place.

Cc: sta...@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92504
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nouveau_gem.c | 5 +++--
 lib/include/nvif/os.h | 6 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c
index ce74ab1..a108cc3 100644
--- a/drm/nouveau/nouveau_gem.c
+++ b/drm/nouveau/nouveau_gem.c
@@ -229,11 +229,12 @@ nouveau_gem_info(struct drm_file *file_priv, struct 
drm_gem_object *gem,
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
struct nvkm_vma *vma;
 
-   if (nvbo->bo.mem.mem_type == TTM_PL_TT)
+   if (is_power_of_2(nvbo->valid_domains))
+   rep->domain = nvbo->valid_domains;
+   else if (nvbo->bo.mem.mem_type == TTM_PL_TT)
rep->domain = NOUVEAU_GEM_DOMAIN_GART;
else
rep->domain = NOUVEAU_GEM_DOMAIN_VRAM;
-
rep->offset = nvbo->bo.offset;
if (cli->vm) {
vma = nouveau_bo_vma_find(nvbo, cli->vm);
diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h
index 552ecf7..2df3048 100644
--- a/lib/include/nvif/os.h
+++ b/lib/include/nvif/os.h
@@ -135,6 +135,12 @@ typedef dma_addr_t resource_size_t;
 
 #define IS_ENABLED(x) IS_ENABLED_##x
 
+static inline bool
+is_power_of_2(unsigned long n)
+{
+   return (n != 0 && ((n & (n - 1)) == 0));
+}
+
 static inline int
 order_base_2(u64 base)
 {
-- 
2.4.10

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nvc0: do upload-time fixups for interpolation parameters

2015-10-20 Thread Ilia Mirkin
WIP: only support SM35, need to add SM20 and SM50 support

Unfortunately flatshading is an all-or-nothing proposition on nvc0,
while GL 3.0 calls for the ability to selectively specify explicit
interpolation parameters on gl_Color/gl_SecondaryColor which would
override the flatshading setting. This allows us to fix up the
interpolation settings after shader generation based on rasterizer
settings.

While we're at it, we can add support for dynamically forcing all
(non-flat) shader inputs to be interpolated per-sample, which allows
st/mesa to not generate variants for these.

Fixes the remaining failing glsl-1.30/execution/interpolation piglits.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---

Incomplete as per above. Wanted to get it out there in case there was
any feedback. This will only work on GK110/GK208 as-is.

 .../drivers/nouveau/codegen/nv50_ir_driver.h   |  5 +++
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++--
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 14 +---
 .../drivers/nouveau/codegen/nv50_ir_target.cpp | 39 +-
 .../drivers/nouveau/codegen/nv50_ir_target.h   | 21 
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 23 -
 src/gallium/drivers/nouveau/nvc0/nvc0_program.h|  6 ++--
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  2 +-
 .../drivers/nouveau/nvc0/nvc0_shader_state.c   | 16 -
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |  3 --
 .../drivers/nouveau/nvc0/nvc0_state_validate.c |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h   |  2 +-
 12 files changed, 147 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 14acb60..2f5654f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -99,6 +99,7 @@ struct nv50_ir_prog_info
   uint8_t sourceRep;  /* NV50_PROGRAM_IR */
   const void *source;
   void *relocData;
+  void *interpData;
   struct nv50_ir_prog_symbol *syms;
   uint16_t numSyms;
} bin;
@@ -198,6 +199,10 @@ extern void nv50_ir_relocate_code(void *relocData, 
uint32_t *code,
   uint32_t libPos,
   uint32_t dataPos);
 
+extern void
+nv50_ir_change_interp(void *interpData, uint32_t *code,
+  bool force_per_sample, bool flatshade);
+
 /* obtain code that will be shared among programs */
 extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 8f15429..d712c9c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
 }
 
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+  bool force_persample_interp, bool flatshade)
+{
+   int ipa = entry->ipa;
+   int reg = entry->reg;
+   int loc = entry->loc;
+
+   if (flatshade &&
+   (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+  ipa = NV50_IR_INTERP_FLAT;
+  reg = 0xff;
+   } else if (force_persample_interp &&
+  (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+  (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+  ipa |= NV50_IR_INTERP_CENTROID;
+   }
+   code[loc + 1] &= ~(0xf << 19);
+   code[loc + 1] |= (ipa & 0x3) << 21;
+   code[loc + 1] |= (ipa & 0xc) << (19 - 2);
+   code[loc + 0] &= ~(0xff << 23);
+   code[loc + 0] |= reg << 23;
+}
+
 void
 CodeEmitterGK110::emitINTERP(const Instruction *i)
 {
@@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
if (i->saturate)
   code[1] |= 1 << 18;
 
-   if (i->op == OP_PINTERP)
+   if (i->op == OP_PINTERP) {
   srcId(i->src(1), 23);
-   else
+  addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+   } else {
   code[0] |= 0xff << 23;
+  addInterp(i->ipa, 0xff, interpApply);
+   }
 
srcId(i->src(0).getIndirect(0), 10);
emitInterpMode(i);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 21099d5..0489ef8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1057,7 +1057,7 @@ bool Source::scanDeclaration(const struct 
tgsi_full_declaration *decl)
default:
   

Re: [Nouveau] 4K ~ 3840x2160 resolution and frame buffer support

2015-10-08 Thread Ilia Mirkin
HDMI with nouveau is limited to 165mhz unfortunately. This is
incorrect (the hardware can do more), but nevertheless, that's the
present limit. If you can connect the screen over dual-link DVI or DP,
you should be able to get the full resolution.

On Thu, Oct 8, 2015 at 5:18 PM, James Lehman <ja...@akrobiz.com> wrote:
> Thank you for your quick reply.
>
> I have a Gigabyte GT-730 2GB card hooked directly via a high quality HDMI
> cable to a 65 inch Samsung curved screen LCD 4K monitor.
>
> I'm not sure what I am looking for in dmesg and xorg log.
>
> Please advise.
>
> I have built the 3.19.8 kernel and enabled frame buffer support.
>
> I can see 7 out of the 8 penguins when I boot.
>
> My ultimate goal is to be able to boot to a 3840x2160 32 bit (frame buffer)
> console.
>
> Then go into Xfce, or not, as needed.
>
> I like to write frame buffer apps that run in the console.
>
> Thank you.
>
>
>
> On 10/08/2015 04:44 PM, Ilia Mirkin wrote:
>>
>> If you only get 1920x1080 in X, chances are that nouveau doesn't
>> believe it can do 3840x2160 for one reason or another. There are a
>> number of reasons why this might be the case, please provide dmesg,
>> xorg log, and information on how the monitor is connected (including
>> any A->B type adapters).
>>
>>-ilia
>>
>> On Thu, Oct 8, 2015 at 4:21 PM, James Lehman <ja...@akrobiz.com> wrote:
>>>
>>> Hello. I hope this is in the right place.
>>>
>>> I just built a new machine and installed Xubuntu and I'm still figuring
>>> things out.
>>>
>>> I'm interested in working with The Linux Frame Buffer.
>>>
>>> Many years ago, I started a project called ezfb and I would like to
>>> continue
>>> developing it on a machine capable of 4K.
>>>
>>> Is it possible to have a frame buffer that does 3840x2160 ?
>>>
>>> With nouveau installed as my video driver, I can get to a frame buffer
>>> console of 1920x1080, but I am limited to that resolution in Xfce as
>>> well.
>>>
>>> With a proprietary nVidia driver installed I can get to 4K in Xfce, but
>>> my
>>> frame buffer is 640x480, 4 bit color. When I try fbset to change
>>> anything,
>>> it doesn't work.
>>>
>>> I have not even bothered to try my code so see of any of it can actually
>>> work in the frame buffers I get.
>>>
>>> Still scratching my head
>>>
>>> Thank you for your time.
>>>
>>> James.
>>>
>>>
>>> ___
>>> Nouveau mailing list
>>> Nouveau@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/nouveau
>
>
> ___
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] 4K ~ 3840x2160 resolution and frame buffer support

2015-10-08 Thread Ilia Mirkin
If you only get 1920x1080 in X, chances are that nouveau doesn't
believe it can do 3840x2160 for one reason or another. There are a
number of reasons why this might be the case, please provide dmesg,
xorg log, and information on how the monitor is connected (including
any A->B type adapters).

  -ilia

On Thu, Oct 8, 2015 at 4:21 PM, James Lehman  wrote:
> Hello. I hope this is in the right place.
>
> I just built a new machine and installed Xubuntu and I'm still figuring
> things out.
>
> I'm interested in working with The Linux Frame Buffer.
>
> Many years ago, I started a project called ezfb and I would like to continue
> developing it on a machine capable of 4K.
>
> Is it possible to have a frame buffer that does 3840x2160 ?
>
> With nouveau installed as my video driver, I can get to a frame buffer
> console of 1920x1080, but I am limited to that resolution in Xfce as well.
>
> With a proprietary nVidia driver installed I can get to 4K in Xfce, but my
> frame buffer is 640x480, 4 bit color. When I try fbset to change anything,
> it doesn't work.
>
> I have not even bothered to try my code so see of any of it can actually
> work in the frame buffers I get.
>
> Still scratching my head
>
> Thank you for your time.
>
> James.
>
>
> ___
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space

2015-10-10 Thread Ilia Mirkin
On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
> This patch looks fine except that it should be a bit more normalized. I
> mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same for
> PUSH_SPACE calls, sometimes you add it sometimes not.

Meh. We need to get our error checking situation straight, but this
isn't the patch to do it in.

>
> Did you run a full piglit test this time ? :)

Nope, but I ran a full piglit before this patch. Almost took down my
box. Probably won't be running it again for this patch.

>
> See my comment below.
>
>
> On 10/10/2015 11:09 AM, Ilia Mirkin wrote:
>>
>> We still have to push everything out, might as well kick earlier and
>> flip pushbufs when we know we'll need it. This resolves some issues with
>> the new policy of making sure that we always leave a bit of room at the
>> end for fences.
>>
>> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>   src/gallium/drivers/nouveau/nv50/nv50_shader_state.c |  9 ++---
>>   src/gallium/drivers/nouveau/nv50/nv50_transfer.c | 16
>> +++-
>>   src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 20
>> +---
>>   3 files changed, 10 insertions(+), 35 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
>> b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
>> index fdde11f..941555f 100644
>> --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
>> @@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50)
>>  PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
>>   }
>>   while (words) {
>> -   unsigned nr;
>> -
>> -   if (!PUSH_SPACE(push, 16))
>> -  break;
>> -   nr = PUSH_AVAIL(push);
>> -   assert(nr >= 16);
>> -   nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
>> +   unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
>>   +   PUSH_SPACE(push, nr + 3);
>
>
> This PUSH_SPACE call doesn't seem to be needed for me because
> NV50_PUSH_EXPLICIT_SPACE_CHECKING is not set and the following BEGIN_XXX
> calls will allocate space.

I want to ensure that both of the below commands are in the same
batch. Not sure if it's necessary, but... don't want to find out. They
were in the same batch before. And this batch stuff is what was
causing the M2MF errors I was seeing earlier.

>
>
>>  BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
>>  PUSH_DATA (push, (start << 8) | b);
>>  BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
>> b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
>> index be51407..9a3fd1e 100644
>> --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
>> @@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
>>  PUSH_DATA (push, 0);
>>while (count) {
>> -  unsigned nr;
>> -
>> -  if (!PUSH_SPACE(push, 16))
>> - break;
>> -  nr = PUSH_AVAIL(push);
>> -  assert(nr >= 16);
>> -  nr = MIN2(count, nr - 1);
>> -  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
>> +  unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
>>   BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
>> PUSH_DATAp(push, src, nr);
>> @@ -395,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv,
>>  nouveau_pushbuf_validate(push);
>>while (words) {
>> -  unsigned nr;
>> -
>> -  nr = PUSH_AVAIL(push);
>> -  nr = MIN2(nr - 7, words);
>> -  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
>> +  unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
>>   +  PUSH_SPACE(push, nr + 7);
>> BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
>> PUSH_DATAh(push, bo->offset + base);
>> PUSH_DATA (push, bo->offset + base);
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
>> index aaec60a..d459dd6 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
>> @@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
>>  nouve

Re: [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space

2015-10-10 Thread Ilia Mirkin
On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
>
>
> On 10/10/2015 09:42 PM, Ilia Mirkin wrote:
>>
>> On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset
>> <samuel.pitoi...@gmail.com> wrote:
>>>
>>> This patch looks fine except that it should be a bit more normalized. I
>>> mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same for
>>> PUSH_SPACE calls, sometimes you add it sometimes not.
>>
>> Meh. We need to get our error checking situation straight, but this
>> isn't the patch to do it in.
>
>
> Yeah, but this needs to be clarified.

What does?
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Mesa-dev] [PATCH] nouveau: avoid emitting new fences unnecessarily

2015-10-10 Thread Ilia Mirkin
On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
> Does this fix those texelFetch piglit tests ? Or is it the second patch ?

This patch "fixes" the initial texelFetch piglit failures. However it
creates some fresh texelFetch piglit failures -- that test is
interesting because it does a lot of draws with minimal state changes
between them. Those ones are fixed by the second patch. But really
these are all different problems, which interact with each other in
frustrating ways.

>
> Anyway, this patch is :
>
> Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
>
>
> On 10/10/2015 08:12 AM, Ilia Mirkin wrote:
>>
>> Right now we emit on every kick, but this is only necessary if something
>> will ever be able to observe that the fence completed. If there are no
>> refs, leave the fence alone and emit it another day.
>>
>> This also happens to work around an issue for the kick handler -- a kick
>> can be a result of e.g. nouveau_bo_wait or explicit kick, or it can be
>> due to lack of space in the pushbuf. We want the emit to happen in the
>> current batch, so we want there to always be enough space. However an
>> explicit kick could take the reserved space for the implicitly-triggered
>> kick's fence emission if it happened right after. With the new mechanism,
>> hopefully there's no way to cause two fences to be emitted into the same
>> reserved space.
>>
>> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
>> Cc: mesa-sta...@lists.freedesktop.org
>> Fixes: 47d11990b (nouveau: make sure there's always room to emit a fence)
>> ---
>>   src/gallium/drivers/nouveau/nouveau_fence.c | 12 +---
>>   1 file changed, 9 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c
>> b/src/gallium/drivers/nouveau/nouveau_fence.c
>> index ee4e08d..18b1592 100644
>> --- a/src/gallium/drivers/nouveau/nouveau_fence.c
>> +++ b/src/gallium/drivers/nouveau/nouveau_fence.c
>> @@ -190,8 +190,10 @@ nouveau_fence_wait(struct nouveau_fence *fence)
>>  /* wtf, someone is waiting on a fence in flush_notify handler? */
>>  assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
>>   -   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
>> +   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
>> +  PUSH_SPACE(screen->pushbuf, 8);
>> nouveau_fence_emit(fence);
>> +   }
>>if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
>> if (nouveau_pushbuf_kick(screen->pushbuf,
>> screen->pushbuf->channel))
>> @@ -224,8 +226,12 @@ nouveau_fence_wait(struct nouveau_fence *fence)
>>   void
>>   nouveau_fence_next(struct nouveau_screen *screen)
>>   {
>> -   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING)
>> -  nouveau_fence_emit(screen->fence.current);
>> +   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) {
>> +  if (screen->fence.current->ref > 1)
>> + nouveau_fence_emit(screen->fence.current);
>> +  else
>> + return;
>> +   }
>>nouveau_fence_ref(NULL, >fence.current);
>>
>
>
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space

2015-10-10 Thread Ilia Mirkin
On Sat, Oct 10, 2015 at 4:21 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
>
>
> On 10/10/2015 09:58 PM, Ilia Mirkin wrote:
>>
>> On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset
>> <samuel.pitoi...@gmail.com> wrote:
>>>
>>>
>>> On 10/10/2015 09:42 PM, Ilia Mirkin wrote:
>>>>
>>>> On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset
>>>> <samuel.pitoi...@gmail.com> wrote:
>>>>>
>>>>> This patch looks fine except that it should be a bit more normalized. I
>>>>> mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same
>>>>> for
>>>>> PUSH_SPACE calls, sometimes you add it sometimes not.
>>>>
>>>> Meh. We need to get our error checking situation straight, but this
>>>> isn't the patch to do it in.
>>>
>>>
>>> Yeah, but this needs to be clarified.
>>
>> What does?
>
>
> I mean, we should either use PUSH_SPACE everywhere or not at all, and always
> breaks (or not) when PUSH_SPACE fails.
> That's really a minor issue.

It's actually a major issue. Error-handling is practically
non-existent. There are a couple of spots here and there, but it
doesn't really scale up. I guess I (semi-)accidentally removed a
couple of spots that error checked, but, again, meh. Doing this for
real will require some careful thought.

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nouveau: make sure there's always room to emit a fence

2015-10-05 Thread Ilia Mirkin
I started seeing a lot of situations on nv30 where fence emission
wouldn't fit into the previous buffer (causing assertions). This ensures
that whenever checking for space, we always leave a bit of extra room
for the fence emission commands. Adjusts the nv30 and nvc0 fence
emission logic to bypass the space checking as well.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nouveau_winsys.h   | 2 ++
 src/gallium/drivers/nouveau/nv30/nv30_screen.c | 4 +++-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 ++-
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h 
b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 389a229..a44fd3e 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push)
 static inline bool
 PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
 {
+   /* Provide a buffer so that fences always have room to be emitted */
+   size += 8;
if (PUSH_AVAIL(push) < size)
   return nouveau_pushbuf_space(push, size, 0, 0) == 0;
return true;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 39267b3..335c163 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -347,7 +347,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, 
uint32_t *sequence)
 
*sequence = ++screen->base.fence.sequence;
 
-   BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2);
+   assert(PUSH_AVAIL(push) >= 3);
+   PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
+  (2 /* size */ << 18) | (7 /* subchan */ << 13));
PUSH_DATA (push, 0);
PUSH_DATA (push, *sequence);
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6012ff6..812b246 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -388,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 
*sequence)
/* we need to do it after possible flush in MARK_RING */
*sequence = ++screen->base.fence.sequence;
 
+   assert(PUSH_AVAIL(push) >= 5);
PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
PUSH_DATAh(push, screen->fence.bo->offset);
PUSH_DATA (push, screen->fence.bo->offset);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 32da76c..afd91e6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -537,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 
*sequence)
/* we need to do it after possible flush in MARK_RING */
*sequence = ++screen->base.fence.sequence;
 
-   BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+   assert(PUSH_AVAIL(push) >= 5);
+   PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
PUSH_DATAh(push, screen->fence.bo->offset);
PUSH_DATA (push, screen->fence.bo->offset);
PUSH_DATA (push, *sequence);
-- 
2.4.9

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space

2015-10-10 Thread Ilia Mirkin
We still have to push everything out, might as well kick earlier and
flip pushbufs when we know we'll need it. This resolves some issues with
the new policy of making sure that we always leave a bit of room at the
end for fences.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nv50/nv50_shader_state.c |  9 ++---
 src/gallium/drivers/nouveau/nv50/nv50_transfer.c | 16 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 20 +---
 3 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index fdde11f..941555f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50)
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
 }
 while (words) {
-   unsigned nr;
-
-   if (!PUSH_SPACE(push, 16))
-  break;
-   nr = PUSH_AVAIL(push);
-   assert(nr >= 16);
-   nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
+   unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
 
+   PUSH_SPACE(push, nr + 3);
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, (start << 8) | b);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c 
b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index be51407..9a3fd1e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
PUSH_DATA (push, 0);
 
while (count) {
-  unsigned nr;
-
-  if (!PUSH_SPACE(push, 16))
- break;
-  nr = PUSH_AVAIL(push);
-  assert(nr >= 16);
-  nr = MIN2(count, nr - 1);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+  unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
 
   BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
   PUSH_DATAp(push, src, nr);
@@ -395,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv,
nouveau_pushbuf_validate(push);
 
while (words) {
-  unsigned nr;
-
-  nr = PUSH_AVAIL(push);
-  nr = MIN2(nr - 7, words);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+  unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
 
+  PUSH_SPACE(push, nr + 7);
   BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, bo->offset + base);
   PUSH_DATA (push, bo->offset + base);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index aaec60a..d459dd6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
nouveau_pushbuf_validate(push);
 
while (count) {
-  unsigned nr;
+  unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
 
-  if (!PUSH_SPACE(push, 16))
+  if (!PUSH_SPACE(push, nr + 9))
  break;
-  nr = PUSH_AVAIL(push);
-  assert(nr >= 16);
-  nr = MIN2(count, nr - 9);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
 
   BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
   PUSH_DATAh(push, dst->offset + offset);
@@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
nouveau_pushbuf_validate(push);
 
while (count) {
-  unsigned nr;
+  unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1));
 
-  if (!PUSH_SPACE(push, 16))
+  if (!PUSH_SPACE(push, nr + 10))
  break;
-  nr = PUSH_AVAIL(push);
-  assert(nr >= 16);
-  nr = MIN2(count, nr - 8);
-  nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
 
   BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, dst->offset + offset);
@@ -571,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv,
PUSH_DATA (push, bo->offset + base);
 
while (words) {
-  unsigned nr = PUSH_AVAIL(push);
-  nr = MIN2(nr, words);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+  unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1);
 
   PUSH_SPACE(push, nr + 2);
   PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
-- 
2.4.9

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nouveau: avoid emitting new fences unnecessarily

2015-10-10 Thread Ilia Mirkin
Right now we emit on every kick, but this is only necessary if something
will ever be able to observe that the fence completed. If there are no
refs, leave the fence alone and emit it another day.

This also happens to work around an issue for the kick handler -- a kick
can be a result of e.g. nouveau_bo_wait or explicit kick, or it can be
due to lack of space in the pushbuf. We want the emit to happen in the
current batch, so we want there to always be enough space. However an
explicit kick could take the reserved space for the implicitly-triggered
kick's fence emission if it happened right after. With the new mechanism,
hopefully there's no way to cause two fences to be emitted into the same
reserved space.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
Cc: mesa-sta...@lists.freedesktop.org
Fixes: 47d11990b (nouveau: make sure there's always room to emit a fence)
---
 src/gallium/drivers/nouveau/nouveau_fence.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c 
b/src/gallium/drivers/nouveau/nouveau_fence.c
index ee4e08d..18b1592 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -190,8 +190,10 @@ nouveau_fence_wait(struct nouveau_fence *fence)
/* wtf, someone is waiting on a fence in flush_notify handler? */
assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
 
-   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
+  PUSH_SPACE(screen->pushbuf, 8);
   nouveau_fence_emit(fence);
+   }
 
if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
   if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
@@ -224,8 +226,12 @@ nouveau_fence_wait(struct nouveau_fence *fence)
 void
 nouveau_fence_next(struct nouveau_screen *screen)
 {
-   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING)
-  nouveau_fence_emit(screen->fence.current);
+   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) {
+  if (screen->fence.current->ref > 1)
+ nouveau_fence_emit(screen->fence.current);
+  else
+ return;
+   }
 
nouveau_fence_ref(NULL, >fence.current);
 
-- 
2.4.9

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] RFC: drop glamor from nouveau ddx

2015-07-07 Thread Ilia Mirkin
On Tue, Jul 7, 2015 at 5:05 PM, Ben Skeggs skeg...@gmail.com wrote:
 On 8 July 2015 at 06:06, Ilia Mirkin imir...@alum.mit.edu wrote:
 Ben,

 Looks like the reality is that glamor is just not hooked up properly
 in the nouveau DDX. Mainly it's missing DRI2, which in turn means no
 core GL contexts, and probably lots of other issues. While this could
 probably be fixed somehow, I doubt there's any advantage to using the
 nouveau DDX over something like modesetting nowadays.

 How would you feel about dropping glamor support from the nouveau ddx
 and failing to load for GPUs that don't have EXA support (unless
 AccelMode = none is forced for them). That way it'll fall back to
 loading modesetting which should be properly set up for DRI2 and so
 on.
 I have no objections to this.  In fact, in Fedora at least (I floated
 the idea in #nouveau a while back too), in the near future I plan on
 having the DDX fail to load on all GPUs where modesetting+glamor can
 be used (unless overridden by a config option).

IMHO that's a little strong (I assume you mean nv50+ here?). In fact
I'm planning to complete my Maxwell EXA impl. The current reality is
that modesetting+glamor doesn't render correctly at least on maxwell,
but possibly others as well. The EXA paths are very well tested and
are stable. I think not relying on mesa in the DDX is a nice advantage
too.

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 1/2] nouveau/compiler: fix trivial compiler warnings

2015-07-08 Thread Ilia Mirkin
Compiler is wrong.

On Wed, Jul 8, 2015 at 2:27 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 nouveau_compiler.c: In function ‘main’:
 nouveau_compiler.c:216:27: warning: ‘code’ may be used uninitialized in
 this function [-Wmaybe-uninitialized]
printf(%08x , code[i / 4]);
^
 nouveau_compiler.c:215:4: warning: ‘size’ may be used uninitialized in
 this function [-Wmaybe-uninitialized]
 for (i = 0; i  size; i += 4) {

 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
  src/gallium/drivers/nouveau/nouveau_compiler.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c 
 b/src/gallium/drivers/nouveau/nouveau_compiler.c
 index 8660498..ca128b5 100644
 --- a/src/gallium/drivers/nouveau/nouveau_compiler.c
 +++ b/src/gallium/drivers/nouveau/nouveau_compiler.c
 @@ -144,7 +144,7 @@ main(int argc, char *argv[])
 const char *filename = NULL;
 FILE *f;
 char text[65536] = {0};
 -   unsigned size, *code;
 +   unsigned size = 0, *code = NULL;

 for (i = 1; i  argc; i++) {
if (!strcmp(argv[i], -a))
 --
 2.4.5

 ___
 Nouveau mailing list
 Nouveau@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/nouveau
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] CUDA fixed VA allocations and sparse mappings

2015-07-07 Thread Ilia Mirkin
On Mon, Jul 6, 2015 at 8:42 PM, Andrew Chew ac...@nvidia.com wrote:
 Hello,

 I am currently looking into ways to support fixed virtual address allocations
 and sparse mappings in nouveau, as a step towards supporting CUDA.

 CUDA requires that the GPU virtual address for a given buffer match the
 CPU virtual address.  Therefore, when mapping a CUDA buffer, we have to have
 a way of specifying a particular virtual address to map to (we would ask that
 the CPU virtual address be used).  Currently, as I understand it, the 
 allocator
 implemented in nvkm/core/mm.c, used to provision virtual addresses, doesn't
 allow for this (but it's very easy to modify the allocator slightly to allow
 for this, which I have done locally in my experiments).

 In addition, the CUDA use case typically involves allocating a big chunk of
 address space ahead of time as a way to reserve that chunk for future CUDA
 use.  It then maps individual buffers into that address space as needed.
 Currently, the virtual address allocation is done during buffer mapping, so
 in order to support these sparse mappings, it seems to me that the virtual
 address allocation and buffer mapping need to be decoupled into separate
 operations.

 My current strawman proposal for supporting this is to introduce two new 
 ioctls
 DRM_IOCTL_NOUVEAU_AS_ALLOC and DRM_IOCTL_NOUVEAU_AS_FREE, that look roughly
 like this:

 #define NOUVEAU_AS_ALLOC_FLAGS_FIXED_OFFSET 0x1
 struct drm_nouveau_as_alloc {
 uint64_t pages; /* in, pages */
 uint32_t page_size; /* in, bytes */
 uint32_t flags; /* in */
 uint64_t offset;/* in/out, byte address */
 };

 struct drm_nouveau_as_free {
 uint64_t offset;/* in, byte address */
 };

 These ioctls just call into the allocator to allocate a range of addresses,
 resulting in a struct nvkm_vma that tracks that allocation (or releases the
 struct nvkm_vma back into the virtual address pool in the case of the free
 ioctl).  If NOUVEAU_AS_ALLOC_FLAGS_FIXED_OFFSET is set, offset specifies the
 requested virtual address.  Otherwise, an arbitrary address will be
 allocated.

Well, this can't just be an address space. You still need bo's, if
this is to work with nouveau -- it has to know when to swap things in
and out, when they're used, etc. (and/or move between VRAM and GART
and system/swap). I suspect that your target here are the GK20A and
GM20B chips which don't have dedicated VRAM, but the ioctl's need to
work for everything.

Would it be sufficient to extend NOUVEAU_GEM_NEW or create a
NOUVEAU_GEM_NEW_FIXED or something? IOW, why do have to separate the
concept of a GEM object and a VM allocation?


 In addition to this, a way to map/unmap buffers is needed.  Ordinarily, one
 would just use DRM_IOCTL_PRIME_FD_TO_HANDLE to import and map a dmabuf into
 gem.  However, this ioctl will try to grab the virtual address range for this
 buffer, which will fail in the CUDA case since the virtual address range
 has been reserved ahead of time.  So we perhaps introduce a set of ioctls
 to map/unmap buffers on top of an already existing virtual address allocation.

My suggestion above is an alternative to this, right? I think dmabufs
tend to be used for sharing between devices. I suspect there's more
going on here that I don't understand though -- I assume the CUDA
use-case is similar to the HSA use-case -- being able to build up data
structures that point to one another on the CPU and then process them
on the GPU? Can you detail a specific use-case perhaps, including the
interactions with the GPU and its address space?

Jérôme, I believe you were doing the HSA kernel implementation.
Perhaps you'd have some feedback on this proposal?

Cheers,

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [RFC PATCH 00/11] Implement ARB_cull_distance

2015-07-08 Thread Ilia Mirkin
On Wed, Jul 8, 2015 at 4:04 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:


 On 25.05.2015 17:07, Ilia Mirkin wrote:

 On Mon, May 25, 2015 at 9:40 AM, Tobias Klausmann
 tobias.johannes.klausm...@mni.thm.de wrote:

 On 25.05.2015 07:17, Dave Airlie wrote:

 On 25 May 2015 at 08:11, Marek Olšák mar...@gmail.com wrote:

 It's the same on Radeon. There are 2x ClipOrCullDistance output
 vectors and a mask saying it should clip or cull or do nothing.

 Marek

 My thinking was gallium should have a single semantic and a mask in
 the shader definition maybe.

 though it doesn't solve the does nvidia do the right thing with
 cull[0] and clip[0], and what is the right thing.

 Dave.


 I'm still convinced that both clip[0] and cull[0] should be possible.
 Plus i
 have written a shader_test for this a while ago which you pushed to
 piglit
 (fs-cull-and-clip-distance-different.shader_test). If i remember right
 nvidia passed that test just fine.

 My take (and note that I last read the extension many months ago) is
 that you're supposed to figure out the max gl_ClipDistance[] written,
 and then write all your cull distances above that. So if you, e.g.,
 have something like

 gl_ClipDistance[5] = 1;
 gl_CullDistance[0] = 1;

 Then it would decide that there are 6 clip distances (or if there's an
 explicit out float gl_ClipDistance[n], then use that), and 1 cull
 distance. In the TGSI, I'm thinking this might look approximately like

 PROPERTY CULL_MASK (16)
 DCL OUT[0], CLIPDIST[0]
 DCL OUT[1], CLIPDIST[1]
 MOV OUT[1].y, 1 (clip distance[5])
 MOV OUT[1].z, 1 (cull distance[0])

 Then basically you'd have

 (rast-clip_enable  shader-actual_clip_writes_mask) | cull_mask =
 the enabled distances
 cull_mask = cull mask

 This would work *very* well for nouveau, not sure how suitable it is
 for other hardware.

 Cheers,

-ilia

 I wonder where this step should be implemented after all. It was brought up
 that llvmpipe already supports cull_distance (it does!), so maybe we should
 implement this in the drivers to evade llvmpipe breakage. Any suggestions
 appreciated :)

 Tobias

I believe that the later feedback from Brian was that my approach was
a bad one and we should use CULLDIST instead, which also reflects how
GL has it. However it's important to specify *somewhere* how many clip
distances are used since it all gets lowered into the 2x vec4. It
might be annoying to derive it from writes to CLIPDIST[0/1].xyzw dest
masks. Although nouveau might already do that anyways...

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 1/2] nouveau/compiler: fix trivial compiler warnings

2015-07-08 Thread Ilia Mirkin
I don't mind telling people that the compiler is wrong :)

On Wed, Jul 8, 2015 at 3:53 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:


 On 08.07.2015 21:42, Emil Velikov wrote:

 On 8 July 2015 at 20:34, Tobias Klausmann
 tobias.johannes.klausm...@mni.thm.de wrote:

 Mh i'm not aware of me ever changed the nouveau_compiler. But i'm happy
 to
 see this made you laugh, so it has something positive at least... :/

 Story time:
 This particular compiler warning has been brought up (incl here) four
 or five times. Each time, Ilia feels reluctant about the fix as the
 (gcc) compiler gets it wrong.

 Personally I do not see a problem with explicitly initialising the
 variable at this instance, yet I'm curious for how long Ilia will say
 no to this (type of) patch(es) :-P

 No offence, I just find it funny.
 Emil

 Oh i did even answer in a thread for a patch from Martin where he propose
 the same change (even with the same prefix :D). Ilia maybe you should take
 this after all, as it seems you are haunted by this :P
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 2/2] nv50/ir: fix a compiler warning with debug-only code

2015-07-08 Thread Ilia Mirkin
I suspect the issue is actually that u_debug.h isn't included. It
defines assert to be debug_assert, which in turn is

#define debug_assert(expr) (void)(0  (expr))

which should cause the relevant var to be seen as used.


On Wed, Jul 8, 2015 at 3:40 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:


 On 08.07.2015 21:34, Emil Velikov wrote:

 On 8 July 2015 at 19:27, Tobias Klausmann
 tobias.johannes.klausm...@mni.thm.de wrote:

 codegen/nv50_ir_emit_nv50.cpp: In member function
 ‘void nv50_ir::CodeEmitterNV50::emitLOAD(const nv50_ir::Instruction*)’:
 codegen/nv50_ir_emit_nv50.cpp:620:12: warning: unused variable ‘offset’
   [-Wunused-variable]
  int32_t offset = i-getSrc(0)-reg.data.offset;

 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
   src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 5 -
   1 file changed, 4 insertions(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
 b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
 index 67ea6df..86b16f2 100644
 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
 +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
 @@ -616,8 +616,11 @@ CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
   void
   CodeEmitterNV50::emitLOAD(const Instruction *i)
   {
 -   DataFile sf = i-src(0).getFile();
 +#ifdef DEBUG
  int32_t offset = i-getSrc(0)-reg.data.offset;
 +#endif
 +

 assert is (normally) guarded by NDEBUG. Mesa/gallium has an in-house
 replacement, which (not 100% sure) should be fine as well.

 -Emil

 As far as i can see it in u_debug.h assert (debug_assert) is guarded by
 DEBUG as the above change...

 ___
 Nouveau mailing list
 Nouveau@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/nouveau
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 1/2] remove glamor support

2015-07-11 Thread Ilia Mirkin
If you want glamor, just use modesetting instead.
---
 configure.ac  |  11 ---
 man/nouveau.man   |   3 +-
 src/Makefile.am   |   2 -
 src/drmmode_display.c |   7 --
 src/nouveau_glamor.c  | 253 --
 src/nouveau_glamor.h  |  33 ---
 src/nouveau_present.c |  29 +-
 src/nouveau_wfb.c |   8 +-
 src/nouveau_xv.c  |   4 -
 src/nv_driver.c   |  16 
 src/nv_type.h |   1 -
 11 files changed, 3 insertions(+), 364 deletions(-)
 delete mode 100644 src/nouveau_glamor.c
 delete mode 100644 src/nouveau_glamor.h

diff --git a/configure.ac b/configure.ac
index 03563c1..9c77f94 100644
--- a/configure.ac
+++ b/configure.ac
@@ -140,17 +140,6 @@ if test x$have_list_h = xyes; then
#include list.h])
 fi
 
-AC_CHECK_HEADERS([glamor.h],[found_glamor_header=yes],[found_glamor_header=no],
-[#include xorg-server.h])
-AC_MSG_CHECKING([whether to include GLAMOR support])
-if test x$found_glamor_header = xyes  pkg-config --exists xorg-server = 
1.15.99.901
-then
-   AC_DEFINE(HAVE_GLAMOR, 1, [Build support for glamor acceleration])
-   AC_MSG_RESULT([yes])
-else
-   AC_MSG_RESULT([no])
-fi
-
 AC_CONFIG_FILES([
Makefile
src/Makefile
diff --git a/man/nouveau.man b/man/nouveau.man
index 129bb7f..c39c113 100644
--- a/man/nouveau.man
+++ b/man/nouveau.man
@@ -81,8 +81,7 @@ are supported:
 Enable or disable the HW cursor.  Default: on.
 .TP
 .BI Option \*qAccelMethod\*q \*q string \*q
-Specify the acceleration method. One of \*qnone\*q, \*qexa\*q, or
-\*qglamor\*q. Default: exa, except for GMxxx which default to glamor.
+Specify the acceleration method. One of \*qnone\*q, or \*qexa\*q. Default: exa.
 .TP
 .BI Option \*qNoAccel\*q \*q boolean \*q
 Disable or enable acceleration.  Default: acceleration is enabled.
diff --git a/src/Makefile.am b/src/Makefile.am
index 9d39a00..1e04ddf 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,7 +35,6 @@ nouveau_drv_la_SOURCES = \
 nouveau_copy90b5.c \
 nouveau_copya0b5.c \
 nouveau_exa.c nouveau_xv.c nouveau_dri2.c \
-nouveau_glamor.c \
 nouveau_present.c \
 nouveau_sync.c \
 nouveau_wfb.c \
@@ -123,7 +122,6 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \
 shader/Makefile \
 nouveau_local.h \
 nouveau_copy.h \
-nouveau_glamor.h \
 nouveau_present.h \
 nouveau_sync.h \
 nv_const.h \
diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index cd13820..6495961 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -42,8 +42,6 @@
 #include libudev.h
 #endif
 
-#include nouveau_glamor.h
-
 static Bool drmmode_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height);
 typedef struct {
 int fd;
@@ -107,8 +105,6 @@ static inline struct nouveau_pixmap *
 drmmode_pixmap(PixmapPtr ppix)
 {
NVPtr pNv = NVPTR(xf86ScreenToScrn(ppix-drawable.pScreen));
-   if (pNv-AccelMethod == GLAMOR)
-   return nouveau_glamor_pixmap_get(ppix);
return nouveau_pixmap(ppix);
 }
 
@@ -1393,9 +1389,6 @@ drmmode_xf86crtc_resize(ScrnInfoPtr scrn, int width, int 
height)
   crtc-rotation, crtc-x, crtc-y);
}
 
-   if (pNv-AccelMethod == GLAMOR)
-   nouveau_glamor_create_screen_resources(scrn-pScreen);
-
if (old_fb_id)
drmModeRmFB(drmmode-fd, old_fb_id);
nouveau_bo_ref(NULL, old_bo);
diff --git a/src/nouveau_glamor.c b/src/nouveau_glamor.c
deleted file mode 100644
index b8bca17..000
--- a/src/nouveau_glamor.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright 2014 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the Software),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs 

[Nouveau] [PATCH 2/2] remove maxwell support for now

2015-07-11 Thread Ilia Mirkin
There is no EXA acceleration, user better off with modesetting.
---
 src/nv_driver.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/nv_driver.c b/src/nv_driver.c
index a5ffbce..16a9029 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -389,7 +389,6 @@ NVHasKMS(struct pci_device *pci_dev, struct 
xf86_platform_device *platform_dev)
case 0xe0:
case 0xf0:
case 0x100:
-   case 0x110:
break;
default:
xf86DrvMsg(-1, X_ERROR, Unknown chipset: NV%02x\n, chipset);
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] RFC: drop glamor from nouveau ddx

2015-07-07 Thread Ilia Mirkin
On Tue, Jul 7, 2015 at 5:16 PM, Ben Skeggs skeg...@gmail.com wrote:
 On 8 July 2015 at 07:09, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Tue, Jul 7, 2015 at 5:05 PM, Ben Skeggs skeg...@gmail.com wrote:
 On 8 July 2015 at 06:06, Ilia Mirkin imir...@alum.mit.edu wrote:
 Ben,

 Looks like the reality is that glamor is just not hooked up properly
 in the nouveau DDX. Mainly it's missing DRI2, which in turn means no
 core GL contexts, and probably lots of other issues. While this could
 probably be fixed somehow, I doubt there's any advantage to using the
 nouveau DDX over something like modesetting nowadays.

 How would you feel about dropping glamor support from the nouveau ddx
 and failing to load for GPUs that don't have EXA support (unless
 AccelMode = none is forced for them). That way it'll fall back to
 loading modesetting which should be properly set up for DRI2 and so
 on.
 I have no objections to this.  In fact, in Fedora at least (I floated
 the idea in #nouveau a while back too), in the near future I plan on
 having the DDX fail to load on all GPUs where modesetting+glamor can
 be used (unless overridden by a config option).

 IMHO that's a little strong (I assume you mean nv50+ here?). In fact
 I'm planning to complete my Maxwell EXA impl. The current reality is
 that modesetting+glamor doesn't render correctly at least on maxwell,
 but possibly others as well. The EXA paths are very well tested and
 are stable. I think not relying on mesa in the DDX is a nice advantage
 too.
 The reality is that this is what people will be using when the big
 switch to wayland by default happens, and the idea is to limit the
 number of codepaths we have to care about and maintain.

Yeah, in 100 years when it actually works :p

 I'm aware
 that there are a few rendering issues left in glamor (I'm not sure if
 it's glamor, or our 3D driver having some bugs), but we should
 probably just fix those :)

I'm all for fixing bugs in the 3D drivers. But until that happens, no
reason that users should suffer. [I also definitely know some people
specially don't have mesa installed so that they get 2d accel but no
3d accel since nouveau can be so hang-y.]


 I believe Dave plans on doing something similar in Intel/Radeon too.

I think those kernel drivers tend to be a lot more stable. The nouveau
kernel driver hangs left and right though under any actual 3d load.
And yeah, the 2d accel is all 3d anyways, but it's a lot less code
and it's been way more tested.

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] CUDA fixed VA allocations and sparse mappings

2015-07-07 Thread Ilia Mirkin
On Tue, Jul 7, 2015 at 8:07 PM, C Bergström cbergst...@pathscale.com wrote:
 On Wed, Jul 8, 2015 at 6:58 AM, Ben Skeggs skeg...@gmail.com wrote:
 On 8 July 2015 at 09:53, C Bergström cbergst...@pathscale.com wrote:
 regarding
 
 Fixed address allocations weren't going to be part of that, but I see
 that it makes sense for a variety of use cases.  One question I have
 here is how this is intended to work where the RM needs to make some
 of these allocations itself (for graphics context mapping, etc), how
 should potential conflicts with user mappings be handled?
 
 As an initial implemetation you can probably assume that the GPU
 offloading is in exclusive mode. Basically that the CUDA or OpenACC
 code has full ownership of the card. The Tesla cards don't even have a
 video out on them. To complicate this even more - some offloading code
 has very long running kernels and even worse - may critically depend
 on using the full available GPU ram. (Large matrix sizes and soon big
 Fortran arrays or complex data types)
 This doesn't change that, to setup the graphics engine, the driver
 needs to map various system-use data structures into the channel's
 address space *somewhere* :)

 I'm not sure I follow exactly what you mean, but I think the answer is
 - don't setup the graphics engine if you're in compute mode. Doing
 that, iiuc, will at least provide a start to support for compute.
 Anyone who argues that graphics+compute is critical to have working at
 the same time is probably a 1%.

On NVIDIA GPUs, compute _is_ part of the graphics engine... aka PGRAPH.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] nv50, nvc0: enable at least one color RT if alphatest is enabled

2015-07-10 Thread Ilia Mirkin
Fixes the following piglits:
  fbo-alphatest-nocolor
  fbo-alphatest-nocolor-ff

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: mesa-sta...@lists.freedesktop.org
---
The nv50 bits need testing, only have a GK208 on-hand. Will be sure to test 
before pushing.

 src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 18 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 18 ++
 2 files changed, 36 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 116bf4b..ead4b29 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -296,6 +296,23 @@ nv50_check_program_ucps(struct nv50_context *nv50,
nv50_fp_linkage_validate(nv50);
 }
 
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nvc0_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nv50_validate_derived_2(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50-base.pushbuf;
+
+   if (nv50-zsa  nv50-zsa-pipe.alpha.enabled 
+   nv50-framebuffer.nr_cbufs == 0) {
+  nv50_fb_set_null_rt(push, 0);
+  BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+  PUSH_DATA (push, (076543210  4) | 1);
+   }
+}
+
 static void
 nv50_validate_clip(struct nv50_context *nv50)
 {
@@ -456,6 +473,7 @@ static struct state_validate {
 { nv50_gp_linkage_validate,NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
 { nv50_validate_derived_rs,NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+{ nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
 { nv50_validate_clip,  NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
 { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index c52399a..785e52e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -535,6 +535,23 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
}
 }
 
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nvc0_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nvc0_validate_derived_2(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0-base.pushbuf;
+
+   if (nvc0-zsa  nvc0-zsa-pipe.alpha.enabled 
+   nvc0-framebuffer.nr_cbufs == 0) {
+  nvc0_fb_set_null_rt(push, 0);
+  BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+  PUSH_DATA (push, (076543210  4) | 1);
+   }
+}
+
 static void
 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
 {
@@ -597,6 +614,7 @@ static struct state_validate {
 { nvc0_fragprog_validate,  NVC0_NEW_FRAGPROG },
 { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
+{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
 { nvc0_validate_clip,  NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
NVC0_NEW_VERTPROG |
NVC0_NEW_TEVLPROG |
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH v2] pmu: fix queued messages while getting no IRQ

2015-11-14 Thread Ilia Mirkin
On Sat, Nov 14, 2015 at 1:44 PM, Karol Herbst  wrote:
> I encountered while stresstesting the reclocking code, that rarely (1 out of
> 20.000+ requests) we don't get any IRQ in nvkm_pmu_intr.
>
> This means we have a queued message on the pmu, but nouveau doesn't read it 
> and
> waits infinitely in nvkm_pmu_send:
> if (reply) {
> wait_event(pmu->recv.wait, (pmu->recv.process == 0));
>
> therefore let us use wait_event_timeout with a 1s timeout frame and just check
> whether there is a message queued and handle it if there is one.
>
> Return -ETIMEDOUT whenever we timed out and there is no message queued or when
> we hit another timeout while trying to read the message without getting any 
> IRQ
>
> The benefit of not using wait_event is, that we don't have a kworker waiting
> on an event, which makes it easier to reload the module at runtime, which 
> helps
> me developing on nouveau on my laptop a lot, because I don't need to reboot
> anymore
>
> Nethertheless, we shouldn't use wait_event here, because we can't guarantee 
> any
> answere at all, can we?
>
> v2: moved it into a new function
>
> Signed-off-by: Karol Herbst 
> ---
>  drm/nouveau/nvkm/subdev/pmu/base.c | 43 
> --
>  1 file changed, 37 insertions(+), 6 deletions(-)
>
> diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c 
> b/drm/nouveau/nvkm/subdev/pmu/base.c
> index 6b2007f..fafbe2a 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/base.c
> +++ b/drm/nouveau/nvkm/subdev/pmu/base.c
> @@ -43,6 +43,41 @@ nvkm_pmu_handle_reclk_request(struct work_struct *work)
> nvkm_clk_pmu_reclk_request(clk, pmu->intr.data[0]);
>  }
>
> +static int
> +wait_for_pmu_reply(struct nvkm_pmu *pmu, u32 reply[2])
> +{
> +   struct nvkm_subdev *subdev = >subdev;
> +   struct nvkm_device *device = subdev->device;
> +   unsigned long jiffies = msecs_to_jiffies(1000);
> +
> +   if (!wait_event_timeout(pmu->recv.wait, (pmu->recv.process == 0), 
> jiffies)) {
> +   u32 addr = nvkm_rd32(device, 0x10a4cc);
> +   nvkm_error(subdev, "wait on reply timed out\n");
> +
> +   if (addr != nvkm_rd32(device, 0x10a4c8)) {
> +   nvkm_error(subdev, "found queued message without 
> getting an interrupt\n");
> +   schedule_work(>recv.work);
> +
> +   if (!wait_event_timeout(pmu->recv.wait, 
> (pmu->recv.process == 0), jiffies)) {
> +   nvkm_error(subdev, "failed to repair PMU 
> state\n");
> +   goto reply_error;
> +   }
> +   } else

Not sure whether kernel style dictates this, but I really hate these
"hanging" else's... both sides should have brackets if either one
does.

> +   goto reply_error;
> +   }
> +
> +   reply[0] = pmu->recv.data[0];
> +   reply[1] = pmu->recv.data[1];
> +   mutex_unlock(>mutex);
> +   return 0;
> +
> +reply_error:
> +   reply[0] = 0;
> +   reply[1] = 0;
> +   mutex_unlock(>mutex);
> +   return -ETIMEDOUT;
> +}
> +
>  int
>  nvkm_pmu_send(struct nvkm_pmu *pmu, u32 reply[2],
>   u32 process, u32 message, u32 data0, u32 data1)
> @@ -88,12 +123,8 @@ nvkm_pmu_send(struct nvkm_pmu *pmu, u32 reply[2],
> nvkm_wr32(device, 0x10a580, 0x);
>
> /* wait for reply, if requested */
> -   if (reply) {
> -   wait_event(pmu->recv.wait, (pmu->recv.process == 0));
> -   reply[0] = pmu->recv.data[0];
> -   reply[1] = pmu->recv.data[1];
> -   mutex_unlock(>mutex);
> -   }
> +   if (reply)
> +   return wait_for_pmu_reply(pmu, reply);

Having one function lock and another unlock is a disaster waiting to
happen. Perhaps make wiat_for_pmu_reply not handle the unlock and
instead do

int ret = 0;
if (reply)
  ret = wait_for_pmu_reply()

return ret;

Additionally leaving the reply[] filling in this function would allow
you to avoid annoying error handling and goto's in the other function.

>
> return 0;
>  }
> --
> 2.6.3
>
> ___
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] llvm TGSI backend (WIP) questions

2015-11-13 Thread Ilia Mirkin
On Fri, Nov 13, 2015 at 9:25 AM, Emil Velikov  wrote:
> Hello Hans,
>
> Not to muddy the waters or anything, have you thought about the NIR
> integration that Rob was thinking about ?
> I'm pretty sure he'll be happy to have extra people helping him out.

How would that in any way plug into llvm or nouveau? There's no OpenCL
C -> NIR, and there's no NIR -> nv50 IR...

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Mesa-dev] gallium state tracker calls calloc for 0 sizes arrays ?

2015-08-27 Thread Ilia Mirkin
On Thu, Aug 27, 2015 at 1:59 PM, Alex Deucher alexdeuc...@gmail.com wrote:
 On Thu, Aug 27, 2015 at 1:55 PM, Hans de Goede hdego...@redhat.com wrote:
 Hi,

 On 27-08-15 15:46, Marek Olšák wrote:

 On Thu, Aug 27, 2015 at 3:09 PM, Hans de Goede hdego...@redhat.com
 wrote:

 Hi All,

 While debugging: https://bugzilla.redhat.com/show_bug.cgi?id=1008089

 I made a apitrace recording of the a single slide transition
 animation, and since I suspected memory corruption replayed
 it using ElectrFence + glretrace, this finds a 0 sized array
 allocation at src/mesa/state_tracker/st_glsl_to_tgsi.cpp: 5565:

 if (proginfo-Parameters) {
t-constants = (struct ureg_src *)
   calloc(proginfo-Parameters-NumParameters,
 sizeof(t-constants[0]));

 And if I protect the code against that one, another one at 5618:

 t-immediates = (struct ureg_src *)
calloc(program-num_immediates, sizeof(struct ureg_src));

 With the regular glibc malloc these both succeed as it actually
 returns a valid memory address (posix says it may also return NULL)

 I believe that the fragment program in question comes from:

 src/mesa/main/state.c update_program() and then from the

 else if (ctx-FragmentProgram._MaintainTexEnvProgram) {
/* Use fragment program generated from fixed-function state */

 }

 block.

 Interestingly enough if I allow malloc(0) to proceed from ElectricFence,
 then the glretrace runs fine, and even renders correctly, where as
 running the same gl command stream from libreoffice impress leads
 to missrendering on nv3c.

 So 2 questions:

 1) Is it normal / expected for st_translate_program() to get called
 with an empty but not NULL proginfo-Parameters resp. num_immediates == 0
 ?

 If not where would I begin to look for finding the culprit of this ?


 Yes, it's normal.


 OK, thanks for the clear answer on this.

 2) Since the glretrace does work outside of libreoffice impress, I think
 it may have something to do with the visual chosen by libreoffice
 impress,
 is there an easy way to find out what visual lo is choosing?


 No, it's not because of the visual. It seems to me that libreoffice
 changed the behavior of malloc and calloc.


 I'm pretty sure that this is not libreoffice changing malloc / calloc,
 it links normally to libc, and the same slide transition works fine
 with an nv84 card which also has a gallium based mesa driver.

 I really believe this is due to libreoffice doing something opengl
 related differently then glretrace, be it the visual or something else
 back buffer related ...


 Does libreoffice use llvm?  I have vague recollections of there being
 issues with llvm and libreoffice in the past because radeonsi uses
 llvm as well.

FWIW the nv30 gallium driver will only use llvm as part of 'draw' when
falling back to the swtnl path. This should be extremely rare. But
easy enough to build mesa with --disable-gallium-llvm to double-check
(or what was the env var? DRAW_USE_LLVM=0 or something along those
lines).

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] Loading vbios on OF

2015-08-28 Thread Ilia Mirkin
Hey Ben,

So with the following totally-hack-patch below, I get OF to load (but
I have to force it, checksum fails). Of note is the following:

-r--r--r-- 1 root root 2403 Aug 28 09:31
/proc/device-tree/pci@0,f000/NVDA,Parent@10/NVDA,BMP

I'm not sure why you require the vbios fetches to be multiples of 4
bytes, but that messes things up here. Also I'm not sure where to get
this bios size from in the first place, perhaps we should just add a
-size() callback? Don't all the backends (except pramin) know how
much vbios they have?

diff --git a/drm/nouveau/nvkm/subdev/bios/image.c b/drm/nouveau/nvkm/subdev/bios
/image.c
index 74b14cf..ce0b549 100644
--- a/drm/nouveau/nvkm/subdev/bios/image.c
+++ b/drm/nouveau/nvkm/subdev/bios/image.c
@@ -47,11 +47,17 @@ nvbios_imagen(struct nvkm_bios *bios, struct nvbios_image *i
mage)
return false;
}

-   if (!(data = nvbios_pcirTp(bios, image-base, ver, hdr, pcir)))
-   return false;
-   image-size = pcir.image_size;
-   image-type = pcir.image_type;
-   image-last = pcir.last;
+   if (!(data = nvbios_pcirTp(bios, image-base, ver, hdr, pcir))) {
+   nvkm_warn(subdev, PCIR section missing\n);
+   image-size = 2403;
+   image-type = 0;
+   image-last = true;
+return true;
+   } else {
+   image-size = pcir.image_size;
+   image-type = pcir.image_type;
+   image-last = pcir.last;
+   }

if (image-type != 0x70) {
if (!(data = nvbios_npdeTp(bios, image-base, npde)))
diff --git a/drm/nouveau/nvkm/subdev/bios/shadow.c
b/drm/nouveau/nvkm/subdev/bios/shadow.c
index 792f017..b7a2249 100644
--- a/drm/nouveau/nvkm/subdev/bios/shadow.c
+++ b/drm/nouveau/nvkm/subdev/bios/shadow.c
@@ -45,7 +45,7 @@ shadow_fetch(struct nvkm_bios *bios, struct shadow
*mthd, u32 upto)
u32 read = mthd-func-read(data, start, limit - start, bios);
bios-size = start + read;
}
-   return bios-size = limit;
+   return bios-size = upto;
 }

 static int
@@ -55,7 +55,7 @@ shadow_image(struct nvkm_bios *bios, int idx, u32
offset, struct shadow *mthd)
struct nvbios_image image;
int score = 1;

-   if (!shadow_fetch(bios, mthd, offset + 0x1000)) {
+   if (!shadow_fetch(bios, mthd, offset + 0x400)) {
nvkm_debug(subdev, %08x: header fetch failed\n, offset);
return 0;
}
diff --git a/drm/nouveau/nvkm/subdev/bios/shadowof.c
b/drm/nouveau/nvkm/subdev/bios/shadowof.c
index 29a37f0..066bc1f 100644
--- a/drm/nouveau/nvkm/subdev/bios/shadowof.c
+++ b/drm/nouveau/nvkm/subdev/bios/shadowof.c
@@ -22,6 +22,7 @@
  */
 #include priv.h

+#include core/pci.h

 #if defined(__powerpc__)
 struct priv {
@@ -33,7 +34,9 @@ static u32
 of_read(void *data, u32 offset, u32 length, struct nvkm_bios *bios)
 {
struct priv *priv = data;
-   if (offset + length = priv-size) {
+printk(KERN_ERR offset: %d, length: %d, size: %d\n, offset, length,
priv-size);
+   if (offset = priv-size) {
+  length = min_t(u32, length, priv-size - offset);
memcpy_fromio(bios-data + offset, priv-data + offset, length);
return length;
}
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] nv3x libreoffice impress opengl animations not working

2015-08-28 Thread Ilia Mirkin
On Fri, Aug 28, 2015 at 4:54 AM, Hans de Goede hdego...@redhat.com wrote:
 Hi,

 On 27-08-15 20:19, Ilia Mirkin wrote:

 On Thu, Aug 27, 2015 at 1:59 PM, Alex Deucher alexdeuc...@gmail.com
 wrote:


 snip

 2) Since the glretrace does work outside of libreoffice impress, I
 think
 it may have something to do with the visual chosen by libreoffice
 impress,
 is there an easy way to find out what visual lo is choosing?



 No, it's not because of the visual. It seems to me that libreoffice
 changed the behavior of malloc and calloc.



 I'm pretty sure that this is not libreoffice changing malloc / calloc,
 it links normally to libc, and the same slide transition works fine
 with an nv84 card which also has a gallium based mesa driver.

 I really believe this is due to libreoffice doing something opengl
 related differently then glretrace, be it the visual or something else
 back buffer related ...


 Does libreoffice use llvm?  I have vague recollections of there being
 issues with llvm and libreoffice in the past because radeonsi uses
 llvm as well.


 FWIW the nv30 gallium driver will only use llvm as part of 'draw' when
 falling back to the swtnl path. This should be extremely rare. But
 easy enough to build mesa with --disable-gallium-llvm to double-check
 (or what was the env var? DRAW_USE_LLVM=0 or something along those
 lines).


 I've tried building with --disable-gallium-llvm, this does not help,
 this is not really surprising since on Fedora both libreoffice and
 mesa use the system llvm, so there should be no problems with them
 expecting different llvm versions.

 I've done some further debugging adding some debug printf-s to the
 texture creation paths for nv3x, this bit is interesting, glretrace
 does:

 nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0
 nv30_miptree_create 1350x863 uniform_pitch 5440 usage 0 flags 0 bind 1
 target 2

 So it gets a texture from a handle, which I believe is the child-window
 in which the animation will be shown, and then create another texture
 with the same dimensions to serve as back buffer I presume.

 ooimpress however does this:

 nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0
 nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind a
 target 2
 nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind 1
 target 2

 Notice how it is creating 2 (back?) buffers and they are twice the size of
 the sheet area of impress to which the animation gets rendered.

bind a = rt/sampler view, bind 1 = depth/stencil. However nv3x doesn't
do NPOT textures... so those sizes are a bit odd. Perhaps there's some
logic that attempts to round-up-to-nearest-POT size, but instead
multiplies width by 2?


 I believe this is a clue to the root cause of the problem, but after this
 I'm sorta stuck. Anyone got any hints on how to debug this further / where
 to look ?

 Thanks  Regards,

 Hans
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] nv3x libreoffice impress opengl animations not working

2015-08-31 Thread Ilia Mirkin
On Mon, Aug 31, 2015 at 8:58 AM, Hans de Goede <hdego...@redhat.com> wrote:
> Hi,
>
>
> On 28-08-15 11:02, Ilia Mirkin wrote:
>>
>> On Fri, Aug 28, 2015 at 4:54 AM, Hans de Goede <hdego...@redhat.com>
>> wrote:
>>>
>>> Hi,
>>>
>>> On 27-08-15 20:19, Ilia Mirkin wrote:
>>>>
>>>>
>>>> On Thu, Aug 27, 2015 at 1:59 PM, Alex Deucher <alexdeuc...@gmail.com>
>>>> wrote:
>>>
>>>
>>>
>>> 
>>>
>>>>>>>> 2) Since the glretrace does work outside of libreoffice impress, I
>>>>>>>> think
>>>>>>>> it may have something to do with the visual chosen by libreoffice
>>>>>>>> impress,
>>>>>>>> is there an easy way to find out what visual lo is choosing?
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> No, it's not because of the visual. It seems to me that libreoffice
>>>>>>> changed the behavior of malloc and calloc.
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> I'm pretty sure that this is not libreoffice changing malloc / calloc,
>>>>>> it links normally to libc, and the same slide transition works fine
>>>>>> with an nv84 card which also has a gallium based mesa driver.
>>>>>>
>>>>>> I really believe this is due to libreoffice doing something opengl
>>>>>> related differently then glretrace, be it the visual or something else
>>>>>> back buffer related ...
>>>>>>
>>>>>
>>>>> Does libreoffice use llvm?  I have vague recollections of there being
>>>>> issues with llvm and libreoffice in the past because radeonsi uses
>>>>> llvm as well.
>>>>
>>>>
>>>>
>>>> FWIW the nv30 gallium driver will only use llvm as part of 'draw' when
>>>> falling back to the swtnl path. This should be extremely rare. But
>>>> easy enough to build mesa with --disable-gallium-llvm to double-check
>>>> (or what was the env var? DRAW_USE_LLVM=0 or something along those
>>>> lines).
>>>
>>>
>>>
>>> I've tried building with --disable-gallium-llvm, this does not help,
>>> this is not really surprising since on Fedora both libreoffice and
>>> mesa use the system llvm, so there should be no problems with them
>>> expecting different llvm versions.
>>>
>>> I've done some further debugging adding some debug printf-s to the
>>> texture creation paths for nv3x, this bit is interesting, glretrace
>>> does:
>>>
>>> nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0
>>> nv30_miptree_create 1350x863 uniform_pitch 5440 usage 0 flags 0 bind 1
>>> target 2
>>>
>>> So it gets a texture from a handle, which I believe is the child-window
>>> in which the animation will be shown, and then create another texture
>>> with the same dimensions to serve as back buffer I presume.
>>>
>>> ooimpress however does this:
>>>
>>> nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0
>>> nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind a
>>> target 2
>>> nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind 1
>>> target 2
>>>
>>> Notice how it is creating 2 (back?) buffers and they are twice the size
>>> of
>>> the "sheet" area of impress to which the animation gets rendered.
>>
>>
>> bind a = rt/sampler view, bind 1 = depth/stencil. However nv3x doesn't
>> do NPOT textures... so those sizes are a bit odd. Perhaps there's some
>> logic that attempts to round-up-to-nearest-POT size, but instead
>> multiplies width by 2?
>
>
> Ok, some debugging / poking at thing further I now know where the multiply
> by 2 comes from, the pipe_resource *tmpl passed into nv30_miptree_create
> has templ->nr_samples = 4, and nv30_miptree_create has:
>
>switch (tmpl->nr_samples) {
>case 4:
>   mt->ms_mode = 0x4000;
>   mt->ms_x = 1;
>   mt->ms_y = 1;
>   break;
>case 2:
>   mt->ms_mode = 0x3000;
>   mt->ms_x = 1;
>   mt->ms_y = 0;
>   break;
>default:
>   mt->ms_mode = 0x;
>   mt->ms_x = 0;
>   

[Nouveau] [PATCH] gr/nv04: fix big endian setting on gr context

2015-08-31 Thread Ilia Mirkin
Broken since "gr: convert user classes to new-style nvkm_object"

Tested on a PPC64 G5 + NV34

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 drm/nouveau/nvkm/engine/gr/nv04.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drm/nouveau/nvkm/engine/gr/nv04.c 
b/drm/nouveau/nvkm/engine/gr/nv04.c
index 426ba00..85c5b7f 100644
--- a/drm/nouveau/nvkm/engine/gr/nv04.c
+++ b/drm/nouveau/nvkm/engine/gr/nv04.c
@@ -1048,11 +1048,11 @@ nv04_gr_object_bind(struct nvkm_object *object, struct 
nvkm_gpuobj *parent,
if (ret == 0) {
nvkm_kmap(*pgpuobj);
nvkm_wo32(*pgpuobj, 0x00, object->oclass);
-   nvkm_wo32(*pgpuobj, 0x04, 0x);
-   nvkm_wo32(*pgpuobj, 0x08, 0x);
 #ifdef __BIG_ENDIAN
-   nvkm_mo32(*pgpuobj, 0x08, 0x0008, 0x0008);
+   nvkm_mo32(*pgpuobj, 0x00, 0x0008, 0x0008);
 #endif
+   nvkm_wo32(*pgpuobj, 0x04, 0x);
+   nvkm_wo32(*pgpuobj, 0x08, 0x);
nvkm_wo32(*pgpuobj, 0x0c, 0x);
nvkm_done(*pgpuobj);
}
-- 
2.4.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


<    2   3   4   5   6   7   8   9   10   11   >