[Nouveau] [PATCH] nv50/ir: avoid messing up arg1 of PFETCH
There can be scenarios where the indirect arg of a PFETCH becomes known, and so the code will attempt to propagate it. Use this opportunity to just fold it into the first argument, and prevent the load propagation pass from touching PFETCH further. This fixes gs-input-array-vec4-index-rd.shader_test and vs-output-array-vec4-index-wr-before-gs.shader_test on nvc0 at least. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 72dd31e..98e3d1f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -236,6 +236,9 @@ LoadPropagation::visit(BasicBlock *bb) if (i-op == OP_CALL) // calls have args as sources, they must be in regs continue; + if (i-op == OP_PFETCH) // pfetch expects arg1 to be a reg + continue; + if (i-srcExists(1)) checkSwapSrc01(i); @@ -581,6 +584,11 @@ ConstantFolding::expr(Instruction *i, case OP_POPCNT: res.data.u32 = util_bitcount(a-data.u32 b-data.u32); break; + case OP_PFETCH: + // The two arguments to pfetch are logically added together. Normally + // the second argument will not be constant, but that can happen. + res.data.u32 = a-data.u32 + b-data.u32; + break; default: return; } @@ -610,6 +618,8 @@ ConstantFolding::expr(Instruction *i, bld.setPosition(i, false); i-setSrc(1, bld.loadImm(NULL, res.data.u32)); } + } else if (i-op == OP_PFETCH) { + // Leave PFETCH alone... we just folded its 2 args into 1. } else { i-op = i-saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */ } -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v2] nv50/ir: avoid messing up arg1 of PFETCH
There can be scenarios where the indirect arg of a PFETCH becomes known, and so the code will attempt to propagate it. Use this opportunity to just fold it into the first argument, and prevent the load propagation pass from touching PFETCH further. This fixes gs-input-array-vec4-index-rd.shader_test and vs-output-array-vec4-index-wr-before-gs.shader_test on nvc0 at least. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org --- v1 - v2: - redo final section of ConstantFolding::expr using a switch, per tobijk .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 20 ++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 72dd31e..b7fcd56 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -236,6 +236,9 @@ LoadPropagation::visit(BasicBlock *bb) if (i-op == OP_CALL) // calls have args as sources, they must be in regs continue; + if (i-op == OP_PFETCH) // pfetch expects arg1 to be a reg + continue; + if (i-srcExists(1)) checkSwapSrc01(i); @@ -581,6 +584,11 @@ ConstantFolding::expr(Instruction *i, case OP_POPCNT: res.data.u32 = util_bitcount(a-data.u32 b-data.u32); break; + case OP_PFETCH: + // The two arguments to pfetch are logically added together. Normally + // the second argument will not be constant, but that can happen. + res.data.u32 = a-data.u32 + b-data.u32; + break; default: return; } @@ -595,7 +603,9 @@ ConstantFolding::expr(Instruction *i, i-getSrc(0)-reg.data = res.data; - if (i-op == OP_MAD || i-op == OP_FMA) { + switch (i-op) { + case OP_MAD: + case OP_FMA: { i-op = OP_ADD; i-setSrc(1, i-getSrc(0)); @@ -610,8 +620,14 @@ ConstantFolding::expr(Instruction *i, bld.setPosition(i, false); i-setSrc(1, bld.loadImm(NULL, res.data.u32)); } - } else { + break; + } + case OP_PFETCH: + // Leave PFETCH alone... we just folded its 2 args into 1. + break; + default: i-op = i-saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */ + break; } i-subOp = 0; } -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 1/2] nv30: avoid doing extra work on clear and hitting unexpected states
Clearing can happen at a time when various state objects are incoherent and not ready for a draw. Some of the validation functions don't handle this well, so only flush the framebuffer state. This has the advantage of also not doing extra work. This works around some crashes that can happen when clearing. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- src/gallium/drivers/nouveau/nv30/nv30_clear.c | 2 +- src/gallium/drivers/nouveau/nv30/nv30_context.h| 2 +- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 4 ++-- src/gallium/drivers/nouveau/nv30/nv30_state_validate.c | 10 ++ src/gallium/drivers/nouveau/nv30/nv30_vbo.c| 2 +- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/src/gallium/drivers/nouveau/nv30/nv30_clear.c index 1ab8929..83fd1fa 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c @@ -58,7 +58,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers, struct pipe_framebuffer_state *fb = nv30-framebuffer; uint32_t colr = 0, zeta = 0, mode = 0; - if (!nv30_state_validate(nv30, TRUE)) + if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, TRUE)) return; if (buffers PIPE_CLEAR_COLOR fb-nr_cbufs) { diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h index 7b32aae..592cdbe 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_context.h +++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h @@ -204,7 +204,7 @@ void nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); boolean -nv30_state_validate(struct nv30_context *nv30, boolean hwtnl); +nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl); void nv30_state_release(struct nv30_context *nv30); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 3575c3d..38c31e9 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -129,7 +129,7 @@ nv30_render_draw_elements(struct vbuf_render *render, NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); } - if (!nv30_state_validate(nv30, FALSE)) + if (!nv30_state_validate(nv30, ~0, FALSE)) return; BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); @@ -174,7 +174,7 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); } - if (!nv30_state_validate(nv30, FALSE)) + if (!nv30_state_validate(nv30, ~0, FALSE)) return; BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c index 0f9d19d..86ac4f7 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c @@ -456,7 +456,7 @@ nv30_state_context_switch(struct nv30_context *nv30) } boolean -nv30_state_validate(struct nv30_context *nv30, boolean hwtnl) +nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl) { struct nouveau_screen *screen = nv30-screen-base; struct nouveau_pushbuf *push = nv30-base.pushbuf; @@ -481,14 +481,16 @@ nv30_state_validate(struct nv30_context *nv30, boolean hwtnl) else validate = swtnl_validate_list; - if (nv30-dirty) { + mask = nv30-dirty; + + if (mask) { while (validate-func) { - if (nv30-dirty validate-mask) + if (mask validate-mask) validate-func(nv30); validate++; } - nv30-dirty = 0; + nv30-dirty = ~mask; } nouveau_pushbuf_bufctx(push, bctx); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c index 67ab829..d4e384b 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c @@ -564,7 +564,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv30-vbo_user !(nv30-dirty (NV30_NEW_VERTEX | NV30_NEW_ARRAYS))) nv30_update_user_vbufs(nv30); - nv30_state_validate(nv30, TRUE); + nv30_state_validate(nv30, ~0, TRUE); if (nv30-draw_flags) { nv30_render_vbo(pipe, info); return; -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 2/2] nv30: fix clip plane uploads and enable changes
nv30_validate_clip depends on the rasterizer state. Also we should upload all the new clip planes on change since next time the plane data won't have changed, but the enables might. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- src/gallium/drivers/nouveau/nv30/nv30_state_validate.c | 16 +++- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c index 86ac4f7..a954dcc 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c @@ -272,15 +272,13 @@ nv30_validate_clip(struct nv30_context *nv30) uint32_t clpd_enable = 0; for (i = 0; i 6; i++) { - if (nv30-rast-pipe.clip_plane_enable (1 i)) { - if (nv30-dirty NV30_NEW_CLIP) { -BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5); -PUSH_DATA (push, i); -PUSH_DATAp(push, nv30-clip.ucp[i], 4); - } - - clpd_enable |= 1 (1 + 4*i); + if (nv30-dirty NV30_NEW_CLIP) { + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5); + PUSH_DATA (push, i); + PUSH_DATAp(push, nv30-clip.ucp[i], 4); } + if (nv30-rast-pipe.clip_plane_enable (1 i)) + clpd_enable |= 2 (4*i); } BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1); @@ -389,7 +387,7 @@ static struct state_validate hwtnl_validate_list[] = { { nv30_validate_stipple, NV30_NEW_STIPPLE }, { nv30_validate_scissor, NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER }, { nv30_validate_viewport, NV30_NEW_VIEWPORT }, -{ nv30_validate_clip, NV30_NEW_CLIP }, +{ nv30_validate_clip, NV30_NEW_CLIP | NV30_NEW_RASTERIZER }, { nv30_fragprog_validate, NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST }, { nv30_vertprog_validate, NV30_NEW_VERTPROG | NV30_NEW_VERTCONST | NV30_NEW_FRAGPROG | NV30_NEW_RASTERIZER }, -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Fermi+ shader header docs
On Thu, May 21, 2015 at 10:05 AM, Robert Morell rmor...@nvidia.com wrote: Hi Ilia, On Sat, May 02, 2015 at 12:34:21PM -0400, Ilia Mirkin wrote: Hi, As I'm looking to add some support to nouveau for features like atomic counters and images, I'm running into some confusion about what the first word of the shader header means. Here is the definition as we have it today: [...] However I know that these are somewhat wrong. I've seen shaders that use gmem accesses (i.e. mov r0, [r0]) that just have the LMEM enable bit set (and they use no lmem). And I've seen additional bits set, esp relating to images, but I haven't spent enough time looking at all the variations to make sense of it yet. For example, I think that Fermi and Kepler+ have different meanings for some of the bits. Those look pretty close :) I was hoping you could just release the docs for the shader headers, or at least the first word of the shader header. We've posted the specification for the full Shader Program Header to our GPU documentation site here: ftp://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html I hope it helps clear things up. Yep, just a few follow-up questions: - SPH Type 1 and type 2 appear to be flipped wrt the tables -- When PS is used, field SphType in CommonWord0 must be set to 1; similarly, when VTG is used, SphType in CommonWord0 must be set to 2. But the Table 1. SPH Type 1 Definition is clearly meant for VTG and table 2 is clearly meant for PS... - You skip over SassVersion -- what is that? - You have a funny note in there -- Triangles generated by the geometry shader always have all their edge flags set to TRUE -- that is the *only* reference to edge flags in the whole document. Right now we do some crazy thing to get edge flags right on fermi+ (and I think we just get them wrong on tesla). Is there a way to emit edge flags from vertex shader? - To be clear: DoesLoadOrStore -- *any* load/store? Even LDC? ALD? Thanks! -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Tessellation shaders get MEM_OUT_OF_BOUNDS errors / missing triangles
One additional observation that I just made is that on GK208, the blob apparently doesn't use the result of S2R Rx, SR_INVOCATION_ID wholesale in TCS. It either passes it through a I2I.S32.S32 Rx, |Rx| (i.e. absolute value), or even more paradoxically, shl 2; shr 2; which removes the top *2* bits, rather than just the top 1. However I see no such behaviour on GF108. I'm going to test out tomorrow whether this is the cause of my GK208 woes. On Fri, May 22, 2015 at 5:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Mon, May 18, 2015 at 4:48 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Hello, I've been debugging a few different tessellation shader issues with nouveau, but let's start small. I see this issue on my GK208 with high frequency, and I *think* I've seen it once or twice on my GF108, but it's exceedingly rare, if it does happen. I don't have a GK10x to test on, unfortunately, but I assume it'll have the same issue as the GK208. The issue is this -- a bunch of triangles that should come out of the tessellator end up black. I also see a GPC0/TPC1/MP trap: MEM_OUT_OF_BOUNDS error produced by nouveau -- this is output in response to a interrupt and MP trap generated by the hardware, read out with nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648)); (see gf100_gr_trap_mp). I assume some of the tessellation evaluation invocations get killed, but I have no proof of this. I also see this: TRAP ch 5 [0x003facf000 shader_runner[19044]] I would imagine that's some floating point number ending up in the register instead of an address, but the fp32 value of it (1.35107421875) does not seem familiar. Ben pointed out that the 0x3facf000 is a channel address, not a value from the shader. Oops. So that theory completely doesn't hold water. Perhaps some buffer isn't big enough? This ends up using 9 output vertices per patch, with 2 vec4's each. I've tried playing with the per-warp stack size to no avail, but I didn't *entirely* know what I was doing either though. Even when all the triangles show up, I still see the error on the GK208, so I'm not sure if they're the same issue or not. Now, here's the fun part -- this is completely non-deterministic. Sometimes everything shows up on the GK208, other times I see holes, in varying locations. I'm fairly sure that the actual shader code is correct... so I'm doing something funny wrong. (And yeah, tons of missed optimization opportunities in this code, but let's not dwell on that.) This is the piglit test: http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_tessellation_shader/execution/quads.shader_test It should be noted that other piglit tests don't exhibit this error, however they also tend to be simpler. One key difference is that they don't change the patch size in TCS. I'm including a link to a text file with the tessellation control and evaluation shaders (decoded with nvdisasm which you're hopefully more familiar with), along with the shader headers that we generate. FTR, this is how I feed the raw shader opcode bytes into nvdisasm: perl -ane 'foreach (@F) { print pack I, hex($_) }' tt; nvdisasm -b SM35 tt (for some reason it doesn't want to read from a pipe or even a fd). http://people.freedesktop.org/~imirkin/tess_shaders_quads.txt My suspicion is that we're doing something wrong with the sched codes. We have an elaborate calculator, but... perhaps not elaborate enough? You can see it here: http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp#n2574 The reason I think it's an error in sched codes is due to the TRAP memory location that I see -- could well be some stale value in the register and the value from S2R or VILD doesn't make it in there in time before the ALD reads it. If you should like to try this yourself, you can use https://github.com/imirkin/mesa/commits/gl4-integration-2 . This branch is good enough to run Unigine Heaven, but still has a lot of known shortcomings. (Both at the core and the nouveau levels.) Any advice or suggestions for debugging this would be greatly appreciated. And let me know if you'd like me to generate additional info on this. For example I can supply a full command trace that can be piped to demmt, if that's helpful. Thanks in advance, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.
Lastly, from some discussions with ajax on IRC, it appears that DRI3 is half-baked at best wrt sync between server and client. I think we should just disable it by default for now, until issues are ironed out. (Rather than what this patch has, which is default-on for Xorg some version.) On Sat, Jul 4, 2015 at 3:03 PM, Emil Velikov emil.l.veli...@gmail.com wrote: The DRI option with the intel ddx can be used to indicate the following - whether dri is disabled - the dri version - dri1, dri2, dri3 - the dri module name - doo_dri.so bar_dri.so I'm not sure how exactly it's supposed to work/works, and I believe most of that is due to legacy reasons. I'm just saying let's not do the whole thing - just the dri version would be great (as you suggested). -Emil On 4 July 2015 at 19:28, Ilia Mirkin imir...@alum.mit.edu wrote: Erm, that's nuts. I also don't really understand what they're talking about there... i915g vs i915? Anyways, I just meant the version numbers :) On Sat, Jul 4, 2015 at 2:23 PM, Emil Velikov emil.l.veli...@gmail.com wrote: That would be great, as long as it does only that and does not go into the drivername territory. As the said driver ;-) A driver name to use can be provided instead of simple boolean value, which will be passed to the GL implementation for it to load the appropriate backend. -Emil On 4 July 2015 at 18:17, Ilia Mirkin imir...@alum.mit.edu wrote: IMO it'd be nice to keep this compatible with the intel driver, which has a DRI option, which can take the values 1, 2, 3. Obviously for nouveau, 1 makes no sense as that was dropped quite some time ago. See http://cgit.freedesktop.org/xorg/driver/xf86-video-intel/tree/man/intel.man#n68 On Mon, Jun 29, 2015 at 11:30 PM, Mario Kleiner mario.kleiner...@gmail.com wrote: X-Server versions older than 1.16.3 have bugs in their DRI3/Present implementation which impair nouveau, so it is better to stick to good old DRI2 by default on such servers. E.g., page flipping doesn't work at all under DRI3/Present with older servers, and use of extensions like OML_sync_control, SGI_video_sync or INTEL_swap_events also causes failure of Present. nouveau's glamor accel backend currently doesn't work under DRI2, so continue to use DRI3 whenever it is supported. Under the exa accel backend, DRI2 works just fine, so disable DRI3 and choose DRI2 by default when nouveau is built for X-Server 1.16.3, and enable DRI3 if building on later X-Servers which work reasonably well under DRI3/Present. A new boolean xorg.conf Option DRI3 allows to enforce or prevent use of DRI3/Present under EXA acceleration for testing. Also add a bit more output about status of Present and DRI3 to aid debugging. Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com --- man/nouveau.man| 6 ++ src/nouveau_dri2.c | 11 ++- src/nv_const.h | 2 ++ src/nv_driver.c| 17 +++-- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/man/nouveau.man b/man/nouveau.man index 129bb7f..12cfbc0 100644 --- a/man/nouveau.man +++ b/man/nouveau.man @@ -125,6 +125,12 @@ that relies on correct presentation timing behaviour as defined in that specification. .br Default: 1. +.TP +.BI Option \*qDRI3\*q \*q boolean \*q +Enable the DRI3 extension under exa acceleration if supported by server. +A setting of off will only use DRI2 instead. Under glamor acceleration, +DRI3 is always enabled if supported. Default: on for XOrg = 1.16.3, off for +earlier versions. .SH SEE ALSO __xservername__(__appmansuffix__), __xconfigfile__(__filemansuffix__), Xserver(__appmansuffix__), X(__miscmansuffix__) .SH AUTHORS diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index f22e319..d818976 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -1130,7 +1130,16 @@ nouveau_dri3_screen_init(ScreenPtr screen) if (buf stat(buf, render) == 0 master.st_mode == render.st_mode) { pNv-render_node = buf; - return dri3_screen_init(screen, nouveau_dri3_screen_info); + if (dri3_screen_init(screen, nouveau_dri3_screen_info)) { + xf86DrvMsg(pScrn-scrnIndex, X_INFO, + DRI3 on EXA enabled\n); + return TRUE; + } + else { + xf86DrvMsg(pScrn-scrnIndex, X_WARNING, + DRI3 on EXA initialization failed\n); + return FALSE; + } } else free(buf); #endif diff --git a/src/nv_const.h b/src/nv_const.h index f1b4e9b..df1e398 100644 --- a/src/nv_const.h +++ b/src/nv_const.h @@ -18,6 +18,7 @@ typedef enum { OPTION_SWAP_LIMIT, OPTION_ASYNC_COPY, OPTION_ACCELMETHOD, +OPTION_DRI3, } NVOpts; @@ -34,6 +35,7 @@ static const OptionInfoRec NVOptions
[Nouveau] RFC: drop glamor from nouveau ddx
Ben, Looks like the reality is that glamor is just not hooked up properly in the nouveau DDX. Mainly it's missing DRI2, which in turn means no core GL contexts, and probably lots of other issues. While this could probably be fixed somehow, I doubt there's any advantage to using the nouveau DDX over something like modesetting nowadays. How would you feel about dropping glamor support from the nouveau ddx and failing to load for GPUs that don't have EXA support (unless AccelMode = none is forced for them). That way it'll fall back to loading modesetting which should be properly set up for DRI2 and so on. Cheers, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] enable dri3 support without glamor causes gnome-shell regression on nv4x
On Mon, Aug 3, 2015 at 9:02 AM, Hans de Goede hdego...@redhat.com wrote: Hi, On 30-07-15 16:09, Ilia Mirkin wrote: FWIW this is a fail on nv50+ as well. See for example https://bugs.freedesktop.org/show_bug.cgi?id=91445 My suspicion is that this is due to the lack of PUSH_KICK in the *Done exa handlers -- works fine with DRI2, but DRI3 has no synchronization and so the commands never get flushed out. Easily verified by sticking PUSH_KICK's everywhere. I do not believe that that is the problem, in my case it clearly seems to be a pitch / swizzle problem rather then a synchronizarion problem, here is what my desktop with gnome shell looks like when using DRI2: https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-good.jpg And this is what it looks like when using DRI3: https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-bad.jpg The DRI2 screenshot is made with Mario's 2 patches on top of current master: http://lists.freedesktop.org/archives/nouveau/2015-July/021740.html http://lists.freedesktop.org/archives/nouveau/2015-July/021741.html And then adding Option DRI 2 to xorg.conf. His patches should have defaulted it to DRI 2 I think, so this is unnecessary. In fact you should have had to say DRI 3 to get DRI3 with his patches. -- I've also tried disabling EXA using Option AccelMethod none, but that seems to also automatically disable all DRI, leading to software rendering. I discussed this with Ben this morning and he suggested that this is likely a Mesa issue since with DRI3 mesa rather then the ddx allocs the surfaces. I've tried disabling swizzling in the mesa code by forcing nv30_miptree_create() to always take the code path for linear textures, but that leads to the exact same result as before that change. Ah yes. Very different problem indeed. I actually suspect it has to do with swizzling. Look at the white pattern of the moon -- it's all in a line. That means that it expected some locality and instead it got drawn all on a line. If it were merely a stride problem, I'd expect to see strips of the moon below and offset from one another. So... take a look at nv30_miptree_from_handle -- I wonder if it can now receive swizzled textures where it couldn't before. -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] DisplayPort support with 750 Ti
Supposed to? Sure! :) DP is finicky in general, and Maxwell is a fairly new generation that not a lot of people have tested or had access to, so quite expected for things to go wrong. Can you file a bug at bugs.freedesktop.org xorg - Driver/nouveau with the output of nouveau loaded with drm.debug=0xe nouveau.debug=debug,VBIOS=trace Hopefully that will give an idea of what's happening. On Thu, Aug 13, 2015 at 12:57 AM, Tom Yan tom.t...@gmail.com wrote: Is nouveau supposed support DisplayPort output as of Linux 4.1? I have an EVGA GeForce GTX 750 Ti. TMDS outputs (HDMI/DVI) works while DisplayPort output failed and delays initialization of nouveau. It seems that nouveau failed to read EDID properly through DisplayPort as well. The following are grep'd kernel messages of booting with DP/DP+DVI/DVI connected respectively: [tom@localhost ~]$ grep nouveau dp Aug 12 11:28:17 localhost kernel: fb: switching to nouveaufb from EFI VGA Aug 12 11:28:17 localhost kernel: nouveau [ DEVICE][:01:00.0] BOOT0 : 0x117000a2 Aug 12 11:28:17 localhost kernel: nouveau [ DEVICE][:01:00.0] Chipset: GM107 (NV117) Aug 12 11:28:17 localhost kernel: nouveau [ DEVICE][:01:00.0] Family : NV110 Aug 12 11:28:17 localhost kernel: nouveau [ VBIOS][:01:00.0] using image from PROM Aug 12 11:28:17 localhost kernel: nouveau [ VBIOS][:01:00.0] BIT signature found Aug 12 11:28:17 localhost kernel: nouveau [ VBIOS][:01:00.0] version 82.07.32.00.38 Aug 12 11:28:17 localhost kernel: nouveau [ PMC][:01:00.0] MSI interrupts enabled Aug 12 11:28:17 localhost kernel: nouveau [ PFB][:01:00.0] RAM type: GDDR5 Aug 12 11:28:17 localhost kernel: nouveau [ PFB][:01:00.0] RAM size: 2048 MiB Aug 12 11:28:17 localhost kernel: nouveau [ PFB][:01:00.0] ZCOMP: 0 tags Aug 12 11:28:19 localhost kernel: nouveau [ PTHERM][:01:00.0] FAN control: PWM Aug 12 11:28:19 localhost kernel: nouveau [ PTHERM][:01:00.0] fan management: automatic Aug 12 11:28:19 localhost kernel: nouveau [ PTHERM][:01:00.0] internal sensor: yes Aug 12 11:28:19 localhost kernel: nouveau [ CLK][:01:00.0] 07: core 405 MHz memory 810 MHz Aug 12 11:28:19 localhost kernel: nouveau [ CLK][:01:00.0] 0f: core 270-1293 MHz memory 5400 MHz Aug 12 11:28:19 localhost kernel: nouveau [ CLK][:01:00.0] --: core 405 MHz memory 810 MHz Aug 12 11:28:19 localhost kernel: nouveau [ DRM] VRAM: 2048 MiB Aug 12 11:28:19 localhost kernel: nouveau [ DRM] GART: 1048576 MiB Aug 12 11:28:19 localhost kernel: nouveau [ DRM] TMDS table version 2.0 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB version 4.0 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB outp 00: 01000f02 00020030 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB outp 01: 02000f00 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB outp 02: 08011f82 00020010 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB outp 03: 02822fa6 04420010 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB outp 04: 02022f62 00020010 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB conn 00: 1030 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB conn 01: 00010161 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] DCB conn 02: 2246 Aug 12 11:28:19 localhost kernel: nouveau [ DRM] MM: using COPY for buffer copies Aug 12 11:28:19 localhost kernel: nouveau [ DRM] allocated 1280x1024 fb: 0x6, bo 880211e2dc00 Aug 12 11:28:19 localhost kernel: fbcon: nouveaufb (fb0) is primary device Aug 12 11:29:02 localhost kernel: nouveau :01:00.0: fb0: nouveaufb frame buffer device Aug 12 11:29:02 localhost kernel: nouveau :01:00.0: registered panic notifier Aug 12 11:29:02 localhost kernel: [drm] Initialized nouveau 1.2.2 20120801 for :01:00.0 on minor 0 [tom@localhost ~]$ grep nouveau dp+dvi Aug 12 11:24:23 localhost kernel: fb: switching to nouveaufb from EFI VGA Aug 12 11:24:23 localhost kernel: nouveau [ DEVICE][:01:00.0] BOOT0 : 0x117000a2 Aug 12 11:24:23 localhost kernel: nouveau [ DEVICE][:01:00.0] Chipset: GM107 (NV117) Aug 12 11:24:23 localhost kernel: nouveau [ DEVICE][:01:00.0] Family : NV110 Aug 12 11:24:23 localhost kernel: nouveau [ VBIOS][:01:00.0] using image from PROM Aug 12 11:24:23 localhost kernel: nouveau [ VBIOS][:01:00.0] BIT signature found Aug 12 11:24:23 localhost kernel: nouveau [ VBIOS][:01:00.0] version 82.07.32.00.38 Aug 12 11:24:23 localhost kernel: nouveau [ PMC][:01:00.0] MSI interrupts enabled Aug 12 11:24:23 localhost kernel: nouveau [ PFB][:01:00.0] RAM type: GDDR5 Aug 12 11:24:23 localhost kernel: nouveau [ PFB][:01:00.0] RAM size: 2048 MiB Aug 12 11:24:23 localhost kernel: nouveau [ PFB][:01:00.0] ZCOMP: 0 tags Aug 12 11:24:24 localhost kernel:
Re: [Nouveau] [PATCH] glsl: Extend lowering pass for gl_ClipDistance to support other arrays
I said this on IRC, but I'll say it here too: (a) please regenerate this with -M (not in the general case, but it makes sense here) (b) this seems odd as there's no support for cull distance elsewhere yet. should be part of a series that adds cull distance support. right now there is none, so this is out of place. On Mon, Aug 17, 2015 at 10:50 PM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: This will come in handy when we want to lower gl_CullDistance into gl_CullDistanceMESA. Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de --- src/glsl/Makefile.sources| 2 +- src/glsl/ir_optimization.h | 1 + src/glsl/lower_clip_distance.cpp | 574 src/glsl/lower_distance.cpp | 606 +++ 4 files changed, 608 insertions(+), 575 deletions(-) delete mode 100644 src/glsl/lower_clip_distance.cpp create mode 100644 src/glsl/lower_distance.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 0b77244..00ba480 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -143,7 +143,7 @@ LIBGLSL_FILES = \ loop_analysis.h \ loop_controls.cpp \ loop_unroll.cpp \ - lower_clip_distance.cpp \ + lower_distance.cpp \ lower_const_arrays_to_uniforms.cpp \ lower_discard.cpp \ lower_discard_flow.cpp \ diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index eef107e..fe62e74 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -120,6 +120,7 @@ bool lower_variable_index_to_cond_assign(gl_shader_stage stage, bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool lower_const_arrays_to_uniforms(exec_list *instructions); bool lower_clip_distance(gl_shader *shader); +bool lower_cull_distance(gl_shader *shader); void lower_output_reads(unsigned stage, exec_list *instructions); bool lower_packing_builtins(exec_list *instructions, int op_mask); void lower_ubo_reference(struct gl_shader *shader, exec_list *instructions); diff --git a/src/glsl/lower_clip_distance.cpp b/src/glsl/lower_clip_distance.cpp deleted file mode 100644 index 1ada215..000 --- a/src/glsl/lower_clip_distance.cpp +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the Software), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_clip_distance.cpp - * - * This pass accounts for the difference between the way - * gl_ClipDistance is declared in standard GLSL (as an array of - * floats), and the way it is frequently implemented in hardware (as - * a pair of vec4s, with four clip distances packed into each). - * - * The declaration of gl_ClipDistance is replaced with a declaration - * of gl_ClipDistanceMESA, and any references to gl_ClipDistance are - * translated to refer to gl_ClipDistanceMESA with the appropriate - * swizzling of array indices. For instance: - * - * gl_ClipDistance[i] - * - * is translated into: - * - * gl_ClipDistanceMESA[i2][i3] - * - * Since some hardware may not internally represent gl_ClipDistance as a pair - * of vec4's, this lowering pass is optional. To enable it, set the - * LowerClipDistance flag in gl_shader_compiler_options to true. - */ - -#include glsl_symbol_table.h -#include ir_rvalue_visitor.h -#include ir.h -#include program/prog_instruction.h /* For WRITEMASK_* */ - -namespace { - -class lower_clip_distance_visitor : public ir_rvalue_visitor { -public: - explicit lower_clip_distance_visitor(gl_shader_stage shader_stage) - : progress(false), old_clip_distance_out_var(NULL), -old_clip_distance_in_var(NULL), new_clip_distance_out_var(NULL), -new_clip_distance_in_var(NULL), shader_stage(shader_stage) - { - } - -
[Nouveau] [PATCH] fb/sddr3: add WR/CWL values seen on a GK208
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- Seen on my GK208. Trace available at http://people.freedesktop.org/~imirkin/traces/gk208/gk208-mmiotrace.log.xz Thanks to Roy for his assistance on finding the parameters. I tested this on top of his patch bios/rammap: Identify DLLoff for = GF100. [But not 100% sure if it was necessary.] drm/nouveau/nvkm/subdev/fb/sddr3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drm/nouveau/nvkm/subdev/fb/sddr3.c b/drm/nouveau/nvkm/subdev/fb/sddr3.c index 037deeb..2410383 100644 --- a/drm/nouveau/nvkm/subdev/fb/sddr3.c +++ b/drm/nouveau/nvkm/subdev/fb/sddr3.c @@ -53,7 +53,7 @@ static const struct ramxlat ramddr3_wr[] = { { 5, 1 }, { 6, 2 }, { 7, 3 }, { 8, 4 }, { 10, 5 }, { 12, 6 }, /* the below are mentioned in some, but not all, ddr3 docs */ - { 14, 7 }, { 16, 0 }, + { 14, 7 }, { 15, 7 }, { 16, 0 }, { -1 } }; @@ -61,7 +61,7 @@ static const struct ramxlat ramddr3_cwl[] = { { 5, 0 }, { 6, 1 }, { 7, 2 }, { 8, 3 }, /* the below are mentioned in some, but not all, ddr3 docs */ - { 9, 4 }, + { 9, 4 }, { 10, 5 }, { -1 } }; -- 2.4.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nv50, nvc0: take level into account when doing eng2d multi-layer blits
This fixes arb_get_texture_sub_image-get, and any situation where the 2d engine was being used for multi-layer blits to a non-0 level. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: 10.6 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv50/nv50_surface.c | 14 ++ src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 14 ++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index b1ae016..77df5ff 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -1387,18 +1387,24 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info) PUSH_DATA (push, info-dst.box.z + i); } else { const unsigned z = info-dst.box.z + i; +const uint64_t address = dst-base.address + + dst-level[info-dst.level].offset + + z * dst-layer_stride; BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2); -PUSH_DATAh(push, dst-base.address + z * dst-layer_stride); -PUSH_DATA (push, dst-base.address + z * dst-layer_stride); +PUSH_DATAh(push, address); +PUSH_DATA (push, address); } if (src-layout_3d) { /* not possible because of depth tiling */ assert(0); } else { const unsigned z = info-src.box.z + i; +const uint64_t address = src-base.address + + src-level[info-src.level].offset + + z * src-layer_stride; BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2); -PUSH_DATAh(push, src-base.address + z * src-layer_stride); -PUSH_DATA (push, src-base.address + z * src-layer_stride); +PUSH_DATAh(push, address); +PUSH_DATA (push, address); } BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */ PUSH_DATA (push, srcy 32); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 51a6f93..136a68c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -1336,18 +1336,24 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) PUSH_DATA (push, info-dst.box.z + i); } else { const unsigned z = info-dst.box.z + i; +const uint64_t address = dst-base.address + + dst-level[info-dst.level].offset + + z * dst-layer_stride; BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2); -PUSH_DATAh(push, dst-base.address + z * dst-layer_stride); -PUSH_DATA (push, dst-base.address + z * dst-layer_stride); +PUSH_DATAh(push, address); +PUSH_DATA (push, address); } if (src-layout_3d) { /* not possible because of depth tiling */ assert(0); } else { const unsigned z = info-src.box.z + i; +const uint64_t address = src-base.address + + src-level[info-src.level].offset + + z * src-layer_stride; BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2); -PUSH_DATAh(push, src-base.address + z * src-layer_stride); -PUSH_DATA (push, src-base.address + z * src-layer_stride); +PUSH_DATAh(push, address); +PUSH_DATA (push, address); } BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */ PUSH_DATA (push, srcy 32); -- 2.4.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [Mesa-dev] [PATCH 1/2] nvc0/ir: detect AND/SHR pairs and convert into EXTBF
On Tue, Aug 18, 2015 at 9:57 PM, Matt Turner matts...@gmail.com wrote: On Tue, Aug 18, 2015 at 6:49 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Some shaders appear to extract bits using shift/and combos. Detect (some) of those and convert to EXTBF instead. What is EXTBF? Extract byte to float? Extract Bitfield. I ask because Unigine Heaven has shaders that pack 3x byte-integers into one component of a vec4 and extracts them with shifts/ands and converts them to floats, and i965 could do the extraction and conversion in a single instruction. I'm curious if this is the same thing you're optimizing. I thought about adding an extract_byte(src, byte_num) operation, but i965's copy propagation caused me some headache and I shelved it. Yes, I think it's the same shader... it's doing a texelFetch() and then grabbing bytes 0, 1, 2 off that. The generated shader code after the second patch does: /*05d0*/ TLD.LL.P R0, R24, 0x0, 2D, 0x3; /*05d8*/ TEXDEPBAR 0x0; /*05e0*/ I2F.F32.U8 R2, R1; /*05e8*/ FFMA.FTZ R2, R2, R15, R19; /*05f0*/ I2F.F32.U8 R8, R1.B1; /*05f8*/ FFMA.FTZ R8, R8, R15, R19; /*0608*/ I2F.F32.U8 R1, R1.B2; I'll let you guess what these things mean. TLD = texelfetch :) -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nvc0: make use of conservative depth info for forcing early z tests
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- Entirely untested as there are no piglit tests for this functionality. Won't push until some appear, but wanted to get it out there. .../drivers/nouveau/codegen/nv50_ir_driver.h | 2 +- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 3 +++ src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_program.h| 2 +- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 5 .../drivers/nouveau/nvc0/nvc0_state_validate.c | 30 -- 6 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 2b9edcf..14acb60 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -140,7 +140,7 @@ struct nv50_ir_prog_info struct { unsigned numColourResults; bool writesDepth; - bool earlyFragTests; + bool depthLayout; bool separateFragData; bool usesDiscard; } fp; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index f153674..dcfa4c4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -947,6 +947,9 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: // we don't care break; + case TGSI_PROPERTY_FS_DEPTH_LAYOUT: + info-prop.fp.depthLayout = prop-u[0].Data; + break; case TGSI_PROPERTY_VS_PROHIBIT_UCPS: info-io.genUserClip = -1; break; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 12f1bb7..44d951b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -452,7 +452,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) fp-hdr[18] |= info-out[i].mask info-out[i].slot[0]; } - fp-fp.early_z = info-prop.fp.earlyFragTests; + fp-fp.depth_layout = info-prop.fp.depthLayout; return 0; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index 390e0c7..fa14d68 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -44,7 +44,7 @@ struct nvc0_program { bool need_vertex_id; } vp; struct { - uint8_t early_z; + uint8_t depth_layout; uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; uint8_t sample_interp; } fp; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 8f8ac2d..1c87714 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -113,11 +113,6 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) return; nvc0_program_update_context_state(nvc0, fp, 4); - if (fp-fp.early_z != nvc0-state.early_z_forced) { - nvc0-state.early_z_forced = fp-fp.early_z; - IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp-fp.early_z); - } - BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); PUSH_DATA (push, 0x51); PUSH_DATA (push, fp-code_base); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 47bd66d..609b3b8 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -517,25 +517,51 @@ nvc0_validate_global_residents(struct nvc0_context *nvc0, } } +static bool +nvc0_depth_layout_test_compatible(unsigned depth_layout, unsigned test) +{ + if (depth_layout (test == PIPE_FUNC_ALWAYS || test == PIPE_FUNC_NEVER)) + return true; + switch (depth_layout) { + case TGSI_FS_DEPTH_LAYOUT_UNCHANGED: + return true; + case TGSI_FS_DEPTH_LAYOUT_GREATER: + return test == PIPE_FUNC_GREATER || test == PIPE_FUNC_GEQUAL; + case TGSI_FS_DEPTH_LAYOUT_LESS: + return test == PIPE_FUNC_LESS || test == PIPE_FUNC_LEQUAL; + default: + return false; + } +} + static void nvc0_validate_derived_1(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0-base.pushbuf; + struct nvc0_program *fp = nvc0-fragprog; bool rasterizer_discard; + bool early_z = false; if (nvc0-rast nvc0-rast-pipe.rasterizer_discard) { rasterizer_discard = true; } else { bool zs = nvc0-zsa (nvc0-zsa-pipe.depth.enabled || nvc0-zsa-pipe.stencil[0].enabled); - rasterizer_discard = !zs - (!nvc0-fragprog || !nvc0-fragprog-hdr[18]); + rasterizer_discard = !zs (!fp || !fp
[Nouveau] [PATCH] nv50: avoid using inline vertex data submit when gl_VertexID is used
The hardware only generates vertexid when vertices come from a VBO. This fixes: vertexid-drawelements vertexid-drawarrays Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: 11.0 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv50/nv50_program.c| 1 + src/gallium/drivers/nouveau/nv50/nv50_program.h| 1 + src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 3 ++- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 8 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 02dc367..eff4477 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; + prog-vp.vertexid = 1; continue; default: break; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 5d3ff56..f4e8e94 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,6 +76,7 @@ struct nv50_program { ubyte psiz;/* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; + ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; } vp; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b304a17..66dcf43 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,7 +503,8 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, -{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, +{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | + NV50_NEW_VERTPROG }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 600b973..fb4305f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -301,6 +301,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex-num_elements, nv50-state.num_vtxelts); + /* A vertexid is not generated for inline data uploads. Have to use a +* VBO. This check must come after the vertprog has been validated, +* otherwise vertexid may be unset. +*/ + assert(nv50-vertprog-translated); + if (nv50-vertprog-vp.vertexid) + nv50-vbo_push_hint = 0; + if (unlikely(vertex-need_conversion)) nv50-vbo_fifo = ~0; else -- 2.4.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] nv50: avoid using inline vertex data submit when gl_VertexID is used
On Mon, Aug 24, 2015 at 11:57 AM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: On 24.08.2015 17:51, Ilia Mirkin wrote: The hardware only generates vertexid when vertices come from a VBO. This fixes: vertexid-drawelements vertexid-drawarrays Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: 11.0 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv50/nv50_program.c| 1 + src/gallium/drivers/nouveau/nv50/nv50_program.h| 1 + src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 3 ++- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 8 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 02dc367..eff4477 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; + prog-vp.vertexid = 1; continue; default: break; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 5d3ff56..f4e8e94 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,6 +76,7 @@ struct nv50_program { ubyte psiz;/* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; + ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; } vp; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b304a17..66dcf43 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,7 +503,8 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, -{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, +{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | + NV50_NEW_VERTPROG }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 600b973..fb4305f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -301,6 +301,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex-num_elements, nv50-state.num_vtxelts); + /* A vertexid is not generated for inline data uploads. Have to use a +* VBO. This check must come after the vertprog has been validated, +* otherwise vertexid may be unset. +*/ + assert(nv50-vertprog-translated); + if (nv50-vertprog-vp.vertexid) + nv50-vbo_push_hint = 0; + if (unlikely(vertex-need_conversion)) nv50-vbo_fifo = ~0; else LGTM! Thanks. I was a little torn on whether to do it this way (which penalizes someone switching vertex programs while keeping the vertex setup the same), as opposed to do it the other way, which would penalize every program that uses vertexid (by calling nv50_vertex_arrays_validate twice for those). Ideally I'd do it in a way that penalized neither, but that's just not going to happen :) ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 1/2] nvc0/ir: detect AND/SHR pairs and convert into EXTBF
Some shaders appear to extract bits using shift/and combos. Detect (some) of those and convert to EXTBF instead. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 66 +++--- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 3841c33..b0e74f0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1023,27 +1023,53 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue imm0, int s) case OP_AND: { - CmpInstruction *cmp = i-getSrc(t)-getInsn()-asCmp(); - if (!cmp || cmp-op == OP_SLCT || cmp-getDef(0)-refCount() 1) - return; - if (!prog-getTarget()-isOpSupported(cmp-op, TYPE_F32)) - return; - if (imm0.reg.data.f32 != 1.0) - return; - if (i-getSrc(t)-getInsn()-dType != TYPE_U32) - return; + Instruction *src = i-getSrc(t)-getInsn(); + ImmediateValue imm1; + if (imm0.reg.data.u32 == 0) { + i-op = OP_MOV; + i-setSrc(0, new_ImmediateValue(prog, 0u)); + i-src(0).mod = Modifier(0); + i-setSrc(1, NULL); + } else if (imm0.reg.data.u32 == ~0U) { + i-op = i-src(t).mod.getOp(); + if (t) { +i-setSrc(0, i-getSrc(t)); +i-src(0).mod = i-src(t).mod; + } + i-setSrc(1, NULL); + } else if (src-asCmp()) { + CmpInstruction *cmp = src-asCmp(); + if (!cmp || cmp-op == OP_SLCT || cmp-getDef(0)-refCount() 1) +return; + if (!prog-getTarget()-isOpSupported(cmp-op, TYPE_F32)) +return; + if (imm0.reg.data.f32 != 1.0) +return; + if (cmp-dType != TYPE_U32) +return; - i-getSrc(t)-getInsn()-dType = TYPE_F32; - if (i-src(t).mod != Modifier(0)) { - assert(i-src(t).mod == Modifier(NV50_IR_MOD_NOT)); - i-src(t).mod = Modifier(0); - cmp-setCond = inverseCondCode(cmp-setCond); - } - i-op = OP_MOV; - i-setSrc(s, NULL); - if (t) { - i-setSrc(0, i-getSrc(t)); - i-setSrc(t, NULL); + cmp-dType = TYPE_F32; + if (i-src(t).mod != Modifier(0)) { +assert(i-src(t).mod == Modifier(NV50_IR_MOD_NOT)); +i-src(t).mod = Modifier(0); +cmp-setCond = inverseCondCode(cmp-setCond); + } + i-op = OP_MOV; + i-setSrc(s, NULL); + if (t) { +i-setSrc(0, i-getSrc(t)); +i-setSrc(t, NULL); + } + } else if (prog-getTarget()-isOpSupported(OP_EXTBF, TYPE_U32) + src-op == OP_SHR + src-src(1).getImmediate(imm1) + i-src(t).mod == Modifier(0) + util_is_power_of_two(imm0.reg.data.u32 + 1)) { + // low byte = offset, high byte = width + uint32_t ext = (util_last_bit(imm0.reg.data.u32) 8) | imm1.reg.data.u32; + i-op = OP_EXTBF; + i-setSrc(0, src-getSrc(0)); + i-setSrc(1, new_ImmediateValue(prog, ext)); } } break; -- 2.4.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 2/2] nvc0/ir: detect i2f/i2i which operate on specific bytes/words
Some Unigine shaders have been observed to unpack bytes out of 32-bit integers and convert them to floats. I2F/I2I can handle this sort of thing directly. Detect the handleable situations. This misses 16-bit word capabilities in nv50, but I haven't seen shaders that would actually make use of that. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 2 + .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 4 ++ .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 79 -- 4 files changed, 82 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index f06056f..8f15429 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -933,6 +933,7 @@ CodeEmitterGK110::emitCVT(const Instruction *i) code[0] |= typeSizeofLog2(dType) 10; code[0] |= typeSizeofLog2(i-sType) 12; + code[1] |= i-subOp 12; if (isSignedIntType(dType)) code[0] |= 0x4000; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index ef5c87d..6e22788 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -818,6 +818,7 @@ CodeEmitterGM107::emitI2F() emitField(0x31, 1, (insn-op == OP_ABS) || insn-src(0).mod.abs()); emitCC (0x2f); emitField(0x2d, 1, (insn-op == OP_NEG) || insn-src(0).mod.neg()); + emitField(0x29, 2, insn-subOp); emitRND (0x27, rnd, -1); emitField(0x0d, 1, isSignedType(insn-sType)); emitField(0x0a, 2, util_logbase2(typeSizeof(insn-sType))); @@ -850,6 +851,7 @@ CodeEmitterGM107::emitI2I() emitField(0x31, 1, (insn-op == OP_ABS) || insn-src(0).mod.abs()); emitCC (0x2f); emitField(0x2d, 1, (insn-op == OP_NEG) || insn-src(0).mod.neg()); + emitField(0x29, 2, insn-subOp); emitField(0x0d, 1, isSignedType(insn-sType)); emitField(0x0c, 1, isSignedType(insn-dType)); emitField(0x0a, 2, util_logbase2(typeSizeof(insn-sType))); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 5703712..6bf5219 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1020,6 +1020,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i) code[0] |= util_logbase2(typeSizeof(dType)) 20; code[0] |= util_logbase2(typeSizeof(i-sType)) 23; + // for 8/16 source types, the byte/word is in subOp. word 1 is + // represented as 2. + code[1] |= i-subOp 0x17; + if (sat) code[0] |= 0x20; if (abs) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index b0e74f0..e37420c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1312,7 +1312,8 @@ private: void handleRCP(Instruction *); void handleSLCT(Instruction *); void handleLOGOP(Instruction *); - void handleCVT(Instruction *); + void handleCVT_NEG(Instruction *); + void handleCVT_EXTBF(Instruction *); void handleSUCLAMP(Instruction *); BuildUtil bld; @@ -1563,12 +1564,12 @@ AlgebraicOpt::handleLOGOP(Instruction *logop) // nv50: // F2I(NEG(I2F(ABS(SET void -AlgebraicOpt::handleCVT(Instruction *cvt) +AlgebraicOpt::handleCVT_NEG(Instruction *cvt) { + Instruction *insn = cvt-getSrc(0)-getInsn(); if (cvt-sType != TYPE_F32 || cvt-dType != TYPE_S32 || cvt-src(0).mod != Modifier(0)) return; - Instruction *insn = cvt-getSrc(0)-getInsn(); if (!insn || insn-op != OP_NEG || insn-dType != TYPE_F32) return; if (insn-src(0).mod != Modifier(0)) @@ -1598,6 +1599,74 @@ AlgebraicOpt::handleCVT(Instruction *cvt) delete_Instruction(prog, cvt); } +// Some shaders extract packed bytes out of words and convert them to +// e.g. float. The Fermi+ CVT instruction can extract those directly, as can +// nv50 for word sizes. +// +// CVT(EXTBF(x, byte/word)) +// CVT(AND(bytemask, x)) +// CVT(AND(bytemask, SHR(x, 8/16/24))) +void +AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt) +{ + Instruction *insn = cvt-getSrc(0)-getInsn(); + ImmediateValue imm0, imm1; + Value *arg = NULL; + unsigned width, offset; + if ((cvt-sType != TYPE_U32 cvt-sType != TYPE_S32) || !insn) + return; + if (insn-op == OP_EXTBF insn-src(1).getImmediate(imm0)) { + width = (imm0.reg.data.u32 8) 0xff; + offset = imm0.reg.data.u32 0xff; + arg = insn-getSrc(0); + + if (width != 8 width != 16) + return; + if (width == 8
[Nouveau] Constbuf uploads on G80 and GF100+
Hello, It seems that NVIDIA GPUs, at least starting with G80, have an optimized path for the sequence draw; update consts; draw; update consts; etc Whereby it will start processing draw2 before draw1 is done. To do this, it appears there's some magic constbuf cache on the chip which buffers the updates to the right draw, eventually serializing them all out to memory as if it were all done serially. In order to make it into this magic constbuf cache, there are special constbuf upload entrypoints, on GF100 they are method 0x2390 and the associated methods that come right before it. However in order for it to all work out as one might hope, the CB settings that were in place when the CB was bound (via method 0x2410) have to match the ones used for upload, specifically the address. So if you have a CB at address 0x1000 of size 0x1000, and you decide to update its data at 0x800, it appears that you have to use that same initial 0x1000 as the base and 0x800 as the offset. If you use an address of 0x1800, it won't notice that the CB is bound. This is easy enough to handle. But what do you do when some genius wants to have two overlapping buffers, and updates the overlapping area? For example glBufferData(GL_UNIFORM_BUFFER, 0x1000) glBindBufferRange(GL_UNIFORM_BUFFER, 1, buf, 0, 0x200); glBindBufferRange(GL_UNIFORM_BUFFER, 2, buf, 0x100, 0x200); and then try to do a glMapBufferRange(buf, 0x100 - 0x1000) or something. Is there a way to handle it while playing nice with the CB update buffer mechanism, or do you have to give up and do a serialize (method 0x110) followed by a memory barrier (0x21c)? Or do you just pick whichever one you like, as long as any were bound and it's good? Also, on G80-era GPUs the constbuf upload process is a bit different, where it wants the uploads to go to a specific binding point. How should the overlapping situation be handled there? Thanks for any info on this! -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] fbcon/nv11-: correctly account for ring space usage
The RING_SPACE macro accounts how much space is used up so it's important to ask it for the right amount. Incorrect accounting of this can cause page faults down the line as writes are attempted outside of the ring. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: sta...@vger.kernel.org --- drm/nouveau/nv04_fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drm/nouveau/nv04_fbcon.c b/drm/nouveau/nv04_fbcon.c index 4ef602c..495c576 100644 --- a/drm/nouveau/nv04_fbcon.c +++ b/drm/nouveau/nv04_fbcon.c @@ -203,7 +203,7 @@ nv04_fbcon_accel_init(struct fb_info *info) if (ret) return ret; - if (RING_SPACE(chan, 49)) { + if (RING_SPACE(chan, 49 + (device-info.chipset = 0x11 ? 4 : 0))) { nouveau_fbcon_gpu_lockup(info); return 0; } -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] fbcon/g80: reduce PUSH_SPACE alloc, fire ring on accel init
Only 58 words get written to the ring, not 59. Also, normalize the accel init wrt nvc0 and nv04 fbcon impls by firing the ring at accel init time rather than waiting until later. Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- drm/nouveau/nv50_fbcon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drm/nouveau/nv50_fbcon.c b/drm/nouveau/nv50_fbcon.c index 394c89a..901130b 100644 --- a/drm/nouveau/nv50_fbcon.c +++ b/drm/nouveau/nv50_fbcon.c @@ -188,7 +188,7 @@ nv50_fbcon_accel_init(struct fb_info *info) if (ret) return ret; - ret = RING_SPACE(chan, 59); + ret = RING_SPACE(chan, 58); if (ret) { nouveau_fbcon_gpu_lockup(info); return ret; @@ -252,6 +252,7 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info-var.yres_virtual); OUT_RING(chan, upper_32_bits(fb-vma.offset)); OUT_RING(chan, lower_32_bits(fb-vma.offset)); + FIRE_RING(chan); return 0; } -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] What are the restrictions around loading indirect constbuf values
On Thu, Jun 25, 2015 at 10:41 AM, Ilia Mirkin imir...@alum.mit.edu wrote: Hello, We recently tracked down a bug on Tesla GPUs (i.e. G80-GT218) whereby it appears that instructions like 0028: b5000409 08000780 add rn f32 $r2 $r2 neg c0[$a1] 0040: b500060d 08004780 add rn f32 $r3 $r3 neg c0[$a1+0x4] or with nvdisasm: .headerflags@EF_CUDA_SM12 EF_CUDA_PTX_SM(EF_CUDA_SM12) /**/ FADD R2, R2, -c[0x0][A1+0x0]; /* 0x08000780b5000409 */ /*0008*/ FADD R3, R3, -c[0x0][A1+0x1]; /* 0x08004780b500060d */ don't appear to execute properly. However just MOV'ing the values into registers works fine. This was observed on a G92 chip. See bug https://bugs.freedesktop.org/show_bug.cgi?id=91056. I was hoping you could save me some time and let me know what instructions can load things like c0[$a1+4] (or maybe it's only in combination with the modifier?), and which Tesla-family GPU's have those restrictions. Hm, there's something more subtle going on here. Please disregard. A simple shader on my GT215 for both vertex and fragment demonstrates that those instructions work at least some of the time. (I didn't have a nv50-era card plugged in when I was asking the question, so I couldn't check for myself.) Perhaps there's something more subtle going on here, like non-uniformity across execution units... -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] Add Option DRI to allow selection of maximum DRI level.
On Wed, Jul 29, 2015 at 6:54 AM, Mario Kleiner mario.kleiner...@gmail.com wrote: Allow user to select the maximum level of DRI implementation to use, DRI2 or DRI3. exa accel supports both DRI2 and, if the kernel supports rendernodes, also DRI3. However, DRI3 still seems to have some bugs on current implementations, and additionally it doesn't work well at all for X-Servers older than 1.16.3 due to X-Server bugs. Therefore we default to DRI2 on exa, but allow the user to enable DRI3 with this new option. nouveau's glamor accel backend currently doesn't work under DRI2 at all, so we continue to use DRI3 whenever it is supported and ignore this new option for now. Also add a bit more output about status of Present and DRI3 to aid debugging. Note: This was originally meant to be a boolean parameter, to just select between DRI3 on and off, but changed here to a DRI level to make it consistent with the same option in the released Intel-ddx. Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com Cc: Ilia Mirkin imir...@alum.mit.edu Cc: Emil Velikov emil.l.veli...@gmail.com Cc: Martin Peres martin.pe...@free.fr Cc: Ben Skeggs bske...@redhat.com --- man/nouveau.man | 6 ++ src/nouveau_dri2.c | 11 ++- src/nouveau_glamor.c | 2 +- src/nv_const.h | 2 ++ src/nv_driver.c | 30 -- src/nv_type.h| 1 + 6 files changed, 48 insertions(+), 4 deletions(-) diff --git a/man/nouveau.man b/man/nouveau.man index 129bb7f..3d5a428 100644 --- a/man/nouveau.man +++ b/man/nouveau.man @@ -125,6 +125,12 @@ that relies on correct presentation timing behaviour as defined in that specification. .br Default: 1. +.TP +.BI Option \*qDRI\*q \*q integer \*q +Define the maximum level of DRI to enable. Valid values are 2 or 3. +exa acceleration will honor the maximum level if it is supported. +Under glamor acceleration DRI3 is always enabled if supported, +as glamor currently does not support DRI2. Default: 2 on exa, 3 on glamor. .SH SEE ALSO __xservername__(__appmansuffix__), __xconfigfile__(__filemansuffix__), Xserver(__appmansuffix__), X(__miscmansuffix__) .SH AUTHORS diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index ce6f53e..81ee9be 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -1134,7 +1134,16 @@ nouveau_dri3_screen_init(ScreenPtr screen) if (buf stat(buf, render) == 0 master.st_mode == render.st_mode) { pNv-render_node = buf; - return dri3_screen_init(screen, nouveau_dri3_screen_info); + if (dri3_screen_init(screen, nouveau_dri3_screen_info)) { + xf86DrvMsg(pScrn-scrnIndex, X_INFO, + DRI3 on EXA enabled\n); + return TRUE; + } + else { + xf86DrvMsg(pScrn-scrnIndex, X_WARNING, + DRI3 on EXA initialization failed\n); + return FALSE; + } } else free(buf); #endif diff --git a/src/nouveau_glamor.c b/src/nouveau_glamor.c index b8bca17..a8e9206 100644 --- a/src/nouveau_glamor.c +++ b/src/nouveau_glamor.c @@ -240,7 +240,7 @@ nouveau_glamor_init(ScreenPtr screen) screen-SharePixmapBacking = nouveau_glamor_share_pixmap_backing; screen-SetSharedPixmapBacking = nouveau_glamor_set_shared_pixmap_backing; - xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised\n); + xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised with DRI3\n); pNv-Flush = nouveau_glamor_flush; return TRUE; } diff --git a/src/nv_const.h b/src/nv_const.h index f1b4e9b..3f18d23 100644 --- a/src/nv_const.h +++ b/src/nv_const.h @@ -18,6 +18,7 @@ typedef enum { OPTION_SWAP_LIMIT, OPTION_ASYNC_COPY, OPTION_ACCELMETHOD, +OPTION_DRI, } NVOpts; @@ -34,6 +35,7 @@ static const OptionInfoRec NVOptions[] = { { OPTION_SWAP_LIMIT, SwapLimit,OPTV_INTEGER, {0}, FALSE }, { OPTION_ASYNC_COPY, AsyncUTSDFS, OPTV_BOOLEAN, {0}, FALSE }, { OPTION_ACCELMETHOD, AccelMethod, OPTV_STRING,{0}, FALSE }, +{ OPTION_DRI, DRI, OPTV_INTEGER, {0}, FALSE }, { -1, NULL, OPTV_NONE, {0}, FALSE } }; diff --git a/src/nv_driver.c b/src/nv_driver.c index 4218e4f..b284d96 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -1095,6 +1095,25 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) pNv-ce_enabled = xf86ReturnOptValBool(pNv-Options, OPTION_ASYNC_COPY, FALSE); + /* Define maximum allowed level of DRI implementation to use. +* We default to DRI2 on EXA for now, as DRI3 still has some +* problems. However, the max_dri_level can be only honored +* by EXA, as GLAMOR only
Re: [Nouveau] [PATCH 2/2] Add Option DRI to allow selection of maximum DRI level. (v2)
Series is Reviewed-by: Ilia Mirkin imir...@alum.mit.edu I'll let it sit for a day or so in case others have feedback. On Wed, Jul 29, 2015 at 8:39 AM, Mario Kleiner mario.kleiner...@gmail.com wrote: Allow user to select the maximum level of DRI implementation to use, DRI2 or DRI3. exa accel supports both DRI2 and, if the kernel supports rendernodes, also DRI3. However, DRI3 still seems to have some bugs on current implementations, and additionally it doesn't work well at all for X-Servers older than 1.16.3 due to X-Server bugs. Therefore we default to DRI2 on exa, but allow the user to enable DRI3 with this new option. nouveau's glamor accel backend currently doesn't work under DRI2 at all, so we continue to use DRI3 whenever it is supported and ignore this new option for now. Also add a bit more output about status of Present and DRI3 to aid debugging. Note: This was originally meant to be a boolean parameter, to just select between DRI3 on and off, but changed here to a DRI level to make it consistent with the same option in the released Intel-ddx. v2: Use fixed up Bool return type of nouveau_present_init(). Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com Cc: Ilia Mirkin imir...@alum.mit.edu Cc: Emil Velikov emil.l.veli...@gmail.com Cc: Martin Peres martin.pe...@free.fr Cc: Ben Skeggs bske...@redhat.com --- man/nouveau.man | 6 ++ src/nouveau_dri2.c | 11 ++- src/nouveau_glamor.c | 2 +- src/nv_const.h | 2 ++ src/nv_driver.c | 30 -- src/nv_type.h| 1 + 6 files changed, 48 insertions(+), 4 deletions(-) diff --git a/man/nouveau.man b/man/nouveau.man index 129bb7f..3d5a428 100644 --- a/man/nouveau.man +++ b/man/nouveau.man @@ -125,6 +125,12 @@ that relies on correct presentation timing behaviour as defined in that specification. .br Default: 1. +.TP +.BI Option \*qDRI\*q \*q integer \*q +Define the maximum level of DRI to enable. Valid values are 2 or 3. +exa acceleration will honor the maximum level if it is supported. +Under glamor acceleration DRI3 is always enabled if supported, +as glamor currently does not support DRI2. Default: 2 on exa, 3 on glamor. .SH SEE ALSO __xservername__(__appmansuffix__), __xconfigfile__(__filemansuffix__), Xserver(__appmansuffix__), X(__miscmansuffix__) .SH AUTHORS diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index ce6f53e..81ee9be 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -1134,7 +1134,16 @@ nouveau_dri3_screen_init(ScreenPtr screen) if (buf stat(buf, render) == 0 master.st_mode == render.st_mode) { pNv-render_node = buf; - return dri3_screen_init(screen, nouveau_dri3_screen_info); + if (dri3_screen_init(screen, nouveau_dri3_screen_info)) { + xf86DrvMsg(pScrn-scrnIndex, X_INFO, + DRI3 on EXA enabled\n); + return TRUE; + } + else { + xf86DrvMsg(pScrn-scrnIndex, X_WARNING, + DRI3 on EXA initialization failed\n); + return FALSE; + } } else free(buf); #endif diff --git a/src/nouveau_glamor.c b/src/nouveau_glamor.c index b8bca17..a8e9206 100644 --- a/src/nouveau_glamor.c +++ b/src/nouveau_glamor.c @@ -240,7 +240,7 @@ nouveau_glamor_init(ScreenPtr screen) screen-SharePixmapBacking = nouveau_glamor_share_pixmap_backing; screen-SetSharedPixmapBacking = nouveau_glamor_set_shared_pixmap_backing; - xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised\n); + xf86DrvMsg(scrn-scrnIndex, X_INFO, [GLAMOR] initialised with DRI3\n); pNv-Flush = nouveau_glamor_flush; return TRUE; } diff --git a/src/nv_const.h b/src/nv_const.h index f1b4e9b..3f18d23 100644 --- a/src/nv_const.h +++ b/src/nv_const.h @@ -18,6 +18,7 @@ typedef enum { OPTION_SWAP_LIMIT, OPTION_ASYNC_COPY, OPTION_ACCELMETHOD, +OPTION_DRI, } NVOpts; @@ -34,6 +35,7 @@ static const OptionInfoRec NVOptions[] = { { OPTION_SWAP_LIMIT, SwapLimit,OPTV_INTEGER, {0}, FALSE }, { OPTION_ASYNC_COPY, AsyncUTSDFS, OPTV_BOOLEAN, {0}, FALSE }, { OPTION_ACCELMETHOD, AccelMethod, OPTV_STRING,{0}, FALSE }, +{ OPTION_DRI, DRI, OPTV_INTEGER, {0}, FALSE }, { -1, NULL, OPTV_NONE, {0}, FALSE } }; diff --git a/src/nv_driver.c b/src/nv_driver.c index 4218e4f..514a8bc 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -1095,6 +1095,25 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) pNv-ce_enabled = xf86ReturnOptValBool(pNv-Options, OPTION_ASYNC_COPY, FALSE); + /* Define maximum allowed level of DRI implementation
Re: [Nouveau] enable dri3 support without glamor causes gnome-shell regression on nv4x
On Mon, Aug 3, 2015 at 1:31 PM, Hans de Goede hdego...@redhat.com wrote: Hi, On 03-08-15 17:36, Ilia Mirkin wrote: On Mon, Aug 3, 2015 at 9:02 AM, Hans de Goede hdego...@redhat.com wrote: Hi, On 30-07-15 16:09, Ilia Mirkin wrote: FWIW this is a fail on nv50+ as well. See for example https://bugs.freedesktop.org/show_bug.cgi?id=91445 My suspicion is that this is due to the lack of PUSH_KICK in the *Done exa handlers -- works fine with DRI2, but DRI3 has no synchronization and so the commands never get flushed out. Easily verified by sticking PUSH_KICK's everywhere. I do not believe that that is the problem, in my case it clearly seems to be a pitch / swizzle problem rather then a synchronizarion problem, here is what my desktop with gnome shell looks like when using DRI2: https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-good.jpg And this is what it looks like when using DRI3: https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-bad.jpg The DRI2 screenshot is made with Mario's 2 patches on top of current master: http://lists.freedesktop.org/archives/nouveau/2015-July/021740.html http://lists.freedesktop.org/archives/nouveau/2015-July/021741.html And then adding Option DRI 2 to xorg.conf. His patches should have defaulted it to DRI 2 I think, so this is unnecessary. In fact you should have had to say DRI 3 to get DRI3 with his patches. -- I've also tried disabling EXA using Option AccelMethod none, but that seems to also automatically disable all DRI, leading to software rendering. I discussed this with Ben this morning and he suggested that this is likely a Mesa issue since with DRI3 mesa rather then the ddx allocs the surfaces. I've tried disabling swizzling in the mesa code by forcing nv30_miptree_create() to always take the code path for linear textures, but that leads to the exact same result as before that change. Ah yes. Very different problem indeed. I actually suspect it has to do with swizzling. Look at the white pattern of the moon -- it's all in a line. That means that it expected some locality and instead it got drawn all on a line. If it were merely a stride problem, I'd expect to see strips of the moon below and offset from one another. So... take a look at nv30_miptree_from_handle -- I wonder if it can now receive swizzled textures where it couldn't before. Ok, that does go in the direction I am expecting the problem to be, but I'm afraid I'm going to need a bit more guidance, what exactly am I looking for in that function / which knobs should I try to vary / play with to maybe fix this ? Unfortunately this is playing near (or past) the limits of my knowledge as well. My understanding is that DRI3 passes pixmaps around with dma-buf, aka bo_from_handle. DRI2 uses some other mechanism which is not that (I think it just copies stuff around). Now on nv50+, bo's have tile flags (and memtype and probably other annoyances). The tile flags indicate the specific tiling mechanism used on that bo (i.e. do you do 32x32 tiles? 32x64? etc). Take a look at the nouveau_bo_new() call in nv50_miptree.c -- note how it takes a bo config argument. This bo config can then later be retrieved using some other syscall. However on nv30 there appears to not be any such thing. The nouveau_bo_new call just passes in NULL for creating the bo, which means that there's no way to recover the are you swizzled information after-the-fact. Presumably you should create a nv04 bo config section in the union, and just pass the single swizzled bit through. I'm not sure what, if anything, is required on the kernel side for that. I don't think there's any optionality in how the swizzling is done for pre-nv50. Note that in the nv30_miptree logic, mt-swizzled implies that mt-uniform_pitch = 0, but the level pitch is set properly (again, see nv30_miptree_create). Hope this sheds some light and doesn't cause you to go in the wrong direction -- please take everything I say with a grain of salt -- I'm often a bit off on some of the details. Cheers, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] enable dri3 support without glamor causes gnome-shell regression on nv4x
On Mon, Aug 10, 2015 at 8:47 AM, Hans de Goede hdego...@redhat.com wrote: Hi, On 03-08-15 20:09, Ilia Mirkin wrote: On Mon, Aug 3, 2015 at 1:31 PM, Hans de Goede hdego...@redhat.com wrote: Hi, On 03-08-15 17:36, Ilia Mirkin wrote: On Mon, Aug 3, 2015 at 9:02 AM, Hans de Goede hdego...@redhat.com wrote: Hi, On 30-07-15 16:09, Ilia Mirkin wrote: FWIW this is a fail on nv50+ as well. See for example https://bugs.freedesktop.org/show_bug.cgi?id=91445 My suspicion is that this is due to the lack of PUSH_KICK in the *Done exa handlers -- works fine with DRI2, but DRI3 has no synchronization and so the commands never get flushed out. Easily verified by sticking PUSH_KICK's everywhere. I do not believe that that is the problem, in my case it clearly seems to be a pitch / swizzle problem rather then a synchronizarion problem, here is what my desktop with gnome shell looks like when using DRI2: https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-good.jpg And this is what it looks like when using DRI3: https://fedorapeople.org/~jwrdegoede/nv46-gnome-shell-bad.jpg The DRI2 screenshot is made with Mario's 2 patches on top of current master: http://lists.freedesktop.org/archives/nouveau/2015-July/021740.html http://lists.freedesktop.org/archives/nouveau/2015-July/021741.html And then adding Option DRI 2 to xorg.conf. His patches should have defaulted it to DRI 2 I think, so this is unnecessary. In fact you should have had to say DRI 3 to get DRI3 with his patches. -- I've also tried disabling EXA using Option AccelMethod none, but that seems to also automatically disable all DRI, leading to software rendering. I discussed this with Ben this morning and he suggested that this is likely a Mesa issue since with DRI3 mesa rather then the ddx allocs the surfaces. I've tried disabling swizzling in the mesa code by forcing nv30_miptree_create() to always take the code path for linear textures, but that leads to the exact same result as before that change. Ah yes. Very different problem indeed. I actually suspect it has to do with swizzling. Look at the white pattern of the moon -- it's all in a line. That means that it expected some locality and instead it got drawn all on a line. If it were merely a stride problem, I'd expect to see strips of the moon below and offset from one another. So... take a look at nv30_miptree_from_handle -- I wonder if it can now receive swizzled textures where it couldn't before. Ok, that does go in the direction I am expecting the problem to be, but I'm afraid I'm going to need a bit more guidance, what exactly am I looking for in that function / which knobs should I try to vary / play with to maybe fix this ? Unfortunately this is playing near (or past) the limits of my knowledge as well. My understanding is that DRI3 passes pixmaps around with dma-buf, aka bo_from_handle. DRI2 uses some other mechanism which is not that (I think it just copies stuff around). Now on nv50+, bo's have tile flags (and memtype and probably other annoyances). The tile flags indicate the specific tiling mechanism used on that bo (i.e. do you do 32x32 tiles? 32x64? etc). Take a look at the nouveau_bo_new() call in nv50_miptree.c -- note how it takes a bo config argument. This bo config can then later be retrieved using some other syscall. However on nv30 there appears to not be any such thing. The nouveau_bo_new call just passes in NULL for creating the bo, which means that there's no way to recover the are you swizzled information after-the-fact. Presumably you should create a nv04 bo config section in the union, That already exists, and indeed gets set by the nouveau_allocate_surface function from src/nv_accel_common.c from the ddx, and just pass the single swizzled bit through. I'm not sure what, if anything, is required on the kernel side for that. I don't think there's any optionality in how the swizzling is done for pre-nv50. Note that in the nv30_miptree logic, mt-swizzled implies that mt-uniform_pitch = 0, but the level pitch is set properly (again, see nv30_miptree_create). Hope this sheds some light and doesn't cause you to go in the wrong direction -- please take everything I say with a grain of salt -- I'm often a bit off on some of the details. Thanks this was helpful, I do feel we are getting somewhere, but I do need a bit more help. I've added some debug printf's to nv30_miptree.c, nv30_miptree_create and nv30_miptree_from_handle, where the latter is only used when using dri2 (e.g. in the working case). Doing a diff between a log of starting gnome-shell with dri vs dri3 results in this: --- mesa.log.dri2 2015-08-10 14:18:03.182712022 +0200 +++ mesa.log.dri3 2015-08-10 14:18:33.26338 +0200 @@ -1,8 +1,8 @@ nv30_miptree_create 512x32 uniform_pitch 0 usage 0 flags 0 -nv30_miptree_from_handle 1x1 uniform_pitch 1024 usage 0 flags 0
Re: [Nouveau] Odd text behavior on Websites and others
On Tue, Aug 11, 2015 at 10:47 AM, Rudolf Künzli rudolf.kun...@gmail.com wrote: GeForce GTX 745 is a NVIDIA card in the NV117 (GM107) Family... The update was made using DNF (Yum) in my daily update procedure using the Fedora 22 Update repository. I am not familiar with details I just can report what happens right now... OK, there's no EXA support for maxwell, so you're using glamor. Before kernel 4.1, unless you had extracted your own ctxsw firmware, you didn't have acceleration at all, that was likely the change that triggered the issue. The glamor integration in nouveau is, sadly, broken. But it's unclear whether that's the cause of your issue. You can either add Option NoAccel true to the device section in your xorg.conf, which will disable 2d acceleration and bring you back to the same state you were in before, or you can add Driver modesetting (or uninstall xf86-video-nouveau) which will use the modesetting driver which has a non-broken glamor integration. You may still get the same issues though, as they could just be generic mesa-sucks-on-maxwell issues (I don't have a Maxwell GPU, no one else has been interested in debugging/fixing issues). -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Odd text behavior on Websites and others
No, you probably want something in /etc/X11... a lot of the time it's split up into a bunch of separate files in like /etc/X11/xorg.conf.d or something. You should consult your distro documentation for how to make it happen. On Tue, Aug 11, 2015 at 11:38 AM, Rudolf Künzli rudolf.kun...@gmail.com wrote: Thanks - the only xorg.conf I found is - /usr/share/abrt/conf.d/plugins/xorg.conf Is this the file to be edited? -- Rudolf Künzli rudolf.kun...@gmail.com On Tue, 2015-08-11 at 10:56 -0400, Ilia Mirkin wrote: On Tue, Aug 11, 2015 at 10:47 AM, Rudolf Künzli rudolf.kun...@gmail.com wrote: GeForce GTX 745 is a NVIDIA card in the NV117 (GM107) Family... The update was made using DNF (Yum) in my daily update procedure using the Fedora 22 Update repository. I am not familiar with details I just can report what happens right now... OK, there's no EXA support for maxwell, so you're using glamor. Before kernel 4.1, unless you had extracted your own ctxsw firmware, you didn't have acceleration at all, that was likely the change that triggered the issue. The glamor integration in nouveau is, sadly, broken. But it's unclear whether that's the cause of your issue. You can either add Option NoAccel true to the device section in your xorg.conf, which will disable 2d acceleration and bring you back to the same state you were in before, or you can add Driver modesetting (or uninstall xf86-video-nouveau) which will use the modesetting driver which has a non-broken glamor integration. You may still get the same issues though, as they could just be generic mesa-sucks-on-maxwell issues (I don't have a Maxwell GPU, no one else has been interested in debugging/fixing issues). -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Odd text behavior on Websites and others
Add a file to /etc/X11/xorg.conf.d, named anything-you-want.conf, which contains Section Device Driver modesetting EndSection Hopefully that should do it. On Tue, Aug 11, 2015 at 6:03 PM, Rudolf Künzli rudolf.kun...@gmail.com wrote: I don't have a file /etc/X11/xorg.conf but a folder /etc/X11/xorg.conf.d [rudolf@mephisto xorg.conf.d]$ ls -la /etc/X11/xorg.conf.d total 12 drwxr-xr-x. 2 root root 4096 Aug 4 08:25 . drwxr-xr-x. 6 root root 4096 May 27 11:40 .. -rw-r--r--. 1 root root 265 Apr 21 17:06 00-keyboard.conf Then a folder /usr/share/X11/xorg.conf.d with serveral config files but I don't see which one to be edited... [rudolf@mephisto xorg.conf.d]$ ls -la /usr/share/X11/xorg.conf.d total 32 drwxr-xr-x. 2 root root 4096 Jul 31 18:33 . drwxr-xr-x. 7 root root 4096 May 27 11:40 .. -rw-r--r--. 1 root root 1099 Jul 15 10:20 10-evdev.conf -rw-r--r--. 1 root root 1350 Jul 15 10:20 10-quirks.conf -rw-r--r--. 1 root root 2827 May 1 08:23 50-synaptics.conf -rw-r--r--. 1 root root 115 May 15 14:49 50-vmmouse.conf -rw-r--r--. 1 root root 1385 Mar 20 00:31 50-wacom.conf -rw-r--r--. 1 root root 789 Jul 13 00:54 90-libinput.conf Any other place to look for. find didn't help... I guess I'll have to run x config as root to get a xorg.conf which I can edit later... -- Rudolf Künzli rudolf.kun...@gmail.com On Tue, 2015-08-11 at 11:42 -0400, Ilia Mirkin wrote: No, you probably want something in /etc/X11... a lot of the time it's split up into a bunch of separate files in like /etc/X11/xorg.conf.d or something. You should consult your distro documentation for how to make it happen. On Tue, Aug 11, 2015 at 11:38 AM, Rudolf Künzli rudolf.kun...@gmail.com wrote: Thanks - the only xorg.conf I found is - /usr/share/abrt/conf.d/plugins/xorg.conf Is this the file to be edited? -- Rudolf Künzli rudolf.kun...@gmail.com On Tue, 2015-08-11 at 10:56 -0400, Ilia Mirkin wrote: On Tue, Aug 11, 2015 at 10:47 AM, Rudolf Künzli rudolf.kun...@gmail.com wrote: GeForce GTX 745 is a NVIDIA card in the NV117 (GM107) Family... The update was made using DNF (Yum) in my daily update procedure using the Fedora 22 Update repository. I am not familiar with details I just can report what happens right now... OK, there's no EXA support for maxwell, so you're using glamor. Before kernel 4.1, unless you had extracted your own ctxsw firmware, you didn't have acceleration at all, that was likely the change that triggered the issue. The glamor integration in nouveau is, sadly, broken. But it's unclear whether that's the cause of your issue. You can either add Option NoAccel true to the device section in your xorg.conf, which will disable 2d acceleration and bring you back to the same state you were in before, or you can add Driver modesetting (or uninstall xf86-video-nouveau) which will use the modesetting driver which has a non-broken glamor integration. You may still get the same issues though, as they could just be generic mesa-sucks-on-maxwell issues (I don't have a Maxwell GPU, no one else has been interested in debugging/fixing issues). -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] Take shift in crtc positions for ZaphodHeads configs into account.
I don't understand this patch (what are all these masks? how are they used?), and don't want to invest the time required to do so. However Mario is probably the sole serious user of ZaphodHeads, and if it fixes issues for him, probably fixes issues for others who try and give up with ZaphodHeads. Any objections if I just push this out? On Sat, Jun 27, 2015 at 8:33 PM, Mario Kleiner mario.kleiner...@gmail.com wrote: In multi-x-screen ZaphodHeads configurations, there isn't a one-to-one mapping of kernel provided drmmode crtc index to the index of the corresponding xf86Crtc inside the xf86CrtcConfig crtc array anymore, ie. for kernel provided drmmode-mode_res-crtcs[i], the i'th crtc won't correspond to the xf86Crtc in the i'th slot of the x-screens xf86CrtcConfig anymore, once ZaphodHeads has only selected a subset of all crtcs of a graphics card for a given x-screen, instead of all crtcs. This breaks the mapping of bit positions in the bit masks returned in kencoder-possible_crtcs and kencoder-possible_clones. A 1 bit in position i of those masks allows use of the kernels i'th crtc for the given kencoder. The X-Servers dix code checks those bit masks for valid xf86Output - xf86Crtc assignments, assuming that the i'th slot xf86CrtcConfigPtr config-crtc[i] corresponds to bit i in the xf86Output-possibe_crtcs bitmask, and bails if the bitmask doesn't allow the specified assignment of crtc to output. If ZaphodHeads breaks the assumption of bit i - crtc slot i this ends in failure. Take this shift of crtc index positions wrt. encoder bitmask bit positions into account by bit-shifting positions accordingly when assigning encoder-possible_crtcs to output-possible_crtcs, so the proper indices match up again for validation by the dix. This problem wasn't apparent last year when testing the ZaphodHeads support on some Kepler cards, as apparently the encoder-possible_crtcs bitmasks returned for those cards by the kernel just had all 4 lsb bits set for all tested encoders/output, so each of the cards 4 crtcs could go with each output and things worked by chance. The current code breaks, e.g., on 2010 MacBookPro with nv50, where one crtc is hardwired to the internal lvds panel, and one crtc is hardwired to the external DP connector, resulting in a failure where dual-display on single-x-screen works fine, but assigning each output to a separate x-screen via ZaphodHeads fails due to the mismatched encoder-possible_crtcs bitmasks. This patch fixes the problem. Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com --- src/drmmode_display.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index c30cb3a..3679482 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -1214,7 +1214,7 @@ drmmode_zaphod_match(ScrnInfoPtr pScrn, const char *s, char *output_name) } static unsigned int -drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num) +drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num, int crtcshift) { NVPtr pNv = NVPTR(pScrn); xf86OutputPtr output; @@ -1296,8 +1296,8 @@ drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num) output-subpixel_order = subpixel_conv_table[koutput-subpixel]; output-driver_private = drmmode_output; - output-possible_crtcs = kencoder-possible_crtcs; - output-possible_clones = kencoder-possible_clones; + output-possible_crtcs = kencoder-possible_crtcs crtcshift; + output-possible_clones = kencoder-possible_clones crtcshift; output-interlaceAllowed = true; output-doubleScanAllowed = true; @@ -1421,6 +1421,7 @@ Bool drmmode_pre_init(ScrnInfoPtr pScrn, int fd, int cpp) NVEntPtr pNVEnt = NVEntPriv(pScrn); int i; unsigned int crtcs_needed = 0; + int crtcshift; drmmode = xnfalloc(sizeof *drmmode); drmmode-fd = fd; @@ -1444,8 +1445,9 @@ Bool drmmode_pre_init(ScrnInfoPtr pScrn, int fd, int cpp) } xf86DrvMsg(pScrn-scrnIndex, X_INFO, Initializing outputs ...\n); + crtcshift = ffs(pNVEnt-assigned_crtcs ^ 0x) - 1; Mario, any objections if I touch this up as ~pNVEnc-assigned_crtcs? XOR with ~0 is not a pattern I've seen a lot. for (i = 0; i drmmode-mode_res-count_connectors; i++) - crtcs_needed += drmmode_output_init(pScrn, drmmode, i); + crtcs_needed += drmmode_output_init(pScrn, drmmode, i, crtcshift); xf86DrvMsg(pScrn-scrnIndex, X_INFO, %d crtcs needed for screen.\n, crtcs_needed); -- 1.9.1 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org
Re: [Nouveau] [REGRESSION] nouveau: Crash in gk104_fifo_intr_runlist()
Alexandre, could you take a look? 0xbad* generally comes from bad mmio reads. On Aug 9, 2015 1:08 PM, Eric Biggers ebigge...@gmail.com wrote: Hi, I am testing Linux v4.2-rc5 and I am sporadically getting crashes shortly after startup in gk104_fifo_intr_runlist(). What I've found is that the 'mask' value read from offset 0x2a00 comes back as '0xbad0da00'. This causes the 'engn' variable to be assigned the value 9, which is invalid; then wake_up() is called on an uninitialized waitqueue which causes the crash. Reverting commit 1addc12648521d (drm/nouveau/fifo/gk104: kick channels when deactivating them) seemed to make the problem go away, although I can't be 100% sure because the problem is sporadic. Attached an example of the kernel log up to the crash. Eric ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Fermi+ shader header docs
And as I've just started looking at GM107 traces to fix up tessellation shader attribute address calculations, I noticed the following unknown bits in CommonWord3 of TCP shaders: PB: 0x0021 GM107_3D.SP[0x2].SELECT = { ENABLE | PROGRAM = TCP } PB: 0x0830 GM107_3D.SP[0x2].START_ID = 0x830 HEADER: 0x04210861 0 = { SPH = VTG | VERSION = 3 | KIND = TCP | GMEM_STORE | SASS_VERS 0x0600 1 = { LMEM_POS_ALLOC = 0 | PATCH_ATTRIBUTES = 6 } 0x0300 2 = { LMEM_NEG_ALLOC = 0 | THREADS_PER_PRIM = 3 } 0x6000 3 = { WARP_CSTACK_SIZE = 0 | 0x6000 } 0xff00 4 = { MIN_OUT_READ_SLOT = 0 | MAX_OUT_READ_SLOT = 0xff } 0xf000 ATTR_EN_0 = 0xf000 0x ATTR_EN_1 = 0 0x ATTR_EN_2 = 0 0x ATTR_EN_3 = 0 0x ATTR_EN_4 = 0 0x ATTR_EN_5 = { 0 } 0x 11 = 0 0x 12 = 0 0xf000 EXPORT_EN_0 = { HPOS = 0xf } 0x EXPORT_EN_1 = 0 0x EXPORT_EN_2 = 0 0x EXPORT_EN_3 = 0 0x EXPORT_EN_4 = 0 0x EXPORT_EN_5 = { CLIP_DISTANCE = 0 | UNK12 = 0 } 0x 19 = 0 Anything that we need to also be setting? -ilia On Mon, Jun 22, 2015 at 9:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote: And an additional question: I have a trace here where a reserved bit from CommonWord0 is set. Is that just random values that aren't cleared by the driver, or does it have some significance? Here is the full shader: HEADER: 0x06040461 0 = { SPH = VTG | VERSION = 3 | KIND = VP_B | SASS_VERSION = 2 | LDST_ENABLE | SO_MASK = 0 | 0x200 } 0x 1 = { LMEM_POS_ALLOC = 0 | PATCH_ATTRIBUTES = 0 } 0x 2 = { LMEM_NEG_ALLOC = 0 | THREADS_PER_PRIM = 0 } 0x 3 = { WARP_CSTACK_SIZE = 0 | OUTPUT_PRIM = 0 } 0x 4 = { MAX_OUTPUT_VERTS = 0 | MIN_OUT_READ_SLOT = 0 | MAX_OUT_READ_SLOT = 0 } 0x ATTR_EN_0 = 0 0x ATTR_EN_1 = 0 0x ATTR_EN_2 = 0 0x ATTR_EN_3 = 0 0x ATTR_EN_4 = 0 0x ATTR_EN_5 = { 0 } 0x 11 = 0 0x 12 = 0 0x0001f000 EXPORT_EN_0 = { HPOS = 0xf | 0x1 } 0x EXPORT_EN_1 = 0 0x EXPORT_EN_2 = 0 0x EXPORT_EN_3 = 0 0x EXPORT_EN_4 = 0 0x EXPORT_EN_5 = { CLIP_DISTANCE = 0 | UNK12 = 0 } 0x 19 = 0 CODE: : a01088b0 08bcb810 sched 0x2c 0x22 0x4 0x28 0x4 0x2e 0x2f 0008: 0b1ffc1e 5b601c07 set $p0 0x1 ge u32 0x0 c0[0x3858] 0010: 103c 1200 $p0 bra 0x38 0018: 0a1c0002 64c03c07 mov b32 $r0 c0[0x3850] 0020: 0a9c0006 64c03c07 mov b32 $r1 c0[0x3854] 0028: 001c cc80 ld b32 $r0 cg g[$r0d] 0030: 041c003c 1200 bra 0x40 0038: 7f9c0002 e4c03c00 C mov b32 $r0 0x0 0040: 9c108010 090c8c10 C sched 0x4 0x20 0x4 0x27 0x4 0x23 0x43 0048: 001c2802 e5c0 cvt rn f32 $r0 u32 $r0 0050: 341c0006 64c03c00 mov b32 $r1 c0[0x1a0] 0058: 349c000a 64c03c00 mov b32 $r2 c0[0x1a4] 0060: 351c000e 64c03c00 mov b32 $r3 c0[0x1a8] 0068: 359c0012 64c03c00 mov b32 $r4 c0[0x1ac] 0070: 381ffc06 7f03fc00 st b32 a[0x70] $r1 0x0 0x0 0078: 3a1ffc0a 7f03fc00 st b32 a[0x74] $r2 0x0 0x0 0080: 3c110d0c 0801 sched 0x43 0x43 0x4 0x4f 0x0 0x0 0x0 0088: 3c1ffc0e 7f03fc00 st b32 a[0x78] $r3 0x0 0x0 0090: 3e1ffc12 7f03fc00 st b32 a[0x7c] $r4 0x0 0x0 0098: 401ffc02 7f03fc00 st b32 a[0x80] $r0 0x0 0x0 00a0: 001c003c 1800 exit 00a8: fc1c003c 12007fff C bra 0xa8 00b0: 001c3c02 8580 nop 00b8: 001c3c02 8580 nop On Sat, May 23, 2015 at 5:35 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Thu, May 21, 2015 at 11:32 AM, Ilia Mirkin imir...@alum.mit.edu wrote: On Thu, May 21, 2015 at 10:05 AM, Robert Morell rmor...@nvidia.com wrote: Hi Ilia, On Sat, May 02, 2015 at 12:34:21PM -0400, Ilia Mirkin wrote: Hi, As I'm looking to add some support to nouveau for features like atomic counters and images, I'm running into some confusion about what the first word of the shader header means. Here is the definition as we have it today: [...] However I know that these are somewhat wrong. I've seen shaders that use gmem accesses (i.e. mov r0, [r0]) that just have the LMEM enable bit set (and they use no lmem). And I've seen additional bits set, esp relating to images, but I haven't spent enough time looking at all the variations to make sense of it yet. For example, I think that Fermi and Kepler+ have different meanings for some of the bits. Those look pretty close :) I was hoping you could just release the docs for the shader headers, or at least the first word of the shader header. We've posted the specification for the full Shader Program Header to our GPU documentation site here: ftp://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html I hope it helps clear things up. Yep, just a few
[Nouveau] [PATCH v2] nvc0: bind a fake tess control program when there isn't one available
Apparently this is necessary in order for tess factors to work in a tess eval program without a tess control program bound. Probably because it uses the fake program's shader header to work out the number of patch constants. Fixes vs-tes-tessinner-tessouter-inputs Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- v1 - v2: improve some of the error handling src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 8 src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 3 +++ src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 17 + src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c | 7 ++- 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 84f8db6..01080d0 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -132,6 +132,9 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0) pipe_resource_reference(res, NULL); } util_dynarray_fini(nvc0-global_residents); + + if (nvc0-tcp_empty) + nvc0-base.pipe.delete_tcs_state(nvc0-base.pipe, nvc0-tcp_empty); } static void @@ -326,6 +329,11 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) /* shader builtin library is per-screen, but we need a context for m2mf */ nvc0_program_library_upload(nvc0); + nvc0_program_init_tcp_empty(nvc0); + if (!nvc0-tcp_empty) + goto out_err; + /* set the empty tctl prog on next draw in case one is never set */ + nvc0-dirty |= NVC0_NEW_TCTLPROG; /* add permanently resident buffers to bufctxts */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index f449942..df1a891 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -128,6 +128,8 @@ struct nvc0_context { struct nvc0_program *fragprog; struct nvc0_program *compprog; + struct nvc0_program *tcp_empty; + struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[6]; uint16_t constbuf_valid[6]; @@ -227,6 +229,7 @@ void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); void nvc0_program_library_upload(struct nvc0_context *); uint32_t nvc0_program_symbol_offset(const struct nvc0_program *, uint32_t label); +void nvc0_program_init_tcp_empty(struct nvc0_context *); /* nvc0_query.c */ void nvc0_init_query_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 4941831..e9975ce 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -22,6 +22,8 @@ #include pipe/p_defines.h +#include tgsi/tgsi_ureg.h + #include nvc0/nvc0_context.h #include codegen/nv50_ir_driver.h @@ -803,3 +805,18 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) return prog-code_base + base + syms[i].offset; return prog-code_base; /* no symbols or symbol not found */ } + +void +nvc0_program_init_tcp_empty(struct nvc0_context *nvc0) +{ + struct ureg_program *ureg; + + ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL); + if (!ureg) + return; + + ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 1); + ureg_END(ureg); + + nvc0-tcp_empty = ureg_create_shader_and_destroy(ureg, nvc0-base.pipe); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 8aa127a..8f8ac2d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -148,8 +148,13 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); PUSH_DATA (push, tp-num_gprs); } else { - BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + tp = nvc0-tcp_empty; + /* not a whole lot we can do to handle this failure */ + if (!nvc0_program_validate(nvc0, tp)) + assert(!unable to validate empty tcp); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); PUSH_DATA (push, 0x20); + PUSH_DATA (push, tp-code_base); } nvc0_program_update_context_state(nvc0, tp, 1); } -- 2.4.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [REGRESSION] nouveau: Crash in gk104_fifo_intr_runlist()
I'm guessing that optimus is the operative difference, not the specific chip. Basically something that can be put to sleep via ACPI... On Tue, Aug 11, 2015 at 11:53 PM, Alexandre Courbot gnu...@gmail.com wrote: Sending the revert patch to Dave after receiving his green light for this, and will investigate the issue on my side. I should be able to find a gk107 somewhere... On Wed, Aug 12, 2015 at 12:35 PM, Alexandre Courbot gnu...@gmail.com wrote: Mmm in that case it is probably best to revert that commit for the time being. It was targeting GM20B (and maybe other Maxwells too) so reverting it should not hurt anyone at the moment. I think Ben is on holidays for now, is there anyone else who can send a pull request to Dave Airlie for this? We don't want 4.2 to ship with a crash every other reboot... On Wed, Aug 12, 2015 at 10:01 AM, Eric Biggers ebigge...@gmail.com wrote: Hi, I think I've done about 10 reboots with the commit reverted and I never experienced the crash. But with 4.2.0-rc6 I get the crash on about every other reboot. Probably relevant: the computer on which the crash occurs has two GPUs (one Intel and one Nvidia). The Intel one is actually being used, whereas I presume the Nvidia one is being automatically disabled shortly after boot, perhaps when the crash occurs... Eric On Mon, Aug 10, 2015 at 11:28 PM, Alexandre Courbot gnu...@gmail.com wrote: Indeed, and I am actually surprised to see one here. I will double-check that patch. Eric, would you be able to give an estimate of the repro rate for this issue? More testing with and without the patch would be welcome, it'd be good to know whether it is actually the culprit or not. On Mon, Aug 10, 2015 at 2:28 AM, Ilia Mirkin imir...@alum.mit.edu wrote: Alexandre, could you take a look? 0xbad* generally comes from bad mmio reads. On Aug 9, 2015 1:08 PM, Eric Biggers ebigge...@gmail.com wrote: Hi, I am testing Linux v4.2-rc5 and I am sporadically getting crashes shortly after startup in gk104_fifo_intr_runlist(). What I've found is that the 'mask' value read from offset 0x2a00 comes back as '0xbad0da00'. This causes the 'engn' variable to be assigned the value 9, which is invalid; then wake_up() is called on an uninitialized waitqueue which causes the crash. Reverting commit 1addc12648521d (drm/nouveau/fifo/gk104: kick channels when deactivating them) seemed to make the problem go away, although I can't be 100% sure because the problem is sporadic. Attached an example of the kernel log up to the crash. Eric ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.
Here's an example of what happens with DRI3: https://bugs.freedesktop.org/show_bug.cgi?id=91445 I'm not equipped to figure out why. On Mon, Jul 13, 2015 at 11:43 PM, Mario Kleiner mario.kleiner...@gmail.com wrote: On 07/07/2015 09:51 PM, Ilia Mirkin wrote: Lastly, from some discussions with ajax on IRC, it appears that DRI3 is half-baked at best wrt sync between server and client. I think we should just disable it by default for now, until issues are ironed out. (Rather than what this patch has, which is default-on for Xorg some version.) What are the remaining known trouble spots wrt. sync? It seems to work pretty well at least for single gpu + unredirected fullscreen windows (== kms page flipping can be used for Presents. That's the use case i usually test very obsessively, as it matters very much for my type of applications, but other than that i only lightly test it via regular desktop use, so maybe that's were problems remain? We can disable it by default on exa - intel and amd/radeon drivers also disable by default. However, on gpus = maxwell only glamor accel is supported and glamor on nouveau is either dri3/present or no hw accel at all afaics. Btw. there are also a few patches made by Chris Wilson floating on the mailing list since around january, some are reviewed and tested by myself, but not included in xorg master. Might be good for people to have a look at them and maybe get them into xorg 1.18? On Sat, Jul 4, 2015 at 3:03 PM, Emil Velikov emil.l.veli...@gmail.com wrote: The DRI option with the intel ddx can be used to indicate the following - whether dri is disabled - the dri version - dri1, dri2, dri3 - the dri module name - doo_dri.so bar_dri.so I'm not sure how exactly it's supposed to work/works, and I believe most of that is due to legacy reasons. I'm just saying let's not do the whole thing - just the dri version would be great (as you suggested). I can change that to allow selection between 2 and 3 - at least for exa, on glamor the parameter 2 would either need to get ignored or it would completely disable hw acceleration. I went for consistency with the ati ddx because i found the intel variant too confusing. I think it changed multiple times during the last year. thanks, -mario -Emil On 4 July 2015 at 19:28, Ilia Mirkin imir...@alum.mit.edu wrote: Erm, that's nuts. I also don't really understand what they're talking about there... i915g vs i915? Anyways, I just meant the version numbers :) On Sat, Jul 4, 2015 at 2:23 PM, Emil Velikov emil.l.veli...@gmail.com wrote: That would be great, as long as it does only that and does not go into the drivername territory. As the said driver ;-) A driver name to use can be provided instead of simple boolean value, which will be passed to the GL implementation for it to load the appropriate backend. -Emil On 4 July 2015 at 18:17, Ilia Mirkin imir...@alum.mit.edu wrote: IMO it'd be nice to keep this compatible with the intel driver, which has a DRI option, which can take the values 1, 2, 3. Obviously for nouveau, 1 makes no sense as that was dropped quite some time ago. See http://cgit.freedesktop.org/xorg/driver/xf86-video-intel/tree/man/intel.man#n68 On Mon, Jun 29, 2015 at 11:30 PM, Mario Kleiner mario.kleiner...@gmail.com wrote: X-Server versions older than 1.16.3 have bugs in their DRI3/Present implementation which impair nouveau, so it is better to stick to good old DRI2 by default on such servers. E.g., page flipping doesn't work at all under DRI3/Present with older servers, and use of extensions like OML_sync_control, SGI_video_sync or INTEL_swap_events also causes failure of Present. nouveau's glamor accel backend currently doesn't work under DRI2, so continue to use DRI3 whenever it is supported. Under the exa accel backend, DRI2 works just fine, so disable DRI3 and choose DRI2 by default when nouveau is built for X-Server 1.16.3, and enable DRI3 if building on later X-Servers which work reasonably well under DRI3/Present. A new boolean xorg.conf Option DRI3 allows to enforce or prevent use of DRI3/Present under EXA acceleration for testing. Also add a bit more output about status of Present and DRI3 to aid debugging. Signed-off-by: Mario Kleiner mario.kleiner...@gmail.com --- man/nouveau.man| 6 ++ src/nouveau_dri2.c | 11 ++- src/nv_const.h | 2 ++ src/nv_driver.c| 17 +++-- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/man/nouveau.man b/man/nouveau.man index 129bb7f..12cfbc0 100644 --- a/man/nouveau.man +++ b/man/nouveau.man @@ -125,6 +125,12 @@ that relies on correct presentation timing behaviour as defined in that specification. .br Default: 1. +.TP +.BI Option \*qDRI3\*q \*q boolean \*q +Enable the DRI3 extension under exa acceleration if supported by server. +A setting of off will only use DRI2 instead. Under glamor
Re: [Nouveau] Tessellation shaders get MEM_OUT_OF_BOUNDS errors / missing triangles
Indeed, this fixed the original issue on the GK208. Additionally it seems like starting with GK104 the mechanism for indirect offsets for ALD/AST changed and a AL2P instruction must now be used to determine the indirect or physical offset. Once nouveau was adjusted to do this, all MEM_OUT_OF_BOUNDS errors with tessellation shaders are gone. On Thu, Jul 23, 2015 at 2:36 AM, Ilia Mirkin imir...@alum.mit.edu wrote: I think I figured out what was going on. Will re-check on the GK208, but on a GF108 the random blue splotches in Unigine Heaven are gone now. Turns out that with an instruction like /*00d0*/ ALD.128 R0, a[0x70], R0; /* 0x7ecc381ffc02 */ The hardware will internally split it up into roughly ALD R0, a[0x70], R0 ALD R1, a[0x74], R0 ALD R2, a[0x78], R0 ALD R3, a[0x7c], R0 Of course the first one of those overwrites R0, which makes the subsequent loads be full of fail. Adding a hazard in our RA for the indirect argument resolves the issue. -ilia On Tue, May 26, 2015 at 7:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote: One additional observation that I just made is that on GK208, the blob apparently doesn't use the result of S2R Rx, SR_INVOCATION_ID wholesale in TCS. It either passes it through a I2I.S32.S32 Rx, |Rx| (i.e. absolute value), or even more paradoxically, shl 2; shr 2; which removes the top *2* bits, rather than just the top 1. However I see no such behaviour on GF108. I'm going to test out tomorrow whether this is the cause of my GK208 woes. On Fri, May 22, 2015 at 5:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Mon, May 18, 2015 at 4:48 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Hello, I've been debugging a few different tessellation shader issues with nouveau, but let's start small. I see this issue on my GK208 with high frequency, and I *think* I've seen it once or twice on my GF108, but it's exceedingly rare, if it does happen. I don't have a GK10x to test on, unfortunately, but I assume it'll have the same issue as the GK208. The issue is this -- a bunch of triangles that should come out of the tessellator end up black. I also see a GPC0/TPC1/MP trap: MEM_OUT_OF_BOUNDS error produced by nouveau -- this is output in response to a interrupt and MP trap generated by the hardware, read out with nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648)); (see gf100_gr_trap_mp). I assume some of the tessellation evaluation invocations get killed, but I have no proof of this. I also see this: TRAP ch 5 [0x003facf000 shader_runner[19044]] I would imagine that's some floating point number ending up in the register instead of an address, but the fp32 value of it (1.35107421875) does not seem familiar. Ben pointed out that the 0x3facf000 is a channel address, not a value from the shader. Oops. So that theory completely doesn't hold water. Perhaps some buffer isn't big enough? This ends up using 9 output vertices per patch, with 2 vec4's each. I've tried playing with the per-warp stack size to no avail, but I didn't *entirely* know what I was doing either though. Even when all the triangles show up, I still see the error on the GK208, so I'm not sure if they're the same issue or not. Now, here's the fun part -- this is completely non-deterministic. Sometimes everything shows up on the GK208, other times I see holes, in varying locations. I'm fairly sure that the actual shader code is correct... so I'm doing something funny wrong. (And yeah, tons of missed optimization opportunities in this code, but let's not dwell on that.) This is the piglit test: http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_tessellation_shader/execution/quads.shader_test It should be noted that other piglit tests don't exhibit this error, however they also tend to be simpler. One key difference is that they don't change the patch size in TCS. I'm including a link to a text file with the tessellation control and evaluation shaders (decoded with nvdisasm which you're hopefully more familiar with), along with the shader headers that we generate. FTR, this is how I feed the raw shader opcode bytes into nvdisasm: perl -ane 'foreach (@F) { print pack I, hex($_) }' tt; nvdisasm -b SM35 tt (for some reason it doesn't want to read from a pipe or even a fd). http://people.freedesktop.org/~imirkin/tess_shaders_quads.txt My suspicion is that we're doing something wrong with the sched codes. We have an elaborate calculator, but... perhaps not elaborate enough? You can see it here: http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp#n2574 The reason I think it's an error in sched codes is due to the TRAP memory location that I see -- could well be some stale value in the register and the value from S2R or VILD doesn't make it in there in time before the ALD reads it. If you should like to try this yourself, you can use https://github.com/imirkin/mesa
Re: [Nouveau] Tessellation shaders get MEM_OUT_OF_BOUNDS errors / missing triangles
I think I figured out what was going on. Will re-check on the GK208, but on a GF108 the random blue splotches in Unigine Heaven are gone now. Turns out that with an instruction like /*00d0*/ ALD.128 R0, a[0x70], R0; /* 0x7ecc381ffc02 */ The hardware will internally split it up into roughly ALD R0, a[0x70], R0 ALD R1, a[0x74], R0 ALD R2, a[0x78], R0 ALD R3, a[0x7c], R0 Of course the first one of those overwrites R0, which makes the subsequent loads be full of fail. Adding a hazard in our RA for the indirect argument resolves the issue. -ilia On Tue, May 26, 2015 at 7:34 PM, Ilia Mirkin imir...@alum.mit.edu wrote: One additional observation that I just made is that on GK208, the blob apparently doesn't use the result of S2R Rx, SR_INVOCATION_ID wholesale in TCS. It either passes it through a I2I.S32.S32 Rx, |Rx| (i.e. absolute value), or even more paradoxically, shl 2; shr 2; which removes the top *2* bits, rather than just the top 1. However I see no such behaviour on GF108. I'm going to test out tomorrow whether this is the cause of my GK208 woes. On Fri, May 22, 2015 at 5:10 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Mon, May 18, 2015 at 4:48 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Hello, I've been debugging a few different tessellation shader issues with nouveau, but let's start small. I see this issue on my GK208 with high frequency, and I *think* I've seen it once or twice on my GF108, but it's exceedingly rare, if it does happen. I don't have a GK10x to test on, unfortunately, but I assume it'll have the same issue as the GK208. The issue is this -- a bunch of triangles that should come out of the tessellator end up black. I also see a GPC0/TPC1/MP trap: MEM_OUT_OF_BOUNDS error produced by nouveau -- this is output in response to a interrupt and MP trap generated by the hardware, read out with nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648)); (see gf100_gr_trap_mp). I assume some of the tessellation evaluation invocations get killed, but I have no proof of this. I also see this: TRAP ch 5 [0x003facf000 shader_runner[19044]] I would imagine that's some floating point number ending up in the register instead of an address, but the fp32 value of it (1.35107421875) does not seem familiar. Ben pointed out that the 0x3facf000 is a channel address, not a value from the shader. Oops. So that theory completely doesn't hold water. Perhaps some buffer isn't big enough? This ends up using 9 output vertices per patch, with 2 vec4's each. I've tried playing with the per-warp stack size to no avail, but I didn't *entirely* know what I was doing either though. Even when all the triangles show up, I still see the error on the GK208, so I'm not sure if they're the same issue or not. Now, here's the fun part -- this is completely non-deterministic. Sometimes everything shows up on the GK208, other times I see holes, in varying locations. I'm fairly sure that the actual shader code is correct... so I'm doing something funny wrong. (And yeah, tons of missed optimization opportunities in this code, but let's not dwell on that.) This is the piglit test: http://cgit.freedesktop.org/piglit/tree/tests/spec/arb_tessellation_shader/execution/quads.shader_test It should be noted that other piglit tests don't exhibit this error, however they also tend to be simpler. One key difference is that they don't change the patch size in TCS. I'm including a link to a text file with the tessellation control and evaluation shaders (decoded with nvdisasm which you're hopefully more familiar with), along with the shader headers that we generate. FTR, this is how I feed the raw shader opcode bytes into nvdisasm: perl -ane 'foreach (@F) { print pack I, hex($_) }' tt; nvdisasm -b SM35 tt (for some reason it doesn't want to read from a pipe or even a fd). http://people.freedesktop.org/~imirkin/tess_shaders_quads.txt My suspicion is that we're doing something wrong with the sched codes. We have an elaborate calculator, but... perhaps not elaborate enough? You can see it here: http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp#n2574 The reason I think it's an error in sched codes is due to the TRAP memory location that I see -- could well be some stale value in the register and the value from S2R or VILD doesn't make it in there in time before the ALD reads it. If you should like to try this yourself, you can use https://github.com/imirkin/mesa/commits/gl4-integration-2 . This branch is good enough to run Unigine Heaven, but still has a lot of known shortcomings. (Both at the core and the nouveau levels.) Any advice or suggestions for debugging this would be greatly appreciated. And let me know if you'd like me to generate additional info on this. For example I can supply a full command trace that can be piped to demmt, if that's helpful. Thanks in advance
Re: [Nouveau] [PATCH] configure: remove unneeded AC_SUBST statements
Reviewed-by: Ilia Mirkin imir...@alum.mit.edu On Tue, Jul 21, 2015 at 5:51 PM, Emil Velikov emil.l.veli...@gmail.com wrote: The variables are already set/substituted by the PKG_CHECK_MODULES macro. Signed-off-by: Emil Velikov emil.l.veli...@gmail.com --- configure.ac | 4 1 file changed, 4 deletions(-) diff --git a/configure.ac b/configure.ac index 03563c1..6048c5a 100644 --- a/configure.ac +++ b/configure.ac @@ -82,8 +82,6 @@ XORG_DRIVER_CHECK_EXT(DRI2, [dri2proto = 2.6]) # Checks for pkg-config packages PKG_CHECK_MODULES(LIBDRM, [libdrm = 2.4.60]) PKG_CHECK_MODULES(LIBDRM_NOUVEAU, [libdrm_nouveau = 2.4.25]) -AC_SUBST(LIBDRM_NOUVEAU_CFLAGS) -AC_SUBST(LIBDRM_NOUVEAU_LIBS) PKG_CHECK_MODULES(XORG, [xorg-server = 1.8] xproto fontsproto libdrm $REQUIRED_MODULES) PKG_CHECK_MODULES(XEXT, [xextproto = 7.0.99.1], @@ -100,8 +98,6 @@ if test x$LIBUDEV = xyes; then AC_DEFINE(HAVE_LIBUDEV, 1, [libudev support]) fi AM_CONDITIONAL(LIBUDEV, [ test x$LIBUDEV = xyes ] ) -AC_SUBST([LIBUDEV_CFLAGS]) -AC_SUBST([LIBUDEV_LIBS]) # Checks for header files. AC_HEADER_STDC -- 2.4.4 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.
On Mon, Jul 13, 2015 at 11:43 PM, Mario Kleiner mario.kleiner...@gmail.com wrote: On 07/07/2015 09:51 PM, Ilia Mirkin wrote: Lastly, from some discussions with ajax on IRC, it appears that DRI3 is half-baked at best wrt sync between server and client. I think we should just disable it by default for now, until issues are ironed out. (Rather than what this patch has, which is default-on for Xorg some version.) What are the remaining known trouble spots wrt. sync? It seems to work pretty well at least for single gpu + unredirected fullscreen windows (== kms page flipping can be used for Presents. That's the use case i usually test very obsessively, as it matters very much for my type of applications, but other than that i only lightly test it via regular desktop use, so maybe that's were problems remain? Adam is the one who actually understands it... I was just asking questions and the answer was that's broken. A user was using DRI3 with EXA (nouveau DDX), and was seeing render fail in the form of stale things on the screen. The nouveau DDX does a lot of implicit sync stuff... it just emits commands into a pushbuf without kicking it. However the libdrm code has cleverness to kick out any pushbufs if you do a nouveau_bo_wait and that bo has been referenced. HOWEVER if you have a pixmap and you share it using the fd thing, and another process makes a (shared) bo out of it, and then does a nouveau_bo_wait, that in no way will cause the DDX to kick its pushbuf. Apparently there's some sort of sync thing that's supposed to happen, but that's entirely unimplemented for DRI3. Unfortunately I can't provide more details than that, as my knowledge of X internals is quite limited. Some sort of DRI drawable or GLX drawable or ... something. I have no proof that this is the cause of the issue the user was seeing, in fact it's just as likely to be something else. However this seems like a pretty significant issue to me. We can disable it by default on exa - intel and amd/radeon drivers also disable by default. However, on gpus = maxwell only glamor accel is supported and glamor on nouveau is either dri3/present or no hw accel at all afaics. You probably saw my patches to just remove glamor from nouveau :) That integration doesn't support DRI2, which in turn means no core contexts (due to lack of GLX_ARB_create_context_profile), and a slew of other issues. Seemed easier to just tell people to go use modesetting, which gets all of these things right(er). Btw. there are also a few patches made by Chris Wilson floating on the mailing list since around january, some are reviewed and tested by myself, but not included in xorg master. Might be good for people to have a look at them and maybe get them into xorg 1.18? I would not oppose the reviewing of Chris's patches ;) However I'm in no position to evaluate them myself. On Sat, Jul 4, 2015 at 3:03 PM, Emil Velikov emil.l.veli...@gmail.com wrote: The DRI option with the intel ddx can be used to indicate the following - whether dri is disabled - the dri version - dri1, dri2, dri3 - the dri module name - doo_dri.so bar_dri.so I'm not sure how exactly it's supposed to work/works, and I believe most of that is due to legacy reasons. I'm just saying let's not do the whole thing - just the dri version would be great (as you suggested). I can change that to allow selection between 2 and 3 - at least for exa, on glamor the parameter 2 would either need to get ignored or it would completely disable hw acceleration. I went for consistency with the ati ddx because i found the intel variant too confusing. I think it changed multiple times during the last year. Bleargh. The ati ddx option name is much newer. Has it seen a release yet? It'd be really nice to get all the DDX's to just agree on something, instead of having different-but-similar options which confuse everyone. -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] Add Option DRI3 to allow to disable DRI3 under EXA.
On Tue, Jul 14, 2015 at 3:00 PM, Mario Kleiner mario.kleiner...@gmail.com wrote: On 07/14/2015 05:26 PM, Ilia Mirkin wrote: On Mon, Jul 13, 2015 at 11:43 PM, Mario Kleiner mario.kleiner...@gmail.com wrote: On 07/07/2015 09:51 PM, Ilia Mirkin wrote: Lastly, from some discussions with ajax on IRC, it appears that DRI3 is half-baked at best wrt sync between server and client. I think we should just disable it by default for now, until issues are ironed out. (Rather than what this patch has, which is default-on for Xorg some version.) What are the remaining known trouble spots wrt. sync? It seems to work pretty well at least for single gpu + unredirected fullscreen windows (== kms page flipping can be used for Presents. That's the use case i usually test very obsessively, as it matters very much for my type of applications, but other than that i only lightly test it via regular desktop use, so maybe that's were problems remain? Adam is the one who actually understands it... I was just asking questions and the answer was that's broken. A user was using DRI3 with EXA (nouveau DDX), and was seeing render fail in the form of stale things on the screen. The nouveau DDX does a lot of implicit sync stuff... it just emits commands into a pushbuf without kicking it. However the libdrm code has cleverness to kick out any pushbufs if you do a nouveau_bo_wait and that bo has been referenced. HOWEVER if you have a pixmap and you share it using the fd thing, and another process makes a (shared) bo out of it, and then does a nouveau_bo_wait, that in no way will cause the DDX to kick its pushbuf. Apparently there's some sort of sync thing that's supposed to Although then i'd expect the other process to hang in nouveau_bo_wait? There probably isn't a specific fdo bug for this? No, because it's blissfully unaware of the fact that there's anything to wait on for the bo (since it hasn't been kicked out by the process and submitted to the kernel), and thus decides that the bo is ready to use. happen, but that's entirely unimplemented for DRI3. Unfortunately I can't provide more details than that, as my knowledge of X internals is quite limited. Some sort of DRI drawable or GLX drawable or ... something. I have no proof that this is the cause of the issue the user was seeing, in fact it's just as likely to be something else. However this seems like a pretty significant issue to me. Ok, thanks for the explanation. But would this problem then be limited to exa + nouveau? glamor uses opengl and thereby mesa's and i think mesa gets sync right afaik, at least in the glx/dri3 backend. Didn't look at egl backend though. At least i didn't see any such corruption? I'll change the patch to default to DRI 2 then for exa, and glamor will ignore the DRI parameter anyway and stick to DRI3. TBH I have no idea how glamor works. Presumably it may have similar issues, but perhaps it just sync's left and right so the problems remain unseen. There's a block handler or something like that which tends to flush things (in nouveau as well IIRC). Although that just flushes out dirty pixmaps, not sure what causes something to get onto the dirty pixmap list. That might still end up not flushing pending commands out. We can disable it by default on exa - intel and amd/radeon drivers also disable by default. However, on gpus = maxwell only glamor accel is supported and glamor on nouveau is either dri3/present or no hw accel at all afaics. You probably saw my patches to just remove glamor from nouveau :) That integration doesn't support DRI2, which in turn means no core contexts (due to lack of GLX_ARB_create_context_profile), and a slew of other issues. Seemed easier to just tell people to go use modesetting, which gets all of these things right(er). Hm, a total removal would at least make me and my users rather unhappy atm., as without glamor no hw accel at all on = maxwell. modesetting and nouveau are not on par feature-wise atm. E.g., modesetting as of the next xorg 1.18 server only provides pageflipping via dri3/present and without pageflipping it is game over for many of my use cases on nouveau-kms. Also modesetting currently completely lacks ZaphodHeads support. Without ZaphodHeads i can't have a page-flipped fullscreen window on one subset of outputs and a regular desktop on another subset, something that is needed for neuroscience/medical/vr applications. So i'd rather like to preserve the choice of glamor. What makes glamor + dri2 difficult to support in nouveau vs. other drivers? Lack of having been done, and a lack of desire by all parties involved to do it. As-is the glamor integration is, unfortunately, quite broken. I have an EXA impl for maxwell in the works, although I haven't made progress on it in months. It's missing some sort of nouveau_scratch_data() style call to make a temp bo to put coordinates in so that we can do draws on the 3d engine. I
[Nouveau] [PATCH] avoid build fail without COMPOSITE
--- src/nouveau_dri2.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index f22e319..4398559 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -142,6 +142,7 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr pDraw, RegionPtr pRegio NVPtr pNv = NVPTR(xf86ScreenToScrn(pScreen)); RegionPtr pCopyClip; GCPtr pGC; + PixmapPtr pPix; DrawablePtr src_draw, dst_draw; Bool translate = FALSE; int off_x = 0, off_y = 0; @@ -170,9 +171,13 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr pDraw, RegionPtr pRegio } if (translate pDraw-type == DRAWABLE_WINDOW) { - PixmapPtr pPix = get_drawable_pixmap(pDraw); - off_x = pDraw-x - pPix-screen_x; - off_y = pDraw-y - pPix-screen_y; + off_x = pDraw-x; + off_y = pDraw-y; +#ifdef COMPOSITE + pPix = get_drawable_pixmap(pDraw); + off_x -= pPix-screen_x; + off_y -= pPix-screen_y; +#endif } pGC = GetScratchGC(pDraw-depth, pScreen); @@ -194,8 +199,8 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr pDraw, RegionPtr pRegio if (extents-x1 == 0 extents-y1 == 0 extents-x2 == pDraw-width extents-y2 == pDraw-height) { - PixmapPtr fpix = get_drawable_pixmap(dst_draw); - struct nouveau_bo *bo = nouveau_pixmap_bo(fpix); + pPix = get_drawable_pixmap(dst_draw); + struct nouveau_bo *bo = nouveau_pixmap_bo(pPix); if (bo) nouveau_bo_wait(bo, NOUVEAU_BO_RD, pNv-client); } -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] avoid build fail without COMPOSITE
Well, I don't pretend to know anything about X, but this is the commit that added the code in question: commit 297fd0d0755bda698be1d0b30cc60a41d7673c0b Author: Dave Airlie airl...@redhat.com Date: Tue Oct 16 16:15:16 2012 +1000 nouveau/dri2: fix pixmap/window offset calcs. This should fix prime rendering under kwin, and not break it under the others. Signed-off-by: Dave Airlie airl...@redhat.com diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index 71cff26..7bd0b3a 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -165,9 +165,9 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr pDraw, RegionPtr pRegio translate = TRUE; if (translate pDraw-type == DRAWABLE_WINDOW) { - WindowPtr pWin = (WindowPtr)pDraw; - off_x = pWin-origin.x; - off_y = pWin-origin.y; + PixmapPtr pPix = get_drawable_pixmap(pDraw); + off_x = pDraw-x - pPix-screen_x; + off_y = pDraw-y - pPix-screen_y; } pGC = GetScratchGC(pDraw-depth, pScreen); Now I sort of assume that pDraw-x == pWin-origin.x. But... who knows. -ilia On Tue, Jul 14, 2015 at 5:46 PM, Emil Velikov emil.l.veli...@gmail.com wrote: On 14 July 2015 at 22:17, Ilia Mirkin imir...@alum.mit.edu wrote: --- src/nouveau_dri2.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index f22e319..4398559 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -142,6 +142,7 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr pDraw, RegionPtr pRegio NVPtr pNv = NVPTR(xf86ScreenToScrn(pScreen)); RegionPtr pCopyClip; GCPtr pGC; + PixmapPtr pPix; DrawablePtr src_draw, dst_draw; Bool translate = FALSE; int off_x = 0, off_y = 0; @@ -170,9 +171,13 @@ nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr pDraw, RegionPtr pRegio } if (translate pDraw-type == DRAWABLE_WINDOW) { - PixmapPtr pPix = get_drawable_pixmap(pDraw); - off_x = pDraw-x - pPix-screen_x; - off_y = pDraw-y - pPix-screen_y; + off_x = pDraw-x; + off_y = pDraw-y; +#ifdef COMPOSITE + pPix = get_drawable_pixmap(pDraw); + off_x -= pPix-screen_x; + off_y -= pPix-screen_y; +#endif If I understand things correctly both ati and the intel ddx seems set the offsets to zero when composite is missing. I doubt that many people build xserver without it though :-) -Emil ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [Mesa-dev] [PATCH] nvc0: fix geometry program revalidation of clipping params
Any one which, after using a geometry shader, enables an extra clip distance. i.e. none. On Mon, Jul 13, 2015 at 4:16 AM, Samuel Pitoiset samuel.pitoi...@gmail.com wrote: What piglit test does this fix? On Sat, Jul 11, 2015 at 7:13 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: mesa-sta...@lists.freedesktop.org --- Even though in practice a geometry program will never be using UCP's, we still were revalidating (aka recompiling) the program when more clip planes became enabled (which also are used for regular clip distances). This seems like it should have led to massive fail, but I guess you don't change the number of clip planes when using geometry shaders. But I'm going to put this through a full piglit run just in case there's something I'm missing. src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 785e52e..11f2b10 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0, nvc0_vertprog_validate(nvc0); else if (likely(vp == nvc0-gmtyprog)) - nvc0_vertprog_validate(nvc0); + nvc0_gmtyprog_validate(nvc0); else nvc0_tevlprog_validate(nvc0); } -- 2.3.6 ___ mesa-dev mailing list mesa-...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Best regards, Samuel Pitoiset. ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [Mesa-dev] [PATCH] nvc0: fix geometry program revalidation of clipping params
This was, btw, introduced in commit 3a8ae6ac243b (nvc0: adapt to new clip state). Back then there was no real geometry support yet. On Mon, Jul 13, 2015 at 2:05 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Any one which, after using a geometry shader, enables an extra clip distance. i.e. none. On Mon, Jul 13, 2015 at 4:16 AM, Samuel Pitoiset samuel.pitoi...@gmail.com wrote: What piglit test does this fix? On Sat, Jul 11, 2015 at 7:13 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: mesa-sta...@lists.freedesktop.org --- Even though in practice a geometry program will never be using UCP's, we still were revalidating (aka recompiling) the program when more clip planes became enabled (which also are used for regular clip distances). This seems like it should have led to massive fail, but I guess you don't change the number of clip planes when using geometry shaders. But I'm going to put this through a full piglit run just in case there's something I'm missing. src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 785e52e..11f2b10 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0, nvc0_vertprog_validate(nvc0); else if (likely(vp == nvc0-gmtyprog)) - nvc0_vertprog_validate(nvc0); + nvc0_gmtyprog_validate(nvc0); else nvc0_tevlprog_validate(nvc0); } -- 2.3.6 ___ mesa-dev mailing list mesa-...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Best regards, Samuel Pitoiset. ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [Mesa-dev] [PATCH] nv50: avoid using inline vertex data submit when gl_VertexID is used
Edge flag stuff is annoying. Pretty sure only blender uses it. shade model = flat should get fixed on nv50 before edge flags, since blender uses that too, and it's produces much worse visual artifacts. I'm having second thoughts about this patch. I think I'm going to go back to my previous approach of just calling nv50_vertex_arrays_validate when vbo_fifo vertexid. I suspect that vertexid usage with small draws from client buffers is next to inexistent, no need to re-emit this stuff so often. On Mon, Aug 24, 2015 at 4:07 PM, Samuel Pitoiset samuel.pitoi...@gmail.com wrote: Reviewed-by: Samuel Pitoiset samuel.pitoi...@gmail.com This fix is simpler than I was expected. What about the edge flag stuff now? :) On 08/24/2015 05:51 PM, Ilia Mirkin wrote: The hardware only generates vertexid when vertices come from a VBO. This fixes: vertexid-drawelements vertexid-drawarrays Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: 11.0 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv50/nv50_program.c| 1 + src/gallium/drivers/nouveau/nv50/nv50_program.h| 1 + src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 3 ++- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 8 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 02dc367..eff4477 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog-vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; + prog-vp.vertexid = 1; continue; default: break; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 5d3ff56..f4e8e94 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,6 +76,7 @@ struct nv50_program { ubyte psiz;/* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; + ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; } vp; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b304a17..66dcf43 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,7 +503,8 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, -{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, +{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | + NV50_NEW_VERTPROG }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 600b973..fb4305f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -301,6 +301,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex-num_elements, nv50-state.num_vtxelts); + /* A vertexid is not generated for inline data uploads. Have to use a +* VBO. This check must come after the vertprog has been validated, +* otherwise vertexid may be unset. +*/ + assert(nv50-vertprog-translated); + if (nv50-vertprog-vp.vertexid) + nv50-vbo_push_hint = 0; + if (unlikely(vertex-need_conversion)) nv50-vbo_fifo = ~0; else ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Documentation request for MP warp error 0x10
On Fri, Oct 2, 2015 at 6:14 PM, Robert Morell <rmor...@nvidia.com> wrote: > Hi Ilia, > > On Fri, Oct 02, 2015 at 06:05:21PM -0400, Ilia Mirkin wrote: >> Hi Robert, >> >> Thanks for the quick response! That goes in line with my observations >> which is that these things happen when using an ATOM/RED instruction. >> I've checked and rechecked that I'm generating ops with identical bits >> as what the proprietary driver does, however (and nvdisasm prints >> identical output). Could you advise what the proper way of indicating >> that the memory is "global" to the op? I'm sure I'm just missing >> something simple. If you show me what to look for in SM35 I can >> probably find it on my own for SM20/SM30/SM50. > > Unfortunately this isn't something I know a lot about, so I'm going to > have do some research and get back to you, hopefully within a few days. Hi Robert, Were you able to find any further information out about this? Happy to provide with any traces or additional details as to what I'm doing (and which is failing). Thanks, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] pci: enable c800 magic for Lenovo Y510P
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70354#c75 Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- Unclear if we want this. Someone with the same vendor/subvendor pci ids didn't have any issues with nouveau at all: https://bugs.launchpad.net/ubuntu/+source/compiz/+bug/1327624 [they had other issues though] Not sure if this will do more harm than good. drm/nouveau/nvkm/engine/device/pci.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drm/nouveau/nvkm/engine/device/pci.c b/drm/nouveau/nvkm/engine/device/pci.c index e8eb14e..c5fc909 100644 --- a/drm/nouveau/nvkm/engine/device/pci.c +++ b/drm/nouveau/nvkm/engine/device/pci.c @@ -259,6 +259,12 @@ nvkm_device_pci_10de_0df4[] = { }; static const struct nvkm_device_pci_vendor +nvkm_device_pci_10de_0fcd[] = { + { 0x17aa, 0x3801, NULL, { .War00C800_0 = true } }, /* Lenovo Y510P */ + {} +}; + +static const struct nvkm_device_pci_vendor nvkm_device_pci_10de_0fd2[] = { { 0x1028, 0x0595, "GeForce GT 640M LE" }, { 0x1028, 0x05b2, "GeForce GT 640M LE" }, @@ -1349,7 +1355,7 @@ nvkm_device_pci_10de[] = { { 0x0fc6, "GeForce GTX 650" }, { 0x0fc8, "GeForce GT 740" }, { 0x0fc9, "GeForce GT 730" }, - { 0x0fcd, "GeForce GT 755M" }, + { 0x0fcd, "GeForce GT 755M", nvkm_device_pci_10de_0fcd }, { 0x0fce, "GeForce GT 640M LE" }, { 0x0fd1, "GeForce GT 650M" }, { 0x0fd2, "GeForce GT 640M", nvkm_device_pci_10de_0fd2 }, -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nvc0: respect edgeflag attribute width
The edgeflag comes in as ubyte with glEdgeFlagPointer but as float with plain immediate glEdgeFlag. Avoid reading bytes that weren't meant for the edgeflag in the pointer case. Fixes intermittent failures with gl-2.0-edgeflag piglit (and valgrind complaints about reading uninitialized memory). Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index 8b23a48..efadeeb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -27,6 +27,7 @@ struct push_context { struct { bool enabled; bool value; + uint8_t width; unsigned stride; const uint8_t *data; } edgeflag; @@ -100,6 +101,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, struct nv04_resource *buf = nv04_resource(vb->buffer); ctx->edgeflag.stride = vb->stride; + ctx->edgeflag.width = util_format_get_blocksize(ve->src_format); if (buf) { unsigned offset = vb->buffer_offset + ve->src_offset; ctx->edgeflag.data = nouveau_resource_map_offset(>base, @@ -139,8 +141,9 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index) static inline bool ef_value(const struct push_context *ctx, uint32_t index) { - float *pf = (float *)>edgeflag.data[index * ctx->edgeflag.stride]; - return *pf ? true : false; + static const uint64_t zero = 0; + uint8_t *pf = (uint8_t *)>edgeflag.data[index * ctx->edgeflag.stride]; + return !!memcmp(pf, , ctx->edgeflag.width); } static inline bool -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] drm/nouveau: Fix pre-nv50 pageflip events (v2)
On Mon, Nov 9, 2015 at 7:57 AM, Mario Kleiner <mario.kleiner...@gmail.com> wrote: > From: Daniel Vetter <daniel.vet...@ffwll.ch> > > Apparently pre-nv50 pageflip events happen before the actual vblank > period. Therefore that functionality got semi-disabled in > > commit af4870e406126b7ac0ae7c7ce5751f25ebe60f28 > Author: Mario Kleiner <mario.kleiner...@gmail.com> > Date: Tue May 13 00:42:08 2014 +0200 > > drm/nouveau/kms/nv04-nv40: fix pageflip events via special case. > > Unfortunately that hack got uprooted in > > commit cc1ef118fc099295ae6aabbacc8af94d8d8885eb > Author: Thierry Reding <tred...@nvidia.com> > Date: Wed Aug 12 17:00:31 2015 +0200 > > drm/irq: Make pipe unsigned and name consistent > > Trigering a warning when trying to sample the vblank timestamp for a > non-existing pipe. There's a few ways to fix this: > > - Open-code the old behaviour, which just enshrines this slight > breakage of the userspace ABI. > > - Revert Mario's commit and again inflict broken timestamps, again not > pretty. > > - Fix this for real by delaying the pageflip TS until the next vblank > interrupt, thereby making it accurate. > > This patch implements the third option. Since having a page flip > interrupt that happens when the pageflip gets armed and not when it > completes in the next vblank seems to be fairly common (older i915 hw > works very similarly) create a new helper to arm vblank events for > such drivers. > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=106431 > Cc: Thierry Reding <tred...@nvidia.com> > Cc: Mario Kleiner <mario.kleiner...@gmail.com> > Cc: Ben Skeggs <bske...@redhat.com> > Cc: Ilia Mirkin <imir...@alum.mit.edu> > > v2 (mario): Integrate my own review comments into Daniels patch. >- Fix function prototypes in drmP.h >- Add missing vblank_put() for pageflip completion without > pageflip event. >- Initialize sequence number for queued pageflip event to avoid > trouble in drm_handle_vblank_events(). >- Remove dead code and spelling fix. > > Signed-off-by: Daniel Vetter <daniel.vet...@intel.com> > Reviewed-by: Mario Kleiner <mario.kleiner...@gmail.com> Without commenting on the actual patch, a few points of procedure: (a) If you're sending the patch, you're supposed to add your Signed-off-by. So you'd keep Daniel's and add yours. (b) Since this is triggering warns for real people in real situations, tack on a Cc: sta...@vger.kernel.org # v4.3 Cheers, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] llvm TGSI backend (WIP) questions
On Fri, Nov 13, 2015 at 3:42 PM, Emil Velikov <emil.l.veli...@gmail.com> wrote: > On 13 November 2015 at 14:38, Ilia Mirkin <imir...@alum.mit.edu> wrote: >> On Fri, Nov 13, 2015 at 9:25 AM, Emil Velikov <emil.l.veli...@gmail.com> >> wrote: >>> Hello Hans, >>> >>> Not to muddy the waters or anything, have you thought about the NIR >>> integration that Rob was thinking about ? >>> I'm pretty sure he'll be happy to have extra people helping him out. >> >> How would that in any way plug into llvm or nouveau? There's no OpenCL >> C -> NIR, and there's no NIR -> nv50 IR... >> > I thought that you've been (remotely) exploring the latter > possibility. Isn't that the case ? Not to my knowledge. I did look at doing SPIR -> nv50 ir (not to be confused with SPIR-V), but that was ~1.5y ago. I got stuck in control flow and llvm ir frustration. The fact that I had to go out-of-ssa didn't help. At this point I don't see any upside to using NIR. -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] help with push
See libdrm's pushbuf.c -- iirc push->cur points to a GART-mapped bo. http://cgit.freedesktop.org/mesa/drm/tree/nouveau/pushbuf.c#n682 nouveau_pushbuf_data(push, NULL, 0, 0); nouveau_bo_ref(bo, >bo); nouveau_bo_ref(NULL, ); nvpb->bgn = nvpb->bo->map; nvpb->ptr = nvpb->bgn; push->cur = nvpb->bgn; push->end = push->cur + (nvpb->bo->size / 4); push->end -= 2 + push->rsvd_kick; /* space for suffix */ Not sure what problem you're trying to solve. -ilia On Mon, Nov 2, 2015 at 12:31 PM, Daniel Melo Jorge da Cunhawrote: > Hi, sorry if I misunderstood everything... > > In the file src/gallium/drivers/nouveau/nv30/nv30_screen.c there is loans of > PUSH_DATA which is basically *push->curr = data; > > I'm thinking that somehow push->curr is the bo->map = drm_mmap(...) > that is called in nouveau_bo_map. But I cannot see how they are linked... > Because when nouveau_bo_map calls nouveau_bo_wait > push = cli_push_get(client, bo) returns NULL... > > Is push->curr the region of memory that we send data to the card? > If so, how is it mapped? > Has bo->map something to do with it? If so, how are they linked? > > ___ > Nouveau mailing list > Nouveau@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau > ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] help with push
E are you sure? nv30_screen_create starts with a bunch of stuff init'ing objects, and then does: BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1); PUSH_DATA (push, screen->eng3d->handle); And as you can see in nv30_winsys.h: static inline void BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size) { PUSH_SPACE(push, size + 1); PUSH_DATA (push, 0x | (size << 18) | (subc << 13) | mthd); } and PUSH_SPACE in turn calls nouveau_pushbuf_space. -ilia On Mon, Nov 2, 2015 at 1:36 PM, Daniel Melo Jorge da Cunha <dmjcu...@gmail.com> wrote: > But at the time the mesa3d file > src/gallium/drivers/nouveau/nv30/nv30_screen.c > is called and when the various PUSH_DATA begin to be called there is not yet > a call to nouveau_pushbuf_space. So it would generate a seg fault in > push->curr. Again, sorry for the confusion and thanks for the reply. > Awaiting > for an answer if possible. Thanks in advance. > > 2015-11-02 14:44 GMT-03:00 Ilia Mirkin <imir...@alum.mit.edu>: >> >> See libdrm's pushbuf.c -- iirc push->cur points to a GART-mapped bo. >> >> http://cgit.freedesktop.org/mesa/drm/tree/nouveau/pushbuf.c#n682 >> >> nouveau_pushbuf_data(push, NULL, 0, 0); >> nouveau_bo_ref(bo, >bo); >> nouveau_bo_ref(NULL, ); >> >> nvpb->bgn = nvpb->bo->map; >> nvpb->ptr = nvpb->bgn; >> push->cur = nvpb->bgn; >> push->end = push->cur + (nvpb->bo->size / 4); >> push->end -= 2 + push->rsvd_kick; /* space for suffix */ >> >> Not sure what problem you're trying to solve. >> >> -ilia >> >> On Mon, Nov 2, 2015 at 12:31 PM, Daniel Melo Jorge da Cunha >> <dmjcu...@gmail.com> wrote: >> > Hi, sorry if I misunderstood everything... >> > >> > In the file src/gallium/drivers/nouveau/nv30/nv30_screen.c there is >> > loans of >> > PUSH_DATA which is basically *push->curr = data; >> > >> > I'm thinking that somehow push->curr is the bo->map = drm_mmap(...) >> > that is called in nouveau_bo_map. But I cannot see how they are >> > linked... >> > Because when nouveau_bo_map calls nouveau_bo_wait >> > push = cli_push_get(client, bo) returns NULL... >> > >> > Is push->curr the region of memory that we send data to the card? >> > If so, how is it mapped? >> > Has bo->map something to do with it? If so, how are they linked? >> > >> > ___ >> > Nouveau mailing list >> > Nouveau@lists.freedesktop.org >> > http://lists.freedesktop.org/mailman/listinfo/nouveau >> > > > ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 1/2] disp: activate dual link TMDS links only when possible
From: Hauke Mehrtens <ha...@hauke-m.de> Without this patch a pixel clock rate above 165 MHz on a TMDS link is assumed to be dual link. This is true for DVI, but not for HDMI. HDMI supports no dual link, but it supports pixel clock rates above 165 MHz. Only activate Dual Link mode when it is actual possible. Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nv50_display.c | 8 drm/nouveau/nvkm/engine/disp/gf119.c | 2 +- drm/nouveau/nvkm/engine/disp/nv50.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c index c053c50..93bcfdf 100644 --- a/drm/nouveau/nv50_display.c +++ b/drm/nouveau/nv50_display.c @@ -1961,10 +1961,10 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode, switch (nv_encoder->dcb->type) { case DCB_OUTPUT_TMDS: if (nv_encoder->dcb->sorconf.link & 1) { - if (mode->clock < 165000) - proto = 0x1; - else - proto = 0x5; + proto = 0x1; + if (mode->clock >= 165000 && + nv_encoder->dcb->duallink_possible) + proto |= 0x4; } else { proto = 0x2; } diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c b/drm/nouveau/nvkm/engine/disp/gf119.c index 186fd3a..8691b68 100644 --- a/drm/nouveau/nvkm/engine/disp/gf119.c +++ b/drm/nouveau/nvkm/engine/disp/gf119.c @@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) switch (outp->info.type) { case DCB_OUTPUT_TMDS: *conf = (ctrl & 0x0f00) >> 8; - if (pclk >= 165000) + if (pclk >= 165000 && outp->info.duallink_possible) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c b/drm/nouveau/nvkm/engine/disp/nv50.c index 32e73a9..ceecd0e 100644 --- a/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drm/nouveau/nvkm/engine/disp/nv50.c @@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) switch (outp->info.type) { case DCB_OUTPUT_TMDS: *conf = (ctrl & 0x0f00) >> 8; - if (pclk >= 165000) + if (pclk >= 165000 && outp->info.duallink_possible) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 2/2] connector: allow 225/297MHz pixel clocks for HDMI on Fermi/Kepler
Some Fermi's apparently alow allow 297MHz clocks, so create a parameter which allows end-users to set it themselves until we have a reliable way to determine the board's maximum pixel clocks. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nouveau_connector.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c index 4c8f6ef..f5806eb 100644 --- a/drm/nouveau/nouveau_connector.c +++ b/drm/nouveau/nouveau_connector.c @@ -56,6 +56,10 @@ MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (default: enabled)"); int nouveau_duallink = 1; module_param_named(duallink, nouveau_duallink, int, 0400); +MODULE_PARM_DESC(hdmimhz, "Force a maximum HDMI pixel clock (in MHz)"); +int nouveau_hdmimhz = 0; +module_param_named(hdmimhz, nouveau_hdmimhz, int, 0400); + struct nouveau_encoder * find_encoder(struct drm_connector *connector, int type) { @@ -815,6 +819,17 @@ get_tmds_link_bandwidth(struct drm_connector *connector) struct nouveau_drm *drm = nouveau_drm(connector->dev); struct dcb_output *dcb = nv_connector->detected_encoder->dcb; + if (drm_detect_hdmi_monitor(nv_connector->edid)) { + if (nouveau_hdmimhz > 0) + return nouveau_hdmimhz * 1000; + /* Note: these limits are conservative, some Fermi's +* can do 297 MHz. Unclear how this can be determined. +*/ + if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER) + return 297000; + if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI) + return 225000; + } if (dcb->location != DCB_LOC_ON_CHIP || drm->device.info.chipset >= 0x46) return 165000; -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nvkm: add/remove 0's to make 7 (or 9)-nibble constants use 8 nibbles
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nvkm/engine/gr/ctxgk20a.c | 2 +- drm/nouveau/nvkm/subdev/fb/ramgk104.c | 8 drm/nouveau/nvkm/subdev/therm/nv40.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c index ddaa16a..ad0a6cf 100644 --- a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c +++ b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c @@ -55,7 +55,7 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gk104_grctx_generate_rop_active_fbps(gr); - nvkm_mask(device, 0x5044b0, 0x800, 0x800); + nvkm_mask(device, 0x5044b0, 0x0800, 0x0800); gf100_gr_wait_idle(gr); diff --git a/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drm/nouveau/nvkm/subdev/fb/ramgk104.c index 0d20563..2614365 100644 --- a/drm/nouveau/nvkm/subdev/fb/ramgk104.c +++ b/drm/nouveau/nvkm/subdev/fb/ramgk104.c @@ -216,11 +216,11 @@ r1373f4_fini(struct gk104_ramfuc *fuc) ram_wr32(fuc, 0x1373ec, tmp | (v1 << 16)); ram_mask(fuc, 0x1373f0, (~ram->mode & 3), 0x); if (ram->mode == 2) { - ram_mask(fuc, 0x1373f4, 0x0003, 0x2); - ram_mask(fuc, 0x1373f4, 0x1100, 0x0); + ram_mask(fuc, 0x1373f4, 0x0003, 0x0002); + ram_mask(fuc, 0x1373f4, 0x1100, 0x); } else { - ram_mask(fuc, 0x1373f4, 0x0003, 0x1); - ram_mask(fuc, 0x1373f4, 0x0001, 0x0); + ram_mask(fuc, 0x1373f4, 0x0003, 0x0001); + ram_mask(fuc, 0x1373f4, 0x0001, 0x); } ram_mask(fuc, 0x10f800, 0x0030, (v0 ^ v1) << 4); } diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c b/drm/nouveau/nvkm/subdev/therm/nv40.c index 6326fdc..2c92ffb 100644 --- a/drm/nouveau/nvkm/subdev/therm/nv40.c +++ b/drm/nouveau/nvkm/subdev/therm/nv40.c @@ -107,7 +107,7 @@ nv40_fan_pwm_ctrl(struct nvkm_therm *therm, int line, bool enable) { struct nvkm_subdev *subdev = >subdev; struct nvkm_device *device = subdev->device; - u32 mask = enable ? 0x8000 : 0x000; + u32 mask = enable ? 0x8000 : 0x; if (line == 2) nvkm_mask(device, 0x0010f0, 0x8000, mask); else if (line == 9) nvkm_mask(device, 0x0015f4, 0x8000, mask); else { -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] HDMI pixel clock limits
In the meanwhile we're pushing out a change that just blanket allows 225MHz on Fermi and 297MHz on Kepler, with a kernel option override available. At least one GF106 user claims to have working 297MHz with proprietary drivers (and with nouveau in presence of the patches): https://bugs.freedesktop.org/show_bug.cgi?id=91236 Having an accurate way to auto-detect this would be ideal though, as higher bandwidth monitors are becoming more ubiquitous. -ilia On Mon, Oct 26, 2015 at 1:35 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > Hello, > > Various HDMI versions enable higher and higher pixel clocks. However > individual GPUs are not required to support the maximum pixel clock > supported by the spec in order to be compliant. It appears that some > GPUs max out at 225MHz while others at 297MHz (while others still, I > assume, are limited to 165MHz, esp among the older ones). > > We've been unable to find this in the VBIOS (I had a thought that it > was in the table pointed to by the 'T' table, but we have a > counterexample to that). Could you suggest a way to find this > information either from the VBIOS or based on the GPU? Are there > differences between regular HDMI and DP -> HDMI (passive)? > > This is becoming more and more relevant as 2560x1440/3840x2160 > displays are become more common, while (dual-link) DVI-D is on its way > out. > > Thanks, > > -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH mesa 0/5] nouveau: codegen: Make use of double immediates
Hi Hans, All pushed. I made a few additional fixes and improvement to fp64 immediate handling along the way, but all your commits were fine as-is. (Except that they enabled fp64 immediates on nv50 implicitly which is wrong -- there are no immediate-taking variants on nv50, so I fixed that glitch. But only the G200 can do fp64 in the first place, and nouveau doesn't actually expose it. Corner case of a corner case :) ) Thanks for taking care of this... it was a small bit of fp64 which I always felt bad about not having finished up. (But not bad enough to actually finish it myself.) Cheers, -ilia On Thu, Nov 5, 2015 at 8:32 AM, Hans de Goedewrote: > Hi All, > > This series implements using double immediates in the nouveau codegen code. > > This turns the following (nvc0) code: > 1: mov u32 $r2 0x (8) > 2: mov u32 $r3 0x3fe0 (8) > 3: add f64 $r0d $r0d $r2d (8) > > Into: > 1: add f64 $r0d $r0d 0.50 (8) > > This has been tested with the 2 double shader tests which I just send to > the piglet list. On a gk208 (gk110 / SM35) card, and by checking the output > of nouveau_compiler with both nvdisasm and envydis on gf100 / gk104 / gm107. > > Regards, > > Hans ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Documentation request for MP warp error 0x10
On Fri, Nov 6, 2015 at 4:19 PM, Robert Morell <rmor...@nvidia.com> wrote: > On Fri, Nov 06, 2015 at 04:15:29PM -0500, Ilia Mirkin wrote: >> In order for ATOM.*/RED.* to work, the addresses in question must >> *NOT* be inside of the 16MB local/shared windows. So if I'm getting >> that error, the address must be inside. > > Yes, that's my understanding. > >> If so, this may be a reasonable explanation for what I'm seeing -- > > Cool, I'm happy it helps. Looks like we were setting LOCAL_BASE (0x077c) to 0, which was effectively shadowing the low 16M of g[] space, which is where our buffers were ending up too. Setting it to some high-up far-off land makes everything work! Obviously I'll need some cleverer way to deal with this, but looks like it's all exactly as you described. Documentation does wonders :) Looks like I should be able to make progress on my atomics/ssbo work now. Thanks again, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Nouveau for FreeBSD
On Wed, Nov 4, 2015 at 3:38 AM, C Bergströmwrote: > To bring this conversation back on track - where would someone start > *exactly* to port this to another OS? What kernel dependencies are > there? drivers/gpu/drm/nouveau/{nvkm,nvif,usif} can be dropped in wholesale with just a small handful of shims and helper functions. This is the code that presents the GPU's internals in a fairly generation-agnostic way, and also provides some high-level functionality. drivers/gpu/drm/nouveau/* needs to be ported to the other OS's infrastructure. If the OS also has a port of linux's DRM and TTM infrastructures, a lot of that will be able to be taken wholesale. This is the code that interacts with the core above. Decodes ioctls, manages memory, performs kernel-side modesetting, etc. Cheers, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH 1/2] disp: activate dual link TMDS links only when possible
On Tue, Nov 3, 2015 at 7:02 PM, Ben Skeggs <skeg...@gmail.com> wrote: > On 11/04/2015 08:41 AM, Ilia Mirkin wrote: >> From: Hauke Mehrtens <ha...@hauke-m.de> >> >> Without this patch a pixel clock rate above 165 MHz on a TMDS link is >> assumed to be dual link. This is true for DVI, but not for HDMI. HDMI >> supports no dual link, but it supports pixel clock rates above 165 MHz. >> Only activate Dual Link mode when it is actual possible. >> >> Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de> >> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >> --- >> drm/nouveau/nv50_display.c | 8 >> drm/nouveau/nvkm/engine/disp/gf119.c | 2 +- >> drm/nouveau/nvkm/engine/disp/nv50.c | 2 +- >> 3 files changed, 6 insertions(+), 6 deletions(-) >> >> diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c >> index c053c50..93bcfdf 100644 >> --- a/drm/nouveau/nv50_display.c >> +++ b/drm/nouveau/nv50_display.c >> @@ -1961,10 +1961,10 @@ nv50_sor_mode_set(struct drm_encoder *encoder, >> struct drm_display_mode *umode, >> switch (nv_encoder->dcb->type) { >> case DCB_OUTPUT_TMDS: >> if (nv_encoder->dcb->sorconf.link & 1) { >> - if (mode->clock < 165000) >> - proto = 0x1; >> - else >> - proto = 0x5; >> + proto = 0x1; >> + if (mode->clock >= 165000 && >> + nv_encoder->dcb->duallink_possible) >> + proto |= 0x4; > This is a somewhat flaky condition, given that one could plug a > single-link HDMI monitor into a duallink-capable TMDS connector. > > Still, it's an improvement :) Yeah, FWIW I thought of that (for the second patch too). All this stuff is pretty fragile. But... what are you gonna do. Is there some other way of telling whether we're on HDMI or DVI? > >> } else { >> proto = 0x2; >> } >> diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c >> b/drm/nouveau/nvkm/engine/disp/gf119.c >> index 186fd3a..8691b68 100644 >> --- a/drm/nouveau/nvkm/engine/disp/gf119.c >> +++ b/drm/nouveau/nvkm/engine/disp/gf119.c >> @@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, >> u32 pclk, u32 *conf) >> switch (outp->info.type) { >> case DCB_OUTPUT_TMDS: >> *conf = (ctrl & 0x0f00) >> 8; >> - if (pclk >= 165000) >> + if (pclk >= 165000 && outp->info.duallink_possible) >> *conf |= 0x0100; > I think it might be more robust to key this off the SOR protocol, rather > than duplicating the condition above. You mean disp->sor.lvdsconf? What do I do with that? Or did you have something else in mind? > >> break; >> case DCB_OUTPUT_LVDS: >> diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c >> b/drm/nouveau/nvkm/engine/disp/nv50.c >> index 32e73a9..ceecd0e 100644 >> --- a/drm/nouveau/nvkm/engine/disp/nv50.c >> +++ b/drm/nouveau/nvkm/engine/disp/nv50.c >> @@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, >> u32 pclk, u32 *conf) >> switch (outp->info.type) { >> case DCB_OUTPUT_TMDS: >> *conf = (ctrl & 0x0f00) >> 8; >> - if (pclk >= 165000) >> + if (pclk >= 165000 && outp->info.duallink_possible) >> *conf |= 0x0100; > Same here. > >> break; >> case DCB_OUTPUT_LVDS: >> > ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] kms: no need to check for empty edid before drm_detect_hdmi_monitor
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nv50_display.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c index bdaba91..d9cba87 100644 --- a/drm/nouveau/nv50_display.c +++ b/drm/nouveau/nv50_display.c @@ -773,7 +773,6 @@ nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, bool update) */ if (nv_connector && ( nv_connector->underscan == UNDERSCAN_ON || (nv_connector->underscan == UNDERSCAN_AUTO && - nv_connector->edid && drm_detect_hdmi_monitor(nv_connector->edid { u32 bX = nv_connector->underscan_hborder; u32 bY = nv_connector->underscan_vborder; -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v2 1/2] disp: activate dual link TMDS links only when possible
From: Hauke Mehrtens <ha...@hauke-m.de> Without this patch a pixel clock rate above 165 MHz on a TMDS link is assumed to be dual link. This is true for DVI, but not for HDMI. HDMI supports no dual link, but it supports pixel clock rates above 165 MHz. Only activate Dual Link mode when it is actually possible and requested. Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de> [imirkin: check for hdmi monitor for computing proto, use sor ctrl to enable extra config bit] Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nv50_display.c | 18 ++ drm/nouveau/nvkm/engine/disp/gf119.c | 2 +- drm/nouveau/nvkm/engine/disp/nv50.c | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c index c053c50..57781fd 100644 --- a/drm/nouveau/nv50_display.c +++ b/drm/nouveau/nv50_display.c @@ -1961,10 +1961,20 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode, switch (nv_encoder->dcb->type) { case DCB_OUTPUT_TMDS: if (nv_encoder->dcb->sorconf.link & 1) { - if (mode->clock < 165000) - proto = 0x1; - else - proto = 0x5; + proto = 0x1; + /* Only enable dual-link if: +* - DCB says we can +* - Need to (i.e. rate > 165MHz) +* - Not an HDMI monitor, since there's no dual-link +*on HDMI. Of course in order to determine that, +*we need the EDID. So if no EDID, just let it +*slide. +*/ + if (mode->clock >= 165000 && + nv_encoder->dcb->duallink_possible && + (!nv_connector->edid || +!drm_detect_hdmi_monitor(nv_connector->edid))) + proto |= 0x4; } else { proto = 0x2; } diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c b/drm/nouveau/nvkm/engine/disp/gf119.c index 186fd3a..f031466 100644 --- a/drm/nouveau/nvkm/engine/disp/gf119.c +++ b/drm/nouveau/nvkm/engine/disp/gf119.c @@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) switch (outp->info.type) { case DCB_OUTPUT_TMDS: *conf = (ctrl & 0x0f00) >> 8; - if (pclk >= 165000) + if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c b/drm/nouveau/nvkm/engine/disp/nv50.c index 32e73a9..4226d21 100644 --- a/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drm/nouveau/nvkm/engine/disp/nv50.c @@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) switch (outp->info.type) { case DCB_OUTPUT_TMDS: *conf = (ctrl & 0x0f00) >> 8; - if (pclk >= 165000) + if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v2 2/2] connector: allow 225/297MHz pixel clocks for HDMI on Fermi/Kepler
Some Fermi's apparently alow allow 297MHz clocks, so create a parameter which allows end-users to set it themselves until we have a reliable way to determine the board's maximum pixel clocks. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nouveau_connector.c | 25 ++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c index 4c8f6ef..57bea79 100644 --- a/drm/nouveau/nouveau_connector.c +++ b/drm/nouveau/nouveau_connector.c @@ -56,6 +56,10 @@ MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (default: enabled)"); int nouveau_duallink = 1; module_param_named(duallink, nouveau_duallink, int, 0400); +MODULE_PARM_DESC(hdmimhz, "Force a maximum HDMI pixel clock (in MHz)"); +int nouveau_hdmimhz = 0; +module_param_named(hdmimhz, nouveau_hdmimhz, int, 0400); + struct nouveau_encoder * find_encoder(struct drm_connector *connector, int type) { @@ -809,12 +813,23 @@ nouveau_connector_get_modes(struct drm_connector *connector) } static unsigned -get_tmds_link_bandwidth(struct drm_connector *connector) +get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi) { struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_drm *drm = nouveau_drm(connector->dev); struct dcb_output *dcb = nv_connector->detected_encoder->dcb; + if (hdmi) { + if (nouveau_hdmimhz > 0) + return nouveau_hdmimhz * 1000; + /* Note: these limits are conservative, some Fermi's +* can do 297 MHz. Unclear how this can be determined. +*/ + if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER) + return 297000; + if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI) + return 225000; + } if (dcb->location != DCB_LOC_ON_CHIP || drm->device.info.chipset >= 0x46) return 165000; @@ -835,6 +850,7 @@ nouveau_connector_mode_valid(struct drm_connector *connector, struct drm_encoder *encoder = to_drm_encoder(nv_encoder); unsigned min_clock = 25000, max_clock = min_clock; unsigned clock = mode->clock; + bool hdmi; switch (nv_encoder->dcb->type) { case DCB_OUTPUT_LVDS: @@ -847,8 +863,11 @@ nouveau_connector_mode_valid(struct drm_connector *connector, max_clock = 40; break; case DCB_OUTPUT_TMDS: - max_clock = get_tmds_link_bandwidth(connector); - if (nouveau_duallink && nv_encoder->dcb->duallink_possible) + hdmi = !nv_connector->edid || + drm_detect_hdmi_monitor(nv_connector->edid); + max_clock = get_tmds_link_bandwidth(connector, hdmi); + if (!hdmi && nouveau_duallink && + nv_encoder->dcb->duallink_possible) max_clock *= 2; break; case DCB_OUTPUT_ANALOG: -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v3 2/2] connector: allow 225/297MHz pixel clocks for HDMI on Fermi/Kepler
Some Fermi's apparently alow allow 297MHz clocks, so create a parameter which allows end-users to set it themselves until we have a reliable way to determine the board's maximum pixel clocks. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nouveau_connector.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c index 4c8f6ef..8dd384b 100644 --- a/drm/nouveau/nouveau_connector.c +++ b/drm/nouveau/nouveau_connector.c @@ -56,6 +56,10 @@ MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (default: enabled)"); int nouveau_duallink = 1; module_param_named(duallink, nouveau_duallink, int, 0400); +MODULE_PARM_DESC(hdmimhz, "Force a maximum HDMI pixel clock (in MHz)"); +int nouveau_hdmimhz = 0; +module_param_named(hdmimhz, nouveau_hdmimhz, int, 0400); + struct nouveau_encoder * find_encoder(struct drm_connector *connector, int type) { @@ -809,12 +813,23 @@ nouveau_connector_get_modes(struct drm_connector *connector) } static unsigned -get_tmds_link_bandwidth(struct drm_connector *connector) +get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi) { struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_drm *drm = nouveau_drm(connector->dev); struct dcb_output *dcb = nv_connector->detected_encoder->dcb; + if (hdmi) { + if (nouveau_hdmimhz > 0) + return nouveau_hdmimhz * 1000; + /* Note: these limits are conservative, some Fermi's +* can do 297 MHz. Unclear how this can be determined. +*/ + if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER) + return 297000; + if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI) + return 225000; + } if (dcb->location != DCB_LOC_ON_CHIP || drm->device.info.chipset >= 0x46) return 165000; @@ -835,6 +850,7 @@ nouveau_connector_mode_valid(struct drm_connector *connector, struct drm_encoder *encoder = to_drm_encoder(nv_encoder); unsigned min_clock = 25000, max_clock = min_clock; unsigned clock = mode->clock; + bool hdmi; switch (nv_encoder->dcb->type) { case DCB_OUTPUT_LVDS: @@ -847,8 +863,10 @@ nouveau_connector_mode_valid(struct drm_connector *connector, max_clock = 40; break; case DCB_OUTPUT_TMDS: - max_clock = get_tmds_link_bandwidth(connector); - if (nouveau_duallink && nv_encoder->dcb->duallink_possible) + hdmi = drm_detect_hdmi_monitor(nv_connector->edid); + max_clock = get_tmds_link_bandwidth(connector, hdmi); + if (!hdmi && nouveau_duallink && + nv_encoder->dcb->duallink_possible) max_clock *= 2; break; case DCB_OUTPUT_ANALOG: -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v3 1/2] disp: activate dual link TMDS links only when possible
From: Hauke Mehrtens <ha...@hauke-m.de> Without this patch a pixel clock rate above 165 MHz on a TMDS link is assumed to be dual link. This is true for DVI, but not for HDMI. HDMI supports no dual link, but it supports pixel clock rates above 165 MHz. Only activate Dual Link mode when it is actually possible and requested. Signed-off-by: Hauke Mehrtens <ha...@hauke-m.de> [imirkin: check for hdmi monitor for computing proto, use sor ctrl to enable extra config bit] Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nv50_display.c | 15 +++ drm/nouveau/nvkm/engine/disp/gf119.c | 2 +- drm/nouveau/nvkm/engine/disp/nv50.c | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c index c053c50..bdaba91 100644 --- a/drm/nouveau/nv50_display.c +++ b/drm/nouveau/nv50_display.c @@ -1961,10 +1961,17 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode, switch (nv_encoder->dcb->type) { case DCB_OUTPUT_TMDS: if (nv_encoder->dcb->sorconf.link & 1) { - if (mode->clock < 165000) - proto = 0x1; - else - proto = 0x5; + proto = 0x1; + /* Only enable dual-link if: +* - Need to (i.e. rate > 165MHz) +* - DCB says we can +* - Not an HDMI monitor, since there's no dual-link +*on HDMI. +*/ + if (mode->clock >= 165000 && + nv_encoder->dcb->duallink_possible && + !drm_detect_hdmi_monitor(nv_connector->edid)) + proto |= 0x4; } else { proto = 0x2; } diff --git a/drm/nouveau/nvkm/engine/disp/gf119.c b/drm/nouveau/nvkm/engine/disp/gf119.c index 186fd3a..f031466 100644 --- a/drm/nouveau/nvkm/engine/disp/gf119.c +++ b/drm/nouveau/nvkm/engine/disp/gf119.c @@ -158,7 +158,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) switch (outp->info.type) { case DCB_OUTPUT_TMDS: *conf = (ctrl & 0x0f00) >> 8; - if (pclk >= 165000) + if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: diff --git a/drm/nouveau/nvkm/engine/disp/nv50.c b/drm/nouveau/nvkm/engine/disp/nv50.c index 32e73a9..4226d21 100644 --- a/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drm/nouveau/nvkm/engine/disp/nv50.c @@ -391,7 +391,7 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) switch (outp->info.type) { case DCB_OUTPUT_TMDS: *conf = (ctrl & 0x0f00) >> 8; - if (pclk >= 165000) + if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Nouveau for FreeBSD
Nouveau kernel module has a largely os-agnostic "core" component (called nvkm/nvif now) which encompasses the actual operation of the GPU. The drm wrapper around it provides the relevant interfaces for KMS/ioctls/etc. Any port would want the ioctl bits as well, since that's what the userspace mesa/ddx components rely on. That said, I'm not aware of any serious effort to port nouveau to any other OS. -ilia On Wed, Nov 4, 2015 at 2:08 AM,wrote: > Is anyone actually and or actively working on this? > Github.com/pathscale/pscnv is totally bitrot but waaay more portable base. > Nouveau made hard Linux assumptions that will be difficult to overcome > afaik. > > > > *From: *Curtis Hamilton > *Sent: *Wednesday, November 4, 2015 08:06 > *To: *nouveau@lists.freedesktop.org > *Subject: *[Nouveau] Nouveau for FreeBSD > > Any progress on the FreeBSD front? > > > ___ > Nouveau mailing list > Nouveau@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau > > ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] Documentation request for MP warp error 0x10
On Fri, Nov 6, 2015 at 3:59 PM, Robert Morell <rmor...@nvidia.com> wrote: > On Fri, Oct 02, 2015 at 06:05:21PM -0400, Ilia Mirkin wrote: >> Could you advise what the proper way of indicating >> that the memory is "global" to the op? I'm sure I'm just missing >> something simple. If you show me what to look for in SM35 I can >> probably find it on my own for SM20/SM30/SM50. > > Sorry again for the delay. Here's what I've been able to find out about > the generic thread address space (used by the SMs) and what types of > memory it contains. Hopefully this clears things up. > > > Local memory is a per-thread space. > Shared memory is a per-CTA space (compute shaders only). > > LDL and STL instructions access local memory with a zero offset. > LDS, LSDLK, STS, and STSCUL instructions access shared memory with a zero > offset. > > LD, ST, RED, ATOM, and CCTL.D instructions access the generic thread address > space, which is layered on top of the channel's virtual address space. > > In the generic thread address space, there are 16MB windows into local and > shared memory; everything not in a Local or Shared address window accesses > global virtual memory. > > The local window offset within the generic thread address space is determined > by the SetShaderLocalMemoryWindow class method (offset 0x77c in classes *97 > and > *c0). > > The shared window offset within the generic thread address space is determined > by the SetShaderSharedMemoryWindow class method (offset 0x214 in classes *c0). > > For both methods, the offset is in bytes, but the window must be aligned to a > 16MB boundary (so the lower 24 bits of the data must be zero). The upper 32 > bits of the windows are hard-coded to 0 (so they must be placed within the > lower 4GB of address space). > > Generally, it is expected that software will reserve ranges in the global > virtual address space where these windows will be placed. (Otherwise anything > mapped there will be inaccessible to shaders.) > > For graphics shaders, the shared address space logic does not exist, so there > is no need to reserve virtual memory for it. Hi Robert, thanks so much for getting back to me. I believe I've understood what you've said, but please confirm: In order for ATOM.*/RED.* to work, the addresses in question must *NOT* be inside of the 16MB local/shared windows. So if I'm getting that error, the address must be inside. If so, this may be a reasonable explanation for what I'm seeing -- while I knew about the local/shared windows, I didn't realize that the windows were 16MB-sized. Thanks again, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] pci: enable c800 magic for Medion Erazer X7827
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91557 Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nvkm/engine/device/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drm/nouveau/nvkm/engine/device/pci.c b/drm/nouveau/nvkm/engine/device/pci.c index e8eb14e..20318f4 100644 --- a/drm/nouveau/nvkm/engine/device/pci.c +++ b/drm/nouveau/nvkm/engine/device/pci.c @@ -678,6 +678,7 @@ nvkm_device_pci_10de_1189[] = { static const struct nvkm_device_pci_vendor nvkm_device_pci_10de_1199[] = { { 0x1458, 0xd001, "GeForce GTX 760" }, + { 0x1462, 0x1106, "GeForce GTX 780M", { .War00C800_0 = true } }, /* Medion Erazer X7827 */ {} }; -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 1/2] gr: document mp error 0x10
NVIDIA provided the documentation for mp error 0x10, INVALID_ADDR_SPACE, which apparently happens when trying to use an atomic operation on local or shared memory (instead of global memory). Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nvkm/engine/gr/gf100.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drm/nouveau/nvkm/engine/gr/gf100.c b/drm/nouveau/nvkm/engine/gr/gf100.c index f1358a5..dda7a7d 100644 --- a/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drm/nouveau/nvkm/engine/gr/gf100.c @@ -882,6 +882,7 @@ static const struct nvkm_enum gf100_mp_warp_error[] = { { 0x0d, "GPR_OUT_OF_BOUNDS" }, { 0x0e, "MEM_OUT_OF_BOUNDS" }, { 0x0f, "UNALIGNED_MEM_ACCESS" }, + { 0x10, "INVALID_ADDR_SPACE" }, { 0x11, "INVALID_PARAM" }, {} }; -- 2.4.9 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 2/2] gr: add FERMI_COMPUTE_B class to GF110+
GF110+ supports both the A and B compute classes, make sure to accept both. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nvkm/engine/gr/gf110.c | 1 + drm/nouveau/nvkm/engine/gr/gf117.c | 1 + drm/nouveau/nvkm/engine/gr/gf119.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drm/nouveau/nvkm/engine/gr/gf110.c b/drm/nouveau/nvkm/engine/gr/gf110.c index d131874..d081ee4 100644 --- a/drm/nouveau/nvkm/engine/gr/gf110.c +++ b/drm/nouveau/nvkm/engine/gr/gf110.c @@ -98,6 +98,7 @@ gf110_gr = { { -1, -1, FERMI_B, _fermi }, { -1, -1, FERMI_C, _fermi }, { -1, -1, FERMI_COMPUTE_A }, + { -1, -1, FERMI_COMPUTE_B }, {} } }; diff --git a/drm/nouveau/nvkm/engine/gr/gf117.c b/drm/nouveau/nvkm/engine/gr/gf117.c index 28483d8..d8e8af4 100644 --- a/drm/nouveau/nvkm/engine/gr/gf117.c +++ b/drm/nouveau/nvkm/engine/gr/gf117.c @@ -135,6 +135,7 @@ gf117_gr = { { -1, -1, FERMI_B, _fermi }, { -1, -1, FERMI_C, _fermi }, { -1, -1, FERMI_COMPUTE_A }, + { -1, -1, FERMI_COMPUTE_B }, {} } }; diff --git a/drm/nouveau/nvkm/engine/gr/gf119.c b/drm/nouveau/nvkm/engine/gr/gf119.c index 9811a72..01faf9a 100644 --- a/drm/nouveau/nvkm/engine/gr/gf119.c +++ b/drm/nouveau/nvkm/engine/gr/gf119.c @@ -189,6 +189,7 @@ gf119_gr = { { -1, -1, FERMI_B, _fermi }, { -1, -1, FERMI_C, _fermi }, { -1, -1, FERMI_COMPUTE_A }, + { -1, -1, FERMI_COMPUTE_B }, {} } }; -- 2.4.9 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] gem: return only valid domain when there's only one
On nv50+, we restrict the valid domains to just the one where the buffer was originally created. However after the buffer is evicted to system memory, we might move it back to a different domain that was not originally valid. When sharing the buffer and retrieving its GEM_INFO data, we still want the domain that will be valid for this buffer in a pushbuf, not the one where it currently happens to be. This resolves fdo#92504 and several others. These are due to suspend evicting all buffers, making it more likely that they temporarily end up in the wrong place. Cc: sta...@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92504 Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nouveau_gem.c | 5 +++-- lib/include/nvif/os.h | 6 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c index ce74ab1..a108cc3 100644 --- a/drm/nouveau/nouveau_gem.c +++ b/drm/nouveau/nouveau_gem.c @@ -229,11 +229,12 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nvkm_vma *vma; - if (nvbo->bo.mem.mem_type == TTM_PL_TT) + if (is_power_of_2(nvbo->valid_domains)) + rep->domain = nvbo->valid_domains; + else if (nvbo->bo.mem.mem_type == TTM_PL_TT) rep->domain = NOUVEAU_GEM_DOMAIN_GART; else rep->domain = NOUVEAU_GEM_DOMAIN_VRAM; - rep->offset = nvbo->bo.offset; if (cli->vm) { vma = nouveau_bo_vma_find(nvbo, cli->vm); diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h index 552ecf7..2df3048 100644 --- a/lib/include/nvif/os.h +++ b/lib/include/nvif/os.h @@ -135,6 +135,12 @@ typedef dma_addr_t resource_size_t; #define IS_ENABLED(x) IS_ENABLED_##x +static inline bool +is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + static inline int order_base_2(u64 base) { -- 2.4.10 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nvc0: do upload-time fixups for interpolation parameters
WIP: only support SM35, need to add SM20 and SM50 support Unfortunately flatshading is an all-or-nothing proposition on nvc0, while GL 3.0 calls for the ability to selectively specify explicit interpolation parameters on gl_Color/gl_SecondaryColor which would override the flatshading setting. This allows us to fix up the interpolation settings after shader generation based on rasterizer settings. While we're at it, we can add support for dynamically forcing all (non-flat) shader inputs to be interpolated per-sample, which allows st/mesa to not generate variants for these. Fixes the remaining failing glsl-1.30/execution/interpolation piglits. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- Incomplete as per above. Wanted to get it out there in case there was any feedback. This will only work on GK110/GK208 as-is. .../drivers/nouveau/codegen/nv50_ir_driver.h | 5 +++ .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++-- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 14 +--- .../drivers/nouveau/codegen/nv50_ir_target.cpp | 39 +- .../drivers/nouveau/codegen/nv50_ir_target.h | 21 src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 23 - src/gallium/drivers/nouveau/nvc0/nvc0_program.h| 6 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 16 - src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 3 -- .../drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h | 2 +- 12 files changed, 147 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 14acb60..2f5654f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -99,6 +99,7 @@ struct nv50_ir_prog_info uint8_t sourceRep; /* NV50_PROGRAM_IR */ const void *source; void *relocData; + void *interpData; struct nv50_ir_prog_symbol *syms; uint16_t numSyms; } bin; @@ -198,6 +199,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, uint32_t libPos, uint32_t dataPos); +extern void +nv50_ir_change_interp(void *interpData, uint32_t *code, + bool force_per_sample, bool flatshade); + /* obtain code that will be shared among programs */ extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 8f15429..d712c9c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0xff; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 1] &= ~(0xf << 19); + code[loc + 1] |= (ipa & 0x3) << 21; + code[loc + 1] |= (ipa & 0xc) << (19 - 2); + code[loc + 0] &= ~(0xff << 23); + code[loc + 0] |= reg << 23; +} + void CodeEmitterGK110::emitINTERP(const Instruction *i) { @@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->saturate) code[1] |= 1 << 18; - if (i->op == OP_PINTERP) + if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - else + addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + } else { code[0] |= 0xff << 23; + addInterp(i->ipa, 0xff, interpApply); + } srcId(i->src(0).getIndirect(0), 10); emitInterpMode(i); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 21099d5..0489ef8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1057,7 +1057,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) default:
Re: [Nouveau] 4K ~ 3840x2160 resolution and frame buffer support
HDMI with nouveau is limited to 165mhz unfortunately. This is incorrect (the hardware can do more), but nevertheless, that's the present limit. If you can connect the screen over dual-link DVI or DP, you should be able to get the full resolution. On Thu, Oct 8, 2015 at 5:18 PM, James Lehman <ja...@akrobiz.com> wrote: > Thank you for your quick reply. > > I have a Gigabyte GT-730 2GB card hooked directly via a high quality HDMI > cable to a 65 inch Samsung curved screen LCD 4K monitor. > > I'm not sure what I am looking for in dmesg and xorg log. > > Please advise. > > I have built the 3.19.8 kernel and enabled frame buffer support. > > I can see 7 out of the 8 penguins when I boot. > > My ultimate goal is to be able to boot to a 3840x2160 32 bit (frame buffer) > console. > > Then go into Xfce, or not, as needed. > > I like to write frame buffer apps that run in the console. > > Thank you. > > > > On 10/08/2015 04:44 PM, Ilia Mirkin wrote: >> >> If you only get 1920x1080 in X, chances are that nouveau doesn't >> believe it can do 3840x2160 for one reason or another. There are a >> number of reasons why this might be the case, please provide dmesg, >> xorg log, and information on how the monitor is connected (including >> any A->B type adapters). >> >>-ilia >> >> On Thu, Oct 8, 2015 at 4:21 PM, James Lehman <ja...@akrobiz.com> wrote: >>> >>> Hello. I hope this is in the right place. >>> >>> I just built a new machine and installed Xubuntu and I'm still figuring >>> things out. >>> >>> I'm interested in working with The Linux Frame Buffer. >>> >>> Many years ago, I started a project called ezfb and I would like to >>> continue >>> developing it on a machine capable of 4K. >>> >>> Is it possible to have a frame buffer that does 3840x2160 ? >>> >>> With nouveau installed as my video driver, I can get to a frame buffer >>> console of 1920x1080, but I am limited to that resolution in Xfce as >>> well. >>> >>> With a proprietary nVidia driver installed I can get to 4K in Xfce, but >>> my >>> frame buffer is 640x480, 4 bit color. When I try fbset to change >>> anything, >>> it doesn't work. >>> >>> I have not even bothered to try my code so see of any of it can actually >>> work in the frame buffers I get. >>> >>> Still scratching my head >>> >>> Thank you for your time. >>> >>> James. >>> >>> >>> ___ >>> Nouveau mailing list >>> Nouveau@lists.freedesktop.org >>> http://lists.freedesktop.org/mailman/listinfo/nouveau > > > ___ > Nouveau mailing list > Nouveau@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] 4K ~ 3840x2160 resolution and frame buffer support
If you only get 1920x1080 in X, chances are that nouveau doesn't believe it can do 3840x2160 for one reason or another. There are a number of reasons why this might be the case, please provide dmesg, xorg log, and information on how the monitor is connected (including any A->B type adapters). -ilia On Thu, Oct 8, 2015 at 4:21 PM, James Lehmanwrote: > Hello. I hope this is in the right place. > > I just built a new machine and installed Xubuntu and I'm still figuring > things out. > > I'm interested in working with The Linux Frame Buffer. > > Many years ago, I started a project called ezfb and I would like to continue > developing it on a machine capable of 4K. > > Is it possible to have a frame buffer that does 3840x2160 ? > > With nouveau installed as my video driver, I can get to a frame buffer > console of 1920x1080, but I am limited to that resolution in Xfce as well. > > With a proprietary nVidia driver installed I can get to 4K in Xfce, but my > frame buffer is 640x480, 4 bit color. When I try fbset to change anything, > it doesn't work. > > I have not even bothered to try my code so see of any of it can actually > work in the frame buffers I get. > > Still scratching my head > > Thank you for your time. > > James. > > > ___ > Nouveau mailing list > Nouveau@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space
On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > This patch looks fine except that it should be a bit more normalized. I > mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same for > PUSH_SPACE calls, sometimes you add it sometimes not. Meh. We need to get our error checking situation straight, but this isn't the patch to do it in. > > Did you run a full piglit test this time ? :) Nope, but I ran a full piglit before this patch. Almost took down my box. Probably won't be running it again for this patch. > > See my comment below. > > > On 10/10/2015 11:09 AM, Ilia Mirkin wrote: >> >> We still have to push everything out, might as well kick earlier and >> flip pushbufs when we know we'll need it. This resolves some issues with >> the new policy of making sure that we always leave a bit of room at the >> end for fences. >> >> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >> Cc: mesa-sta...@lists.freedesktop.org >> --- >> src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 9 ++--- >> src/gallium/drivers/nouveau/nv50/nv50_transfer.c | 16 >> +++- >> src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 20 >> +--- >> 3 files changed, 10 insertions(+), 35 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c >> b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c >> index fdde11f..941555f 100644 >> --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c >> +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c >> @@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50) >> PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); >> } >> while (words) { >> - unsigned nr; >> - >> - if (!PUSH_SPACE(push, 16)) >> - break; >> - nr = PUSH_AVAIL(push); >> - assert(nr >= 16); >> - nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN); >> + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); >> + PUSH_SPACE(push, nr + 3); > > > This PUSH_SPACE call doesn't seem to be needed for me because > NV50_PUSH_EXPLICIT_SPACE_CHECKING is not set and the following BEGIN_XXX > calls will allocate space. I want to ensure that both of the below commands are in the same batch. Not sure if it's necessary, but... don't want to find out. They were in the same batch before. And this batch stuff is what was causing the M2MF errors I was seeing earlier. > > >> BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); >> PUSH_DATA (push, (start << 8) | b); >> BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); >> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c >> b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c >> index be51407..9a3fd1e 100644 >> --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c >> +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c >> @@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv, >> PUSH_DATA (push, 0); >>while (count) { >> - unsigned nr; >> - >> - if (!PUSH_SPACE(push, 16)) >> - break; >> - nr = PUSH_AVAIL(push); >> - assert(nr >= 16); >> - nr = MIN2(count, nr - 1); >> - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); >> + unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); >> BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); >> PUSH_DATAp(push, src, nr); >> @@ -395,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv, >> nouveau_pushbuf_validate(push); >>while (words) { >> - unsigned nr; >> - >> - nr = PUSH_AVAIL(push); >> - nr = MIN2(nr - 7, words); >> - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); >> + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); >> + PUSH_SPACE(push, nr + 7); >> BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); >> PUSH_DATAh(push, bo->offset + base); >> PUSH_DATA (push, bo->offset + base); >> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c >> b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c >> index aaec60a..d459dd6 100644 >> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c >> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c >> @@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv, >> nouve
Re: [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space
On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 10/10/2015 09:42 PM, Ilia Mirkin wrote: >> >> On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset >> <samuel.pitoi...@gmail.com> wrote: >>> >>> This patch looks fine except that it should be a bit more normalized. I >>> mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same for >>> PUSH_SPACE calls, sometimes you add it sometimes not. >> >> Meh. We need to get our error checking situation straight, but this >> isn't the patch to do it in. > > > Yeah, but this needs to be clarified. What does? ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [Mesa-dev] [PATCH] nouveau: avoid emitting new fences unnecessarily
On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > Does this fix those texelFetch piglit tests ? Or is it the second patch ? This patch "fixes" the initial texelFetch piglit failures. However it creates some fresh texelFetch piglit failures -- that test is interesting because it does a lot of draws with minimal state changes between them. Those ones are fixed by the second patch. But really these are all different problems, which interact with each other in frustrating ways. > > Anyway, this patch is : > > Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > > > On 10/10/2015 08:12 AM, Ilia Mirkin wrote: >> >> Right now we emit on every kick, but this is only necessary if something >> will ever be able to observe that the fence completed. If there are no >> refs, leave the fence alone and emit it another day. >> >> This also happens to work around an issue for the kick handler -- a kick >> can be a result of e.g. nouveau_bo_wait or explicit kick, or it can be >> due to lack of space in the pushbuf. We want the emit to happen in the >> current batch, so we want there to always be enough space. However an >> explicit kick could take the reserved space for the implicitly-triggered >> kick's fence emission if it happened right after. With the new mechanism, >> hopefully there's no way to cause two fences to be emitted into the same >> reserved space. >> >> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >> Cc: mesa-sta...@lists.freedesktop.org >> Fixes: 47d11990b (nouveau: make sure there's always room to emit a fence) >> --- >> src/gallium/drivers/nouveau/nouveau_fence.c | 12 +--- >> 1 file changed, 9 insertions(+), 3 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c >> b/src/gallium/drivers/nouveau/nouveau_fence.c >> index ee4e08d..18b1592 100644 >> --- a/src/gallium/drivers/nouveau/nouveau_fence.c >> +++ b/src/gallium/drivers/nouveau/nouveau_fence.c >> @@ -190,8 +190,10 @@ nouveau_fence_wait(struct nouveau_fence *fence) >> /* wtf, someone is waiting on a fence in flush_notify handler? */ >> assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING); >> - if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) >> + if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) { >> + PUSH_SPACE(screen->pushbuf, 8); >> nouveau_fence_emit(fence); >> + } >>if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED) >> if (nouveau_pushbuf_kick(screen->pushbuf, >> screen->pushbuf->channel)) >> @@ -224,8 +226,12 @@ nouveau_fence_wait(struct nouveau_fence *fence) >> void >> nouveau_fence_next(struct nouveau_screen *screen) >> { >> - if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) >> - nouveau_fence_emit(screen->fence.current); >> + if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) { >> + if (screen->fence.current->ref > 1) >> + nouveau_fence_emit(screen->fence.current); >> + else >> + return; >> + } >>nouveau_fence_ref(NULL, >fence.current); >> > > ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space
On Sat, Oct 10, 2015 at 4:21 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 10/10/2015 09:58 PM, Ilia Mirkin wrote: >> >> On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset >> <samuel.pitoi...@gmail.com> wrote: >>> >>> >>> On 10/10/2015 09:42 PM, Ilia Mirkin wrote: >>>> >>>> On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset >>>> <samuel.pitoi...@gmail.com> wrote: >>>>> >>>>> This patch looks fine except that it should be a bit more normalized. I >>>>> mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same >>>>> for >>>>> PUSH_SPACE calls, sometimes you add it sometimes not. >>>> >>>> Meh. We need to get our error checking situation straight, but this >>>> isn't the patch to do it in. >>> >>> >>> Yeah, but this needs to be clarified. >> >> What does? > > > I mean, we should either use PUSH_SPACE everywhere or not at all, and always > breaks (or not) when PUSH_SPACE fails. > That's really a minor issue. It's actually a major issue. Error-handling is practically non-existent. There are a couple of spots here and there, but it doesn't really scale up. I guess I (semi-)accidentally removed a couple of spots that error checked, but, again, meh. Doing this for real will require some careful thought. -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nouveau: make sure there's always room to emit a fence
I started seeing a lot of situations on nv30 where fence emission wouldn't fit into the previous buffer (causing assertions). This ensures that whenever checking for space, we always leave a bit of extra room for the fence emission commands. Adjusts the nv30 and nvc0 fence emission logic to bypass the space checking as well. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nouveau_winsys.h | 2 ++ src/gallium/drivers/nouveau/nv30/nv30_screen.c | 4 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 ++- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 389a229..a44fd3e 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push) static inline bool PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size) { + /* Provide a buffer so that fences always have room to be emitted */ + size += 8; if (PUSH_AVAIL(push) < size) return nouveau_pushbuf_space(push, size, 0, 0) == 0; return true; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 39267b3..335c163 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -347,7 +347,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence) *sequence = ++screen->base.fence.sequence; - BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2); + assert(PUSH_AVAIL(push) >= 3); + PUSH_DATA (push, NV30_3D_FENCE_OFFSET | + (2 /* size */ << 18) | (7 /* subchan */ << 13)); PUSH_DATA (push, 0); PUSH_DATA (push, *sequence); } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 6012ff6..812b246 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -388,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) /* we need to do it after possible flush in MARK_RING */ *sequence = ++screen->base.fence.sequence; + assert(PUSH_AVAIL(push) >= 5); PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4)); PUSH_DATAh(push, screen->fence.bo->offset); PUSH_DATA (push, screen->fence.bo->offset); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 32da76c..afd91e6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -537,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) /* we need to do it after possible flush in MARK_RING */ *sequence = ++screen->base.fence.sequence; - BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4); + assert(PUSH_AVAIL(push) >= 5); + PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4)); PUSH_DATAh(push, screen->fence.bo->offset); PUSH_DATA (push, screen->fence.bo->offset); PUSH_DATA (push, *sequence); -- 2.4.9 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space
We still have to push everything out, might as well kick earlier and flip pushbufs when we know we'll need it. This resolves some issues with the new policy of making sure that we always leave a bit of room at the end for fences. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 9 ++--- src/gallium/drivers/nouveau/nv50/nv50_transfer.c | 16 +++- src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 20 +--- 3 files changed, 10 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index fdde11f..941555f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50) PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); } while (words) { - unsigned nr; - - if (!PUSH_SPACE(push, 16)) - break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN); + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); + PUSH_SPACE(push, nr + 3); BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (start << 8) | b); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c index be51407..9a3fd1e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv, PUSH_DATA (push, 0); while (count) { - unsigned nr; - - if (!PUSH_SPACE(push, 16)) - break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(count, nr - 1); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); PUSH_DATAp(push, src, nr); @@ -395,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv, nouveau_pushbuf_validate(push); while (words) { - unsigned nr; - - nr = PUSH_AVAIL(push); - nr = MIN2(nr - 7, words); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); + PUSH_SPACE(push, nr + 7); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c index aaec60a..d459dd6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c @@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv, nouveau_pushbuf_validate(push); while (count) { - unsigned nr; + unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); - if (!PUSH_SPACE(push, 16)) + if (!PUSH_SPACE(push, nr + 9)) break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(count, nr - 9); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); PUSH_DATAh(push, dst->offset + offset); @@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv, nouveau_pushbuf_validate(push); while (count) { - unsigned nr; + unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1)); - if (!PUSH_SPACE(push, 16)) + if (!PUSH_SPACE(push, nr + 10)) break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(count, nr - 8); - nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, dst->offset + offset); @@ -571,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv, PUSH_DATA (push, bo->offset + base); while (words) { - unsigned nr = PUSH_AVAIL(push); - nr = MIN2(nr, words); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1); PUSH_SPACE(push, nr + 2); PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain); -- 2.4.9 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nouveau: avoid emitting new fences unnecessarily
Right now we emit on every kick, but this is only necessary if something will ever be able to observe that the fence completed. If there are no refs, leave the fence alone and emit it another day. This also happens to work around an issue for the kick handler -- a kick can be a result of e.g. nouveau_bo_wait or explicit kick, or it can be due to lack of space in the pushbuf. We want the emit to happen in the current batch, so we want there to always be enough space. However an explicit kick could take the reserved space for the implicitly-triggered kick's fence emission if it happened right after. With the new mechanism, hopefully there's no way to cause two fences to be emitted into the same reserved space. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org Fixes: 47d11990b (nouveau: make sure there's always room to emit a fence) --- src/gallium/drivers/nouveau/nouveau_fence.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c index ee4e08d..18b1592 100644 --- a/src/gallium/drivers/nouveau/nouveau_fence.c +++ b/src/gallium/drivers/nouveau/nouveau_fence.c @@ -190,8 +190,10 @@ nouveau_fence_wait(struct nouveau_fence *fence) /* wtf, someone is waiting on a fence in flush_notify handler? */ assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING); - if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) + if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) { + PUSH_SPACE(screen->pushbuf, 8); nouveau_fence_emit(fence); + } if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED) if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel)) @@ -224,8 +226,12 @@ nouveau_fence_wait(struct nouveau_fence *fence) void nouveau_fence_next(struct nouveau_screen *screen) { - if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) - nouveau_fence_emit(screen->fence.current); + if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) { + if (screen->fence.current->ref > 1) + nouveau_fence_emit(screen->fence.current); + else + return; + } nouveau_fence_ref(NULL, >fence.current); -- 2.4.9 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] RFC: drop glamor from nouveau ddx
On Tue, Jul 7, 2015 at 5:05 PM, Ben Skeggs skeg...@gmail.com wrote: On 8 July 2015 at 06:06, Ilia Mirkin imir...@alum.mit.edu wrote: Ben, Looks like the reality is that glamor is just not hooked up properly in the nouveau DDX. Mainly it's missing DRI2, which in turn means no core GL contexts, and probably lots of other issues. While this could probably be fixed somehow, I doubt there's any advantage to using the nouveau DDX over something like modesetting nowadays. How would you feel about dropping glamor support from the nouveau ddx and failing to load for GPUs that don't have EXA support (unless AccelMode = none is forced for them). That way it'll fall back to loading modesetting which should be properly set up for DRI2 and so on. I have no objections to this. In fact, in Fedora at least (I floated the idea in #nouveau a while back too), in the near future I plan on having the DDX fail to load on all GPUs where modesetting+glamor can be used (unless overridden by a config option). IMHO that's a little strong (I assume you mean nv50+ here?). In fact I'm planning to complete my Maxwell EXA impl. The current reality is that modesetting+glamor doesn't render correctly at least on maxwell, but possibly others as well. The EXA paths are very well tested and are stable. I think not relying on mesa in the DDX is a nice advantage too. -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH 1/2] nouveau/compiler: fix trivial compiler warnings
Compiler is wrong. On Wed, Jul 8, 2015 at 2:27 PM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: nouveau_compiler.c: In function ‘main’: nouveau_compiler.c:216:27: warning: ‘code’ may be used uninitialized in this function [-Wmaybe-uninitialized] printf(%08x , code[i / 4]); ^ nouveau_compiler.c:215:4: warning: ‘size’ may be used uninitialized in this function [-Wmaybe-uninitialized] for (i = 0; i size; i += 4) { Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de --- src/gallium/drivers/nouveau/nouveau_compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c index 8660498..ca128b5 100644 --- a/src/gallium/drivers/nouveau/nouveau_compiler.c +++ b/src/gallium/drivers/nouveau/nouveau_compiler.c @@ -144,7 +144,7 @@ main(int argc, char *argv[]) const char *filename = NULL; FILE *f; char text[65536] = {0}; - unsigned size, *code; + unsigned size = 0, *code = NULL; for (i = 1; i argc; i++) { if (!strcmp(argv[i], -a)) -- 2.4.5 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] CUDA fixed VA allocations and sparse mappings
On Mon, Jul 6, 2015 at 8:42 PM, Andrew Chew ac...@nvidia.com wrote: Hello, I am currently looking into ways to support fixed virtual address allocations and sparse mappings in nouveau, as a step towards supporting CUDA. CUDA requires that the GPU virtual address for a given buffer match the CPU virtual address. Therefore, when mapping a CUDA buffer, we have to have a way of specifying a particular virtual address to map to (we would ask that the CPU virtual address be used). Currently, as I understand it, the allocator implemented in nvkm/core/mm.c, used to provision virtual addresses, doesn't allow for this (but it's very easy to modify the allocator slightly to allow for this, which I have done locally in my experiments). In addition, the CUDA use case typically involves allocating a big chunk of address space ahead of time as a way to reserve that chunk for future CUDA use. It then maps individual buffers into that address space as needed. Currently, the virtual address allocation is done during buffer mapping, so in order to support these sparse mappings, it seems to me that the virtual address allocation and buffer mapping need to be decoupled into separate operations. My current strawman proposal for supporting this is to introduce two new ioctls DRM_IOCTL_NOUVEAU_AS_ALLOC and DRM_IOCTL_NOUVEAU_AS_FREE, that look roughly like this: #define NOUVEAU_AS_ALLOC_FLAGS_FIXED_OFFSET 0x1 struct drm_nouveau_as_alloc { uint64_t pages; /* in, pages */ uint32_t page_size; /* in, bytes */ uint32_t flags; /* in */ uint64_t offset;/* in/out, byte address */ }; struct drm_nouveau_as_free { uint64_t offset;/* in, byte address */ }; These ioctls just call into the allocator to allocate a range of addresses, resulting in a struct nvkm_vma that tracks that allocation (or releases the struct nvkm_vma back into the virtual address pool in the case of the free ioctl). If NOUVEAU_AS_ALLOC_FLAGS_FIXED_OFFSET is set, offset specifies the requested virtual address. Otherwise, an arbitrary address will be allocated. Well, this can't just be an address space. You still need bo's, if this is to work with nouveau -- it has to know when to swap things in and out, when they're used, etc. (and/or move between VRAM and GART and system/swap). I suspect that your target here are the GK20A and GM20B chips which don't have dedicated VRAM, but the ioctl's need to work for everything. Would it be sufficient to extend NOUVEAU_GEM_NEW or create a NOUVEAU_GEM_NEW_FIXED or something? IOW, why do have to separate the concept of a GEM object and a VM allocation? In addition to this, a way to map/unmap buffers is needed. Ordinarily, one would just use DRM_IOCTL_PRIME_FD_TO_HANDLE to import and map a dmabuf into gem. However, this ioctl will try to grab the virtual address range for this buffer, which will fail in the CUDA case since the virtual address range has been reserved ahead of time. So we perhaps introduce a set of ioctls to map/unmap buffers on top of an already existing virtual address allocation. My suggestion above is an alternative to this, right? I think dmabufs tend to be used for sharing between devices. I suspect there's more going on here that I don't understand though -- I assume the CUDA use-case is similar to the HSA use-case -- being able to build up data structures that point to one another on the CPU and then process them on the GPU? Can you detail a specific use-case perhaps, including the interactions with the GPU and its address space? Jérôme, I believe you were doing the HSA kernel implementation. Perhaps you'd have some feedback on this proposal? Cheers, -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [RFC PATCH 00/11] Implement ARB_cull_distance
On Wed, Jul 8, 2015 at 4:04 PM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: On 25.05.2015 17:07, Ilia Mirkin wrote: On Mon, May 25, 2015 at 9:40 AM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: On 25.05.2015 07:17, Dave Airlie wrote: On 25 May 2015 at 08:11, Marek Olšák mar...@gmail.com wrote: It's the same on Radeon. There are 2x ClipOrCullDistance output vectors and a mask saying it should clip or cull or do nothing. Marek My thinking was gallium should have a single semantic and a mask in the shader definition maybe. though it doesn't solve the does nvidia do the right thing with cull[0] and clip[0], and what is the right thing. Dave. I'm still convinced that both clip[0] and cull[0] should be possible. Plus i have written a shader_test for this a while ago which you pushed to piglit (fs-cull-and-clip-distance-different.shader_test). If i remember right nvidia passed that test just fine. My take (and note that I last read the extension many months ago) is that you're supposed to figure out the max gl_ClipDistance[] written, and then write all your cull distances above that. So if you, e.g., have something like gl_ClipDistance[5] = 1; gl_CullDistance[0] = 1; Then it would decide that there are 6 clip distances (or if there's an explicit out float gl_ClipDistance[n], then use that), and 1 cull distance. In the TGSI, I'm thinking this might look approximately like PROPERTY CULL_MASK (16) DCL OUT[0], CLIPDIST[0] DCL OUT[1], CLIPDIST[1] MOV OUT[1].y, 1 (clip distance[5]) MOV OUT[1].z, 1 (cull distance[0]) Then basically you'd have (rast-clip_enable shader-actual_clip_writes_mask) | cull_mask = the enabled distances cull_mask = cull mask This would work *very* well for nouveau, not sure how suitable it is for other hardware. Cheers, -ilia I wonder where this step should be implemented after all. It was brought up that llvmpipe already supports cull_distance (it does!), so maybe we should implement this in the drivers to evade llvmpipe breakage. Any suggestions appreciated :) Tobias I believe that the later feedback from Brian was that my approach was a bad one and we should use CULLDIST instead, which also reflects how GL has it. However it's important to specify *somewhere* how many clip distances are used since it all gets lowered into the 2x vec4. It might be annoying to derive it from writes to CLIPDIST[0/1].xyzw dest masks. Although nouveau might already do that anyways... -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH 1/2] nouveau/compiler: fix trivial compiler warnings
I don't mind telling people that the compiler is wrong :) On Wed, Jul 8, 2015 at 3:53 PM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: On 08.07.2015 21:42, Emil Velikov wrote: On 8 July 2015 at 20:34, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: Mh i'm not aware of me ever changed the nouveau_compiler. But i'm happy to see this made you laugh, so it has something positive at least... :/ Story time: This particular compiler warning has been brought up (incl here) four or five times. Each time, Ilia feels reluctant about the fix as the (gcc) compiler gets it wrong. Personally I do not see a problem with explicitly initialising the variable at this instance, yet I'm curious for how long Ilia will say no to this (type of) patch(es) :-P No offence, I just find it funny. Emil Oh i did even answer in a thread for a patch from Martin where he propose the same change (even with the same prefix :D). Ilia maybe you should take this after all, as it seems you are haunted by this :P ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH 2/2] nv50/ir: fix a compiler warning with debug-only code
I suspect the issue is actually that u_debug.h isn't included. It defines assert to be debug_assert, which in turn is #define debug_assert(expr) (void)(0 (expr)) which should cause the relevant var to be seen as used. On Wed, Jul 8, 2015 at 3:40 PM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: On 08.07.2015 21:34, Emil Velikov wrote: On 8 July 2015 at 19:27, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: codegen/nv50_ir_emit_nv50.cpp: In member function ‘void nv50_ir::CodeEmitterNV50::emitLOAD(const nv50_ir::Instruction*)’: codegen/nv50_ir_emit_nv50.cpp:620:12: warning: unused variable ‘offset’ [-Wunused-variable] int32_t offset = i-getSrc(0)-reg.data.offset; Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 67ea6df..86b16f2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -616,8 +616,11 @@ CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty) void CodeEmitterNV50::emitLOAD(const Instruction *i) { - DataFile sf = i-src(0).getFile(); +#ifdef DEBUG int32_t offset = i-getSrc(0)-reg.data.offset; +#endif + assert is (normally) guarded by NDEBUG. Mesa/gallium has an in-house replacement, which (not 100% sure) should be fine as well. -Emil As far as i can see it in u_debug.h assert (debug_assert) is guarded by DEBUG as the above change... ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 1/2] remove glamor support
If you want glamor, just use modesetting instead. --- configure.ac | 11 --- man/nouveau.man | 3 +- src/Makefile.am | 2 - src/drmmode_display.c | 7 -- src/nouveau_glamor.c | 253 -- src/nouveau_glamor.h | 33 --- src/nouveau_present.c | 29 +- src/nouveau_wfb.c | 8 +- src/nouveau_xv.c | 4 - src/nv_driver.c | 16 src/nv_type.h | 1 - 11 files changed, 3 insertions(+), 364 deletions(-) delete mode 100644 src/nouveau_glamor.c delete mode 100644 src/nouveau_glamor.h diff --git a/configure.ac b/configure.ac index 03563c1..9c77f94 100644 --- a/configure.ac +++ b/configure.ac @@ -140,17 +140,6 @@ if test x$have_list_h = xyes; then #include list.h]) fi -AC_CHECK_HEADERS([glamor.h],[found_glamor_header=yes],[found_glamor_header=no], -[#include xorg-server.h]) -AC_MSG_CHECKING([whether to include GLAMOR support]) -if test x$found_glamor_header = xyes pkg-config --exists xorg-server = 1.15.99.901 -then - AC_DEFINE(HAVE_GLAMOR, 1, [Build support for glamor acceleration]) - AC_MSG_RESULT([yes]) -else - AC_MSG_RESULT([no]) -fi - AC_CONFIG_FILES([ Makefile src/Makefile diff --git a/man/nouveau.man b/man/nouveau.man index 129bb7f..c39c113 100644 --- a/man/nouveau.man +++ b/man/nouveau.man @@ -81,8 +81,7 @@ are supported: Enable or disable the HW cursor. Default: on. .TP .BI Option \*qAccelMethod\*q \*q string \*q -Specify the acceleration method. One of \*qnone\*q, \*qexa\*q, or -\*qglamor\*q. Default: exa, except for GMxxx which default to glamor. +Specify the acceleration method. One of \*qnone\*q, or \*qexa\*q. Default: exa. .TP .BI Option \*qNoAccel\*q \*q boolean \*q Disable or enable acceleration. Default: acceleration is enabled. diff --git a/src/Makefile.am b/src/Makefile.am index 9d39a00..1e04ddf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,7 +35,6 @@ nouveau_drv_la_SOURCES = \ nouveau_copy90b5.c \ nouveau_copya0b5.c \ nouveau_exa.c nouveau_xv.c nouveau_dri2.c \ -nouveau_glamor.c \ nouveau_present.c \ nouveau_sync.c \ nouveau_wfb.c \ @@ -123,7 +122,6 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \ shader/Makefile \ nouveau_local.h \ nouveau_copy.h \ -nouveau_glamor.h \ nouveau_present.h \ nouveau_sync.h \ nv_const.h \ diff --git a/src/drmmode_display.c b/src/drmmode_display.c index cd13820..6495961 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -42,8 +42,6 @@ #include libudev.h #endif -#include nouveau_glamor.h - static Bool drmmode_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height); typedef struct { int fd; @@ -107,8 +105,6 @@ static inline struct nouveau_pixmap * drmmode_pixmap(PixmapPtr ppix) { NVPtr pNv = NVPTR(xf86ScreenToScrn(ppix-drawable.pScreen)); - if (pNv-AccelMethod == GLAMOR) - return nouveau_glamor_pixmap_get(ppix); return nouveau_pixmap(ppix); } @@ -1393,9 +1389,6 @@ drmmode_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height) crtc-rotation, crtc-x, crtc-y); } - if (pNv-AccelMethod == GLAMOR) - nouveau_glamor_create_screen_resources(scrn-pScreen); - if (old_fb_id) drmModeRmFB(drmmode-fd, old_fb_id); nouveau_bo_ref(NULL, old_bo); diff --git a/src/nouveau_glamor.c b/src/nouveau_glamor.c deleted file mode 100644 index b8bca17..000 --- a/src/nouveau_glamor.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright 2014 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the Software), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs
[Nouveau] [PATCH 2/2] remove maxwell support for now
There is no EXA acceleration, user better off with modesetting. --- src/nv_driver.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nv_driver.c b/src/nv_driver.c index a5ffbce..16a9029 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -389,7 +389,6 @@ NVHasKMS(struct pci_device *pci_dev, struct xf86_platform_device *platform_dev) case 0xe0: case 0xf0: case 0x100: - case 0x110: break; default: xf86DrvMsg(-1, X_ERROR, Unknown chipset: NV%02x\n, chipset); -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] RFC: drop glamor from nouveau ddx
On Tue, Jul 7, 2015 at 5:16 PM, Ben Skeggs skeg...@gmail.com wrote: On 8 July 2015 at 07:09, Ilia Mirkin imir...@alum.mit.edu wrote: On Tue, Jul 7, 2015 at 5:05 PM, Ben Skeggs skeg...@gmail.com wrote: On 8 July 2015 at 06:06, Ilia Mirkin imir...@alum.mit.edu wrote: Ben, Looks like the reality is that glamor is just not hooked up properly in the nouveau DDX. Mainly it's missing DRI2, which in turn means no core GL contexts, and probably lots of other issues. While this could probably be fixed somehow, I doubt there's any advantage to using the nouveau DDX over something like modesetting nowadays. How would you feel about dropping glamor support from the nouveau ddx and failing to load for GPUs that don't have EXA support (unless AccelMode = none is forced for them). That way it'll fall back to loading modesetting which should be properly set up for DRI2 and so on. I have no objections to this. In fact, in Fedora at least (I floated the idea in #nouveau a while back too), in the near future I plan on having the DDX fail to load on all GPUs where modesetting+glamor can be used (unless overridden by a config option). IMHO that's a little strong (I assume you mean nv50+ here?). In fact I'm planning to complete my Maxwell EXA impl. The current reality is that modesetting+glamor doesn't render correctly at least on maxwell, but possibly others as well. The EXA paths are very well tested and are stable. I think not relying on mesa in the DDX is a nice advantage too. The reality is that this is what people will be using when the big switch to wayland by default happens, and the idea is to limit the number of codepaths we have to care about and maintain. Yeah, in 100 years when it actually works :p I'm aware that there are a few rendering issues left in glamor (I'm not sure if it's glamor, or our 3D driver having some bugs), but we should probably just fix those :) I'm all for fixing bugs in the 3D drivers. But until that happens, no reason that users should suffer. [I also definitely know some people specially don't have mesa installed so that they get 2d accel but no 3d accel since nouveau can be so hang-y.] I believe Dave plans on doing something similar in Intel/Radeon too. I think those kernel drivers tend to be a lot more stable. The nouveau kernel driver hangs left and right though under any actual 3d load. And yeah, the 2d accel is all 3d anyways, but it's a lot less code and it's been way more tested. -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] CUDA fixed VA allocations and sparse mappings
On Tue, Jul 7, 2015 at 8:07 PM, C Bergström cbergst...@pathscale.com wrote: On Wed, Jul 8, 2015 at 6:58 AM, Ben Skeggs skeg...@gmail.com wrote: On 8 July 2015 at 09:53, C Bergström cbergst...@pathscale.com wrote: regarding Fixed address allocations weren't going to be part of that, but I see that it makes sense for a variety of use cases. One question I have here is how this is intended to work where the RM needs to make some of these allocations itself (for graphics context mapping, etc), how should potential conflicts with user mappings be handled? As an initial implemetation you can probably assume that the GPU offloading is in exclusive mode. Basically that the CUDA or OpenACC code has full ownership of the card. The Tesla cards don't even have a video out on them. To complicate this even more - some offloading code has very long running kernels and even worse - may critically depend on using the full available GPU ram. (Large matrix sizes and soon big Fortran arrays or complex data types) This doesn't change that, to setup the graphics engine, the driver needs to map various system-use data structures into the channel's address space *somewhere* :) I'm not sure I follow exactly what you mean, but I think the answer is - don't setup the graphics engine if you're in compute mode. Doing that, iiuc, will at least provide a start to support for compute. Anyone who argues that graphics+compute is critical to have working at the same time is probably a 1%. On NVIDIA GPUs, compute _is_ part of the graphics engine... aka PGRAPH. ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH] nv50, nvc0: enable at least one color RT if alphatest is enabled
Fixes the following piglits: fbo-alphatest-nocolor fbo-alphatest-nocolor-ff Signed-off-by: Ilia Mirkin imir...@alum.mit.edu Cc: mesa-sta...@lists.freedesktop.org --- The nv50 bits need testing, only have a GK208 on-hand. Will be sure to test before pushing. src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 18 ++ src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 18 ++ 2 files changed, 36 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 116bf4b..ead4b29 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -296,6 +296,23 @@ nv50_check_program_ucps(struct nv50_context *nv50, nv50_fp_linkage_validate(nv50); } +/* alpha test is disabled if there are no color RTs, so make sure we have at + * least one if alpha test is enabled. Note that this must run after + * nvc0_validate_fb, otherwise that will override the RT count setting. + */ +static void +nv50_validate_derived_2(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50-base.pushbuf; + + if (nv50-zsa nv50-zsa-pipe.alpha.enabled + nv50-framebuffer.nr_cbufs == 0) { + nv50_fb_set_null_rt(push, 0); + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); + PUSH_DATA (push, (076543210 4) | 1); + } +} + static void nv50_validate_clip(struct nv50_context *nv50) { @@ -456,6 +473,7 @@ static struct state_validate { { nv50_gp_linkage_validate,NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, { nv50_validate_derived_rs,NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, +{ nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER }, { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index c52399a..785e52e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -535,6 +535,23 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0) } } +/* alpha test is disabled if there are no color RTs, so make sure we have at + * least one if alpha test is enabled. Note that this must run after + * nvc0_validate_fb, otherwise that will override the RT count setting. + */ +static void +nvc0_validate_derived_2(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0-base.pushbuf; + + if (nvc0-zsa nvc0-zsa-pipe.alpha.enabled + nvc0-framebuffer.nr_cbufs == 0) { + nvc0_fb_set_null_rt(push, 0); + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); + PUSH_DATA (push, (076543210 4) | 1); + } +} + static void nvc0_switch_pipe_context(struct nvc0_context *ctx_to) { @@ -597,6 +614,7 @@ static struct state_validate { { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | NVC0_NEW_RASTERIZER }, +{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER }, { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER | NVC0_NEW_VERTPROG | NVC0_NEW_TEVLPROG | -- 2.3.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH v2] pmu: fix queued messages while getting no IRQ
On Sat, Nov 14, 2015 at 1:44 PM, Karol Herbstwrote: > I encountered while stresstesting the reclocking code, that rarely (1 out of > 20.000+ requests) we don't get any IRQ in nvkm_pmu_intr. > > This means we have a queued message on the pmu, but nouveau doesn't read it > and > waits infinitely in nvkm_pmu_send: > if (reply) { > wait_event(pmu->recv.wait, (pmu->recv.process == 0)); > > therefore let us use wait_event_timeout with a 1s timeout frame and just check > whether there is a message queued and handle it if there is one. > > Return -ETIMEDOUT whenever we timed out and there is no message queued or when > we hit another timeout while trying to read the message without getting any > IRQ > > The benefit of not using wait_event is, that we don't have a kworker waiting > on an event, which makes it easier to reload the module at runtime, which > helps > me developing on nouveau on my laptop a lot, because I don't need to reboot > anymore > > Nethertheless, we shouldn't use wait_event here, because we can't guarantee > any > answere at all, can we? > > v2: moved it into a new function > > Signed-off-by: Karol Herbst > --- > drm/nouveau/nvkm/subdev/pmu/base.c | 43 > -- > 1 file changed, 37 insertions(+), 6 deletions(-) > > diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c > b/drm/nouveau/nvkm/subdev/pmu/base.c > index 6b2007f..fafbe2a 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/base.c > +++ b/drm/nouveau/nvkm/subdev/pmu/base.c > @@ -43,6 +43,41 @@ nvkm_pmu_handle_reclk_request(struct work_struct *work) > nvkm_clk_pmu_reclk_request(clk, pmu->intr.data[0]); > } > > +static int > +wait_for_pmu_reply(struct nvkm_pmu *pmu, u32 reply[2]) > +{ > + struct nvkm_subdev *subdev = >subdev; > + struct nvkm_device *device = subdev->device; > + unsigned long jiffies = msecs_to_jiffies(1000); > + > + if (!wait_event_timeout(pmu->recv.wait, (pmu->recv.process == 0), > jiffies)) { > + u32 addr = nvkm_rd32(device, 0x10a4cc); > + nvkm_error(subdev, "wait on reply timed out\n"); > + > + if (addr != nvkm_rd32(device, 0x10a4c8)) { > + nvkm_error(subdev, "found queued message without > getting an interrupt\n"); > + schedule_work(>recv.work); > + > + if (!wait_event_timeout(pmu->recv.wait, > (pmu->recv.process == 0), jiffies)) { > + nvkm_error(subdev, "failed to repair PMU > state\n"); > + goto reply_error; > + } > + } else Not sure whether kernel style dictates this, but I really hate these "hanging" else's... both sides should have brackets if either one does. > + goto reply_error; > + } > + > + reply[0] = pmu->recv.data[0]; > + reply[1] = pmu->recv.data[1]; > + mutex_unlock(>mutex); > + return 0; > + > +reply_error: > + reply[0] = 0; > + reply[1] = 0; > + mutex_unlock(>mutex); > + return -ETIMEDOUT; > +} > + > int > nvkm_pmu_send(struct nvkm_pmu *pmu, u32 reply[2], > u32 process, u32 message, u32 data0, u32 data1) > @@ -88,12 +123,8 @@ nvkm_pmu_send(struct nvkm_pmu *pmu, u32 reply[2], > nvkm_wr32(device, 0x10a580, 0x); > > /* wait for reply, if requested */ > - if (reply) { > - wait_event(pmu->recv.wait, (pmu->recv.process == 0)); > - reply[0] = pmu->recv.data[0]; > - reply[1] = pmu->recv.data[1]; > - mutex_unlock(>mutex); > - } > + if (reply) > + return wait_for_pmu_reply(pmu, reply); Having one function lock and another unlock is a disaster waiting to happen. Perhaps make wiat_for_pmu_reply not handle the unlock and instead do int ret = 0; if (reply) ret = wait_for_pmu_reply() return ret; Additionally leaving the reply[] filling in this function would allow you to avoid annoying error handling and goto's in the other function. > > return 0; > } > -- > 2.6.3 > > ___ > Nouveau mailing list > Nouveau@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] llvm TGSI backend (WIP) questions
On Fri, Nov 13, 2015 at 9:25 AM, Emil Velikovwrote: > Hello Hans, > > Not to muddy the waters or anything, have you thought about the NIR > integration that Rob was thinking about ? > I'm pretty sure he'll be happy to have extra people helping him out. How would that in any way plug into llvm or nouveau? There's no OpenCL C -> NIR, and there's no NIR -> nv50 IR... -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [Mesa-dev] gallium state tracker calls calloc for 0 sizes arrays ?
On Thu, Aug 27, 2015 at 1:59 PM, Alex Deucher alexdeuc...@gmail.com wrote: On Thu, Aug 27, 2015 at 1:55 PM, Hans de Goede hdego...@redhat.com wrote: Hi, On 27-08-15 15:46, Marek Olšák wrote: On Thu, Aug 27, 2015 at 3:09 PM, Hans de Goede hdego...@redhat.com wrote: Hi All, While debugging: https://bugzilla.redhat.com/show_bug.cgi?id=1008089 I made a apitrace recording of the a single slide transition animation, and since I suspected memory corruption replayed it using ElectrFence + glretrace, this finds a 0 sized array allocation at src/mesa/state_tracker/st_glsl_to_tgsi.cpp: 5565: if (proginfo-Parameters) { t-constants = (struct ureg_src *) calloc(proginfo-Parameters-NumParameters, sizeof(t-constants[0])); And if I protect the code against that one, another one at 5618: t-immediates = (struct ureg_src *) calloc(program-num_immediates, sizeof(struct ureg_src)); With the regular glibc malloc these both succeed as it actually returns a valid memory address (posix says it may also return NULL) I believe that the fragment program in question comes from: src/mesa/main/state.c update_program() and then from the else if (ctx-FragmentProgram._MaintainTexEnvProgram) { /* Use fragment program generated from fixed-function state */ } block. Interestingly enough if I allow malloc(0) to proceed from ElectricFence, then the glretrace runs fine, and even renders correctly, where as running the same gl command stream from libreoffice impress leads to missrendering on nv3c. So 2 questions: 1) Is it normal / expected for st_translate_program() to get called with an empty but not NULL proginfo-Parameters resp. num_immediates == 0 ? If not where would I begin to look for finding the culprit of this ? Yes, it's normal. OK, thanks for the clear answer on this. 2) Since the glretrace does work outside of libreoffice impress, I think it may have something to do with the visual chosen by libreoffice impress, is there an easy way to find out what visual lo is choosing? No, it's not because of the visual. It seems to me that libreoffice changed the behavior of malloc and calloc. I'm pretty sure that this is not libreoffice changing malloc / calloc, it links normally to libc, and the same slide transition works fine with an nv84 card which also has a gallium based mesa driver. I really believe this is due to libreoffice doing something opengl related differently then glretrace, be it the visual or something else back buffer related ... Does libreoffice use llvm? I have vague recollections of there being issues with llvm and libreoffice in the past because radeonsi uses llvm as well. FWIW the nv30 gallium driver will only use llvm as part of 'draw' when falling back to the swtnl path. This should be extremely rare. But easy enough to build mesa with --disable-gallium-llvm to double-check (or what was the env var? DRAW_USE_LLVM=0 or something along those lines). -ilia ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] Loading vbios on OF
Hey Ben, So with the following totally-hack-patch below, I get OF to load (but I have to force it, checksum fails). Of note is the following: -r--r--r-- 1 root root 2403 Aug 28 09:31 /proc/device-tree/pci@0,f000/NVDA,Parent@10/NVDA,BMP I'm not sure why you require the vbios fetches to be multiples of 4 bytes, but that messes things up here. Also I'm not sure where to get this bios size from in the first place, perhaps we should just add a -size() callback? Don't all the backends (except pramin) know how much vbios they have? diff --git a/drm/nouveau/nvkm/subdev/bios/image.c b/drm/nouveau/nvkm/subdev/bios /image.c index 74b14cf..ce0b549 100644 --- a/drm/nouveau/nvkm/subdev/bios/image.c +++ b/drm/nouveau/nvkm/subdev/bios/image.c @@ -47,11 +47,17 @@ nvbios_imagen(struct nvkm_bios *bios, struct nvbios_image *i mage) return false; } - if (!(data = nvbios_pcirTp(bios, image-base, ver, hdr, pcir))) - return false; - image-size = pcir.image_size; - image-type = pcir.image_type; - image-last = pcir.last; + if (!(data = nvbios_pcirTp(bios, image-base, ver, hdr, pcir))) { + nvkm_warn(subdev, PCIR section missing\n); + image-size = 2403; + image-type = 0; + image-last = true; +return true; + } else { + image-size = pcir.image_size; + image-type = pcir.image_type; + image-last = pcir.last; + } if (image-type != 0x70) { if (!(data = nvbios_npdeTp(bios, image-base, npde))) diff --git a/drm/nouveau/nvkm/subdev/bios/shadow.c b/drm/nouveau/nvkm/subdev/bios/shadow.c index 792f017..b7a2249 100644 --- a/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -45,7 +45,7 @@ shadow_fetch(struct nvkm_bios *bios, struct shadow *mthd, u32 upto) u32 read = mthd-func-read(data, start, limit - start, bios); bios-size = start + read; } - return bios-size = limit; + return bios-size = upto; } static int @@ -55,7 +55,7 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) struct nvbios_image image; int score = 1; - if (!shadow_fetch(bios, mthd, offset + 0x1000)) { + if (!shadow_fetch(bios, mthd, offset + 0x400)) { nvkm_debug(subdev, %08x: header fetch failed\n, offset); return 0; } diff --git a/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drm/nouveau/nvkm/subdev/bios/shadowof.c index 29a37f0..066bc1f 100644 --- a/drm/nouveau/nvkm/subdev/bios/shadowof.c +++ b/drm/nouveau/nvkm/subdev/bios/shadowof.c @@ -22,6 +22,7 @@ */ #include priv.h +#include core/pci.h #if defined(__powerpc__) struct priv { @@ -33,7 +34,9 @@ static u32 of_read(void *data, u32 offset, u32 length, struct nvkm_bios *bios) { struct priv *priv = data; - if (offset + length = priv-size) { +printk(KERN_ERR offset: %d, length: %d, size: %d\n, offset, length, priv-size); + if (offset = priv-size) { + length = min_t(u32, length, priv-size - offset); memcpy_fromio(bios-data + offset, priv-data + offset, length); return length; } ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] nv3x libreoffice impress opengl animations not working
On Fri, Aug 28, 2015 at 4:54 AM, Hans de Goede hdego...@redhat.com wrote: Hi, On 27-08-15 20:19, Ilia Mirkin wrote: On Thu, Aug 27, 2015 at 1:59 PM, Alex Deucher alexdeuc...@gmail.com wrote: snip 2) Since the glretrace does work outside of libreoffice impress, I think it may have something to do with the visual chosen by libreoffice impress, is there an easy way to find out what visual lo is choosing? No, it's not because of the visual. It seems to me that libreoffice changed the behavior of malloc and calloc. I'm pretty sure that this is not libreoffice changing malloc / calloc, it links normally to libc, and the same slide transition works fine with an nv84 card which also has a gallium based mesa driver. I really believe this is due to libreoffice doing something opengl related differently then glretrace, be it the visual or something else back buffer related ... Does libreoffice use llvm? I have vague recollections of there being issues with llvm and libreoffice in the past because radeonsi uses llvm as well. FWIW the nv30 gallium driver will only use llvm as part of 'draw' when falling back to the swtnl path. This should be extremely rare. But easy enough to build mesa with --disable-gallium-llvm to double-check (or what was the env var? DRAW_USE_LLVM=0 or something along those lines). I've tried building with --disable-gallium-llvm, this does not help, this is not really surprising since on Fedora both libreoffice and mesa use the system llvm, so there should be no problems with them expecting different llvm versions. I've done some further debugging adding some debug printf-s to the texture creation paths for nv3x, this bit is interesting, glretrace does: nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0 nv30_miptree_create 1350x863 uniform_pitch 5440 usage 0 flags 0 bind 1 target 2 So it gets a texture from a handle, which I believe is the child-window in which the animation will be shown, and then create another texture with the same dimensions to serve as back buffer I presume. ooimpress however does this: nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0 nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind a target 2 nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind 1 target 2 Notice how it is creating 2 (back?) buffers and they are twice the size of the sheet area of impress to which the animation gets rendered. bind a = rt/sampler view, bind 1 = depth/stencil. However nv3x doesn't do NPOT textures... so those sizes are a bit odd. Perhaps there's some logic that attempts to round-up-to-nearest-POT size, but instead multiplies width by 2? I believe this is a clue to the root cause of the problem, but after this I'm sorta stuck. Anyone got any hints on how to debug this further / where to look ? Thanks Regards, Hans ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] nv3x libreoffice impress opengl animations not working
On Mon, Aug 31, 2015 at 8:58 AM, Hans de Goede <hdego...@redhat.com> wrote: > Hi, > > > On 28-08-15 11:02, Ilia Mirkin wrote: >> >> On Fri, Aug 28, 2015 at 4:54 AM, Hans de Goede <hdego...@redhat.com> >> wrote: >>> >>> Hi, >>> >>> On 27-08-15 20:19, Ilia Mirkin wrote: >>>> >>>> >>>> On Thu, Aug 27, 2015 at 1:59 PM, Alex Deucher <alexdeuc...@gmail.com> >>>> wrote: >>> >>> >>> >>> >>> >>>>>>>> 2) Since the glretrace does work outside of libreoffice impress, I >>>>>>>> think >>>>>>>> it may have something to do with the visual chosen by libreoffice >>>>>>>> impress, >>>>>>>> is there an easy way to find out what visual lo is choosing? >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> No, it's not because of the visual. It seems to me that libreoffice >>>>>>> changed the behavior of malloc and calloc. >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> I'm pretty sure that this is not libreoffice changing malloc / calloc, >>>>>> it links normally to libc, and the same slide transition works fine >>>>>> with an nv84 card which also has a gallium based mesa driver. >>>>>> >>>>>> I really believe this is due to libreoffice doing something opengl >>>>>> related differently then glretrace, be it the visual or something else >>>>>> back buffer related ... >>>>>> >>>>> >>>>> Does libreoffice use llvm? I have vague recollections of there being >>>>> issues with llvm and libreoffice in the past because radeonsi uses >>>>> llvm as well. >>>> >>>> >>>> >>>> FWIW the nv30 gallium driver will only use llvm as part of 'draw' when >>>> falling back to the swtnl path. This should be extremely rare. But >>>> easy enough to build mesa with --disable-gallium-llvm to double-check >>>> (or what was the env var? DRAW_USE_LLVM=0 or something along those >>>> lines). >>> >>> >>> >>> I've tried building with --disable-gallium-llvm, this does not help, >>> this is not really surprising since on Fedora both libreoffice and >>> mesa use the system llvm, so there should be no problems with them >>> expecting different llvm versions. >>> >>> I've done some further debugging adding some debug printf-s to the >>> texture creation paths for nv3x, this bit is interesting, glretrace >>> does: >>> >>> nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0 >>> nv30_miptree_create 1350x863 uniform_pitch 5440 usage 0 flags 0 bind 1 >>> target 2 >>> >>> So it gets a texture from a handle, which I believe is the child-window >>> in which the animation will be shown, and then create another texture >>> with the same dimensions to serve as back buffer I presume. >>> >>> ooimpress however does this: >>> >>> nv30_miptree_from_handle 1350x863 uniform_pitch 6144 usage 0 flags 0 >>> nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind a >>> target 2 >>> nv30_miptree_create 2700x1726 uniform_pitch 10816 usage 0 flags 0 bind 1 >>> target 2 >>> >>> Notice how it is creating 2 (back?) buffers and they are twice the size >>> of >>> the "sheet" area of impress to which the animation gets rendered. >> >> >> bind a = rt/sampler view, bind 1 = depth/stencil. However nv3x doesn't >> do NPOT textures... so those sizes are a bit odd. Perhaps there's some >> logic that attempts to round-up-to-nearest-POT size, but instead >> multiplies width by 2? > > > Ok, some debugging / poking at thing further I now know where the multiply > by 2 comes from, the pipe_resource *tmpl passed into nv30_miptree_create > has templ->nr_samples = 4, and nv30_miptree_create has: > >switch (tmpl->nr_samples) { >case 4: > mt->ms_mode = 0x4000; > mt->ms_x = 1; > mt->ms_y = 1; > break; >case 2: > mt->ms_mode = 0x3000; > mt->ms_x = 1; > mt->ms_y = 0; > break; >default: > mt->ms_mode = 0x; > mt->ms_x = 0; >
[Nouveau] [PATCH] gr/nv04: fix big endian setting on gr context
Broken since "gr: convert user classes to new-style nvkm_object" Tested on a PPC64 G5 + NV34 Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- drm/nouveau/nvkm/engine/gr/nv04.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drm/nouveau/nvkm/engine/gr/nv04.c b/drm/nouveau/nvkm/engine/gr/nv04.c index 426ba00..85c5b7f 100644 --- a/drm/nouveau/nvkm/engine/gr/nv04.c +++ b/drm/nouveau/nvkm/engine/gr/nv04.c @@ -1048,11 +1048,11 @@ nv04_gr_object_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent, if (ret == 0) { nvkm_kmap(*pgpuobj); nvkm_wo32(*pgpuobj, 0x00, object->oclass); - nvkm_wo32(*pgpuobj, 0x04, 0x); - nvkm_wo32(*pgpuobj, 0x08, 0x); #ifdef __BIG_ENDIAN - nvkm_mo32(*pgpuobj, 0x08, 0x0008, 0x0008); + nvkm_mo32(*pgpuobj, 0x00, 0x0008, 0x0008); #endif + nvkm_wo32(*pgpuobj, 0x04, 0x); + nvkm_wo32(*pgpuobj, 0x08, 0x); nvkm_wo32(*pgpuobj, 0x0c, 0x); nvkm_done(*pgpuobj); } -- 2.4.6 ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau