[Mesa-dev] [PATCH 3/3] st/va: flush the context before calling flush_frontbuffer(v2)
so that the texture is rendered to back buffer before calling flush_frontbuffer and can be copied to a different buffer in the function v2: change comment style Signed-off-by: Nayan DeshmukhReviewed-by: Michel Dänzer --- src/gallium/state_trackers/va/surface.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 00df69d..115db43 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -321,10 +321,14 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s return status; } + /* flush before calling flush_frontbuffer so that rendering is flushed +* to back buffer so the texture can be copied in flush_frontbuffer +*/ + drv->pipe->flush(drv->pipe, NULL, 0); + screen->flush_frontbuffer(screen, tex, 0, 0, vscreen->get_private(vscreen), NULL); - drv->pipe->flush(drv->pipe, NULL, 0); pipe_resource_reference(, NULL); pipe_surface_reference(_draw, NULL); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] st/vdpau: flush the context before calling flush_frontbuffer
so that the texture is rendered to back buffer before calling flush_frontbuffer and can be copied to a different buffer in the function Signed-off-by: Nayan DeshmukhReviewed-by: Michel Dänzer --- src/gallium/state_trackers/vdpau/presentation.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c index 2862eaf..f35d73a 100644 --- a/src/gallium/state_trackers/vdpau/presentation.c +++ b/src/gallium/state_trackers/vdpau/presentation.c @@ -271,11 +271,14 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, } vscreen->set_next_timestamp(vscreen, earliest_presentation_time); - pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0, - vscreen->get_private(vscreen), NULL); + // flush before calling flush_frontbuffer so that rendering is flushed + // to back buffer so the texture can be copied in flush_frontbuffer pipe->screen->fence_reference(pipe->screen, >fence, NULL); pipe->flush(pipe, >fence, 0); + pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0, + vscreen->get_private(vscreen), NULL); + pq->last_surf = surf; if (dump_window == -1) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] vl/dri3: handle the case of different GPU(v4.2)
In case of prime when rendering is done on GPU other then the server GPU, use a seprate linear buffer for each back buffer which will be displayed using present extension. v2: Use a seprate linear buffer for each back buffer (Michel) v3: Change variable names and fix coding style (Leo and Emil) v4: Use PIPE_BIND_SAMPLER_VIEW for back buffer in case when a seprate linear buffer is used (Michel) v4.1: remove empty line v4.2: destroy the context and handle the case when create_context fails (Emil) Signed-off-by: Nayan DeshmukhReviewed-by: Leo Liu Acked-by: Michel Dänzer --- src/gallium/auxiliary/vl/vl_winsys_dri3.c | 66 +-- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/src/gallium/auxiliary/vl/vl_winsys_dri3.c index 3d596a6..191a64b 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c @@ -49,6 +49,7 @@ struct vl_dri3_buffer { struct pipe_resource *texture; + struct pipe_resource *linear_texture; uint32_t pixmap; uint32_t sync_fence; @@ -69,6 +70,8 @@ struct vl_dri3_screen xcb_present_event_t eid; xcb_special_event_t *special_event; + struct pipe_context *pipe; + struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM]; int cur_back; @@ -82,6 +85,7 @@ struct vl_dri3_screen int64_t last_ust, ns_frame, last_msc, next_msc; bool flushed; + bool is_different_gpu; }; static void @@ -102,6 +106,8 @@ dri3_free_back_buffer(struct vl_dri3_screen *scrn, xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence); xshmfence_unmap_shm(buffer->shm_fence); pipe_resource_reference(>texture, NULL); + if (buffer->linear_texture) + pipe_resource_reference(>linear_texture, NULL); FREE(buffer); } @@ -209,7 +215,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) xcb_sync_fence_t sync_fence; struct xshmfence *shm_fence; int buffer_fd, fence_fd; - struct pipe_resource templ; + struct pipe_resource templ, *pixmap_buffer_texture; struct winsys_handle whandle; unsigned usage; @@ -226,8 +232,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) goto close_fd; memset(, 0, sizeof(templ)); - templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | -PIPE_BIND_SCANOUT | PIPE_BIND_SHARED; + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; templ.format = PIPE_FORMAT_B8G8R8X8_UNORM; templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; @@ -235,16 +240,34 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) templ.height0 = scrn->height; templ.depth0 = 1; templ.array_size = 1; - buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, - ); - if (!buffer->texture) - goto unmap_shm; + if (scrn->is_different_gpu) { + buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, +); + if (!buffer->texture) + goto unmap_shm; + + templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | +PIPE_BIND_LINEAR; + buffer->linear_texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, + ); + pixmap_buffer_texture = buffer->linear_texture; + + if (!buffer->linear_texture) + goto no_linear_texture; + } else { + templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED; + buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, +); + if (!buffer->texture) + goto unmap_shm; + pixmap_buffer_texture = buffer->texture; + } memset(, 0, sizeof(whandle)); whandle.type= DRM_API_HANDLE_TYPE_FD; usage = PIPE_HANDLE_USAGE_EXPLICIT_FLUSH | PIPE_HANDLE_USAGE_READ; scrn->base.pscreen->resource_get_handle(scrn->base.pscreen, NULL, - buffer->texture, , + pixmap_buffer_texture, , usage); buffer_fd = whandle.handle; buffer->pitch = whandle.stride; @@ -271,6 +294,8 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) return buffer; +no_linear_texture: + pipe_resource_reference(>texture, NULL); unmap_shm: xshmfence_unmap_shm(shm_fence); close_fd: @@ -474,6 +499,7 @@ vl_dri3_flush_frontbuffer(struct pipe_screen *screen, struct vl_dri3_screen *scrn = (struct vl_dri3_screen *)context_private; uint32_t options = XCB_PRESENT_OPTION_NONE; struct vl_dri3_buffer *back; + struct pipe_box src_box; back = scrn->back_buffers[scrn->cur_back]; if (!back) @@ -485,6 +511,16 @@ vl_dri3_flush_frontbuffer(struct
Re: [Mesa-dev] [PATCH] vl/dri3: handle the case of different GPU(v4.1)
Hi Leo, On Tue, Sep 20, 2016 at 1:56 AM, Leo Liuwrote: > > > On 09/17/2016 07:33 AM, Nayan Deshmukh wrote: > > Hi Leo, > > Could you push the patches? I don't have the push access. > > > > Can you rebase all your reviewed patches, and add RB to it, and then you > can send them to me ? > > Surely. :) I will also include changes suggested by emil. > Sorry for too busy to do this for you. > > Regards, > Leo > > > > Regards, > Nayan. > > On Fri, Sep 16, 2016 at 7:44 PM, Leo Liu wrote: > >> This Patch is Reviewed-by: Leo Liu >> >> >> On 09/16/2016 08:51 AM, Nayan Deshmukh wrote: >> >>> In case of prime when rendering is done on GPU other then the >>> server GPU, use a seprate linear buffer for each back buffer >>> which will be displayed using present extension. >>> >>> v2: Use a seprate linear buffer for each back buffer (Michel) >>> v3: Change variable names and fix coding style (Leo and Emil) >>> v4: Use PIPE_BIND_SAMPLER_VIEW for back buffer in case when >>> a seprate linear buffer is used (Michel) >>> v4.1: remove empty line >>> >>> Signed-off-by: Nayan Deshmukh < >>> nayan26deshm...@gmail.com> >>> --- >>> src/gallium/auxiliary/vl/vl_winsys_dri3.c | 61 >>> --- >>> 1 file changed, 48 insertions(+), 13 deletions(-) >>> >>> diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c >>> b/src/gallium/auxiliary/vl/vl_winsys_dri3.c >>> index 3d596a6..e0aaad8 100644 >>> --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c >>> +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c >>> @@ -49,6 +49,7 @@ >>> struct vl_dri3_buffer >>> { >>> struct pipe_resource *texture; >>> + struct pipe_resource *linear_texture; >>>uint32_t pixmap; >>> uint32_t sync_fence; >>> @@ -69,6 +70,8 @@ struct vl_dri3_screen >>> xcb_present_event_t eid; >>> xcb_special_event_t *special_event; >>> + struct pipe_context *pipe; >>> + >>> struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM]; >>> int cur_back; >>> @@ -82,6 +85,7 @@ struct vl_dri3_screen >>> int64_t last_ust, ns_frame, last_msc, next_msc; >>>bool flushed; >>> + bool is_different_gpu; >>> }; >>> static void >>> @@ -102,6 +106,8 @@ dri3_free_back_buffer(struct vl_dri3_screen *scrn, >>> xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence); >>> xshmfence_unmap_shm(buffer->shm_fence); >>> pipe_resource_reference(>texture, NULL); >>> + if (buffer->linear_texture) >>> + pipe_resource_reference(>linear_texture, NULL); >>> FREE(buffer); >>> } >>> @@ -209,7 +215,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) >>> xcb_sync_fence_t sync_fence; >>> struct xshmfence *shm_fence; >>> int buffer_fd, fence_fd; >>> - struct pipe_resource templ; >>> + struct pipe_resource templ, *pixmap_buffer_texture; >>> struct winsys_handle whandle; >>> unsigned usage; >>> @@ -226,8 +232,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) >>> goto close_fd; >>>memset(, 0, sizeof(templ)); >>> - templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | >>> -PIPE_BIND_SCANOUT | PIPE_BIND_SHARED; >>> + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; >>> templ.format = PIPE_FORMAT_B8G8R8X8_UNORM; >>> templ.target = PIPE_TEXTURE_2D; >>> templ.last_level = 0; >>> @@ -235,16 +240,34 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) >>> templ.height0 = scrn->height; >>> templ.depth0 = 1; >>> templ.array_size = 1; >>> - buffer->texture = scrn->base.pscreen->resource_c >>> reate(scrn->base.pscreen, >>> - ); >>> - if (!buffer->texture) >>> - goto unmap_shm; >>> + if (scrn->is_different_gpu) { >>> + buffer->texture = scrn->base.pscreen->resource_c >>> reate(scrn->base.pscreen, >>> +); >>> + if (!buffer->texture) >>> + goto unmap_shm; >>> + >>> + templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | >>> +PIPE_BIND_LINEAR; >>> + buffer->linear_texture = scrn->base.pscreen->resource_c >>> reate(scrn->base.pscreen, >>> + >>> ); >>> + pixmap_buffer_texture = buffer->linear_texture; >>> + >>> + if (!buffer->linear_texture) >>> + goto no_linear_texture; >>> + } else { >>> + templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED; >>> + buffer->texture = scrn->base.pscreen->resource_c >>> reate(scrn->base.pscreen, >>> +); >>> + if (!buffer->texture) >>> + goto unmap_shm; >>> + pixmap_buffer_texture = buffer->texture; >>> + } >>> memset(, 0, sizeof(whandle)); >>> whandle.type= DRM_API_HANDLE_TYPE_FD; >>> usage = PIPE_HANDLE_USAGE_EXPLICIT_FLUSH | PIPE_HANDLE_USAGE_READ; >>>
[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions, textures, uniforms and more
https://bugs.freedesktop.org/show_bug.cgi?id=97863 Tapani Pällichanged: What|Removed |Added CC||lem...@gmail.com -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] st/va: flush the context before calling flush_frontbuffer(v2)
On 16/09/16 07:33 PM, Nayan Deshmukh wrote: > so that the texture is rendered to back buffer before calling > flush_frontbuffer and can be copied to a different buffer in > the function > > v2: change comment style > > Signed-off-by: Nayan Deshmukh> --- > src/gallium/state_trackers/va/surface.c | 6 +- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/state_trackers/va/surface.c > b/src/gallium/state_trackers/va/surface.c > index 00df69d..115db43 100644 > --- a/src/gallium/state_trackers/va/surface.c > +++ b/src/gallium/state_trackers/va/surface.c > @@ -321,10 +321,14 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID > surface_id, void* draw, short s >return status; > } > > + /* flush before calling flush_frontbuffer so that rendering is flushed > +* to back buffer so the texture can be copied in flush_frontbuffer > +*/ > + drv->pipe->flush(drv->pipe, NULL, 0); > + > screen->flush_frontbuffer(screen, tex, 0, 0, > vscreen->get_private(vscreen), NULL); > > - drv->pipe->flush(drv->pipe, NULL, 0); > > pipe_resource_reference(, NULL); > pipe_surface_reference(_draw, NULL); > This patch and patch 3 are Reviewed-by: Michel Dänzer -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 02/10] nir: Add a loop analysis pass
On Fri, 2016-09-16 at 15:25 -0700, Jason Ekstrand wrote: > > On Thu, Sep 15, 2016 at 12:03 AM, Timothy Arceriwrote: > > > > From: Thomas Helland > > > > > > This pass detects induction variables and calculates the > > > > trip count of loops to be used for loop unrolling. > > > > > > > > I've removed support for float induction values for now, for the > > > > simple reason that they don't appear in my shader-db collection, > > > > and so I don't see it as common enough that we want to pollute the > > > > pass with this in the initial version. > > > > > > > > V2: Rebase, adapt to removal of function overloads > > > > > > > > V3: (Timothy Arceri) > > > > > > - don't try to find trip count if loop terminator conditional is a phi > > > > - fix trip count for do-while loops > > > > - replace conditional type != alu assert with return > > > > - disable unrolling of loops with continues > > > > > > - multiple fixes to memory allocation, stop leaking and don't destroy > > > > structs we want to use for unrolling. > > > > > > - fix iteration count bugs when induction var not on RHS of condition > > > > - add FIXME for && conditions > > > > - calculate trip count for unsigned induction/limit vars > > > > > > > > V4: > > > > - count instructions in a loop > > > > > > - set the limiting_terminator even if we can't find the trip count for > > > > > > all terminators. This is needed for complex unrolling where we handle > > > > 2 terminators and the trip count is unknown for one of them. > > > > - restruct structs so we don't keep information not required after > > > > analysis and remove dead fields. > > > > > > - force unrolling in some cases as per the rules in the GLSL IR pass > > > > --- > > > > src/compiler/Makefile.sources | 2 + > > > > src/compiler/nir/nir.h | 36 +- > > > > > > src/compiler/nir/nir_loop_analyze.c | 1012 +++ > > > > src/compiler/nir/nir_metadata.c | 8 +- > > > > 4 files changed, 1056 insertions(+), 2 deletions(-) > > > > create mode 100644 src/compiler/nir/nir_loop_analyze.c > > > > > > > > > > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources > > > > index f5b4f9c..7ed26a9 100644 > > > > --- a/src/compiler/Makefile.sources > > > > +++ b/src/compiler/Makefile.sources > > > > @@ -190,6 +190,8 @@ NIR_FILES = \ > > > > nir/nir_intrinsics.c \ > > > > nir/nir_intrinsics.h \ > > > > nir/nir_liveness.c \ > > > > + nir/nir_loop_analyze.c \ > > > > + nir/nir_loop_analyze.h \ > > > > nir/nir_lower_alu_to_scalar.c \ > > > > nir/nir_lower_atomics.c \ > > > > nir/nir_lower_bitmap.c \ > > > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > > > > index ff7c422..49e8cd8 100644 > > > > --- a/src/compiler/nir/nir.h > > > > +++ b/src/compiler/nir/nir.h > > > > @@ -1549,9 +1549,36 @@ nir_if_last_else_node(nir_if *if_stmt) > > > > } > > > > > > > > typedef struct { > > > > + nir_if *nif; > > > > + > > > > + nir_instr *conditional_instr; > > > > + > > > > + struct list_head loop_terminator_link; > > > > +} nir_loop_terminator; > > > > + > > > > +typedef struct { > > > > + /* Number of instructions in the loop */ > > > > + unsigned num_instructions; > > > > + > > > > + /* How many times the loop is run (if known) */ > > > > + unsigned trip_count; > > > > + bool is_trip_count_known; > > We could use 0 or -1 to indicate "I don't know trip count" instead of an extra boolean. Not sure that it matters much. > > > + > > > > + /* Unroll the loop regardless of its size */ > > > > + bool force_unroll; > > It seems a bit odd to have this decide to force-unroll. This is an analysis pass, not a "make decisions" pass. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > >
Re: [Mesa-dev] [PATCH 02/10] nir: Add a loop analysis pass
I sent this reply on Saturday however is seems something went wrong and it didn't make it out so here it is again. On Fri, 2016-09-16 at 15:25 -0700, Jason Ekstrand wrote: > On Thu, Sep 15, 2016 at 12:03 AM, Timothy Arceriabora.com> wrote: > > From: Thomas Helland > > > > This pass detects induction variables and calculates the > > trip count of loops to be used for loop unrolling. > > > > I've removed support for float induction values for now, for the > > simple reason that they don't appear in my shader-db collection, > > and so I don't see it as common enough that we want to pollute the > > pass with this in the initial version. > > > > V2: Rebase, adapt to removal of function overloads > > > > V3: (Timothy Arceri) > > - don't try to find trip count if loop terminator conditional is a > > phi > > - fix trip count for do-while loops > > - replace conditional type != alu assert with return > > - disable unrolling of loops with continues > > - multiple fixes to memory allocation, stop leaking and don't > > destroy > > structs we want to use for unrolling. > > - fix iteration count bugs when induction var not on RHS of > > condition > > - add FIXME for && conditions > > - calculate trip count for unsigned induction/limit vars > > > > V4: > > - count instructions in a loop > > - set the limiting_terminator even if we can't find the trip count > > for > > all terminators. This is needed for complex unrolling where we > > handle > > 2 terminators and the trip count is unknown for one of them. > > - restruct structs so we don't keep information not required after > > analysis and remove dead fields. > > - force unrolling in some cases as per the rules in the GLSL IR > > pass > > --- > > src/compiler/Makefile.sources | 2 + > > src/compiler/nir/nir.h | 36 +- > > src/compiler/nir/nir_loop_analyze.c | 1012 > > +++ > > src/compiler/nir/nir_metadata.c | 8 +- > > 4 files changed, 1056 insertions(+), 2 deletions(-) > > create mode 100644 src/compiler/nir/nir_loop_analyze.c > > > > diff --git a/src/compiler/Makefile.sources > > b/src/compiler/Makefile.sources > > index f5b4f9c..7ed26a9 100644 > > --- a/src/compiler/Makefile.sources > > +++ b/src/compiler/Makefile.sources > > @@ -190,6 +190,8 @@ NIR_FILES = \ > > nir/nir_intrinsics.c \ > > nir/nir_intrinsics.h \ > > nir/nir_liveness.c \ > > + nir/nir_loop_analyze.c \ > > + nir/nir_loop_analyze.h \ > > nir/nir_lower_alu_to_scalar.c \ > > nir/nir_lower_atomics.c \ > > nir/nir_lower_bitmap.c \ > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > > index ff7c422..49e8cd8 100644 > > --- a/src/compiler/nir/nir.h > > +++ b/src/compiler/nir/nir.h > > @@ -1549,9 +1549,36 @@ nir_if_last_else_node(nir_if *if_stmt) > > } > > > > typedef struct { > > + nir_if *nif; > > + > > + nir_instr *conditional_instr; > > + > > + struct list_head loop_terminator_link; > > +} nir_loop_terminator; > > + > > +typedef struct { > > + /* Number of instructions in the loop */ > > + unsigned num_instructions; > > + > > + /* How many times the loop is run (if known) */ > > + unsigned trip_count; > > + bool is_trip_count_known; > > We could use 0 or -1 to indicate "I don't know trip count" instead of > an extra boolean. Not sure that it matters much. > > > + > > + /* Unroll the loop regardless of its size */ > > + bool force_unroll; > > It seems a bit odd to have this decide to force-unroll. This is an > analysis pass, not a "make decisions" pass. > > > + > > + nir_loop_terminator *limiting_terminator; > > + > > + /* A list of loop_terminators terminating this loop. */ > > + struct list_head loop_terminator_list; > > +} nir_loop_info; > > + > > +typedef struct { > > nir_cf_node cf_node; > > > > struct exec_list body; /** < list of nir_cf_node */ > > + > > + nir_loop_info *info; > > } nir_loop; > > > > static inline nir_cf_node * > > @@ -1576,6 +1603,7 @@ typedef enum { > > nir_metadata_dominance = 0x2, > > nir_metadata_live_ssa_defs = 0x4, > > nir_metadata_not_properly_reset = 0x8, > > + nir_metadata_loop_analysis = 0x16, > > } nir_metadata; > > > > typedef struct { > > @@ -1758,6 +1786,8 @@ typedef struct nir_shader_compiler_options { > > * information must be inferred from the list of input > > nir_variables. > > */ > > bool use_interpolated_input_intrinsics; > > + > > + unsigned max_unroll_iterations; > > } nir_shader_compiler_options; > > > > typedef struct nir_shader_info { > > @@ -1962,7 +1992,7 @@ nir_loop *nir_loop_create(nir_shader > > *shader); > > nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); > > > > /** requests that the given pieces of metadata be generated */ > > -void nir_metadata_require(nir_function_impl *impl, nir_metadata > > required); > > +void
Re: [Mesa-dev] [PATCH v5] clover: Introduce CLOVER_EXTRA_{COMPILER, LINKER}_OPTIONS
Vedran Miletićwrites: > The options specified in the CLOVER_EXTRA_COMPILER_OPTIONS shell > variable are appended to the compiler options specified by the OpenCL > program, if any. > Analogously, the options specified in the CLOVER_EXTRA_LINKER_OPTIONS > variable are appended to the linker options and the options spoecified > in the CLOVER_EXTRA_COMPILER_OPTIONS variable. > > v2: > * rename to CLOVER_EXTRA_COMPILER_OPTIONS > * use debug_get_option > * append to linker options as well > > v3: code cleanups > > v4: separate CLOVER_EXTRA_LINKER_OPTIONS options > > v5: > * fix documentation typo > *use CLOVER_EXTRA_COMPILER_OPTIONS in link stage > > Signed-off-by: Vedran Miletić > Reviewed-by[v1]: Edward O'Callaghan > --- > docs/envvars.html | 13 + > src/gallium/state_trackers/clover/llvm/invocation.cpp | 11 --- > 2 files changed, 21 insertions(+), 3 deletions(-) > > diff --git a/docs/envvars.html b/docs/envvars.html > index cf57ca5..252b783 100644 > --- a/docs/envvars.html > +++ b/docs/envvars.html > @@ -235,6 +235,19 @@ Setting to "tgsi", for example, will print all the TGSI > shaders. > See src/mesa/state_tracker/st_debug.c for other options. > > > +Clover state tracker environment variables > + > + > +CLOVER_EXTRA_COMPILER_OPTIONS - allows specifying additional compiler > +options. Specified options are appended after the options set by the > OpenCL > +program in clBuildProgram and/or clCompileProgram. > +CLOVER_EXTRA_LINKER_OPTIONS - allows specifying additional linker > +options. Specified options are appended after the options set by the > OpenCL > +linker in clBuildProgram and/or clLinkProgram and options set in the > +CLOVER_EXTRA_COMPILER_OPTIONS variable. > + > + > + > Softpipe driver environment variables > > SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment > shaders > diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp > b/src/gallium/state_trackers/clover/llvm/invocation.cpp > index b5e8b52..68b9d2e 100644 > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp > @@ -199,11 +199,13 @@ clover::llvm::compile_program(const std::string , >const std::string , >const std::string , >std::string _log) { > + const std::string all_opts = opts + " " + > + debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", > ""); > if (has_flag(debug::clc)) > - debug::log(".cl", "// Options: " + opts + '\n' + source); > + debug::log(".cl", "// Options: " + all_opts + '\n' + source); > > auto ctx = create_context(r_log); > - auto c = create_compiler_instance(target, tokenize(opts + " input.cl"), > + auto c = create_compiler_instance(target, tokenize(all_opts + " > input.cl"), > r_log); > auto mod = compile(*ctx, *c, "input.cl", source, headers, target, opts, >r_log); > @@ -266,7 +268,10 @@ module > clover::llvm::link_program(const std::vector , > enum pipe_shader_ir ir, const std::string , > const std::string , std::string _log) { > - std::vector options = tokenize(opts + " input.cl"); > + const std::string all_opts = opts + " " + > + debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", > "") + > + debug_get_option("CLOVER_EXTRA_LINKER_OPTIONS", ""); Why do you concatenate the two? I thought the reason Jan suggested splitting this was that a bunch of compiler options are invalid as linker options or the other way around? > + std::vector options = tokenize(all_opts + " input.cl"); > const bool create_library = count("-create-library", options); > erase_if(equals("-create-library"), options); > > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Problem with RX 480 on Alien: Isolation and Dota 2
2016-09-19 18:40 GMT-04:00 Marek Olšák: > Do you mean the PC is frozen for 2 minutes or just occasional hiccups > for 2 minutes? Occasional hiccups for 2 minutes. The game works well, I wander around and, from time to time, it freezes for a random duration between 2 seconds and 2 minutes (on average I'd say it's 30 seconds) and then the game continues like nothing happened (the sound still works during the freeze though). I played yesterday for instance and I had 10 minutes without any freeze and then 2 freezes in less than a minute. It's really random, but it seems to be related to the new places I visit, once I had one or more freezes in an area, I won't have the freeze a freeze in this area anymore. That's why I think it could be linked to shader compilation. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Problem with RX 480 on Alien: Isolation and Dota 2
On Mon, Sep 19, 2016 at 7:51 PM, Romain Failliotwrote: > 2016-09-15 16:27 GMT-04:00 Marek Olšák : >> Update your gcc I guess? Sorry, I don't know much about LLVM build >> requirements. It works with gcc 5.4.0. > > I'm using a pretty recent gcc, and the 64-bit compilation works like a > charm, it's only the 32-bit compilation that has trouble. > Anyway, I've been advised to use a pre-build rep: > https://copr.fedorainfracloud.org/coprs/mystro256/polaris-gfx/ > > And it works pretty well now! I do have a bug with the game > (i.e.Alien: Isolation) though: my PC hangs for up to 2 minutes. From > my experience, it seems to be because of the shaders compilation, but > I'm not 100% sure. Is it a known bug? Is it a problem from the game or > from the drivers? Do you mean the PC is frozen for 2 minutes or just occasional hiccups for 2 minutes? Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] mesa: Implement ARB_shader_viewport_layer_array for i965
This extension is a combination of AMD_vertex_shader_viewport_index and AMD_vertex_shader_layer, making it rather trivial to implement. For gallium I *think* this needs a new cap because of the addition of support in tessellation evaluation shaders, and since I don't have any hardware to test it on, I've left that for someone else to wire up. Signed-off-by: Dylan BakerReviewed-by: Ilia Mirkin Reviewed-by: Kenneth Graunke --- v2: - changed messages to gen6+ instead of gen8+. - remove GLL from EXT list. docs/features.txt| 2 +- docs/relnotes/12.1.0.html| 1 + src/compiler/glsl/builtin_variables.cpp | 14 -- src/compiler/glsl/glsl_parser_extras.cpp | 1 + src/compiler/glsl/glsl_parser_extras.h | 2 ++ src/mesa/drivers/dri/i965/intel_extensions.c | 1 + src/mesa/main/extensions_table.h | 1 + src/mesa/main/mtypes.h | 1 + 8 files changed, 20 insertions(+), 3 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index d6c3240..ac93ec6 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -296,7 +296,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi) GL_ARB_shader_group_vote DONE (nvc0) GL_ARB_shader_stencil_export DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr) - GL_ARB_shader_viewport_layer_arraynot started + GL_ARB_shader_viewport_layer_arrayDONE (i965/gen6+) GL_ARB_sparse_buffer not started GL_ARB_sparse_texture not started GL_ARB_sparse_texture2not started diff --git a/docs/relnotes/12.1.0.html b/docs/relnotes/12.1.0.html index bb20e4f..65b8e4c 100644 --- a/docs/relnotes/12.1.0.html +++ b/docs/relnotes/12.1.0.html @@ -52,6 +52,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_indirect_parameters on radeonsi GL_ARB_shader_draw_parameters on radeonsi GL_ARB_shader_group_vote on nvc0 +GL_ARB_shader_viewport_layer_array on i965/gen6+ GL_ARB_stencil_texturing on i965/hsw GL_ARB_texture_stencil8 on i965/hsw GL_EXT_window_rectangles on nv50, nvc0 diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp index 90278d6..8d6413e 100644 --- a/src/compiler/glsl/builtin_variables.cpp +++ b/src/compiler/glsl/builtin_variables.cpp @@ -1000,11 +1000,13 @@ builtin_variable_generator::generate_vs_special_vars() add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstanceARB"); add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB"); } - if (state->AMD_vertex_shader_layer_enable) { + if (state->AMD_vertex_shader_layer_enable || + state->ARB_shader_viewport_layer_array_enable) { var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); var->data.interpolation = INTERP_MODE_FLAT; } - if (state->AMD_vertex_shader_viewport_index_enable) { + if (state->AMD_vertex_shader_viewport_index_enable || + state->ARB_shader_viewport_layer_array_enable) { var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); var->data.interpolation = INTERP_MODE_FLAT; } @@ -1066,6 +1068,8 @@ builtin_variable_generator::generate_tcs_special_vars() void builtin_variable_generator::generate_tes_special_vars() { + ir_variable *var; + add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); add_system_value(SYSTEM_VALUE_TESS_COORD, vec3_t, "gl_TessCoord"); @@ -1073,6 +1077,12 @@ builtin_variable_generator::generate_tes_special_vars() "gl_TessLevelOuter"); add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2), "gl_TessLevelInner"); + if (state->ARB_shader_viewport_layer_array_enable) { + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_MODE_FLAT; + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_MODE_FLAT; + } } diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 436ddd0..a21ce50 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -608,6 +608,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_shader_subroutine), EXT(ARB_shader_texture_image_samples), EXT(ARB_shader_texture_lod), + EXT(ARB_shader_viewport_layer_array), EXT(ARB_shading_language_420pack), EXT(ARB_shading_language_packing),
Re: [Mesa-dev] [PATCH v3] clover: Pass unquoted compiler arguments to Clang
Vedran Miletićwrites: > OpenCL apps can quote arguments they pass to the OpenCL compiler, most > commonly include paths containing spaces. > > If the Clang OpenCL compiler was called via a shell, the shell would > split the arguments with respect to to quotes and then remove quotes > before passing the arguments to the compiler. Since we call Clang as a > library, we have to split the argument with respect to quotes and then > remove quotes before passing the arguments. > > v2: move to tokenize(), remove throwing of CL_INVALID_COMPILER_OPTIONS > Why did you remove the error checking? Would it make sense to throw invalid_build_options_error instead? (which kind of replaced error(CL_INVALID_COMPILER_OPTIONS) after the recent clLinkProgram rework). > v3: simplify parsing logic, use more C++11 > --- > src/gallium/state_trackers/clover/llvm/util.hpp | 33 > ++--- > 1 file changed, 29 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/state_trackers/clover/llvm/util.hpp > b/src/gallium/state_trackers/clover/llvm/util.hpp > index 8db6f20..c770dd8 100644 > --- a/src/gallium/state_trackers/clover/llvm/util.hpp > +++ b/src/gallium/state_trackers/clover/llvm/util.hpp > @@ -42,11 +42,36 @@ namespace clover { >inline std::vector >tokenize(const std::string ) { > std::vector ss; > - std::istringstream iss(s); > - std::string t; > + std::ostringstream oss; > > - while (getline(iss, t, ' ')) > -ss.push_back(t); > + // OpenCL programs can pass a single or double quoted argument, most > + // frequently include path. This is useful so that the path > containing > + // spaces is treated as a single argument, but we should anyhow > unquote > + // quoted arguments before passing them to the compiler. > + // We do not want to avoid using std::string::replace here, as > include > + // path can contain quotes in file names. The last sentence in the comment doesn't make much sense to me -- I don't see how std::string::replace could be useful for this, nor why we "don't want to avoid using" it. Maybe just drop the last two lines? > + bool escape_next = false; > + bool in_quote_double = false; > + bool in_quote_single = false; > + for (auto c : s) { > +if (escape_next) { > + oss.put(c); > + escape_next = false; > +} else if (c == '\\') { > + escape_next = true; > +} else if (c == '"' && !in_quote_single) { > + in_quote_double = !in_quote_double; > +} else if (c == '\'' && !in_quote_double) { > + in_quote_single = !in_quote_single; > +} else if (c != ' ' || in_quote_single || in_quote_double) { > + oss.put(c); > +} else if (oss.tellp() > 0) { > + ss.emplace_back(oss.str()); > + oss.str(""); > +} > + } > + if (oss.tellp() > 0) > +ss.emplace_back(oss.str()); > Other than the two minor comments above, the code looks reasonable to me. > return ss; >} > -- > 2.7.4 signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/6] nv50/ir: optimize IMAD to SHLADD in presence of power of 2
On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoisetwrote: > Only and only if src1 is a power of 2 we can replace IMAD by SHLADD. > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 13 + > 1 file changed, 13 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 74a5a85..336f407 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -915,6 +915,7 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue > ) > void > ConstantFolding::opnd(Instruction *i, ImmediateValue , int s) > { > + const Target *target = prog->getTarget(); > const int t = !s; > const operation op = i->op; > Instruction *newi = i; > @@ -1016,6 +1017,18 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue > , int s) > i->src(1).mod = i->src(2).mod; > i->setSrc(2, NULL); > i->op = OP_ADD; > + } else > + if (s == 1 && imm0.isPow2() && target->isOpSupported(i->op, i->dType)) > { > + int32_t v; > + switch (i->dType) { > + case TYPE_S32: v = util_last_bit_signed(imm0.reg.data.s32) - 1; > break; > + case TYPE_U32: v = util_last_bit(imm0.reg.data.u32) - 1; break; Huh? Can the shift be a negative value? I think the shift immediate is always positive. > + default: > +return; > + } > + bld.setPosition(i, false); > + i->op = OP_SHLADD; > + i->setSrc(1, bld.mkImm(v)); >} >break; > case OP_ADD: > -- > 2.10.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] nv50/ir: add preliminary support for SHLADD
On 09/20/2016 12:16 AM, Ilia Mirkin wrote: On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoisetwrote: This instruction is available since SM20 (Fermi) and allow to do (a << b) + c in one shot. In some situations, IMAD should be replaced by SHLADD when b is a power of 2, and ADD+SHL should be replaced by SHLADD as well. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/codegen/nv50_ir.h| 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp| 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 6 +++--- src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 7 +-- 6 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index d6011d9..bedbdcc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -57,6 +57,7 @@ enum operation OP_MAD, OP_FMA, OP_SAD, // abs(src0 - src1) + src2 + OP_SHLADD, OP_ABS, OP_NEG, OP_NOT, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 22f2f5d..dbd0f7d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = "mad", "fma", "sad", + "shladd", "abs", "neg", "not", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 7d7b315..273ec34 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] = 0, 0, // NOP, PHI 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 1, 1, 2,// MOV, LOAD, STORE - 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD + 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD 1, 1, 1,// ABS, NEG, NOT 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 2, 2, 1,// MAX, MIN, SAT @@ -70,10 +70,10 @@ const OpClass Target::operationClass[] = OPCLASS_MOVE, OPCLASS_LOAD, OPCLASS_STORE, - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp index 6b8f767..cf8a08f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp @@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const case OP_DIV: case OP_MOD: return false; + case OP_SHLADD: + if (isFloatType(ty)) + return false; + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index b37ea73..5ab95fc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const case OP_EXTBF: case OP_EXIT: // want exit modifier instead (on NOP if required) case OP_MEMBAR: + case OP_SHLADD: return false; case OP_SAD: return ty == TYPE_S32; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index f75e395..d8fa285 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -105,6 +105,7 @@ static const struct opProperties _initProps[] = { OP_MAX,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, { OP_MIN,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, { OP_MAD,0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint + { OP_SHLADD, 0x3, 0x0, 0x0, 0x0, 0x4, 0x6 }, { OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, { OP_ABS,0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, { OP_NEG,0x0, 0x1, 0x0, 0x0, 0x1, 0x0 }, @@ -158,13 +159,13 @@ void TargetNVC0::initOpInfo() { // ADD, MUL, MAD, FMA, AND, OR,
Re: [Mesa-dev] [PATCH 2/6] nvc0/ir: add emission for SHLADD
On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoisetwrote: > Unfortunately, we can't use the emit helpers for GF100/GK110 > because src1 and src2 are swapped. > > Signed-off-by: Samuel Pitoiset > --- > .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 53 > ++ > .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 32 + > .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 44 ++ > 3 files changed, 129 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > index 61c450b..2c4e3a7 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > @@ -96,6 +96,7 @@ private: > void emitDMUL(const Instruction *); > void emitIMAD(const Instruction *); > void emitISAD(const Instruction *); > + void emitSHLADD(const Instruction *); > void emitFMAD(const Instruction *); > void emitDMAD(const Instruction *); > void emitMADSP(const Instruction *i); > @@ -757,6 +758,55 @@ CodeEmitterGK110::emitISAD(const Instruction *i) > } > > void > +CodeEmitterGK110::emitSHLADD(const Instruction *i) > +{ > + uint8_t addOp = > + (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ > i->src(1).mod.neg()); > + const ImmediateValue *imm = i->src(1).get()->asImm(); > + assert(imm); > + > + if (i->src(2).getFile() == FILE_IMMEDIATE) { > + code[0] = 0x1; > + code[1] = 0xc0c << 20; > + } else { > + code[0] = 0x2; > + code[1] = 0x20c << 20; > + } > + code[1] |= addOp << 19; > + > + emitPredicate(i); > + > + defId(i->def(0), 2); > + srcId(i->src(0), 10); > + > + if (i->flagsDef >= 0) > + code[1] |= 1 << 18; > + > + assert(!(imm->reg.data.u32 & 0xffe0)); > + code[1] |= imm->reg.data.u32 << 10; > + > + switch (i->src(2).getFile()) { > + case FILE_GPR: > + assert(code[0] & 0x2); > + code[1] |= 0xc << 28; > + srcId(i->src(2), 23); > + break; > + case FILE_MEMORY_CONST: > + assert(code[0] & 0x2); > + code[1] |= 0x4 << 28; > + setCAddress14(i->src(2)); > + break; > + case FILE_IMMEDIATE: > + assert(code[0] & 0x1); > + setShortImmediate(i, 2); > + break; > + default: > + assert(!"bad src2 file"); > + break; > + } > +} > + > +void > CodeEmitterGK110::emitNOT(const Instruction *i) > { > code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src > @@ -2403,6 +2453,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) > case OP_SAD: >emitISAD(insn); >break; > + case OP_SHLADD: > + emitSHLADD(insn); > + break; > case OP_NOT: >emitNOT(insn); >break; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > index cfde66c..973a105 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > @@ -152,6 +152,7 @@ private: > void emitIADD(); > void emitIMUL(); > void emitIMAD(); > + void emitSHLADD(); > void emitIMNMX(); > void emitICMP(); > void emitISET(); > @@ -1813,6 +1814,34 @@ CodeEmitterGM107::emitIMAD() > } > > void > +CodeEmitterGM107::emitSHLADD() The convention in the GM107 emitter is to use the nvdisasm names here. So ISCADD. > +{ > + switch (insn->src(2).getFile()) { > + case FILE_GPR: > + emitInsn(0x5c18); > + emitGPR (0x14, insn->src(2)); > + break; > + case FILE_MEMORY_CONST: > + emitInsn(0x4c18); > + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); > + break; > + case FILE_IMMEDIATE: > + emitInsn(0x3818); > + emitIMMD(0x14, 19, insn->src(2)); > + break; > + default: > + assert(!"bad src1 file"); > + break; > + } > + emitNEG (0x31, insn->src(0)); > + emitNEG (0x30, insn->src(2)); > + emitCC (0x2f); > + emitIMMD(0x27, 5, insn->src(1)); > + emitGPR (0x08, insn->src(0)); > + emitGPR (0x00, insn->def(0)); > +} > + > +void > CodeEmitterGM107::emitIMNMX() > { > switch (insn->src(1).getFile()) { > @@ -3098,6 +3127,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) > emitIMAD(); >} >break; > + case OP_SHLADD: > + emitSHLADD(); > + break; > case OP_MIN: > case OP_MAX: >if (isFloatType(insn->dType)) { > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > index d8ca6ab..c874b86 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > @@ -101,6 +101,7 @@ private: > void emitDMUL(const Instruction *); > void emitIMAD(const Instruction *); >
Re: [Mesa-dev] [PATCH 1/6] nv50/ir: add preliminary support for SHLADD
On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoisetwrote: > This instruction is available since SM20 (Fermi) and allow to do > (a << b) + c in one shot. In some situations, IMAD should be > replaced by SHLADD when b is a power of 2, and ADD+SHL should be > replaced by SHLADD as well. > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/nouveau/codegen/nv50_ir.h| 1 + > src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp| 1 + > src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 6 +++--- > src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 > src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + > src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 7 +-- > 6 files changed, 15 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > index d6011d9..bedbdcc 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > @@ -57,6 +57,7 @@ enum operation > OP_MAD, > OP_FMA, > OP_SAD, // abs(src0 - src1) + src2 > + OP_SHLADD, > OP_ABS, > OP_NEG, > OP_NOT, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > index 22f2f5d..dbd0f7d 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = > "mad", > "fma", > "sad", > + "shladd", > "abs", > "neg", > "not", > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > index 7d7b315..273ec34 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > @@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] = > 0, 0, // NOP, PHI > 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT > 1, 1, 2,// MOV, LOAD, STORE > - 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD > + 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, > SHLADD > 1, 1, 1,// ABS, NEG, NOT > 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR > 2, 2, 1,// MAX, MIN, SAT > @@ -70,10 +70,10 @@ const OpClass Target::operationClass[] = > OPCLASS_MOVE, > OPCLASS_LOAD, > OPCLASS_STORE, > - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD > + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD > OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, > OPCLASS_ARITH, OPCLASS_ARITH, > - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, > + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, > // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR > OPCLASS_CONVERT, OPCLASS_CONVERT, > OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > index 6b8f767..cf8a08f 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > @@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const > case OP_DIV: > case OP_MOD: >return false; > + case OP_SHLADD: > + if (isFloatType(ty)) > + return false; > + break; > default: >break; > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index b37ea73..5ab95fc 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const > case OP_EXTBF: > case OP_EXIT: // want exit modifier instead (on NOP if required) > case OP_MEMBAR: > + case OP_SHLADD: >return false; > case OP_SAD: >return ty == TYPE_S32; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > index f75e395..d8fa285 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > @@ -105,6 +105,7 @@ static const struct opProperties _initProps[] = > { OP_MAX,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, > { OP_MIN,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, > { OP_MAD,0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] > constraint > + { OP_SHLADD, 0x3, 0x0, 0x0, 0x0, 0x4, 0x6 }, > { OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, > { OP_ABS,0x0,
Re: [Mesa-dev] [PATCH] mesa: Implement ARB_shader_viewport_layer_array for i965
On Friday, September 9, 2016 4:14:55 PM PDT Dylan Baker wrote: > This extension is a combination of AMD_vertex_shader_viewport_index and > AMD_vertex_shader_layer, making it rather trivial to implement. > > For gallium I *think* this needs a new cap because of the addition of > support in tessellation evaluation shaders, and since I don't have any > hardware to test it on, I've left that for someone else to wire up. > > Since this requires GL 4.1, this is only available on gen8+. You've actually enabled this on Gen6+, by virtue of: > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index 0f28546..6573bc2 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -330,6 +330,7 @@ intelInitExtensions(struct gl_context *ctx) > */ >if (ctx->API == API_OPENGL_CORE) { > ctx->Extensions.ARB_shader_subroutine = true; > + ctx->Extensions.ARB_shader_viewport_layer_array = true; > ctx->Extensions.ARB_viewport_array = true; > ctx->Extensions.AMD_vertex_shader_viewport_index = true; >} ^^^ this is in a Gen6+ and core only block. I agree with Ilia that this is the right thing to do - it makes sense to expose it where AMD_vertex_shader_viewport_index is already exposed. I'd just drop that sentence from your commit message. > diff --git a/src/mesa/main/extensions_table.h > b/src/mesa/main/extensions_table.h > index 75cdcb8..38636b4 100644 > --- a/src/mesa/main/extensions_table.h > +++ b/src/mesa/main/extensions_table.h > @@ -115,6 +115,7 @@ EXT(ARB_shader_storage_buffer_object, > ARB_shader_storage_buffer_object > EXT(ARB_shader_subroutine , ARB_shader_subroutine > , x , GLC, x , x , 2010) > EXT(ARB_shader_texture_image_samples, > ARB_shader_texture_image_samples , GLL, GLC, x , x , 2014) > EXT(ARB_shader_texture_lod , ARB_shader_texture_lod > , GLL, GLC, x , x , 2009) > +EXT(ARB_shader_viewport_layer_array , > ARB_shader_viewport_layer_array, GLL, GLC, x , x , 2015) > EXT(ARB_shading_language_100, dummy_true > , GLL, x , x , x , 2003) > EXT(ARB_shading_language_420pack, ARB_shading_language_420pack > , GLL, GLC, x , x , 2011) > EXT(ARB_shading_language_packing, ARB_shading_language_packing > , GLL, GLC, x , x , 2011) As Ilia mentioned, please drop "GLL", changing it to " x ". Otherwise, this is: Reviewed-by: Kenneth Graunkesignature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] nvc0/ir: add emission for SHLADD
Unfortunately, we can't use the emit helpers for GF100/GK110 because src1 and src2 are swapped. Signed-off-by: Samuel Pitoiset--- .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 53 ++ .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 32 + .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 44 ++ 3 files changed, 129 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 61c450b..2c4e3a7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -96,6 +96,7 @@ private: void emitDMUL(const Instruction *); void emitIMAD(const Instruction *); void emitISAD(const Instruction *); + void emitSHLADD(const Instruction *); void emitFMAD(const Instruction *); void emitDMAD(const Instruction *); void emitMADSP(const Instruction *i); @@ -757,6 +758,55 @@ CodeEmitterGK110::emitISAD(const Instruction *i) } void +CodeEmitterGK110::emitSHLADD(const Instruction *i) +{ + uint8_t addOp = + (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg()); + const ImmediateValue *imm = i->src(1).get()->asImm(); + assert(imm); + + if (i->src(2).getFile() == FILE_IMMEDIATE) { + code[0] = 0x1; + code[1] = 0xc0c << 20; + } else { + code[0] = 0x2; + code[1] = 0x20c << 20; + } + code[1] |= addOp << 19; + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0), 10); + + if (i->flagsDef >= 0) + code[1] |= 1 << 18; + + assert(!(imm->reg.data.u32 & 0xffe0)); + code[1] |= imm->reg.data.u32 << 10; + + switch (i->src(2).getFile()) { + case FILE_GPR: + assert(code[0] & 0x2); + code[1] |= 0xc << 28; + srcId(i->src(2), 23); + break; + case FILE_MEMORY_CONST: + assert(code[0] & 0x2); + code[1] |= 0x4 << 28; + setCAddress14(i->src(2)); + break; + case FILE_IMMEDIATE: + assert(code[0] & 0x1); + setShortImmediate(i, 2); + break; + default: + assert(!"bad src2 file"); + break; + } +} + +void CodeEmitterGK110::emitNOT(const Instruction *i) { code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src @@ -2403,6 +2453,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_SAD: emitISAD(insn); break; + case OP_SHLADD: + emitSHLADD(insn); + break; case OP_NOT: emitNOT(insn); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index cfde66c..973a105 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -152,6 +152,7 @@ private: void emitIADD(); void emitIMUL(); void emitIMAD(); + void emitSHLADD(); void emitIMNMX(); void emitICMP(); void emitISET(); @@ -1813,6 +1814,34 @@ CodeEmitterGM107::emitIMAD() } void +CodeEmitterGM107::emitSHLADD() +{ + switch (insn->src(2).getFile()) { + case FILE_GPR: + emitInsn(0x5c18); + emitGPR (0x14, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c18); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + case FILE_IMMEDIATE: + emitInsn(0x3818); + emitIMMD(0x14, 19, insn->src(2)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitNEG (0x31, insn->src(0)); + emitNEG (0x30, insn->src(2)); + emitCC (0x2f); + emitIMMD(0x27, 5, insn->src(1)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void CodeEmitterGM107::emitIMNMX() { switch (insn->src(1).getFile()) { @@ -3098,6 +3127,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) emitIMAD(); } break; + case OP_SHLADD: + emitSHLADD(); + break; case OP_MIN: case OP_MAX: if (isFloatType(insn->dType)) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index d8ca6ab..c874b86 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -101,6 +101,7 @@ private: void emitDMUL(const Instruction *); void emitIMAD(const Instruction *); void emitISAD(const Instruction *); + void emitSHLADD(const Instruction *a); void emitFMAD(const Instruction *); void emitDMAD(const Instruction *); void emitMADSP(const Instruction *); @@ -759,6 +760,46 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i) } void +CodeEmitterNVC0::emitSHLADD(const Instruction *i) +{ + uint8_t addOp = + (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg()); + const ImmediateValue *imm =
Re: [Mesa-dev] [PATCH] gallium/util: make use of strtol() in debug_get_num_option()
On 09/19/2016 11:59 PM, Brian Paul wrote: Seems OK here. Tested-by: Brian PaulThanks for testing and reviewing guys. On 09/19/2016 02:43 AM, Nicolai Hähnle wrote: Reviewed-by: Nicolai Hähnle However, you might want to check with the VMWare guys. I seem to recall that MSVC is a bit peculiar with some of these library functions. Cheers, Nicolai On 14.09.2016 20:37, Samuel Pitoiset wrote: This allows to use hexadecimal numbers which are automatically detected by strtol() when the base is 0. Signed-off-by: Samuel Pitoiset --- src/gallium/auxiliary/util/u_debug.c | 25 - 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 4619526..dd3e167 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -203,25 +203,16 @@ debug_get_num_option(const char *name, long dfault) const char *str; str = os_get_option(name); - if (!str) + if (!str) { result = dfault; - else { - long sign; - char c; - c = *str++; - if (c == '-') { - sign = -1; - c = *str++; - } - else { - sign = 1; - } - result = 0; - while ('0' <= c && c <= '9') { - result = result*10 + (c - '0'); - c = *str++; + } else { + char *endptr; + + result = strtol(str, , 0); + if (str == endptr) { + /* Restore the default value when no digits were found. */ + result = dfault; } - result *= sign; } if (debug_get_option_should_print()) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] nv50/ir: teach insnCanLoad() about SHLADD
Commutativity is not allowed with SHLADD, but src2 can accept loads. To allow the load propagation pass to do its job, add a special case like for SUCLAMP because src1 is always an immediate. This IMAD to SHLADD optimization helps a bunch of shaders from Tomb Raider, Victor Vran, UE4 demos (+15% perf with Elemental) and Shadow Warrior. GF100/GK104: total instructions in shared programs :2838045 -> 2834712 (-0.12%) total gprs used in shared programs:396684 -> 396386 (-0.08%) total local used in shared programs :34416 -> 34416 (0.00%) localgpr inst bytes helped 0 32611051105 hurt 0 55 3 3 Signed-off-by: Samuel Pitoiset--- src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index d8fa285..9bc5b8d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -334,6 +334,8 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s, if (i->src(k).getFile() == FILE_IMMEDIATE) { if (k == 2 && i->op == OP_SUCLAMP) // special case continue; + if (k == 1 && i->op == OP_SHLADD) // special case +continue; if (i->getSrc(k)->reg.data.u64 != 0) return false; } else -- 2.10.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] nv50/ir: optimize SHLADD(a, b, c) to MOV((a << b) + c)
Signed-off-by: Samuel Pitoiset--- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 1b99ce7..75c448e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -778,6 +778,9 @@ ConstantFolding::expr(Instruction *i, } break; } + case OP_SHLADD: + res.data.u32 = (a->data.u32 << b->data.u32) + c->data.u32; + break; default: return; } -- 2.10.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] nv50/ir: optimize IMAD to SHLADD in presence of power of 2
Only and only if src1 is a power of 2 we can replace IMAD by SHLADD. Signed-off-by: Samuel Pitoiset--- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 13 + 1 file changed, 13 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 74a5a85..336f407 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -915,6 +915,7 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue ) void ConstantFolding::opnd(Instruction *i, ImmediateValue , int s) { + const Target *target = prog->getTarget(); const int t = !s; const operation op = i->op; Instruction *newi = i; @@ -1016,6 +1017,18 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue , int s) i->src(1).mod = i->src(2).mod; i->setSrc(2, NULL); i->op = OP_ADD; + } else + if (s == 1 && imm0.isPow2() && target->isOpSupported(i->op, i->dType)) { + int32_t v; + switch (i->dType) { + case TYPE_S32: v = util_last_bit_signed(imm0.reg.data.s32) - 1; break; + case TYPE_U32: v = util_last_bit(imm0.reg.data.u32) - 1; break; + default: +return; + } + bld.setPosition(i, false); + i->op = OP_SHLADD; + i->setSrc(1, bld.mkImm(v)); } break; case OP_ADD: -- 2.10.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] nv50/ir: optimize SHLADD(a, b, 0x0) to SHL(a, b)
Signed-off-by: Samuel Pitoiset--- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 8 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 336f407..1b99ce7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -907,6 +907,14 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue ) return; } break; + case OP_SHLADD: + if (imm2.isInteger(0)) { + i->op = OP_SHL; + i->setSrc(2, NULL); + foldCount++; + return; + } + break; default: return; } -- 2.10.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] nv50/ir: add preliminary support for SHLADD
This instruction is available since SM20 (Fermi) and allow to do (a << b) + c in one shot. In some situations, IMAD should be replaced by SHLADD when b is a power of 2, and ADD+SHL should be replaced by SHLADD as well. Signed-off-by: Samuel Pitoiset--- src/gallium/drivers/nouveau/codegen/nv50_ir.h| 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp| 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 6 +++--- src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 7 +-- 6 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index d6011d9..bedbdcc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -57,6 +57,7 @@ enum operation OP_MAD, OP_FMA, OP_SAD, // abs(src0 - src1) + src2 + OP_SHLADD, OP_ABS, OP_NEG, OP_NOT, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 22f2f5d..dbd0f7d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = "mad", "fma", "sad", + "shladd", "abs", "neg", "not", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 7d7b315..273ec34 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] = 0, 0, // NOP, PHI 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 1, 1, 2,// MOV, LOAD, STORE - 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD + 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD 1, 1, 1,// ABS, NEG, NOT 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 2, 2, 1,// MAX, MIN, SAT @@ -70,10 +70,10 @@ const OpClass Target::operationClass[] = OPCLASS_MOVE, OPCLASS_LOAD, OPCLASS_STORE, - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp index 6b8f767..cf8a08f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp @@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const case OP_DIV: case OP_MOD: return false; + case OP_SHLADD: + if (isFloatType(ty)) + return false; + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index b37ea73..5ab95fc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const case OP_EXTBF: case OP_EXIT: // want exit modifier instead (on NOP if required) case OP_MEMBAR: + case OP_SHLADD: return false; case OP_SAD: return ty == TYPE_S32; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index f75e395..d8fa285 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -105,6 +105,7 @@ static const struct opProperties _initProps[] = { OP_MAX,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, { OP_MIN,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, { OP_MAD,0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint + { OP_SHLADD, 0x3, 0x0, 0x0, 0x0, 0x4, 0x6 }, { OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, { OP_ABS,0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, { OP_NEG,0x0, 0x1, 0x0, 0x0, 0x1, 0x0 }, @@ -158,13 +159,13 @@ void TargetNVC0::initOpInfo() { // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR, // SET, SELP, SLCT - 0x0670ca00, 0x003f, 0x, 0x +
Re: [Mesa-dev] [PATCH] gallium/util: make use of strtol() in debug_get_num_option()
Seems OK here. Tested-by: Brian PaulOn 09/19/2016 02:43 AM, Nicolai Hähnle wrote: Reviewed-by: Nicolai Hähnle However, you might want to check with the VMWare guys. I seem to recall that MSVC is a bit peculiar with some of these library functions. Cheers, Nicolai On 14.09.2016 20:37, Samuel Pitoiset wrote: This allows to use hexadecimal numbers which are automatically detected by strtol() when the base is 0. Signed-off-by: Samuel Pitoiset --- src/gallium/auxiliary/util/u_debug.c | 25 - 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 4619526..dd3e167 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -203,25 +203,16 @@ debug_get_num_option(const char *name, long dfault) const char *str; str = os_get_option(name); - if (!str) + if (!str) { result = dfault; - else { - long sign; - char c; - c = *str++; - if (c == '-') { - sign = -1; - c = *str++; - } - else { - sign = 1; - } - result = 0; - while ('0' <= c && c <= '9') { - result = result*10 + (c - '0'); - c = *str++; + } else { + char *endptr; + + result = strtol(str, , 0); + if (str == endptr) { + /* Restore the default value when no digits were found. */ + result = dfault; } - result *= sign; } if (debug_get_option_should_print()) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/ir: Test thread dispatch packing assumptions.
Not intended for upstream. Should cause a GPU hang if some thread is executed with a non-contiguous dispatch mask breaking assumptions of brw_stage_has_packed_dispatch(). Doesn't cause any CTS, DEQP or Piglit regressions, while replacing brw_stage_has_packed_dispatch() with a dummy implementation that unconditionally returns true on top of this patch causes multiple GPU hangs. v2: Drop VEC4 test and clean up slightly for upstream (Jason). --- src/mesa/drivers/dri/i965/brw_fs.cpp | 30 ++ 1 file changed, 30 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 03d4f5f..c5fa3f7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6832,3 +6832,33 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, return g.get_assembly(final_assembly_size); } + +/** + * Test the dispatch mask packing assumptions of + * brw_stage_has_packed_dispatch(). Call this from e.g. the top of + * fs_visitor::emit_nir_code() to cause a GPU hang if any shader invocation is + * executed with an unexpected dispatch mask. + */ +static UNUSED void +brw_fs_test_dispatch_packing(const fs_builder ) +{ + const gl_shader_stage stage = bld.shader->stage; + + if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage, + bld.shader->stage_prog_data)) { + const fs_builder ubld = bld.exec_all().group(1, 0); + const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0); + const fs_reg mask = (stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : + brw_dmask_reg()); + + ubld.ADD(tmp, mask, brw_imm_ud(1)); + ubld.AND(tmp, mask, tmp); + + /* This will loop forever if the dispatch mask doesn't have the expected + * form '2^n-1', in which case tmp will be non-zero. + */ + bld.emit(BRW_OPCODE_DO); + bld.CMP(bld.null_reg_ud(), tmp, brw_imm_ud(0), BRW_CONDITIONAL_NZ); + set_predicate(BRW_PREDICATE_NORMAL, bld.emit(BRW_OPCODE_WHILE)); + } +} -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/12] anv: Implement HiZ for simple render passes
On Fri, Sep 02, 2016 at 05:27:11PM -0700, Jason Ekstrand wrote: > On Wed, Aug 31, 2016 at 8:29 PM, Nanley Cherywrote: > > > This series enables Hierarchical depth buffer rendering and fast depth > > clears > > for render passes with a single subpass running on platforms BDW+. > > Platforms > > pre-BDW can test this feature with an environment variable. The FPS of some > > demos are roughly estimated to increase by as much as ~50% on a SKL GT2. > > > > This feature was partially implemented by Chad and Jason. Where applicable, > > I've tried to accurately note the modifications I've made to their patches > > without being too verbose. I've also tried to maintain the authorship of > > their > > patches when the core of their work remained. > > > > The only patch which wasn't retained due to the core of the work being lost > > was a patch to create a HiZ surface. This was replaced with my patch to > > update > > an existing function which does so. This diverged enough for me to feel at > > risk of misrepresenting the original author's work. > > > > Any suggestions with respect to my annotating method, notices of > > incorrectly > > attributed credit, or general comments are welcome. > > > > Feel free to take more credit. :) Chad and I wrote sketchy, untested, > skeleton patches. You were the one who got it working! > > Patches 1, 3-7, and 10-12 have a few comments here and there. Assuming > those comments are addressed, those patches are > > Reviewed-by: Jason Ekstrand > I've made more updates to patch 10 and 12 than your comments so I'll wait for you to take a look at the V2 before applying your Rb. > We talked about 2 offline and I sent my little 6-patch series that makes > the original plan work. > > On patch 9, I gave a bunch of comments but one thing was clear: We need > tests. In the interest of merging patches, I think I'd recommend that we > disable HiZ for mipmapped surfaces (we can just not allocate the surface) > and don't do fast-clears for anything other than full-RT clears. That > seems like the shortest path to getting the patches merged quickly with > some guarantee of correctness. > I had a local patch to disable gen8 multisampled and BDW+ mipmapped HiZ in patch 9, but I prefer your plan of not allocating the surface at all. Partial clears are currently tested by the CTS. > For partial clears and mipmapped HiZ, I think we need more tests. There > may be CTS tests for partial depth clears (In particular, the subpass > tests) but I'm not sure. I'll leave it up to you as to whether you'd > rather write CTS tests or crucible tests. Crucible may be easier, but the > CTS needs those tests too, so maybe we should be good citizens and put them > there? > > Yes, we do need more tests. The CTS is steadily increasing its test coverage so I'm thinking of revisiting those cases once a test exists for it. If I do write a test, it'd likely be a crucible one. Nanley > > > > Chad Versace (4): > > anv: Add anv_image::hiz_surface > > anv: Add func anv_image_has_hiz() > > anv: Allocate hiz surface > > genX/cmd_buffer: Enable rendering to HiZ > > > > Jason Ekstrand (3): > > anv: Move BindImageMemory to anv_image.c > > anv/image: Memset hiz surfaces to 0 when binding memory > > anv/cmd_buffer: Add code for performing HZ operations > > > > Nanley Chery (5): > > isl: Correct a comment in the isl_format enum > > isl: Update isl_surf_get_hiz_surf() > > isl: Make MSAA pixel scaling function public > > genX/cmd_buffer: Enable fast depth clears > > anv/TODO: Update the HiZ task > > > > src/intel/isl/isl.c| 41 ++-- > > src/intel/isl/isl.h| 6 +- > > src/intel/vulkan/TODO | 2 +- > > src/intel/vulkan/anv_device.c | 20 -- > > src/intel/vulkan/anv_genX.h| 3 + > > src/intel/vulkan/anv_image.c | 67 ++- > > src/intel/vulkan/anv_pass.c| 11 +++ > > src/intel/vulkan/anv_private.h | 18 + > > src/intel/vulkan/gen7_cmd_buffer.c | 5 ++ > > src/intel/vulkan/gen8_cmd_buffer.c | 134 ++ > > +++ > > src/intel/vulkan/genX_cmd_buffer.c | 45 +++-- > > 11 files changed, 313 insertions(+), 39 deletions(-) > > > > -- > > 2.9.3 > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.
On 19 September 2016 at 16:38, Jason Ekstrandwrote: > It all looks fine to me. Feel free to add a > > Reviewed-by: Jason Ekstrand > > That said, my knowledge of the details of the DRI vfuncs is very limited so > I'd like to see Emil or Axel sign off on it too, especially since they were > the ones who had all the comments. > Thanks for double-checking Jason. Afaics patches have a few outstanding style issues (mentioned last round), but I'll squash those just before committing tomorrow morning. Regards, Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vl/dri3: handle the case of different GPU(v4.1)
Hi Nayan, On 16 September 2016 at 13:51, Nayan Deshmukhwrote: > + scrn->pipe = scrn->base.pscreen->context_create(scrn->base.pscreen, > + >base, 0); > + I think you're forgetting to destroy the context in vl_dri3_screen_destroy and proper handling if context_create fails seems to be missing. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/12] genX/cmd_buffer: Enable rendering to HiZ
On Fri, Sep 02, 2016 at 03:16:21PM -0700, Chad Versace wrote: > On Wed 31 Aug 2016, Nanley Chery wrote: > > From: Chad Versace> > > > Nanley Chery: > > (rebase) > > - Resolve conflicts with new anv_batch_emit macro > > (amend) > > - Remove wip! tag and handle a QPitch TODO > > - Emit 3DSTATE_HIER_DEPTH_BUFFER on pre-BDW systems > > - Only use HiZ for single-subpass renderpasses > > - Emit the HiZ instruction before the stencil instruction to follow the > >optimized clear sequence specified in the PRMs > > - Don't modify clear params > > - Enable resolves when a HiZ buffer is used to ensure depth buffer validity > > > > Provides an FPS increase of ~15% on the Sascha triangle and multisampling > > demos. > > > > Signed-off-by: Nanley Chery > > --- > > src/intel/vulkan/gen8_cmd_buffer.c | 4 > > src/intel/vulkan/genX_cmd_buffer.c | 41 > > ++ > > 2 files changed, 41 insertions(+), 4 deletions(-) > > > > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c > > b/src/intel/vulkan/gen8_cmd_buffer.c > > index 4f27350..7f65fe2 100644 > > --- a/src/intel/vulkan/gen8_cmd_buffer.c > > +++ b/src/intel/vulkan/gen8_cmd_buffer.c > > @@ -414,6 +414,10 @@ genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer > > *cmd_buffer, enum anv_hz_op op) > > if (iview == NULL || !anv_image_has_hiz(iview->image)) > >return; > > > > + /* FIXME: Implement multi-subpass HiZ */ > > + if (cmd_buffer->state.pass->subpass_count > 1) > > + return; > > + > > const uint32_t ds = cmd_state->subpass->depth_stencil_attachment; > > const bool full_surface_op = > > cmd_state->render_area.extent.width == iview->extent.width && > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > > b/src/intel/vulkan/genX_cmd_buffer.c > > index 95ed5f2..349d2a4 100644 > > --- a/src/intel/vulkan/genX_cmd_buffer.c > > +++ b/src/intel/vulkan/genX_cmd_buffer.c > > @@ -1040,6 +1040,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer > > *cmd_buffer) > >anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); > > const struct anv_image *image = iview ? iview->image : NULL; > > const bool has_depth = image && (image->aspects & > > VK_IMAGE_ASPECT_DEPTH_BIT); > > + const bool has_hiz = image != NULL && anv_image_has_hiz(image); > > const bool has_stencil = > >image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT); > > > > > @@ -1052,7 +1053,12 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer > > *cmd_buffer) > > db.SurfaceType = SURFTYPE_2D; > > db.DepthWriteEnable = true; > > db.StencilWriteEnable= has_stencil; > > - db.HierarchicalDepthBufferEnable = false; > > + > > + if (cmd_buffer->state.pass->subpass_count == 1) { > > +db.HierarchicalDepthBufferEnable = has_hiz; > > + } else { > > +anv_finishme("Multiple-subpass HiZ not implemented"); > > + } > > > > db.SurfaceFormat = isl_surf_get_depth_format(>isl_dev, > > > > >depth_surface.isl); > > @@ -1104,6 +1110,34 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer > > *cmd_buffer) > >} > > } > > > > + if (has_hiz) { > > Note: This codepath is hit sometimes when > 3DSTATE_DEPTH_BUFFER.HierarchicalDepthBufferEnable is false. > Specifically, when subpass_count > 1. It's weird, but I doubt it causes > any harm. After all, all the surface data programmed by > 3DSTATE_HIER_BUFFER is valid here regardless of the value of > HierarchicalDepthBufferEnable. > > > + anv_batch_emit(_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), > > hdb) { > > + hdb.HierarchicalDepthBufferObjectControlState = GENX(MOCS); > > + hdb.SurfacePitch = image->hiz_surface.isl.row_pitch - 1; > > + hdb.SurfaceBaseAddress = (struct anv_address) { > > +.bo = image->bo, > > +.offset = image->offset + image->hiz_surface.offset, > > + }; > > +#if GEN_GEN >= 8 > > + /* From the SKL PRM Vol2a: > > + * > > + *The interpretation of this field is dependent on Surface > > Type > > + *as follows: > > + *- SURFTYPE_1D: distance in pixels between array slices > > + *- SURFTYPE_2D/CUBE: distance in rows between array slices > > + *- SURFTYPE_3D: distance in rows between R - slices > > + * > > + * ISL implements HiZ surfaces for 1D depth buffers as 2D. > > Therefore > > + * the depth buffer needs to be checked for the dimension. > > + */ > > + hdb.SurfaceQPitch = > > +image->depth_surface.isl.dim == ISL_SURF_DIM_1D ? > > + isl_surf_get_array_pitch_el(>hiz_surface.isl) >> 2 : > > + isl_surf_get_array_pitch_el_rows(>hiz_surface.isl) > > >> 2; > >
Re: [Mesa-dev] [PATCH] vl/dri3: handle the case of different GPU(v4.1)
On 09/17/2016 07:33 AM, Nayan Deshmukh wrote: Hi Leo, Could you push the patches? I don't have the push access. Can you rebase all your reviewed patches, and add RB to it, and then you can send them to me ? Sorry for too busy to do this for you. Regards, Leo Regards, Nayan. On Fri, Sep 16, 2016 at 7:44 PM, Leo Liu> wrote: This Patch is Reviewed-by: Leo Liu > On 09/16/2016 08:51 AM, Nayan Deshmukh wrote: In case of prime when rendering is done on GPU other then the server GPU, use a seprate linear buffer for each back buffer which will be displayed using present extension. v2: Use a seprate linear buffer for each back buffer (Michel) v3: Change variable names and fix coding style (Leo and Emil) v4: Use PIPE_BIND_SAMPLER_VIEW for back buffer in case when a seprate linear buffer is used (Michel) v4.1: remove empty line Signed-off-by: Nayan Deshmukh > --- src/gallium/auxiliary/vl/vl_winsys_dri3.c | 61 --- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/src/gallium/auxiliary/vl/vl_winsys_dri3.c index 3d596a6..e0aaad8 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c @@ -49,6 +49,7 @@ struct vl_dri3_buffer { struct pipe_resource *texture; + struct pipe_resource *linear_texture; uint32_t pixmap; uint32_t sync_fence; @@ -69,6 +70,8 @@ struct vl_dri3_screen xcb_present_event_t eid; xcb_special_event_t *special_event; + struct pipe_context *pipe; + struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM]; int cur_back; @@ -82,6 +85,7 @@ struct vl_dri3_screen int64_t last_ust, ns_frame, last_msc, next_msc; bool flushed; + bool is_different_gpu; }; static void @@ -102,6 +106,8 @@ dri3_free_back_buffer(struct vl_dri3_screen *scrn, xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence); xshmfence_unmap_shm(buffer->shm_fence); pipe_resource_reference(>texture, NULL); + if (buffer->linear_texture) + pipe_resource_reference(>linear_texture, NULL); FREE(buffer); } @@ -209,7 +215,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) xcb_sync_fence_t sync_fence; struct xshmfence *shm_fence; int buffer_fd, fence_fd; - struct pipe_resource templ; + struct pipe_resource templ, *pixmap_buffer_texture; struct winsys_handle whandle; unsigned usage; @@ -226,8 +232,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) goto close_fd; memset(, 0, sizeof(templ)); - templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | -PIPE_BIND_SCANOUT | PIPE_BIND_SHARED; + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; templ.format = PIPE_FORMAT_B8G8R8X8_UNORM; templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; @@ -235,16 +240,34 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) templ.height0 = scrn->height; templ.depth0 = 1; templ.array_size = 1; - buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, -); - if (!buffer->texture) - goto unmap_shm; + if (scrn->is_different_gpu) { + buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, + ); + if (!buffer->texture) + goto unmap_shm; + + templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | +PIPE_BIND_LINEAR; + buffer->linear_texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, + ); + pixmap_buffer_texture = buffer->linear_texture; + + if (!buffer->linear_texture) + goto no_linear_texture; + } else { + templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED; + buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, + ); + if (!buffer->texture) + goto unmap_shm; + pixmap_buffer_texture = buffer->texture; + } memset(, 0,
Re: [Mesa-dev] [PATCH 11/12] genX/cmd_buffer: Enable fast depth clears
On Fri, Sep 02, 2016 at 05:12:58PM -0700, Jason Ekstrand wrote: > On Wed, Aug 31, 2016 at 8:29 PM, Nanley Cherywrote: > > > Provides an FPS increase of ~30% on the Sascha triangle and multisampling > > demos. > > > > Clears that happen within a render pass via vkCmdClearAttachments are safe > > even if the clear color changes. This is because the meta implementation > > does > > not use LOAD_OP_CLEAR which avoids any conflicts with 3DSTATE_CLEAR_PARAMS. > > > > Signed-off-by: Nanley Chery > > --- > > src/intel/vulkan/anv_pass.c| 11 +++ > > src/intel/vulkan/gen8_cmd_buffer.c | 6 ++ > > src/intel/vulkan/genX_cmd_buffer.c | 4 +--- > > 3 files changed, 18 insertions(+), 3 deletions(-) > > > > diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c > > index 69c3c7e..823f9cf 100644 > > --- a/src/intel/vulkan/anv_pass.c > > +++ b/src/intel/vulkan/anv_pass.c > > @@ -155,5 +155,16 @@ void anv_GetRenderAreaGranularity( > > VkRenderPassrenderPass, > > VkExtent2D* pGranularity) > > { > > + ANV_FROM_HANDLE(anv_render_pass, pass, renderPass); > > + > > + /* This granularity is needed for HiZ fast clears */ > > + for (unsigned i = 0; i < pass->subpass_count; ++i) { > > + if (pass->subpasses[i].depth_stencil_attachment != > > + VK_ATTACHMENT_UNUSED) { > > + *pGranularity = (VkExtent2D) { 8, 4 }; > > + return; > > + } > > + } > > > > Thanks for remembering this! As mentioned in an earlier e-mail, I'm not > sure this is needed for Sky Lake or Broadwell with a format other than > D16_UNORM but setting it all the time doesn't hurt. 8x4 isn't onerous and > once we get fast clears, it'll get much bigger anyway. > > Np. It's a bit unfortunate that the Vulkan apps I've tested don't call this function. I do need to update the comment as it's not exactly true (gen8 can have smaller alignments depending on the sample count). > > + > > *pGranularity = (VkExtent2D) { 1, 1 }; > > } > > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c > > b/src/intel/vulkan/gen8_cmd_buffer.c > > index 7f65fe2..ec91ecd 100644 > > --- a/src/intel/vulkan/gen8_cmd_buffer.c > > +++ b/src/intel/vulkan/gen8_cmd_buffer.c > > @@ -451,6 +451,12 @@ genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer > > *cmd_buffer, enum anv_hz_op op) > > cmd_state->render_area.extent.height % align_h) > > return; > >} > > + > > + anv_batch_emit(_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp) > > { > > + cp.DepthClearValueValid = true; > > + cp.DepthClearValue = > > +cmd_buffer->state.attachments[ds].clear_value.depthStencil. > > depth; > > + } > > > > Hrm... I'm not sure where the best place to set CLEAR_PARAMS is. It might > almost be better in BeginSubpass... In any case, I think this works. We > can move it later if we want. > > > >break; > > case ANV_HZ_OP_DEPTH_RESOLVE: > >if (cmd_buffer->state.pass->attachments[ds].store_op != > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > > b/src/intel/vulkan/genX_cmd_buffer.c > > index 349d2a4..7d2a6bd 100644 > > --- a/src/intel/vulkan/genX_cmd_buffer.c > > +++ b/src/intel/vulkan/genX_cmd_buffer.c > > @@ -1159,9 +1159,6 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer > > *cmd_buffer) > > } else { > >anv_batch_emit(_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), > > sb); > > } > > - > > - /* Clear the clear params. */ > > - anv_batch_emit(_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp); > > } > > > > /** > > @@ -1196,6 +1193,7 @@ void genX(CmdBeginRenderPass)( > > > > genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); > > genX(cmd_buffer_do_hz_op)(cmd_buffer, ANV_HZ_OP_HIZ_RESOLVE); > > + genX(cmd_buffer_do_hz_op)(cmd_buffer, ANV_HZ_OP_CLEAR); > > anv_cmd_buffer_clear_subpass(cmd_buffer); > > } > > > > -- > > 2.9.3 > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/12] anv/cmd_buffer: Add code for performing HZ operations
On Fri, Sep 02, 2016 at 05:01:28PM -0700, Jason Ekstrand wrote: > On Wed, Aug 31, 2016 at 8:29 PM, Nanley Cherywrote: > > > From: Jason Ekstrand > > > > First off, this is your patch not mine. The patch of mine you based this > on was little more than a skeleton that demonstrated how to use > PIPE_CONTROL. All of the interesting stuff in here is yours. > > Thanks! V2's commit message will be a lot simpler. > > Nanley Chery: > > (rebase) > > - Resolve conflicts with the new anv_batch_emit macro > > (amend) > > - Update commit title > > - Combine all HZ operations into one function > > - Add code for performing HiZ resolve operations > > - Add proper stencil and multisampling support > > - Set the proper clear rectangles > > - Add required cases for aborting an HZ operation > > > > Signed-off-by: Nanley Chery > > --- > > src/intel/vulkan/anv_genX.h| 3 + > > src/intel/vulkan/anv_private.h | 6 ++ > > src/intel/vulkan/gen7_cmd_buffer.c | 5 ++ > > src/intel/vulkan/gen8_cmd_buffer.c | 124 ++ > > +++ > > 4 files changed, 138 insertions(+) > > > > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h > > index cf5a232..16de990 100644 > > --- a/src/intel/vulkan/anv_genX.h > > +++ b/src/intel/vulkan/anv_genX.h > > @@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > > anv_cmd_buffer *cmd_buffer); > > > > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer > > *cmd_buffer); > > > > +void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, > > + enum anv_hz_op op); > > + > > VkResult > > genX(graphics_pipeline_create)(VkDevice _device, > > struct anv_pipeline_cache *cache, > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > > private.h > > index 5718a19..40325fd 100644 > > --- a/src/intel/vulkan/anv_private.h > > +++ b/src/intel/vulkan/anv_private.h > > @@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct > > anv_cmd_buffer *cmd_buffer); > > > > void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); > > > > +enum anv_hz_op { > > + ANV_HZ_OP_CLEAR, > > + ANV_HZ_OP_HIZ_RESOLVE, > > + ANV_HZ_OP_DEPTH_RESOLVE, > > +}; > > > > Now that blorp is in its own folder, we could use the blorp_hiz_op enum > instead of rolling our own. That'll make it easier to add gen7 support. > > Sounds good. > > + > > struct anv_fence { > > struct anv_bo bo; > > struct drm_i915_gem_execbuffer2 execbuf; > > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c > > b/src/intel/vulkan/gen7_cmd_buffer.c > > index 61778aa..a057a04 100644 > > --- a/src/intel/vulkan/gen7_cmd_buffer.c > > +++ b/src/intel/vulkan/gen7_cmd_buffer.c > > @@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct > > anv_cmd_buffer *cmd_buffer) > > cmd_buffer->state.dirty = 0; > > } > > > > +void > > +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum > > anv_hz_op op) > > +{ > > +} > > + > > void genX(CmdSetEvent)( > > VkCommandBuffer commandBuffer, > > VkEvent event, > > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c > > b/src/intel/vulkan/gen8_cmd_buffer.c > > index e22b4e2..4f27350 100644 > > --- a/src/intel/vulkan/gen8_cmd_buffer.c > > +++ b/src/intel/vulkan/gen8_cmd_buffer.c > > @@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct > > anv_cmd_buffer *cmd_buffer) > > genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); > > } > > > > + > > +/** > > + * Emit the HZ_OP packet in the sequence specified by the BDW PRM section > > + * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear." > > + */ > > +void > > +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum > > anv_hz_op op) > > +{ > > + struct anv_cmd_state *cmd_state = _buffer->state; > > + const struct anv_image_view *iview = > > + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); > > + > > + if (iview == NULL || !anv_image_has_hiz(iview->image)) > > + return; > > > > This looks like something that would be better as an assert. Silently > doing nothing is probably fine for resolves. For clears on the other hand, > it means silently *not* clearing which would be bad. > > We don't silently skip clearing. Clears are marked as having been performed through the following line's execution later on in this function: /* Mark aspects as cleared */ cmd_state->attachments[ds].pending_clear_aspects = 0; > > + > > + const uint32_t ds = cmd_state->subpass->depth_stencil_attachment; > > + const bool full_surface_op = > > + cmd_state->render_area.extent.width == iview->extent.width > > && > > + cmd_state->render_area.extent.height == > > iview->extent.height; > > > > I think you also need
[Mesa-dev] [PATCH v5] clover: Introduce CLOVER_EXTRA_{COMPILER, LINKER}_OPTIONS
The options specified in the CLOVER_EXTRA_COMPILER_OPTIONS shell variable are appended to the compiler options specified by the OpenCL program, if any. Analogously, the options specified in the CLOVER_EXTRA_LINKER_OPTIONS variable are appended to the linker options and the options spoecified in the CLOVER_EXTRA_COMPILER_OPTIONS variable. v2: * rename to CLOVER_EXTRA_COMPILER_OPTIONS * use debug_get_option * append to linker options as well v3: code cleanups v4: separate CLOVER_EXTRA_LINKER_OPTIONS options v5: * fix documentation typo *use CLOVER_EXTRA_COMPILER_OPTIONS in link stage Signed-off-by: Vedran MiletićReviewed-by[v1]: Edward O'Callaghan --- docs/envvars.html | 13 + src/gallium/state_trackers/clover/llvm/invocation.cpp | 11 --- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/envvars.html b/docs/envvars.html index cf57ca5..252b783 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -235,6 +235,19 @@ Setting to "tgsi", for example, will print all the TGSI shaders. See src/mesa/state_tracker/st_debug.c for other options. +Clover state tracker environment variables + + +CLOVER_EXTRA_COMPILER_OPTIONS - allows specifying additional compiler +options. Specified options are appended after the options set by the OpenCL +program in clBuildProgram and/or clCompileProgram. +CLOVER_EXTRA_LINKER_OPTIONS - allows specifying additional linker +options. Specified options are appended after the options set by the OpenCL +linker in clBuildProgram and/or clLinkProgram and options set in the +CLOVER_EXTRA_COMPILER_OPTIONS variable. + + + Softpipe driver environment variables SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment shaders diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index b5e8b52..68b9d2e 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -199,11 +199,13 @@ clover::llvm::compile_program(const std::string , const std::string , const std::string , std::string _log) { + const std::string all_opts = opts + " " + + debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", ""); if (has_flag(debug::clc)) - debug::log(".cl", "// Options: " + opts + '\n' + source); + debug::log(".cl", "// Options: " + all_opts + '\n' + source); auto ctx = create_context(r_log); - auto c = create_compiler_instance(target, tokenize(opts + " input.cl"), + auto c = create_compiler_instance(target, tokenize(all_opts + " input.cl"), r_log); auto mod = compile(*ctx, *c, "input.cl", source, headers, target, opts, r_log); @@ -266,7 +268,10 @@ module clover::llvm::link_program(const std::vector , enum pipe_shader_ir ir, const std::string , const std::string , std::string _log) { - std::vector options = tokenize(opts + " input.cl"); + const std::string all_opts = opts + " " + + debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", "") + + debug_get_option("CLOVER_EXTRA_LINKER_OPTIONS", ""); + std::vector options = tokenize(all_opts + " input.cl"); const bool create_library = count("-create-library", options); erase_if(equals("-create-library"), options); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] Rename the DEBUG macro to MESA_DEBUG
On 09/07/2016 06:52 PM, Vedran Miletić wrote: > LLVM and Mesa both define the DEBUG macro in incompatible ways. As a > general practice, we should avoid using such generic names when it is > possible to do so. > > This patch renames all occurrences of the DEBUG macro to MESA_DEBUG, > and removes workarounds previously used to enable building Mesa with > LLVM (pop_macro() and push_macro() function calls). > > v2: > * Rename remaining occurences found by git grep '\' > * Use /* !MESA_DEBUG */ with #else instead of /* MESA_DEBUG */ > > Signed-off-by: Vedran Miletić > Acked-by: Christian König > --- Anyone? Regards, Vedran -- Vedran Miletić vedran.miletic.net ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3] clover: Pass unquoted compiler arguments to Clang
OpenCL apps can quote arguments they pass to the OpenCL compiler, most commonly include paths containing spaces. If the Clang OpenCL compiler was called via a shell, the shell would split the arguments with respect to to quotes and then remove quotes before passing the arguments to the compiler. Since we call Clang as a library, we have to split the argument with respect to quotes and then remove quotes before passing the arguments. v2: move to tokenize(), remove throwing of CL_INVALID_COMPILER_OPTIONS v3: simplify parsing logic, use more C++11 --- src/gallium/state_trackers/clover/llvm/util.hpp | 33 ++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/clover/llvm/util.hpp b/src/gallium/state_trackers/clover/llvm/util.hpp index 8db6f20..c770dd8 100644 --- a/src/gallium/state_trackers/clover/llvm/util.hpp +++ b/src/gallium/state_trackers/clover/llvm/util.hpp @@ -42,11 +42,36 @@ namespace clover { inline std::vector tokenize(const std::string ) { std::vector ss; - std::istringstream iss(s); - std::string t; + std::ostringstream oss; - while (getline(iss, t, ' ')) -ss.push_back(t); + // OpenCL programs can pass a single or double quoted argument, most + // frequently include path. This is useful so that the path containing + // spaces is treated as a single argument, but we should anyhow unquote + // quoted arguments before passing them to the compiler. + // We do not want to avoid using std::string::replace here, as include + // path can contain quotes in file names. + bool escape_next = false; + bool in_quote_double = false; + bool in_quote_single = false; + for (auto c : s) { +if (escape_next) { + oss.put(c); + escape_next = false; +} else if (c == '\\') { + escape_next = true; +} else if (c == '"' && !in_quote_single) { + in_quote_double = !in_quote_double; +} else if (c == '\'' && !in_quote_double) { + in_quote_single = !in_quote_single; +} else if (c != ' ' || in_quote_single || in_quote_double) { + oss.put(c); +} else if (oss.tellp() > 0) { + ss.emplace_back(oss.str()); + oss.str(""); +} + } + if (oss.tellp() > 0) +ss.emplace_back(oss.str()); return ss; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Problem with RX 480 on Alien: Isolation and Dota 2
2016-09-15 16:27 GMT-04:00 Marek Olšák: > Update your gcc I guess? Sorry, I don't know much about LLVM build > requirements. It works with gcc 5.4.0. I'm using a pretty recent gcc, and the 64-bit compilation works like a charm, it's only the 32-bit compilation that has trouble. Anyway, I've been advised to use a pre-build rep: https://copr.fedorainfracloud.org/coprs/mystro256/polaris-gfx/ And it works pretty well now! I do have a bug with the game (i.e.Alien: Isolation) though: my PC hangs for up to 2 minutes. From my experience, it seems to be because of the shaders compilation, but I'm not 100% sure. Is it a known bug? Is it a problem from the game or from the drivers? Thanks! Romain ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] spirv: fix AtomicLoad/Store on images
This looks good to me. Reviewed-by: Jason EkstrandDo we have tests for this? If not, we should write some. I know there are other image atomic tests in the CTS. They shouldn't be hard to extend. On Sep 19, 2016 9:36 AM, "Lionel Landwerlin" wrote: > OpAtomicLoad/Store should have pointer to images just like the rest of the > atomic operators. These couple of lines were poorly copied from the > ssbo/shared_vars cases (the only ones currently tests by the CTS). > > Fixes 2afb950161f847d9b0a7 "spirv/nir: Add support for OpAtomicLoad/Store" > Cc: Timothy Arceri > Cc: Jason Ekstrand > --- > src/compiler/spirv/spirv_to_nir.c | 13 +++-- > 1 file changed, 3 insertions(+), 10 deletions(-) > > diff --git a/src/compiler/spirv/spirv_to_nir.c > b/src/compiler/spirv/spirv_to_nir.c > index 49338b2..12b43ee 100644 > --- a/src/compiler/spirv/spirv_to_nir.c > +++ b/src/compiler/spirv/spirv_to_nir.c > @@ -1671,6 +1671,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, > case SpvOpAtomicIDecrement: > case SpvOpAtomicIAdd: > case SpvOpAtomicISub: > + case SpvOpAtomicLoad: > case SpvOpAtomicSMin: > case SpvOpAtomicUMin: > case SpvOpAtomicSMax: > @@ -1681,17 +1682,9 @@ vtn_handle_image(struct vtn_builder *b, SpvOp > opcode, >image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; >break; > > - case SpvOpAtomicLoad: { > - image.image = > - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; > - break; > - } > - > - case SpvOpAtomicStore: { > - image.image = > - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; > + case SpvOpAtomicStore: > + image = *vtn_value(b, w[1], vtn_value_type_image_pointer)->image; >break; > - } > > case SpvOpImageQuerySize: >image.image = > -- > 2.9.3 > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] clover: assert struct argument is compiled usably
On 09/19/2016 07:08 PM, Vedran Miletić wrote: > On 07/28/2016 07:52 AM, Francisco Jerez wrote: >> Emil Velikovwrites: >> >>> On 6 June 2016 at 00:02, Vedran Miletić wrote: On 06/04/2016 04:18 AM, Francisco Jerez wrote: > > Serge Martin writes: > >> From: Vedran Miletić >> >> Make sure that a struct argument did not get compiled into a pointer >> type with the byval attribute. If we try to handle the pointer with >> byval, we end up with the pointer size instead of the struct size. >> > Ugh, is that a bug in the code below? How are byval pointers supposed > to be handled here? Exactly as if the argument wasn't a pointer at all > by providing a copy of the pointed-to object as-is in the kernel input > buffer? In that case wouldn't the code below need to pass the correct > size of the pointed-to object as target/api size rather than the size of > the pointer? > Yes, byval+pointer should be handled as there is no pointer at all. I have tried passing the correct size, but IIRC LLVM AMDGPU backend does not generate correct asm for byval+pointer variant. The simple solution is to fail with an assert here unless Clang generates code both Clover and the backend can handle. >>> Gents, can anyone confirm if the series is still applicable for master >>> or it's been superseded ? >>> >> Hi Emil, I don't think PATCH 1 is useful, but v1.1 of PATCH 2 still >> makes sense. It looks like it's going to need some minor rework though >> for it to apply cleanly on master. >> >>> Thanks >>> Emil > > Hi Emil, Francisco, Serge, > > now that PATCH 1 is merged, can we also merge PATCH 2? > > Thanks, > Vedran > Oops, it's the other way round. Anyhow, Serge's patch "clover: fix getting struct args api size" got merged, and I am asking to merge this one. Regards, Vedran -- Vedran Miletić vedran.miletic.net ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] clover: assert struct argument is compiled usably
On 07/28/2016 07:52 AM, Francisco Jerez wrote: > Emil Velikovwrites: > >> On 6 June 2016 at 00:02, Vedran Miletić wrote: >>> On 06/04/2016 04:18 AM, Francisco Jerez wrote: Serge Martin writes: > From: Vedran Miletić > > Make sure that a struct argument did not get compiled into a pointer > type with the byval attribute. If we try to handle the pointer with > byval, we end up with the pointer size instead of the struct size. > Ugh, is that a bug in the code below? How are byval pointers supposed to be handled here? Exactly as if the argument wasn't a pointer at all by providing a copy of the pointed-to object as-is in the kernel input buffer? In that case wouldn't the code below need to pass the correct size of the pointed-to object as target/api size rather than the size of the pointer? >>> >>> Yes, byval+pointer should be handled as there is no pointer at all. >>> >>> I have tried passing the correct size, but IIRC LLVM AMDGPU backend does not >>> generate correct asm for byval+pointer variant. The simple solution is to >>> fail with an assert here unless Clang generates code both Clover and the >>> backend can handle. >>> >> Gents, can anyone confirm if the series is still applicable for master >> or it's been superseded ? >> > Hi Emil, I don't think PATCH 1 is useful, but v1.1 of PATCH 2 still > makes sense. It looks like it's going to need some minor rework though > for it to apply cleanly on master. > >> Thanks >> Emil Hi Emil, Francisco, Serge, now that PATCH 1 is merged, can we also merge PATCH 2? Thanks, Vedran -- Vedran Miletić vedran.miletic.net ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] spirv: fix AtomicLoad/Store on images
On 19/09/16 18:02, Jason Ekstrand wrote: This looks good to me. Reviewed-by: Jason Ekstrand> Do we have tests for this? If not, we should write some. I know there are other image atomic tests in the CTS. They shouldn't be hard to extend. Not that I saw. I'll add some. On Sep 19, 2016 9:36 AM, "Lionel Landwerlin" > wrote: OpAtomicLoad/Store should have pointer to images just like the rest of the atomic operators. These couple of lines were poorly copied from the ssbo/shared_vars cases (the only ones currently tests by the CTS). Fixes 2afb950161f847d9b0a7 "spirv/nir: Add support for OpAtomicLoad/Store" Cc: Timothy Arceri > Cc: Jason Ekstrand > --- src/compiler/spirv/spirv_to_nir.c | 13 +++-- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 49338b2..12b43ee 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1671,6 +1671,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, case SpvOpAtomicIDecrement: case SpvOpAtomicIAdd: case SpvOpAtomicISub: + case SpvOpAtomicLoad: case SpvOpAtomicSMin: case SpvOpAtomicUMin: case SpvOpAtomicSMax: @@ -1681,17 +1682,9 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; break; - case SpvOpAtomicLoad: { - image.image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - break; - } - - case SpvOpAtomicStore: { - image.image = - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + case SpvOpAtomicStore: + image = *vtn_value(b, w[1], vtn_value_type_image_pointer)->image; break; - } case SpvOpImageQuerySize: image.image = -- 2.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] spirv: fix AtomicLoad/Store on images
OpAtomicLoad/Store should have pointer to images just like the rest of the atomic operators. These couple of lines were poorly copied from the ssbo/shared_vars cases (the only ones currently tests by the CTS). Fixes 2afb950161f847d9b0a7 "spirv/nir: Add support for OpAtomicLoad/Store" Cc: Timothy ArceriCc: Jason Ekstrand --- src/compiler/spirv/spirv_to_nir.c | 13 +++-- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 49338b2..12b43ee 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1671,6 +1671,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, case SpvOpAtomicIDecrement: case SpvOpAtomicIAdd: case SpvOpAtomicISub: + case SpvOpAtomicLoad: case SpvOpAtomicSMin: case SpvOpAtomicUMin: case SpvOpAtomicSMax: @@ -1681,17 +1682,9 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; break; - case SpvOpAtomicLoad: { - image.image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - break; - } - - case SpvOpAtomicStore: { - image.image = - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + case SpvOpAtomicStore: + image = *vtn_value(b, w[1], vtn_value_type_image_pointer)->image; break; - } case SpvOpImageQuerySize: image.image = -- 2.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions, textures, uniforms and more
https://bugs.freedesktop.org/show_bug.cgi?id=97863 --- Comment #2 from Elio--- Forgot to include the website: www.khronos.org/registry/webgl/conformance-suites/1.0.3/webgl-conformance-tests.html -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions, textures, uniforms and more
https://bugs.freedesktop.org/show_bug.cgi?id=97863 Eliochanged: What|Removed |Added Summary|[BXT] Webglc is failing a |[BXT] Webglc is failing a |lot of tests related to |lot of tests related to |extensions textures |extensions, textures, |uniforms and more |uniforms and more -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions textures uniforms and more
https://bugs.freedesktop.org/show_bug.cgi?id=97863 Bug ID: 97863 Summary: [BXT] Webglc is failing a lot of tests related to extensions textures uniforms and more Product: Mesa Version: unspecified Hardware: x86-64 (AMD64) OS: Linux (All) Status: NEW Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: elio.martinez.mon...@intel.com QA Contact: mesa-dev@lists.freedesktop.org Created attachment 126626 --> https://bugs.freedesktop.org/attachment.cgi?id=126626=edit Results summary Webglc execution is suffering several failures during execution.(Google Chrome). Having about 1425 failures from 22644 available tests. Google chrome is sending the message "Rats! WebGL hit a snag." Software configuration: OS: Ubuntu 16.04 Kernel: 4.7.2 from kernel.org Graphic stack: Component : drm tag : libdrm-2.4.68 Component : mesa tag : mesa-12.0.1 Component : xf86-video-intel tag : 2.99.917-701-g205146b Component : libva tag : libva-1.7.2.pre1 Component : intel-driver tag : 1.7.2.pre1 Component : cairo tag : 1.15.2 Component : xserver tag : xorg-server-1.18.3 Component : macros tag : util-macros-1.19.0-2-gd7acec2 Component : intel-gpu-tools tag : intel-gpu-tools-1.16 Execution: 1.-Download google chrome latest stable version 2.-Open Google chrome with the following command line in terminal " google-chrome --enable-webgl --ignore-gpu-blacklist" 3.-On the browser click over "run test" button Expected result: The tests should run smoothly without pauses or error messages on the status bar. Actual result: As it is described before the browser sends a lot of failures on results html and is constantly sending "Rats! WebGL hit a snag." Attaching logs and results -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions textures uniforms and more
https://bugs.freedesktop.org/show_bug.cgi?id=97863 --- Comment #1 from Elio--- Created attachment 126627 --> https://bugs.freedesktop.org/attachment.cgi?id=126627=edit Dmesg -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 4/6] gallivm/llvmpipe: prepare support for ARB_gpu_shader_int64.
Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle: > From: Dave Airlie> > This enables 64-bit integer support in gallivm and > llvmpipe. > > v2: add conversion opcodes. > v3: > - PIPE_CAP_INT64 is not there yet > - restrict DIV/MOD defaults to the CPU, as for 32 bits > - TGSI_OPCODE_I2U64 becomes TGSI_OPCODE_U2I64 > > Signed-off-by: Dave Airlie > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi.c| 2 + > src/gallium/auxiliary/gallivm/lp_bld_tgsi.h| 4 + > src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 471 > + > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 40 +- > src/gallium/auxiliary/tgsi/tgsi_info.h | 3 +- > 5 files changed, 515 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > index 1ef6ae4..b397261 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > @@ -357,20 +357,22 @@ lp_build_emit_fetch( > if (reg->Register.Absolute) { >switch (stype) { >case TGSI_TYPE_FLOAT: >case TGSI_TYPE_DOUBLE: >case TGSI_TYPE_UNTYPED: >/* modifiers on movs assume data is float */ > res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res); > break; >case TGSI_TYPE_UNSIGNED: >case TGSI_TYPE_SIGNED: > + case TGSI_TYPE_UNSIGNED64: > + case TGSI_TYPE_SIGNED64: >case TGSI_TYPE_VOID: >default: > /* abs modifier is only legal on floating point types */ > assert(0); > break; >} > } > > if (reg->Register.Negate) { >switch (stype) { > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > index de1150c..b6b3fe3 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > @@ -330,20 +330,24 @@ typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct > lp_build_tgsi_context *, > unsigned); > > struct lp_build_tgsi_context > { > struct lp_build_context base; > > struct lp_build_context uint_bld; > struct lp_build_context int_bld; > > struct lp_build_context dbl_bld; > + > + struct lp_build_context uint64_bld; > + struct lp_build_context int64_bld; > + > /** This array stores functions that are used to transform TGSI opcodes to > * LLVM instructions. > */ > struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST]; > > /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action > * should compute 1 / sqrt (src0.x) */ > struct lp_build_tgsi_action rsq_action; > > struct lp_build_tgsi_action sqrt_action; > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > index 1ee9704..d924770 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > @@ -1086,20 +1086,230 @@ static void dfrac_emit( > struct lp_build_tgsi_context * bld_base, > struct lp_build_emit_data * emit_data) > { > LLVMValueRef tmp; > tmp = lp_build_floor(_base->dbl_bld, > emit_data->args[0]); > emit_data->output[emit_data->chan] = > LLVMBuildFSub(bld_base->base.gallivm->builder, > emit_data->args[0], > tmp, ""); > } > > +/* TGSI_OPCODE_U64MUL */ > +static void > +u64mul_emit( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + emit_data->output[emit_data->chan] = lp_build_mul(_base->uint64_bld, > + emit_data->args[0], emit_data->args[1]); > +} > + > +/* TGSI_OPCODE_U64MOD */ > +static void > +u64mod_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + LLVMBuilderRef builder = bld_base->base.gallivm->builder; > + LLVMValueRef div_mask = lp_build_cmp(_base->uint64_bld, > +PIPE_FUNC_EQUAL, emit_data->args[1], > +bld_base->uint64_bld.zero); > + /* We want to make sure that we never divide/mod by zero to not > +* generate sigfpe. We don't want to crash just because the > +* shader is doing something weird. */ > + LLVMValueRef divisor = LLVMBuildOr(builder, > + div_mask, > + emit_data->args[1], ""); > + LLVMValueRef result = lp_build_mod(_base->uint64_bld, > + emit_data->args[0], divisor); > + /* umod by zero doesn't have a guaranteed return value chose
Re: [Mesa-dev] [PATCH v2 1/6] gallium: add opcode and types for 64-bit integers. (v3)
Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle: > From: Dave Airlie> > This just adds the basic support for 64-bit opcodes, > and the new types. > > v2: add conversion opcodes. > add documentation. > v3: > - make docs more consistent > - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64 > > Reviewed-by: Marek Olšák (v2) > Signed-off-by: Dave Airlie > --- > src/gallium/auxiliary/tgsi/tgsi_info.c | 92 +-- > src/gallium/auxiliary/tgsi/tgsi_info.h | 4 +- > src/gallium/docs/source/tgsi.rst | 240 > + > src/gallium/include/pipe/p_shader_tokens.h | 46 -- > 4 files changed, 362 insertions(+), 20 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c > b/src/gallium/auxiliary/tgsi/tgsi_info.c > index 60e0f2c..18e1bc8 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c > @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info > opcode_info[TGSI_OPCODE_LAST] = > { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, > { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, > + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, > { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, > { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, > - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, > { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, > { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, > { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, > { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, > { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, > { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, > { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 }, /* removed */ > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F }, > + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F }, > { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, > - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 }, /* removed */ > + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D }, > { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, > { 0, 0, 0, 0, 0, 0, 0, NONE, "RET",
Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.
It all looks fine to me. Feel free to add a Reviewed-by: Jason EkstrandThat said, my knowledge of the details of the DRI vfuncs is very limited so I'd like to see Emil or Axel sign off on it too, especially since they were the ones who had all the comments. --Jason On Mon, Sep 19, 2016 at 3:55 AM, Weng, Chuanbo wrote: > Seems they haven't got lost, because I see these patches in the > mailing-list webpage: > https://lists.freedesktop.org/archives/mesa-dev/2016-September/128847.html > https://lists.freedesktop.org/archives/mesa-dev/2016-September/128845.html > https://lists.freedesktop.org/archives/mesa-dev/2016-September/128846.html > https://lists.freedesktop.org/archives/mesa-dev/2016-September/128844.html > > And my gmail account also receives these patches. > > Thanks, > Chuanbo Weng > > > -Original Message- > From: Nicolai Hähnle [mailto:nhaeh...@gmail.com] > Sent: Monday, September 19, 2016 4:53 PM > To: Weng, Chuanbo ; mesa-dev@lists.freedesktop.org; > emil.l.veli...@gmail.com > Subject: Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA > return corresponding offset. > > Those patches got lost somehow? Maybe they weren't sent out as replies to > your first email, check the git configuration for sendemail.thread or the > git send-email --thread flag. > > Cheers, > Nicolai > > On 18.09.2016 09:04, Weng, Chuanbo wrote: > > Ping for review. Thanks. > > > > -Original Message- > > From: Weng, Chuanbo > > Sent: Wednesday, September 14, 2016 1:07 AM > > To: mesa-dev@lists.freedesktop.org; emil.l.veli...@gmail.com > > Cc: Weng, Chuanbo > > Subject: [PATCH v3 0/3] Make eglExportDMABUFImageMESA return > corresponding offset. > > > > This patchset makes eglExportDMABUFImageMESA return corresponding offset > of EGLImage instead of 0 on intel platfrom with classic dri driver(i965). > > > > v2: Add version check of __DRIimageExtension implementation in egl > loader (Suggested by Axel Davy). > > > > v3: Don't add version check of __DRIimageExtension implementation in > > egl loader. Set the offset only when queryImage() succeeds. (Suggested > > by Emil > > Velikov) > > > > Chuanbo Weng (3): > > dri: add offset attribute and bump version of EGLImage extensions. > > egl: return corresponding offset of EGLImage instead of 0. > > i965: implement querying __DRI_IMAGE_ATTRIB_OFFSET. > > > > include/GL/internal/dri_interface.h | 4 +++- > > src/egl/drivers/dri2/egl_dri2.c | 8 +++- > > src/mesa/drivers/dri/i965/intel_screen.c | 9 +++-- > > 3 files changed, 17 insertions(+), 4 deletions(-) > > > > -- > > 1.9.1 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 88354] glXSwapBuffers() can cause BadMatch or lock X when performed repeatedly
https://bugs.freedesktop.org/show_bug.cgi?id=88354 Eero Tamminenchanged: What|Removed |Added Status|NEW |NEEDINFO --- Comment #3 from Eero Tamminen --- On SKL with Ubuntu 16.04, using latest Mesa from Git everything seems to work fine, same with older Mesa 11.2 coming with Ubuntu. No crashes / locks either with Intel DDX or modesetting, with DRI3 or DRI2. I would think that the issue is either fixed in Mesa, or culprit is something else than Mesa. Can you try newer Mesa, and if that doesn't help, newer Intel DDX version? Btw. your test program shows interesting difference between DRI3 & DRI2. I increased the test loop count a bit. With Intel XX / DRI2, test goes through 10 000 rounds "instantly". With DRI3, it takes 5-10x longer, and perf says following of the Xorg 100% CPU usage: - 23.83% Xorg Xorg [.] SyncAddTriggerToSyncObject 18.20% Xorg intel_drv.so [.] 0x0010a5e5 16.83% Xorg Xorg [.] TimerSet 16.49% Xorg Xorg [.] present_pixmap 14.41% Xorg Xorg [.] present_event_notify 7.96% Xorg Xorg [.] SyncDeleteTriggerFromSyncObject ... - With modesetting instead of Intel DDX: - test takes even longer and seems to be limited to 60 FPS - with DRI2, CPU usage is ~1% (LIBGL_DRI3_DISABLE=1 Mesa option) - with DRI3, CPU usage is 100% - Overview: 98.99% Xorg 80.49% [kernel.kallsyms] 6.66% [unknown] (I think this is on kernel side also) 5.12% [vdso] 3.99% modesetting_drv.so 1.68% libc-2.23.so 1.64% libdrm.so.2.4.0 Details: 16.55% Xorg [kernel.kallsyms][k] copy_user_enhanced_fast_string 9.27% Xorg [kernel.kallsyms][k] do_sys_poll 8.13% Xorg [kernel.kallsyms][.] entry_SYSCALL_64_fastpath 5.43% Xorg [kernel.kallsyms][k] _raw_spin_unlock_irqrestore 4.51% Xorg [kernel.kallsyms][k] entry_SYSCALL_64_after_swapgs 4.44% Xorg [kernel.kallsyms][k] _raw_spin_lock_irqsave 4.20% Xorg [kernel.kallsyms][k] kfree 3.79% Xorg [kernel.kallsyms][k] drm_ioctl 3.48% Xorg [kernel.kallsyms][k] drm_wait_vblank 3.44% Xorg [kernel.kallsyms][k] kmem_cache_alloc_trace - In general, with DRI3, first (few thousand) swaps go faster than the later ones. -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 0/6] gallium/tgsi: 64-bit integer foundations
This series is, Reviewed-by: Edward O'CallaghanOn 09/19/2016 11:08 PM, Nicolai Hähnle wrote: > Hi everybody, > > here's a v2 of the series. Compared to previously, I have now squashed my > changes in. I have also included Roland's comments on the tgsi.rst docs, > and, following his comment, I have changed the 32-bit to 64-bit conversion > so that there is now a TGSI_OPCODE_I2I64 and a TGSI_OPCODE_U2I64. The > former does sign extension, the latter does zero extension (and the latter > could be called U2U64). > > Since this doesn't actually turn any extensions on yet, I plan to push this > tomorrow unless there are objections or further comments before then. > > Cheers, > Nicolai > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/9] gallium/tgsi: 64-bit integer foundations
On 16.09.2016 19:19, Ian Romanick wrote: On 09/16/2016 06:48 AM, Nicolai Hähnle wrote: Hi all, this is really Dave's work, with a few touch-ups from me that I think make sense. I've kept those separate with the intention to squash. I'd like to land these in master even before the main ARB_gpu_shader_int64 stuff lands (that is currently in Ian's court). If you guys are comfortable enabling it in radeonsi, I think the rest of the code is close enough to ready to land. I'm sure that we'll find more bugs as more tests become available, but that's always the case. I've updated my arb_gpu_shader_int64 tree, but it's intertwined with some other stuff. I can de-tangle it easy enough. It makes sense to re-test with whatever additional test coverage you've come up with by now. Although I suppose the next release is far enough off that there's plenty of time to fix things up. Cheers, Nicolai The reason is that radeonsi's ARB_query_buffer_object support needs 64-bit integers in shaders, and for that it's convenient to have all the TGSI opcodes and gallivm bits in place already. Any objections? Reviews? Thanks, Nicolai ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 6/6] gallivm: support negation on 64-bit integers
From: Nicolai HähnleThis should be analogous to 32-bit integers. --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index b397261..68ac695 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -382,20 +382,24 @@ lp_build_emit_fetch( res = lp_build_negate( _base->base, res ); break; case TGSI_TYPE_DOUBLE: /* no double build context */ assert(0); break; case TGSI_TYPE_SIGNED: case TGSI_TYPE_UNSIGNED: res = lp_build_negate( _base->int_bld, res ); break; + case TGSI_TYPE_SIGNED64: + case TGSI_TYPE_UNSIGNED64: + res = lp_build_negate( _base->int64_bld, res ); + break; case TGSI_TYPE_VOID: default: assert(0); break; } } /* * Swizzle the argument */ -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/6] tgsi/softpipe: prepare ARB_gpu_shader_int64 support. (v3)
From: Dave AirlieThis adds all the opcodes to tgsi_exec for softpipe to use. v2: add conversion opcodes. v3: - no PIPE_CAP_INT64 yet - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64 Signed-off-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 673 ++--- 1 file changed, 541 insertions(+), 132 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 37f3fc7..7b5c56d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -687,25 +687,265 @@ micro_trunc(union tgsi_exec_channel *dst, static void micro_u2d(union tgsi_double_channel *dst, const union tgsi_exec_channel *src) { dst->d[0] = (double)src->u[0]; dst->d[1] = (double)src->u[1]; dst->d[2] = (double)src->u[2]; dst->d[3] = (double)src->u[3]; } +static void +micro_i64abs(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; + dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; + dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; + dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; +} + +static void +micro_i64sgn(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; + dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; + dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; + dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; +} + +static void +micro_i64neg(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = -src->i64[0]; + dst->i64[1] = -src->i64[1]; + dst->i64[2] = -src->i64[2]; + dst->i64[3] = -src->i64[3]; +} + +static void +micro_u64seq(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64sne(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; + dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; + dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; + dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; +} + +static void +micro_i64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; + dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; + dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; + dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] :
[Mesa-dev] [PATCH v2 5/6] radeonsi: prepare 64-bit integer support. (v2)
From: Dave Airliev2: - no PIPE_CAP_INT64 yet - emit DIV/MOD without the divide-by-zero workaround Reviewed-by: Marek Olšák (v1) Signed-off-by: Dave Airlie --- .../drivers/radeon/radeon_setup_tgsi_llvm.c| 69 +++--- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 4fa43cd..bcb3143 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -44,20 +44,23 @@ LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type) { LLVMContextRef ctx = bld_base->base.gallivm->context; switch (type) { case TGSI_TYPE_UNSIGNED: case TGSI_TYPE_SIGNED: return LLVMInt32TypeInContext(ctx); + case TGSI_TYPE_UNSIGNED64: + case TGSI_TYPE_SIGNED64: + return LLVMInt64TypeInContext(ctx); case TGSI_TYPE_DOUBLE: return LLVMDoubleTypeInContext(ctx); case TGSI_TYPE_UNTYPED: case TGSI_TYPE_FLOAT: return LLVMFloatTypeInContext(ctx); default: break; } return 0; } @@ -1173,26 +1176,32 @@ void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base static void emit_icmp(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { unsigned pred; LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMContextRef context = bld_base->base.gallivm->context; switch (emit_data->inst->Instruction.Opcode) { - case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break; - case TGSI_OPCODE_USNE: pred = LLVMIntNE; break; - case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break; - case TGSI_OPCODE_USLT: pred = LLVMIntULT; break; - case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break; - case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break; + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break; + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break; + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break; + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break; + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break; + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break; default: assert(!"unknown instruction"); pred = 0; break; } LLVMValueRef v = LLVMBuildICmp(builder, pred, emit_data->args[0], emit_data->args[1],""); v = LLVMBuildSExtOrBitCast(builder, v, @@ -1434,21 +1443,26 @@ static void emit_xor(const struct lp_build_tgsi_action *action, } static void emit_ssg(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef cmp, val; - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) { + cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], ""); + cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), ""); + } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, ""); val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], ""); cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, ""); val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), ""); } else { // float SSG cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, ""); val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], ""); cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, ""); val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), ""); } @@ -1698,29 +1712,33 @@ static void emit_minmax_int(const struct
[Mesa-dev] [PATCH v2 4/6] gallivm/llvmpipe: prepare support for ARB_gpu_shader_int64.
From: Dave AirlieThis enables 64-bit integer support in gallivm and llvmpipe. v2: add conversion opcodes. v3: - PIPE_CAP_INT64 is not there yet - restrict DIV/MOD defaults to the CPU, as for 32 bits - TGSI_OPCODE_I2U64 becomes TGSI_OPCODE_U2I64 Signed-off-by: Dave Airlie --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c| 2 + src/gallium/auxiliary/gallivm/lp_bld_tgsi.h| 4 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 471 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 40 +- src/gallium/auxiliary/tgsi/tgsi_info.h | 3 +- 5 files changed, 515 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 1ef6ae4..b397261 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -357,20 +357,22 @@ lp_build_emit_fetch( if (reg->Register.Absolute) { switch (stype) { case TGSI_TYPE_FLOAT: case TGSI_TYPE_DOUBLE: case TGSI_TYPE_UNTYPED: /* modifiers on movs assume data is float */ res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res); break; case TGSI_TYPE_UNSIGNED: case TGSI_TYPE_SIGNED: + case TGSI_TYPE_UNSIGNED64: + case TGSI_TYPE_SIGNED64: case TGSI_TYPE_VOID: default: /* abs modifier is only legal on floating point types */ assert(0); break; } } if (reg->Register.Negate) { switch (stype) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index de1150c..b6b3fe3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -330,20 +330,24 @@ typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *, unsigned); struct lp_build_tgsi_context { struct lp_build_context base; struct lp_build_context uint_bld; struct lp_build_context int_bld; struct lp_build_context dbl_bld; + + struct lp_build_context uint64_bld; + struct lp_build_context int64_bld; + /** This array stores functions that are used to transform TGSI opcodes to * LLVM instructions. */ struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST]; /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action * should compute 1 / sqrt (src0.x) */ struct lp_build_tgsi_action rsq_action; struct lp_build_tgsi_action sqrt_action; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 1ee9704..d924770 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -1086,20 +1086,230 @@ static void dfrac_emit( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { LLVMValueRef tmp; tmp = lp_build_floor(_base->dbl_bld, emit_data->args[0]); emit_data->output[emit_data->chan] = LLVMBuildFSub(bld_base->base.gallivm->builder, emit_data->args[0], tmp, ""); } +/* TGSI_OPCODE_U64MUL */ +static void +u64mul_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_mul(_base->uint64_bld, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_U64MOD */ +static void +u64mod_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef div_mask = lp_build_cmp(_base->uint64_bld, +PIPE_FUNC_EQUAL, emit_data->args[1], +bld_base->uint64_bld.zero); + /* We want to make sure that we never divide/mod by zero to not +* generate sigfpe. We don't want to crash just because the +* shader is doing something weird. */ + LLVMValueRef divisor = LLVMBuildOr(builder, + div_mask, + emit_data->args[1], ""); + LLVMValueRef result = lp_build_mod(_base->uint64_bld, + emit_data->args[0], divisor); + /* umod by zero doesn't have a guaranteed return value chose -1 for now. */ + emit_data->output[emit_data->chan] = LLVMBuildOr(builder, +div_mask, +result, ""); +} + +/* TGSI_OPCODE_MOD (CPU Only) */ +static void +i64mod_emit_cpu( + const struct
[Mesa-dev] [PATCH v2 2/6] gallium/tgsi: add support for 64-bit integer immediates.
From: Dave AirlieThis adds support to TGSI for 64-bit integer immediates. Reviewed-by: Marek Olšák Reviewed-by: Nicolai Hähnle Signed-off-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 14 ++ src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 ++ src/gallium/auxiliary/tgsi/tgsi_parse.c| 2 ++ src/gallium/auxiliary/tgsi/tgsi_text.c | 44 + src/gallium/auxiliary/tgsi/tgsi_ureg.c | 45 -- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 10 +++ src/gallium/include/pipe/p_shader_tokens.h | 2 ++ 7 files changed, 117 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index d59b7ff..614bcb2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -247,20 +247,34 @@ dump_imm_data(struct tgsi_iterate_context *iter, assert( num_tokens <= 4 ); for (i = 0; i < num_tokens; i++) { switch (data_type) { case TGSI_IMM_FLOAT64: { union di d; d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32; DBL( d.d ); i++; break; } + case TGSI_IMM_INT64: { + union di d; + d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32; + UID( d.i ); + i++; + break; + } + case TGSI_IMM_UINT64: { + union di d; + d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32; + UID( d.ui ); + i++; + break; + } case TGSI_IMM_FLOAT32: if (ctx->dump_float_as_hex) HFLT( data[i].Float ); else FLT( data[i].Float ); break; case TGSI_IMM_UINT32: UID(data[i].Uint); break; case TGSI_IMM_INT32: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index aff35e6..37f3fc7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -70,20 +70,22 @@ #define FAST_MATH 0 #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT1 #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 union tgsi_double_channel { double d[TGSI_QUAD_SIZE]; unsigned u[TGSI_QUAD_SIZE][2]; + uint64_t u64[TGSI_QUAD_SIZE]; + int64_t i64[TGSI_QUAD_SIZE]; }; struct tgsi_double_vector { union tgsi_double_channel xy; union tgsi_double_channel zw; }; static void micro_abs(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 16564dd..940af7d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -148,26 +148,28 @@ tgsi_parse_token( switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: case TGSI_IMM_FLOAT64: for (i = 0; i < imm_count; i++) { next_token(ctx, >u[i].Float); } break; case TGSI_IMM_UINT32: + case TGSI_IMM_UINT64: for (i = 0; i < imm_count; i++) { next_token(ctx, >u[i].Uint); } break; case TGSI_IMM_INT32: + case TGSI_IMM_INT64: for (i = 0; i < imm_count; i++) { next_token(ctx, >u[i].Int); } break; default: assert( 0 ); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 8bdec06..be80842 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -288,20 +288,56 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1) v.dval = strtod(cur, (char**)pcur); if (*pcur == cur) return FALSE; *val0 = v.uval[0]; *val1 = v.uval[1]; return TRUE; } +static boolean parse_int64( const char **pcur, uint32_t *val0, uint32_t *val1) +{ + const char *cur = *pcur; + union { + int64_t i64val; + uint32_t uval[2]; + } v; + + v.i64val = strtoll(cur, (char**)pcur, 0); + if (*pcur == cur) + return FALSE; + + *val0 = v.uval[0]; + *val1 = v.uval[1]; + + return TRUE; +} + +static boolean parse_uint64( const char **pcur, uint32_t *val0, uint32_t *val1) +{ + const char *cur = *pcur; + union { + uint64_t u64val; + uint32_t uval[2]; + } v; + + v.u64val = strtoull(cur, (char**)pcur, 0); + if (*pcur == cur) + return FALSE; + + *val0 = v.uval[0]; + *val1 = v.uval[1]; + + return TRUE; +} + struct translate_ctx { const char *text; const char *cur; struct tgsi_token *tokens; struct tgsi_token *tokens_cur; struct tgsi_token *tokens_end; struct tgsi_header *header; unsigned processor : 4; unsigned implied_array_size : 6;
[Mesa-dev] [PATCH v2 0/6] gallium/tgsi: 64-bit integer foundations
Hi everybody, here's a v2 of the series. Compared to previously, I have now squashed my changes in. I have also included Roland's comments on the tgsi.rst docs, and, following his comment, I have changed the 32-bit to 64-bit conversion so that there is now a TGSI_OPCODE_I2I64 and a TGSI_OPCODE_U2I64. The former does sign extension, the latter does zero extension (and the latter could be called U2U64). Since this doesn't actually turn any extensions on yet, I plan to push this tomorrow unless there are objections or further comments before then. Cheers, Nicolai ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/6] gallium: add opcode and types for 64-bit integers. (v3)
From: Dave AirlieThis just adds the basic support for 64-bit opcodes, and the new types. v2: add conversion opcodes. add documentation. v3: - make docs more consistent - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64 Reviewed-by: Marek Olšák (v2) Signed-off-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_info.c | 92 +-- src/gallium/auxiliary/tgsi/tgsi_info.h | 4 +- src/gallium/docs/source/tgsi.rst | 240 + src/gallium/include/pipe/p_shader_tokens.h | 46 -- 4 files changed, 362 insertions(+), 20 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 60e0f2c..18e1bc8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 }, { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY }, { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL }, { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H }, { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US }, { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B }, { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 }, /* removed */ - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F }, + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F }, { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 }, /* removed */ + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D }, { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB",
Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter
Hi Andy, On Mon, Sep 19, 2016 at 3:27 PM, Andy Furnisswrote: > Nayan Deshmukh wrote: > >> Hi Andy, >> >> Thanks for testing the patches!! >> >> On Mon, Sep 19, 2016 at 5:24 AM, Andy Furniss >> wrote: >> >> Andy Furniss wrote: >>> >>> Nayan Deshmukh wrote: This series implements lanczos interpolation filter. > > Andy, I have made some changes to the code. Can you test the > patches. I hope the artifacts are reduced this time. > > The artifacts are still there. >>> :( >>> >> >> The higher levels involve a hell lot of calucations per pixel so the >> decreased fps is expected. I was thinking of having only 2 levels for >> lanczos filter ie. 2 and 4. >> >> Christian, will it be fine if we only have 2 levels (with kernel size >> 2 and 4) of lanczos filter corresponding to HIGH_QUALITY_SCALING_L2 >> and L3? >> > > 2 and 4 still have the offset issue, which does sometimes cause the > white line. > Hi Andy I am able to reproduce the offset issue, I will try to work on the patch tonight. Regards, Nayan. > > I've found another issue with 2 and 4 = unscaled with a raster locked > res test the 1 pix detail will be lost. The detail does re-appear if you > scale up. > > bz2 compressed vid showing the issue - > > https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.
Seems they haven't got lost, because I see these patches in the mailing-list webpage: https://lists.freedesktop.org/archives/mesa-dev/2016-September/128847.html https://lists.freedesktop.org/archives/mesa-dev/2016-September/128845.html https://lists.freedesktop.org/archives/mesa-dev/2016-September/128846.html https://lists.freedesktop.org/archives/mesa-dev/2016-September/128844.html And my gmail account also receives these patches. Thanks, Chuanbo Weng -Original Message- From: Nicolai Hähnle [mailto:nhaeh...@gmail.com] Sent: Monday, September 19, 2016 4:53 PM To: Weng, Chuanbo; mesa-dev@lists.freedesktop.org; emil.l.veli...@gmail.com Subject: Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset. Those patches got lost somehow? Maybe they weren't sent out as replies to your first email, check the git configuration for sendemail.thread or the git send-email --thread flag. Cheers, Nicolai On 18.09.2016 09:04, Weng, Chuanbo wrote: > Ping for review. Thanks. > > -Original Message- > From: Weng, Chuanbo > Sent: Wednesday, September 14, 2016 1:07 AM > To: mesa-dev@lists.freedesktop.org; emil.l.veli...@gmail.com > Cc: Weng, Chuanbo > Subject: [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding > offset. > > This patchset makes eglExportDMABUFImageMESA return corresponding offset of > EGLImage instead of 0 on intel platfrom with classic dri driver(i965). > > v2: Add version check of __DRIimageExtension implementation in egl loader > (Suggested by Axel Davy). > > v3: Don't add version check of __DRIimageExtension implementation in > egl loader. Set the offset only when queryImage() succeeds. (Suggested > by Emil > Velikov) > > Chuanbo Weng (3): > dri: add offset attribute and bump version of EGLImage extensions. > egl: return corresponding offset of EGLImage instead of 0. > i965: implement querying __DRI_IMAGE_ATTRIB_OFFSET. > > include/GL/internal/dri_interface.h | 4 +++- > src/egl/drivers/dri2/egl_dri2.c | 8 +++- > src/mesa/drivers/dri/i965/intel_screen.c | 9 +++-- > 3 files changed, 17 insertions(+), 4 deletions(-) > > -- > 1.9.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter
Andy Furniss wrote: Andy Furniss wrote: bz2 compressed vid showing the issue - https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms To be clear this is just the test rez vid I used so you can possibly recreate the issue your self. Hmm, interesting, with bicubic, hqscaling=1 this vid behaves differently with mplayer compared to mpv, the latter looks like it's scaling a bit, maybe there's an off by one somewhere in mpv code. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter
Andy Furniss wrote: bz2 compressed vid showing the issue - https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms To be clear this is just the test rez vid I used so you can possibly recreate the issue your self. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter
Nayan Deshmukh wrote: Hi Andy, Thanks for testing the patches!! On Mon, Sep 19, 2016 at 5:24 AM, Andy Furnisswrote: Andy Furniss wrote: Nayan Deshmukh wrote: This series implements lanczos interpolation filter. Andy, I have made some changes to the code. Can you test the patches. I hope the artifacts are reduced this time. The artifacts are still there. :( The higher levels involve a hell lot of calucations per pixel so the decreased fps is expected. I was thinking of having only 2 levels for lanczos filter ie. 2 and 4. Christian, will it be fine if we only have 2 levels (with kernel size 2 and 4) of lanczos filter corresponding to HIGH_QUALITY_SCALING_L2 and L3? 2 and 4 still have the offset issue, which does sometimes cause the white line. I've found another issue with 2 and 4 = unscaled with a raster locked res test the 1 pix detail will be lost. The detail does re-appear if you scale up. bz2 compressed vid showing the issue - https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.
Those patches got lost somehow? Maybe they weren't sent out as replies to your first email, check the git configuration for sendemail.thread or the git send-email --thread flag. Cheers, Nicolai On 18.09.2016 09:04, Weng, Chuanbo wrote: Ping for review. Thanks. -Original Message- From: Weng, Chuanbo Sent: Wednesday, September 14, 2016 1:07 AM To: mesa-dev@lists.freedesktop.org; emil.l.veli...@gmail.com Cc: Weng, ChuanboSubject: [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset. This patchset makes eglExportDMABUFImageMESA return corresponding offset of EGLImage instead of 0 on intel platfrom with classic dri driver(i965). v2: Add version check of __DRIimageExtension implementation in egl loader (Suggested by Axel Davy). v3: Don't add version check of __DRIimageExtension implementation in egl loader. Set the offset only when queryImage() succeeds. (Suggested by Emil Velikov) Chuanbo Weng (3): dri: add offset attribute and bump version of EGLImage extensions. egl: return corresponding offset of EGLImage instead of 0. i965: implement querying __DRI_IMAGE_ATTRIB_OFFSET. include/GL/internal/dri_interface.h | 4 +++- src/egl/drivers/dri2/egl_dri2.c | 8 +++- src/mesa/drivers/dri/i965/intel_screen.c | 9 +++-- 3 files changed, 17 insertions(+), 4 deletions(-) -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: make use of strtol() in debug_get_num_option()
Reviewed-by: Nicolai HähnleHowever, you might want to check with the VMWare guys. I seem to recall that MSVC is a bit peculiar with some of these library functions. Cheers, Nicolai On 14.09.2016 20:37, Samuel Pitoiset wrote: This allows to use hexadecimal numbers which are automatically detected by strtol() when the base is 0. Signed-off-by: Samuel Pitoiset --- src/gallium/auxiliary/util/u_debug.c | 25 - 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 4619526..dd3e167 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -203,25 +203,16 @@ debug_get_num_option(const char *name, long dfault) const char *str; str = os_get_option(name); - if (!str) + if (!str) { result = dfault; - else { - long sign; - char c; - c = *str++; - if (c == '-') { -sign = -1; -c = *str++; - } - else { -sign = 1; - } - result = 0; - while ('0' <= c && c <= '9') { -result = result*10 + (c - '0'); -c = *str++; + } else { + char *endptr; + + result = strtol(str, , 0); + if (str == endptr) { + /* Restore the default value when no digits were found. */ + result = dfault; } - result *= sign; } if (debug_get_option_should_print()) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 97230] MATLAB hangs if DRI3 enabled with intel driver
https://bugs.freedesktop.org/show_bug.cgi?id=97230 Eero Tamminenchanged: What|Removed |Added CC||eero.t.tammi...@intel.com Status|NEW |NEEDINFO -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 30/30] egl/dri2: set WL_bind_wayland_display in a consistent way
On 16 September 2016 at 18:02, Emil Velikovwrote: > Introduce a helper and use it throughout the platform code. This allows > us to reduce the amount of ifdef(s) and (potentially) use > kms_swrast_dri.so for !drm platforms (namely wayland and x11). Reviewed-by: Daniel Stone ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev