[Mesa-dev] [PATCH] i965: fix wrong cube/3D texture layout
Fix wrong cube/3D texture layout for the tailing levels whose width or height is smaller than the align unit. From 965 B-spec http://intellinuxgraphics.org/VOL_1_graphics_core.pdf at page 135: All of the LOD=0 q-planes are stacked vertically, then below that, the LOD=1 qplanes are stacked two-wide, then the LOD=2 qplanes are stacked four-wide below that, and so on. Thus we should always inrease pack_x_nr, which results to the pitch of LODn may greater than the pitch of LOD0. So we should refactor mt-total_width when needed. This would fix the following webgl test case on all gen4 platforms: conformance/textures/texture-size-cube-maps.html NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_tex_layout.c |5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 7a1b91f..8bf1d3d 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -115,6 +115,8 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) intel_miptree_set_image_offset(mt, level, q, x, y); x += pack_x_pitch; } +if (x mt-total_width) + mt-total_width = x; x = 0; y += pack_y_pitch; @@ -135,10 +137,9 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) pack_x_nr = 1; } } else { +pack_x_nr = 1; if (pack_x_pitch 4) { pack_x_pitch = 1; - pack_x_nr = 1; - assert(pack_x_pitch * pack_x_nr = mt-total_width); } if (pack_y_pitch 2) { -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4] i915: set SPRITE_POINT_ENABLE bit correctly
When SPRITE_POINT_ENABLE bit is set, the texture coord would be replaced, and this is only needed when we called something like glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE). And more, we currently handle varying inputs as texture coord, we would be careful when setting this bit and set it just when needed, or you will find the value of varying input is not right and changed. Thus we do set SPRITE_POINT_ENABLE bit only when all enabled tex coord units need do CoordReplace. Or fallback is needed to make sure the rendering is right. With handling the bit setup at i915_update_sprite_point_enable(), we don't need the relative code at i915Enable then. This patch would _really_ fix the webglc point-size.html test case and of course, not regress piglit point-sprite and glean-pointSprite testcase. NOTE: This is a candidate for stable release branches. v2: fallback just when all enabled tex coord units need do CoordReplace (Eric) v3: move the sprite point validate code at I915InvalidateState (Eric) v4: sprite point enable bit update based on _NEW_PROGRAM, too add relative _NEW-state comments to show what state is being used(Eric) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_context.c |2 + src/mesa/drivers/dri/i915/i915_context.h |2 + src/mesa/drivers/dri/i915/i915_state.c | 55 +++-- src/mesa/drivers/dri/i915/intel_tris.c |1 + 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 36563ef..dc32292 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -76,6 +76,8 @@ i915InvalidateState(struct gl_context * ctx, GLuint new_state) i915_update_provoking_vertex(ctx); if (new_state (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_program(ctx); + if (new_state (_NEW_PROGRAM | _NEW_POINT)) + i915_update_sprite_point_enable(ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index 8167137..7037465 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -40,6 +40,7 @@ #define I915_FALLBACK_POINT_SMOOTH 0x8 #define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN 0x10 #define I915_FALLBACK_DRAW_OFFSET 0x20 +#define I915_FALLBACK_COORD_REPLACE 0x40 #define I915_UPLOAD_CTX 0x1 #define I915_UPLOAD_BUFFERS 0x2 @@ -338,6 +339,7 @@ extern void i915InitStateFunctions(struct dd_function_table *functions); extern void i915InitState(struct i915_context *i915); extern void i915_update_stencil(struct gl_context * ctx); extern void i915_update_provoking_vertex(struct gl_context *ctx); +extern void i915_update_sprite_point_enable(struct gl_context *ctx); /*== diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 756001f..94c7327 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -652,6 +652,48 @@ i915PointParameterfv(struct gl_context * ctx, GLenum pname, const GLfloat *param } } +void +i915_update_sprite_point_enable(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + /* _NEW_PROGRAM */ + struct i915_fragment_program *p = + (struct i915_fragment_program *) ctx-FragmentProgram._Current; + const GLbitfield64 inputsRead = p-FragProg.Base.InputsRead; + struct i915_context *i915 = i915_context(ctx); + GLuint s4 = i915-state.Ctx[I915_CTXREG_LIS4] ~S4_VFMT_MASK; + int i; + GLuint coord_replace_bits = 0x0; + GLuint tex_coord_unit_bits = 0x0; + + for (i = 0; i ctx-Const.MaxTextureCoordUnits; i++) { + /* _NEW_POINT */ + if (ctx-Point.CoordReplace[i] ctx-Point.PointSprite) + coord_replace_bits |= (1 i); + if (inputsRead FRAG_BIT_TEX(i)) + tex_coord_unit_bits |= (1 i); + } + + /* +* Here we can't enable the SPRITE_POINT_ENABLE bit when the mis-match +* of tex_coord_unit_bits and coord_replace_bits, or this will make all +* the other non-point-sprite coords(like varying inputs, as we now use +* tex coord to implement varying inputs) be replaced to value (0, 0)-(1, 1). +* +* Thus, do fallback when needed. +*/ + FALLBACK(intel, I915_FALLBACK_COORD_REPLACE, +coord_replace_bits coord_replace_bits != tex_coord_unit_bits); + + s4 = ~S4_SPRITE_POINT_ENABLE; + s4 |= (coord_replace_bits coord_replace_bits == tex_coord_unit_bits) ? + S4_SPRITE_POINT_ENABLE : 0; + if (s4 != i915-state.Ctx[I915_CTXREG_LIS4]) { + i915-state.Ctx[I915_CTXREG_LIS4] = s4; + I915_STATECHANGE(i915, I915_UPLOAD_CTX
Re: [Mesa-dev] [PATCH 1/2] i915: set SPRITE_POINT_ENABLE bit correctly
On Wed, Mar 28, 2012 at 01:21:18PM -0700, Eric Anholt wrote: On Sat, 17 Mar 2012 10:58:27 +0800, Liu Aleaxander aleaxan...@gmail.com wrote: On Sat, Mar 17, 2012 at 1:57 AM, Eric Anholt e...@anholt.net wrote: On Mon, 12 Mar 2012 16:04:00 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: /**/ /* High level hooks for t_vb_render.c */ @@ -1070,6 +1112,15 @@ intelRunPipeline(struct gl_context * ctx) if (ctx-NewState) _mesa_update_state_locked(ctx); + /* + * Enable POINT_SPRITE_ENABLE bit when needed here + * + * Handle it at _draw_ time so that we can guarantee the CoordReplace + * changes handled well. And we must do it before the tnl pipeline is + * running so that we can fallback when finding something goes wrong. + */ + intel_validate_sprite_point_enable(intel); Other computed state happens in i915InvalidateState. Why does this one go here? A nice point. Yeah, I should do the stuff there. So, how about the following patch (note: I haven't tested the patch yet since I don't have hardware for testing at home, but it should work ;) (send from my gmail account, the format may not good, sorry for that) --- From 34964ef86aad7361cb4f3f5f73ae4e42928a4b31 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Sat, 17 Mar 2012 10:48:23 +0800 Subject: [PATCH] i915: set SPRITE_POINT_ENABLE bit correctly When SPRITE_POINT_ENABLE bit is set, the texture coord would be replaced, and this is only needed when we called something like glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE). And more, we currently handle varying inputs as texture coord, we would be careful when setting this bit and set it just when needed, or you will find the value of varying input is not right and changed. Thus we do set SPRITE_POINT_ENABLE bit only when all enabled tex coord units need do CoordReplace. Or fallback is needed to make sure the rendering is right. With handling the bit setup at i915_update_sprite_point_enable(), we don't need the relative code at i915Enable then. This patch would _really_ fix the webglc point-size.html test case and of course, not regress piglit point-sprite and glean-pointSprite testcase. NOTE: This is a candidate for stable release branches. v2: fallback just when all enabled tex coord units need do CoordReplace(Eric). v3: move the sprite point validate code at I915InvalidateState(Eric) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_context.c |2 + src/mesa/drivers/dri/i915/i915_context.h |2 + src/mesa/drivers/dri/i915/i915_state.c | 53 +++--- src/mesa/drivers/dri/i915/intel_tris.c |1 + 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 36563ef..d7785be 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -76,6 +76,8 @@ i915InvalidateState(struct gl_context * ctx, GLuint new_state) i915_update_provoking_vertex(ctx); if (new_state (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_program(ctx); + if (new_state _NEW_POINT) + i915_update_sprite_point_enable(ctx); } +void +i915_update_sprite_point_enable(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); In the next two lines, you make use of _NEW_PROGRAM-governed state, but you only call this function based on _NEW_POINT. In the i965 driver, we annotate state usage with /* _NEW_WHATEVER */ so the reader can see what state is being used, and make sure that it's reflected in the caller. Yes, will do that. + struct i915_fragment_program *p = + (struct i915_fragment_program *) ctx-FragmentProgram._Current; + const GLbitfield64 inputsRead = p-FragProg.Base.InputsRead; + struct i915_context *i915 = i915_context(ctx); + GLuint s4 = i915-state.Ctx[I915_CTXREG_LIS4] ~S4_VFMT_MASK; + int i; + GLuint coord_replace_bits = 0x0; + GLuint tex_coord_unit_bits = 0x0; + + for (i = 0; i ctx-Const.MaxTextureCoordUnits; i++) { + if (ctx-Point.CoordReplace[i] ctx-Point.PointSprite) + coord_replace_bits |= (1 i); + if (inputsRead FRAG_BIT_TEX(i)) + tex_coord_unit_bits |= (1 i); + } + + s4 = ~S4_SPRITE_POINT_ENABLE; + s4 |= (coord_replace_bits coord_replace_bits == tex_coord_unit_bits) ? + S4_SPRITE_POINT_ENABLE : 0; + if (s4 != i915-state.Ctx[I915_CTXREG_LIS4]) { + i915-state.Ctx[I915_CTXREG_LIS4] = s4; + I915_STATECHANGE(i915, I915_UPLOAD_CTX
Re: [Mesa-dev] [Intel-gfx] [PATCH] intel: Fix a case when mapping large texture fails
On Mon, Feb 27, 2012 at 11:45:46AM -0800, Anuj Phogat wrote: This patch handles a case when mapping a large texture fails in drm_intel_gem_bo_map_gtt(). These changes avoid assertion failure later in the driver as reported in following bugs: https://bugs.freedesktop.org/show_bug.cgi?id=44970 https://bugs.freedesktop.org/show_bug.cgi?id=46303 Signed-off-by: Anuj Phogat anuj.pho...@gmail.com -- [..] void diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index bc83649..982d6cb 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -124,7 +124,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region, */ _DBG(%s %p\n, __FUNCTION__, region); - if (!region-map_refcount++) { + if (!region-map_refcount) { intel_flush(intel-ctx); if (region-tiling != I915_TILING_NONE) @@ -133,7 +133,10 @@ intel_region_map(struct intel_context *intel, struct intel_region *region, drm_intel_bo_map(region-bo, true); region-map = region-bo-virtual; - ++intel-num_mapped_regions; + if (region-map) { + ++intel-num_mapped_regions; + region-map_refcount++; + } } Hi Anuj, The above change will make the map_refcount un-blanced since you removed the increase to the successfully mapped region, here is a patch to fix this issue: From 3ce3f93d3378fd31df6dca24230edb52407cb9d8 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Tue, 27 Mar 2012 15:41:52 +0800 Subject: [PATCH] intel: fix un-blanced map_refcount issue This is a regression introduced by commit cdcfd5, which forget to increase the map_refcount for successfully-mapped region. Thus caused a wrong non-blanced map_refcount. This would fix the regression found in the two following webglc testcase on Pineview platform: texture-npot.html gl-max-texture-dimensions.html Cc: Anuj Phogat anuj.pho...@gmail.com Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/intel/intel_regions.c |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index d2b737b..abea2bd 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -133,10 +133,10 @@ intel_region_map(struct intel_context *intel, struct intel_region *region, drm_intel_bo_map(region-bo, true); region-map = region-bo-virtual; - if (region-map) { - intel-num_mapped_regions++; - region-map_refcount++; - } + } + if (region-map) { + intel-num_mapped_regions++; + region-map_refcount++; } return region-map; -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glx: fix compile warnings
Fix 'set but not used' warnings; gl_version, gl_versions_profiles and glx_extensions variables are used just only HAVE_XCB_GLX_CREATE_CONTEXT is defined. Thus those warnings are shown when that macro isn't defined. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/glx/clientinfo.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/src/glx/clientinfo.c b/src/glx/clientinfo.c index 461030f..97d43ce 100644 --- a/src/glx/clientinfo.c +++ b/src/glx/clientinfo.c @@ -39,6 +39,7 @@ __glX_send_client_info(struct glx_display *glx_dpy) Bool any_screen_has_ARB_create_context = False; Bool any_screen_has_ARB_create_context_profile = False; unsigned i; +#ifdef HAVE_XCB_GLX_CREATE_CONTEXT static const uint32_t gl_versions[] = { 1, 4, }; @@ -47,6 +48,7 @@ __glX_send_client_info(struct glx_display *glx_dpy) }; static const char glx_extensions[] = GLX_ARB_create_context GLX_ARB_create_context_profile; +#endif /* There are three possible flavors of the client info structure that the * client could send to the server. The version sent depends on the -- 1.7.7.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/6] intel: Handle devid overrides using libdrm.
On Tue, Mar 20, 2012 at 10:36:24AM -0700, Eric Anholt wrote: On Mon, 19 Mar 2012 09:38:03 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: On Fri, Mar 16, 2012 at 04:26:43PM -0700, Eric Anholt wrote: --- src/mesa/drivers/dri/intel/intel_screen.c | 23 --- 1 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 7939c4d..3f1ef87 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -624,8 +624,7 @@ intel_init_bufmgr(struct intel_screen *intelScreen) __DRIscreen *spriv = intelScreen-driScrnPriv; int num_fences = 0; - intelScreen-no_hw = (getenv(INTEL_NO_HW) != NULL || - getenv(INTEL_DEVID_OVERRIDE) != NULL); + intelScreen-no_hw = getenv(INTEL_NO_HW) != NULL; Seems that we are doing duplicate things here in Mesa and Libdrm-intel: mesa will bypass hardware rendering if INTEL_NO_HW env is set libdrm-intel also will bypass hardware rendering if INTEL_DEVID_OVERRIDE is set They are doing the same thing, but by different env variable, is that necessary? INTEL_DEVID_OVERRIDE obviously implies INTEL_NO_HW, but INTEL_NO_HW is independently very useful for looking at debug output on your current hardware for a workload that hangs the GPU. Got it. Thanks, then Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glx:dri_common.c: check psc-driScreen-createDrawable return value
On Tue, Mar 20, 2012 at 05:24:06PM +0800, Wang YanQing wrote: On Tue, Mar 20, 2012 at 08:49:25AM +0100, Michel Dänzer wrote: Please use one of the *MessageF() functions instead of fprintf directly. createDrawable may return NULL value, we should check it, or it will make a segment failed. Signed-off-by: Wang YanQing udkni...@gmail.com --- src/glx/dri_common.c |6 ++ 1 file changed, 6 insertions(+) diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c index 0e06d51..31e4d4d 100644 --- a/src/glx/dri_common.c +++ b/src/glx/dri_common.c @@ -403,6 +403,12 @@ driFetchDrawable(struct glx_context *gc, GLXDrawable glxDrawable) pdraw = psc-driScreen-createDrawable(psc, glxDrawable, glxDrawable, gc-config); + + if (pdraw == NULL) { + ErrorMessageF(failed to create drawable\n); + return NULL; + } + Looks good to me, except the minor indent issue. Otherwise, Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com if (__glxHashInsert(priv-drawHash, glxDrawable, pdraw)) { (*pdraw-destroyDrawable) (pdraw); return NULL; -- 1.7.9.2.315.g25a78 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel:i915:i965: enable mipmap layout right mode
On Fri, Mar 16, 2012 at 11:13:23AM -0700, Eric Anholt wrote: On Thu, 15 Mar 2012 14:42:53 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: There are two mipmap layout modes: below and right. And we currently just use _below_ mode. And in some cases, like height is greater than width, it would be better to use the _right_ mode for saving memory. And it also fix some issues like the gl-max-texture-dimensions.html webglc test case on pineview in a hardware way(no fallback). Since when rendering with 1x2048 texture using below mode would make the draw offset exceed the max allowed size, but will not when using right mode. I think you should be able to get hardware rendering without changing layouts by taking advantage of intel_renderbuffer_get_tile_offsets(). diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 9022548..54f32a4 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -192,6 +192,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) MS4_MAX_LOD_SHIFT) | ((firstImage-Depth - 1) MS4_VOLUME_DEPTH_SHIFT)); + if (intel-is_945) + state[I915_TEXREG_MS4] |= intelObj-mt-layout MS4_MIP_LAYOUT_MODE_SHIFT; { GLuint minFilt, mipFilt, magFilt; You're checking 945 here, but your decision to choose right layout didn't check for 945. The decision to choose right layout is done at i945_miptree_choose_layout, that's where we did check. diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index a3de2e3..b6565fe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -654,7 +654,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) 6 * 4, 32, brw-wm.surf_offset[surf_index]); surf[0] = (translate_tex_target(tObj-Target) BRW_SURFACE_TYPE_SHIFT | - BRW_SURFACE_MIPMAPLAYOUT_BELOW BRW_SURFACE_MIPLAYOUT_SHIFT | + mt-layout BRW_SURFACE_MIPLAYOUT_SHIFT | BRW_SURFACE_CUBEFACE_ENABLES | (translate_tex_format(mt-format, firstImage-InternalFormat, You didn't update gen7. Wierdly, I didn't find mipmap layout mode option at SURFRACE_STATE for IVB. But you remind me that I forgot to do that for gen6 hiz. diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 9082864..d175f50 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -215,6 +215,10 @@ struct intel_mipmap_tree /* These are also refcounted: */ GLuint refcount; + +#define INTEL_LAYOUT_BELOW 0 +#define INTEL_LAYOUT_RIGHT 1 + int layout; }; Make it a bool called miplayout_right or something instead of #defines. It's fine to me. But I guess it's less readable than using macros, say the following code: if ((mt-layout == INTEL_LAYOUT_BELOW level == mt-first_level + 1) || (mt-layout == INTEL_LAYOUT_RIGHT level == mt-first_level)) { x += ALIGN(width, mt-align_w); } else { y += img_height; } which translated to if ((mt-miplayout_right level == mt-first_level) || (!mt-miplayout_right level == mt-first_level + 1)) { x += ALIGN(width, mt-align_w); } else { y += img_height; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index 65645bc..4687dd3 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -138,7 +138,27 @@ intel_get_texture_alignment_unit(struct intel_context *intel, *h = intel_vertical_texture_alignment_unit(intel, format); } -void i945_miptree_layout_2d(struct intel_mipmap_tree *mt) +static int +i945_miptree_choose_layout(struct intel_context *intel, GLenum target, + GLuint width, GLuint height) +{ + int layout = INTEL_LAYOUT_BELOW; /* Use layout _below_ by default */ + + /* +* INTEL_LAYOUT_RIGHT is only for: +* GL_TEXTURE_1D, GL_TEXTURE_2D [945+, aka all platforms here] +* GL_TEXTURE_1D, GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP [gen5+] +*/ + if (target == GL_TEXTURE_1D || target == GL_TEXTURE_2D || + (intel-gen = 5 target == GL_TEXTURE_CUBE_MAP)) { + if ((height 1) = width) + layout = INTEL_LAYOUT_RIGHT; + } + + return layout; +} You should explain why the choice is being made here. Yes, will add it. +void i945_miptree_layout_2d(struct intel_context *intel, struct intel_mipmap_tree *mt) { GLuint level
Re: [Mesa-dev] [PATCH 2/6] intel: Ask libdrm to dump an AUB file if INTEL_DEBUG=aub.
On Fri, Mar 16, 2012 at 04:26:42PM -0700, Eric Anholt wrote: It also asks for BMPs in the aub file at SwapBuffers time. --- src/mesa/drivers/dri/intel/intel_context.c |4 +++ src/mesa/drivers/dri/intel/intel_context.h |1 + src/mesa/drivers/dri/intel/intel_screen.c | 32 3 files changed, 37 insertions(+), 0 deletions(-) Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 7b2bdad..ff721fb 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -477,6 +477,7 @@ static const struct dri_debug_control debug_control[] = { { urb, DEBUG_URB }, { vs,DEBUG_VS }, { clip, DEBUG_CLIP }, + { aub, DEBUG_AUB }, { NULL,0 } }; @@ -754,6 +755,9 @@ intelInitContext(struct intel_context *intel, if (INTEL_DEBUG DEBUG_BUFMGR) dri_bufmgr_set_debug(intel-bufmgr, true); + if (INTEL_DEBUG DEBUG_AUB) + drm_intel_bufmgr_gem_set_aub_dump(intel-bufmgr, true); + intel_batchbuffer_init(intel); intel_fbo_init(intel); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index ef024b1..7b42009 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -472,6 +472,7 @@ extern int INTEL_DEBUG; #define DEBUG_URB 0x80 #define DEBUG_VS0x100 #define DEBUG_CLIP 0x200 +#define DEBUG_AUB 0x400 #define DBG(...) do {\ if (unlikely(INTEL_DEBUG FILE_DEBUG_FLAG))\ diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 48762d0..7939c4d 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -125,6 +125,38 @@ intelDRI2Flush(__DRIdrawable *drawable) if (intel-batch.used) intel_batchbuffer_flush(intel); + + if (INTEL_DEBUG DEBUG_AUB) { + struct gl_framebuffer *fb = ctx-DrawBuffer; + + for (int i = 0; i fb-_NumColorDrawBuffers; i++) { + struct intel_renderbuffer *irb = + intel_renderbuffer(fb-_ColorDrawBuffers[i]); + + if (irb irb-mt) { + enum aub_dump_bmp_format format; + + switch (irb-Base.Base.Format) { + case MESA_FORMAT_ARGB: + case MESA_FORMAT_XRGB: +format = AUB_DUMP_BMP_FORMAT_ARGB_; +break; + default: +continue; + } + + drm_intel_gem_bo_aub_dump_bmp(irb-mt-region-bo, + irb-draw_x, + irb-draw_y, + irb-Base.Base.Width, + irb-Base.Base.Height, + format, + irb-mt-region-pitch * + irb-mt-region-cpp, + 0); + } + } + } } static const struct __DRI2flushExtensionRec intelFlushExtension = { -- 1.7.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/6] intel: Handle devid overrides using libdrm.
On Fri, Mar 16, 2012 at 04:26:43PM -0700, Eric Anholt wrote: --- src/mesa/drivers/dri/intel/intel_screen.c | 23 --- 1 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 7939c4d..3f1ef87 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -624,8 +624,7 @@ intel_init_bufmgr(struct intel_screen *intelScreen) __DRIscreen *spriv = intelScreen-driScrnPriv; int num_fences = 0; - intelScreen-no_hw = (getenv(INTEL_NO_HW) != NULL || - getenv(INTEL_DEVID_OVERRIDE) != NULL); + intelScreen-no_hw = getenv(INTEL_NO_HW) != NULL; Seems that we are doing duplicate things here in Mesa and Libdrm-intel: mesa will bypass hardware rendering if INTEL_NO_HW env is set libdrm-intel also will bypass hardware rendering if INTEL_DEVID_OVERRIDE is set They are doing the same thing, but by different env variable, is that necessary? intelScreen-bufmgr = intel_bufmgr_gem_init(spriv-fd, BATCH_SZ); if (intelScreen-bufmgr == NULL) { @@ -736,7 +735,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) GLenum fb_format[3]; GLenum fb_type[3]; unsigned int api_mask; - char *devid_override; static const GLenum back_buffer_modes[] = { GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML @@ -758,20 +756,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen-driScrnPriv = psp; psp-driverPrivate = (void *) intelScreen; - /* Determine chipset ID */ - if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID, - intelScreen-deviceID)) - return false; + if (!intel_init_bufmgr(intelScreen)) + return false; - /* Allow an override of the device ID for the purpose of making the -* driver produce dumps for debugging of new chipset enablement. -* This implies INTEL_NO_HW, to avoid programming your actual GPU -* incorrectly. -*/ - devid_override = getenv(INTEL_DEVID_OVERRIDE); - if (devid_override) { - intelScreen-deviceID = strtod(devid_override, NULL); - } + intelScreen-deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen-bufmgr); intelScreen-kernel_has_gen7_sol_reset = intel_get_boolean(intelScreen-driScrnPriv, @@ -818,9 +806,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) if (IS_9XX(intelScreen-deviceID) || IS_965(intelScreen-deviceID)) psp-api_mask = api_mask; - if (!intel_init_bufmgr(intelScreen)) - return false; - intelScreen-hw_has_swizzling = intel_detect_swizzling(intelScreen); psp-extensions = intelScreenExtensions; -- 1.7.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] softpipe: set max cube texture size to 4Kx4K
On Sun, Mar 18, 2012 at 10:49:00AM -0600, Brian Paul wrote: The max size was 16Kx16K so a 4 byte/pixel, six-sided cube would require 6 GBytes of memory. If mipmapped, 8 GB. Reduce the max size to 4K to make the total size more reasonable. Fixes a crash with the new piglit max-texture-size test. Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com --- src/gallium/drivers/softpipe/sp_limits.h |1 + src/gallium/drivers/softpipe/sp_screen.c |2 +- 2 files changed, 2 insertions(+), 1 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_limits.h b/src/gallium/drivers/softpipe/sp_limits.h index a7a24c9..ada5c0c 100644 --- a/src/gallium/drivers/softpipe/sp_limits.h +++ b/src/gallium/drivers/softpipe/sp_limits.h @@ -32,6 +32,7 @@ #define SP_MAX_TEXTURE_2D_LEVELS 15 /* 16K x 16K */ #define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */ +#define SP_MAX_TEXTURE_CUBE_LEVELS 13 /* 4K x 4K */ /** Max surface size */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index c983ef5..183ed9a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -94,7 +94,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return SP_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return SP_MAX_TEXTURE_2D_LEVELS; + return SP_MAX_TEXTURE_CUBE_LEVELS; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_INDEP_BLEND_ENABLE: -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Doing 8.0.1 release?
On Thu, Mar 15, 2012 at 01:22:10PM +0800, Yuanhan Liu wrote: On Tue, Mar 13, 2012 at 07:29:02AM -0700, Jakob Bornecrantz wrote: - Original Message - On Mon, Mar 12, 2012 at 05:05:08PM -0700, Jakob Bornecrantz wrote: Hi all We well over due for a 8.0.1 release, so I thought we do it aggressively this week. A quick rc tomorrow and a release on Thursday or Friday? Is that okay with people, comments please? Did you mean 8.1? 8.0.1 has been relased for a while, see http://lists.freedesktop.org/archives/mesa-dev/2012-February/019167.html I meant 8.0.2, sorry for any confusion. Thanks. BTW, I have some commites which should be inclued in the next release(aka 8.0.2 here). Should I do cherry-pick myself? (Usually, Ian will do that for me before). FYI, I have done it by myself. BTW, would someone inform me before the release? I have 2 patches more that would be good to be included in the next release. I hope I can make it before the release. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Doing 8.0.1 release?
On Thu, Mar 15, 2012 at 08:32:34PM -0700, Jakob Bornecrantz wrote: - Original Message - On Thu, Mar 15, 2012 at 01:22:10PM +0800, Yuanhan Liu wrote: On Tue, Mar 13, 2012 at 07:29:02AM -0700, Jakob Bornecrantz wrote: - Original Message - On Mon, Mar 12, 2012 at 05:05:08PM -0700, Jakob Bornecrantz wrote: Hi all We well over due for a 8.0.1 release, so I thought we do it aggressively this week. A quick rc tomorrow and a release on Thursday or Friday? Is that okay with people, comments please? Did you mean 8.1? 8.0.1 has been relased for a while, see http://lists.freedesktop.org/archives/mesa-dev/2012-February/019167.html I meant 8.0.2, sorry for any confusion. Thanks. BTW, I have some commites which should be inclued in the next release(aka 8.0.2 here). Should I do cherry-pick myself? (Usually, Ian will do that for me before). FYI, I have done it by myself. BTW, would someone inform me before the release? I have 2 patches more that would be good to be included in the next release. I hope I can make it before the release. Thanks, I have postponed the 8.0.2 release to Monday next week. Thanks for the info. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Doing 8.0.1 release?
On Tue, Mar 13, 2012 at 07:29:02AM -0700, Jakob Bornecrantz wrote: - Original Message - On Mon, Mar 12, 2012 at 05:05:08PM -0700, Jakob Bornecrantz wrote: Hi all We well over due for a 8.0.1 release, so I thought we do it aggressively this week. A quick rc tomorrow and a release on Thursday or Friday? Is that okay with people, comments please? Did you mean 8.1? 8.0.1 has been relased for a while, see http://lists.freedesktop.org/archives/mesa-dev/2012-February/019167.html I meant 8.0.2, sorry for any confusion. Thanks. BTW, I have some commites which should be inclued in the next release(aka 8.0.2 here). Should I do cherry-pick myself? (Usually, Ian will do that for me before). Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Doing 8.0.1 release?
On Mon, Mar 12, 2012 at 05:05:08PM -0700, Jakob Bornecrantz wrote: Hi all We well over due for a 8.0.1 release, so I thought we do it aggressively this week. A quick rc tomorrow and a release on Thursday or Friday? Is that okay with people, comments please? Did you mean 8.1? 8.0.1 has been relased for a while, see http://lists.freedesktop.org/archives/mesa-dev/2012-February/019167.html Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i915: fallback when point sprite is enabled while handling varying inputs
On Mon, Mar 12, 2012 at 10:14:06AM +0800, Yuanhan Liu wrote: On Fri, Mar 09, 2012 at 10:35:33AM -0800, Eric Anholt wrote: On Thu, 8 Mar 2012 19:21:23 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: From ddd1a9d8f0d82c2f5fcb78a471608a005a6a077c Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Thu, 8 Mar 2012 18:48:54 +0800 Subject: [PATCH] i915: set SPRITE_POINT_ENABLE bit just when we need do coord replace When SPRITE_POINT_ENABLE bit is set, the texture coord would be replaced, and this is only needed when we called something like glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE). Since we currently handling varying inputs as tex coord, we would be careful when setting this bit and set it just when needed, or you will find the value of varying input is not right and changed. With handling the bit setup at i915ValidateFragmentProgram, we don't need the code at i915Enable then. This patch would _really_ fix the webglc point-size.html test case and of course, not regress piglit point-sprite and glean-pointSprite testcase. NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c |5 + src/mesa/drivers/dri/i915/i915_state.c| 13 + 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 5b7e93e..8829e8d 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1379,7 +1379,12 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); } + s4 = ~S4_SPRITE_POINT_ENABLE; for (i = 0; i p-ctx-Const.MaxTextureCoordUnits; i++) { + /* Do set SPRITE_POINT_ENABLE bit when we need do coord replace */ + if (ctx-Point.CoordReplace[i] ctx-Point.PointSprite) + s4 |= S4_SPRITE_POINT_ENABLE; + if (inputsRead FRAG_BIT_TEX(i)) { int sz = VB-AttribPtr[_TNL_ATTRIB_TEX0 + i]-size; I don't think you've done anything to guarantee that this code is called when CoordReplace changes. Yes, you are right. Maybe we can do it at the final _draw_ time, say i915_reduced_primitive_state? Acutally, it seems we can guarantee that: since i915ValidateFragmentProgram is called at the _draw_ time, which makes sure the previous call of CoordReplace is handled. Right? Anyway, I made another 2 patches, please help to review it. Thanks, Yuanhan Liu A more general problem: you're turning on point sprite if coord replace is set on any texcoord. i915 replaces all texcoords with (0,0)-(1,1) when point sprite is enabled, which breaks any non-point-sprite coordinates, plus varyings as you noted. Well, I guess that's also what the current code do. Since SPRITE_POINT_ENABLE is just _one_ bit instead of a set of mask bits. If you need point sprite coordinates and actual texcoords, a fallback should be done. Yes, agreed acoording to the above state. (Well, if we did better compiling, we could route a couple of varyings through color/secondarycolor while still getting point sprite coordinates on the texcoords) That's a good hint. But I'd like to do the fallback first. Since I don't know how many works should be done to get the better compiling. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i915: set SPRITE_POINT_ENABLE bit correctly
When SPRITE_POINT_ENABLE bit is set, the texture coord would be replaced, and this is only needed when we called something like glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE). And more, we currently handle varying inputs as texture coord, we would be careful when setting this bit and set it just when needed, or you will find the value of varying input is not right and changed. Thus we do set SPRITE_POINT_ENABLE bit only when all enabled tex coord units need do CoordReplace. Or fallback is needed to make sure the rendering is right. As we need guarantee the CoordReplace changes handled well and be able to fallback when finding something wrong, I added another function to handle it at intelRunPipepline, where the drawing happened here and tnl pipeline hasn't started yet. With handling the bit setup at intel_validate_sprite_point_enable(), we don't need the relative code at i915Enable then. This patch would _really_ fix the webglc point-size.html test case and of course, not regress piglit point-sprite and glean-pointSprite testcase. NOTE: This is a candidate for stable release branches. v2: fallback just when all enabled tex coord units need do CoordReplace(Eric). Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_context.h |1 + src/mesa/drivers/dri/i915/i915_state.c | 13 +--- src/mesa/drivers/dri/i915/intel_tris.c | 52 ++ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index 8167137..59eeb6e 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -40,6 +40,7 @@ #define I915_FALLBACK_POINT_SMOOTH 0x8 #define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN 0x10 #define I915_FALLBACK_DRAW_OFFSET 0x20 +#define I915_FALLBACK_COORD_REPLACE 0x40 #define I915_UPLOAD_CTX 0x1 #define I915_UPLOAD_BUFFERS 0x2 diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 756001f..3c751e4 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -869,18 +869,7 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) break; case GL_POINT_SPRITE: - /* This state change is handled in i915_reduced_primitive_state because - * the hardware bit should only be set when rendering points. - */ -dw = i915-state.Ctx[I915_CTXREG_LIS4]; - if (state) -dw |= S4_SPRITE_POINT_ENABLE; - else -dw = ~S4_SPRITE_POINT_ENABLE; - if (dw != i915-state.Ctx[I915_CTXREG_LIS4]) { -i915-state.Ctx[I915_CTXREG_LIS4] = dw; -I915_STATECHANGE(i915, I915_UPLOAD_CTX); - } + /* Handle it at intel_validate_sprite_point_enable() */ break; case GL_POINT_SMOOTH: diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index a36011a..58f6a59 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1052,6 +1052,48 @@ static const GLenum reduced_prim[GL_POLYGON + 1] = { GL_TRIANGLES }; +static void +intel_validate_sprite_point_enable(struct intel_context *intel) +{ + struct gl_context *ctx = intel-ctx; + struct i915_fragment_program *p = + (struct i915_fragment_program *) ctx-FragmentProgram._Current; + const GLbitfield64 inputsRead = p-FragProg.Base.InputsRead; + struct i915_context *i915 = i915_context(ctx); + GLuint s4 = i915-state.Ctx[I915_CTXREG_LIS4] ~S4_VFMT_MASK; + int i; + GLuint coord_replace_bits = 0x0; + GLuint tex_coord_unit_bits = 0x0; + + for (i = 0; i ctx-Const.MaxTextureCoordUnits; i++) { + if (ctx-Point.CoordReplace[i] ctx-Point.PointSprite) + coord_replace_bits |= (1 i); + if (inputsRead FRAG_BIT_TEX(i)) + tex_coord_unit_bits |= (1 i); + } + + s4 = ~S4_SPRITE_POINT_ENABLE; + if (coord_replace_bits) { + if (coord_replace_bits != tex_coord_unit_bits) { + /* + * Here we can't enable the SPRITE_POINT_ENABLE bit due to the + * mis-match of tex_coord_unit_bits and coord_replace_bits, or + * this will make all the other non-point-sprite coords be + * replaced to value (0, 0)-(1, 1). + * + * Thus, a fallback is needed. + */ + FALLBACK(intel, I915_FALLBACK_COORD_REPLACE, true); + } else { + s4 |= S4_SPRITE_POINT_ENABLE; + } + } + + if (s4 != i915-state.Ctx[I915_CTXREG_LIS4]) { + i915-state.Ctx[I915_CTXREG_LIS4] = s4; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } +} /**/ /* High level hooks for t_vb_render.c */ @@ -1070,6 +1112,15 @@ intelRunPipeline(struct gl_context * ctx
[Mesa-dev] [PATCH 2/2] i915: set SPRITE_POINT_ENABLE bit only for points
Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_state.c |8 +++- src/mesa/drivers/dri/i915/i915_vtbl.c |9 + 2 files changed, 16 insertions(+), 1 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 3c751e4..d7c6918 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -869,7 +869,13 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) break; case GL_POINT_SPRITE: - /* Handle it at intel_validate_sprite_point_enable() */ + /* + * Handle it at intel_validate_sprite_point_enable() + * + * And final handle it in i915_reduced_primitive_state() + * because the hardware bit should only be set when + * rendering points + */ break; case GL_POINT_SMOOTH: diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index e78dbc8..b131b19 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -87,6 +87,15 @@ i915_reduced_primitive_state(struct intel_context *intel, GLenum rprim) i915-intel.reduced_primitive = rprim; + /* Set SPRITE_POINT_ENABLE bit only for points */ + if (rpim != GL_POINTS + i915-state.Ctx[I915_CTXREG_LIS4] S4_SPRITE_POINT_ENABLE) { + INTEL_FIREVERTICES(intel); + + i915-state.Ctx[I915_CTXREG_LIS4] = ~S4_SPRITE_POINT_ENABLE; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } + if (st1 != i915-state.Stipple[I915_STPREG_ST1]) { INTEL_FIREVERTICES(intel); -- 1.7.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i915: set SPRITE_POINT_ENABLE bit only for points
On Mon, Mar 12, 2012 at 12:30:20PM -0700, Eric Anholt wrote: On Mon, 12 Mar 2012 16:04:01 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com Is there a reason for this change? What test does it fix? No big reason and it doesn't fix anyting so far. I made this patch for: 1. There is a comment so long ago at i915Enable() like this: /* This state change is handled in i915_reduced_primitive_state because * the hardware bit should only be set when rendering points. */ (A simple view of the history, it seems that it's Ian add this comments but I see nowhere he did it) 2. It make sense to me to clear those bits for non-point prims. The hardware docs I have say This bit controls the generation of texture coordinates at the corners of point primitives. When ENABLED, the corners of the point primitive will be different... suggesting that it doesn't affect non-points. Yes, I just confirmed that. So, I'm fine to drop this patch. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i915: fallback when point sprite is enabled while handling varying inputs
On Fri, Mar 09, 2012 at 10:35:33AM -0800, Eric Anholt wrote: On Thu, 8 Mar 2012 19:21:23 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: From ddd1a9d8f0d82c2f5fcb78a471608a005a6a077c Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Thu, 8 Mar 2012 18:48:54 +0800 Subject: [PATCH] i915: set SPRITE_POINT_ENABLE bit just when we need do coord replace When SPRITE_POINT_ENABLE bit is set, the texture coord would be replaced, and this is only needed when we called something like glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE). Since we currently handling varying inputs as tex coord, we would be careful when setting this bit and set it just when needed, or you will find the value of varying input is not right and changed. With handling the bit setup at i915ValidateFragmentProgram, we don't need the code at i915Enable then. This patch would _really_ fix the webglc point-size.html test case and of course, not regress piglit point-sprite and glean-pointSprite testcase. NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c |5 + src/mesa/drivers/dri/i915/i915_state.c| 13 + 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 5b7e93e..8829e8d 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1379,7 +1379,12 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); } + s4 = ~S4_SPRITE_POINT_ENABLE; for (i = 0; i p-ctx-Const.MaxTextureCoordUnits; i++) { + /* Do set SPRITE_POINT_ENABLE bit when we need do coord replace */ + if (ctx-Point.CoordReplace[i] ctx-Point.PointSprite) + s4 |= S4_SPRITE_POINT_ENABLE; + if (inputsRead FRAG_BIT_TEX(i)) { int sz = VB-AttribPtr[_TNL_ATTRIB_TEX0 + i]-size; I don't think you've done anything to guarantee that this code is called when CoordReplace changes. Yes, you are right. Maybe we can do it at the final _draw_ time, say i915_reduced_primitive_state? A more general problem: you're turning on point sprite if coord replace is set on any texcoord. i915 replaces all texcoords with (0,0)-(1,1) when point sprite is enabled, which breaks any non-point-sprite coordinates, plus varyings as you noted. Well, I guess that's also what the current code do. Since SPRITE_POINT_ENABLE is just _one_ bit instead of a set of mask bits. If you need point sprite coordinates and actual texcoords, a fallback should be done. Yes, agreed acoording to the above state. (Well, if we did better compiling, we could route a couple of varyings through color/secondarycolor while still getting point sprite coordinates on the texcoords) That's a good hint. But I'd like to do the fallback first. Since I don't know how many works should be done to get the better compiling. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i915: fallback when point sprite is enabled while handling varying inputs
On Thu, Mar 08, 2012 at 02:30:30PM +0800, Yuanhan Liu wrote: The current code would use tex coord to implement varying inputs. If point sprite is enabled(always enabled in chrome and firefox), the tex coord would be replaced with the value (x, y, 0, 1) where x and y vary from 0 to 1. Thus you will find that the value of the varying inputs doesn't work anymore. Why chrome(and firefox) would always enable GL_POINT_SPRITE to enable webglc, if you would ask, here is the answer I find from the code of chrome at file gpu/command_buffer/service/gles2_cmd_decoder.cc: // OpenGL ES 2.0 implicitly enables the desktop GL capability // VERTEX_PROGRAM_POINT_SIZE and doesn't expose this enum. This fact // isn't well documented; it was discovered in the Khronos OpenGL ES // mailing list archives. It also implicitly enables the desktop GL // capability GL_POINT_SPRITE to provide access to the gl_PointCoord // variable in fragment shaders. if (gfx::GetGLImplementation() != gfx::kGLImplementationEGLGLES2) { glEnable(GL_VERTEX_PROGRAM_POINT_SIZE); glEnable(GL_POINT_SPRITE); } So, fallback when point sprite is enabled while handling varying inputs before finding a better way to not use tex coord to implement varying inputs. This would _really_ fix the following webglc case on pineview this time: https://cvs.khronos.org/svn/repos/registry/trunk/public/webgl/conformance-suites/1.0.1/conformance/rendering/point-size.html NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c | 23 --- 1 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 5b7e93e..c2390fe 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -132,9 +132,26 @@ src_vector(struct i915_fragment_program *p, case FRAG_ATTRIB_VAR0 + 5: case FRAG_ATTRIB_VAR0 + 6: case FRAG_ATTRIB_VAR0 + 7: - src = i915_emit_decl(p, REG_TYPE_T, - T_TEX0 + (source-Index - FRAG_ATTRIB_VAR0), - D0_CHANNEL_ALL); + /* + * The current code would use tex coord to implement varying inputs. + * If point sprite is enabled(always enabled in chrome and firefox), + * the tex coord would be replaced with the value (x, y, 0, 1) where + * x and y vary from 0 to 1. Thus you will find that the value of the + * varying inputs doesn't work anymore. + * + * So, fallback when point sprite is enabled. + * + * FIXME: a better way to not use tex coord to add the support of + *varying inputs? + */ + if (p-ctx-Point.PointSprite) { + i915_program_error(p, Point Sprite is enabled while using + tex coord to implement varying inputs); + } else { + src = i915_emit_decl(p, REG_TYPE_T, + T_TEX0 + (source-Index - FRAG_ATTRIB_VAR0), + D0_CHANNEL_ALL); + } Honestly, I don't like this patch myself: it makes lots of webglc test case fallback to swrast. Though I somehow understand why you use tex coord to implement varying inputs. But I still don't figure a better way to handle varying inputs. Then I tried to get rid of such fallback and came the following patch, which I think is much better than this one. From ddd1a9d8f0d82c2f5fcb78a471608a005a6a077c Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Thu, 8 Mar 2012 18:48:54 +0800 Subject: [PATCH] i915: set SPRITE_POINT_ENABLE bit just when we need do coord replace When SPRITE_POINT_ENABLE bit is set, the texture coord would be replaced, and this is only needed when we called something like glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE). Since we currently handling varying inputs as tex coord, we would be careful when setting this bit and set it just when needed, or you will find the value of varying input is not right and changed. With handling the bit setup at i915ValidateFragmentProgram, we don't need the code at i915Enable then. This patch would _really_ fix the webglc point-size.html test case and of course, not regress piglit point-sprite and glean-pointSprite testcase. NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c |5 + src/mesa/drivers/dri/i915/i915_state.c| 13 + 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 5b7e93e..8829e8d 100644 --- a/src/mesa/drivers/dri
[Mesa-dev] about varying support for i915
Hi Eric, Few days ago, I wrote a patch(058fc6521e3bc483bc948cc90dc5ee3b08d6ec64) to fix an issue specifying the point size in vertex shader. And I mentiond it also fixed the point-size.html webglc case. Well, it does fix the piglit case glsl-vs-point-size, but it doesn't fix the webglc case. I don't know why I remember that my patch fixed the webglc issue as well. I guess this is becase it fixed the gl case I wrote for the webglc case(I wrote a local gl program to do exctally what the case do for debugging). Really sorry for the mistake I made first. Then I continued to figure out why my patches doesn't work. It takes me quite a while to figure out why my local gl program passed while the webglc case failed, since I do write my gl program extaclly just like the webglc case. And finally, I found the root cause: chrome(and firefox as well) will call glEnable(GL_VERTEX_PROGRAM_POINT_SIZE) and glEnable(GL_POINT_SPRITE_ARB) first when init the GPU process. (well, this turned out that my former patch does work). So, here is how the issue triggered. Firstly, the case(I also attached my another gl testcase for reproducing this issue) using glsl varying var to assign the glFragColor like the following code: varying vec4 color; void main() { gl_FragColor = color; } which translated to: fp: # Fragment Program/Shader 3 0: MOV OUTPUT[2], INPUT[16]; 1: END # Fragment Program/Shader 3 0: MOV OUTPUT[2], INPUT[16]; 1: END i915: BEGIN DCL T_TEX0 oC = MOV T_TEX0 END And I found that i915 varying support is added by you by the commit f9f31b25740887373806cb489e5480dc9b261805, which you just use the texture coord. So, when the POINT_SPRITE_ARB is enabled(always enabled in chrome and firefox), the texture coord is replaced due to point sprite is enabled. Thus the varying support isn't work. Honestly speaking, I don't quite understand how the shader handling varying inputs works. Would you mind please educate me a little bit? (I will try to understand that by reading more, of course). BTW, any thoughts on handling varying inputs better(I mean, don't use texture coord)? You can use the program I attached to reproduce this issue on pineview. Thanks, Yuanhan Liu #include stdio.h #include stdlib.h #include GL/glut.h #include assert.h static const char *vs = #version 120\n attribute vec3 pos;\n attribute vec4 colorIn;\n varying vec4 color;\n uniform float pointSize;\n void main()\n {\n color = colorIn;\n gl_Position = vec4(pos, 1.0);\n }\n; static const char *ps = #version 120\n varying vec4 color;\n void main()\n {\n gl_FragColor = color;\n }\n; static GLuint shader_stuff() { GLuint shaderProgram; GLuint vertexShader, fragmentShader; vertexShader = glCreateShader(GL_VERTEX_SHADER); fragmentShader = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(vertexShader, 1, vs, NULL); glShaderSource(fragmentShader, 1, ps, NULL); glCompileShader(vertexShader); glCompileShader(fragmentShader); shaderProgram = glCreateProgram(); glAttachShader(shaderProgram, vertexShader); glAttachShader(shaderProgram, fragmentShader); glBindAttribLocation(shaderProgram, 0, pos); glBindAttribLocation(shaderProgram, 1, colorIn); glLinkProgram(shaderProgram); return shaderProgram; } void myDisplay(void) { GLuint program; GLuint vbo; GLfloat vertices [] = { 0.0, 0.0, 0.0, }; GLubyte colors [] = { 255, 0, 0, 255, }; /* A must to reprodue the wrong handling of varying inputs on pineview */ glEnable(GL_POINT_SPRITE_ARB); program = shader_stuff(); glUseProgram(program); glGenBuffers(1, vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(vertices) + sizeof(colors), NULL, GL_STATIC_DRAW); glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices); glBufferSubData(GL_ARRAY_BUFFER, sizeof(vertices), sizeof(colors), colors); glVertexAttribPointer(0, 3, GL_FLOAT, 0, 0, NULL); glEnableVertexAttribArray(0); glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, 0, 0, sizeof(vertices)); glEnableVertexAttribArray(1); glClearColor(0, 0, 0, 1); glClear(GL_COLOR_BUFFER_BIT); glPointSize(100); glDrawArrays(GL_POINTS, 0, 1); glFlush(); } int main(int argc,char** argv) { glutInit(argc,argv); glutInitDisplayMode(GLUT_SINGLE|GLUT_RGB); glutInitWindowSize(200, 200); glutInitWindowPosition(100,100); glutCreateWindow(varying-color); glutDisplayFunc(myDisplay); glutMainLoop(); return 0
[Mesa-dev] [PATCH] i915: fallback when point sprite is enabled while handling varying inputs
The current code would use tex coord to implement varying inputs. If point sprite is enabled(always enabled in chrome and firefox), the tex coord would be replaced with the value (x, y, 0, 1) where x and y vary from 0 to 1. Thus you will find that the value of the varying inputs doesn't work anymore. Why chrome(and firefox) would always enable GL_POINT_SPRITE to enable webglc, if you would ask, here is the answer I find from the code of chrome at file gpu/command_buffer/service/gles2_cmd_decoder.cc: // OpenGL ES 2.0 implicitly enables the desktop GL capability // VERTEX_PROGRAM_POINT_SIZE and doesn't expose this enum. This fact // isn't well documented; it was discovered in the Khronos OpenGL ES // mailing list archives. It also implicitly enables the desktop GL // capability GL_POINT_SPRITE to provide access to the gl_PointCoord // variable in fragment shaders. if (gfx::GetGLImplementation() != gfx::kGLImplementationEGLGLES2) { glEnable(GL_VERTEX_PROGRAM_POINT_SIZE); glEnable(GL_POINT_SPRITE); } So, fallback when point sprite is enabled while handling varying inputs before finding a better way to not use tex coord to implement varying inputs. This would _really_ fix the following webglc case on pineview this time: https://cvs.khronos.org/svn/repos/registry/trunk/public/webgl/conformance-suites/1.0.1/conformance/rendering/point-size.html NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c | 23 --- 1 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 5b7e93e..c2390fe 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -132,9 +132,26 @@ src_vector(struct i915_fragment_program *p, case FRAG_ATTRIB_VAR0 + 5: case FRAG_ATTRIB_VAR0 + 6: case FRAG_ATTRIB_VAR0 + 7: - src = i915_emit_decl(p, REG_TYPE_T, - T_TEX0 + (source-Index - FRAG_ATTRIB_VAR0), - D0_CHANNEL_ALL); + /* + * The current code would use tex coord to implement varying inputs. + * If point sprite is enabled(always enabled in chrome and firefox), + * the tex coord would be replaced with the value (x, y, 0, 1) where + * x and y vary from 0 to 1. Thus you will find that the value of the + * varying inputs doesn't work anymore. + * + * So, fallback when point sprite is enabled. + * + * FIXME: a better way to not use tex coord to add the support of + *varying inputs? + */ + if (p-ctx-Point.PointSprite) { + i915_program_error(p, Point Sprite is enabled while using + tex coord to implement varying inputs); + } else { + src = i915_emit_decl(p, REG_TYPE_T, + T_TEX0 + (source-Index - FRAG_ATTRIB_VAR0), + D0_CHANNEL_ALL); + } break; default: -- 1.7.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i915: fallback for NPOT cubemap texture
On Fri, Mar 02, 2012 at 01:52:00PM +0800, Yuanhan Liu wrote: On Thu, Mar 01, 2012 at 04:04:59PM +0800, Yuanhan Liu wrote: On Wed, Feb 29, 2012 at 11:44:59AM -0800, Eric Anholt wrote: On Wed, 29 Feb 2012 15:11:06 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: According to 3DSTATE_MAP_STATE at page of 104 in Bspec [snip]... + Pineview should be DevBLB derived, which doens't have so that requirement, Yeah, it's true according to Bspec: [DevLPT, DevCST and DevBLB]: If not a power of 2, cube maps must have all faces enabled. Well, we have all faces enabled in our code and it still failed. And it's ok when the height is power of 2. That's why I think we need do fallback for other platforms as well. so this doesn't really make sense to me. Have you looked at the rendering to see what's going on? Not much, and I'm tring now. Seems that the cube map texture layout is wrong. Making the width and height align of 4(although this doesn't make much sense) will fix this issue and also broke some others. Well, Bspec mentioned nothing about the layout alignment and layout for NPOT cube map. I guess the Bspec assume it's a POT cube map. Thus I don't know the _working_ texture layout of NPOT cube map texture layout. Thoughts? My thought is if we can't figure out the texture layout for NPOT cube map, I would like to do fallback as a workaround. Eric, does that make sense to you? Eric, does the following patch make sense to you? Well, honestlly speaking, I just added some more detailed comments. Thanks, Yuanhan Liu -- From 784b067924c05dc41c4ea56d1d9389aade0f076b Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Wed, 29 Feb 2012 15:04:45 +0800 Subject: [PATCH] i915: fallback for NPOT cubemap texture Although some hardware support NPOT cubemap, but it seems we don't know the right layout for NPOT cubemap. Thus seems we need do fallback for other platforms as well. See comments inline the code for more detailed info. v2: give a more detailed info about why we need fallback for other platfroms as well. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=4 NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_texstate.c | 22 ++ 1 files changed, 22 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 9022548..fd63a69 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -319,6 +319,28 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) ((wt != GL_CLAMP) (wt != GL_CLAMP_TO_EDGE return false; + /* + * According to 3DSTATE_MAP_STATE at page of 104 in Bspec + * Vol3d 3D Instructions: + * [DevGDG and DevAlv]: Must be a power of 2 for cube maps. + * [DevLPT, DevCST and DevBLB]: If not a power of 2, cube maps + * must have all faces enabled. + * + * But, as I tested on pineview(DevBLB derived), the rendering is + * bad(you will find the color isn't samplered right in some + * fragments). After checking, it seems that the texture layout is + * wrong: making the width and height align of 4(although this + * doesn't make much sense) will fix this issue and also broke some + * others. Well, Bspec mentioned nothing about the layout alignment + * and layout for NPOT cube map. I guess the Bspec just assume it's + * a POT cube map. + * + * Thus, I guess we need do this for other platforms as well. + */ + if (tObj-Target == GL_TEXTURE_CUBE_MAP_ARB + !is_power_of_two(firstImage-Height)) + return false; + state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */ state[I915_TEXREG_SS3] |= -- 1.7.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: handle gl_PointCoord for Gen4 and Gen5 platforms
On Tue, Mar 06, 2012 at 08:25:10AM -0800, Eric Anholt wrote: On Mon, 27 Feb 2012 15:46:32 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 6e63583..7950c47 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -125,6 +135,7 @@ brw_upload_sf_prog(struct brw_context *brw) { struct gl_context *ctx = brw-intel.ctx; struct brw_sf_prog_key key; There should be a /* _NEW_BUFFERS */ comment here to note the state flag dependency. Ok. + bool render_to_fbo = ctx-DrawBuffer-Name != 0; Other than that, Reviewed-by: Eric Anholt e...@anholt.net Thanks for the Reviewed-by, which I have been waiting for a quite while ;) Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i915: move the FALLBACK_DRAW_OFFSET check outside the drawing rect check
We have to do fallback when the 'Clipped Drawing Rectangle X/Y Max' exceed the hardware's limit no matter the drawing rectangle offset changed or not. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=46665 NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_vtbl.c |7 +++ 1 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 11e8a35..e78dbc8 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -665,12 +665,11 @@ i915_set_draw_region(struct intel_context *intel, draw_offset = (draw_y 16) | draw_x; + FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET, +(ctx-DrawBuffer-Width + draw_x 2048) || +(ctx-DrawBuffer-Height + draw_y 2048)); /* When changing drawing rectangle offset, an MI_FLUSH is first required. */ if (draw_offset != i915-last_draw_offset) { - FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET, - (ctx-DrawBuffer-Width + draw_x 2048) || - (ctx-DrawBuffer-Height + draw_y 2048)); - state-Buffer[I915_DESTREG_DRAWRECT0] = MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE; i915-last_draw_offset = draw_offset; } else -- 1.7.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i915: fallback for NPOT cubemap texture
On Wed, Feb 29, 2012 at 11:44:59AM -0800, Eric Anholt wrote: On Wed, 29 Feb 2012 15:11:06 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: According to 3DSTATE_MAP_STATE at page of 104 in Bspec vol3d 3D Instructions: [DevGDG and DevAlv]: Must be a power of 2 for cube maps Well, it turned out to be that we need do this for other platforms as well, like pineview. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=4 NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_texstate.c | 12 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 0e500e2..e3ab50e 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -319,6 +319,18 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) ((wt != GL_CLAMP) (wt != GL_CLAMP_TO_EDGE return false; + /* + * According to 3DSTATE_MAP_STATE at page of 104 in Bspec + * Vol3d 3D Instructions: + * [DevGDG and DevAlv]: Must be a power of 2 for cube maps. + * + * Well, it turned out to be that we need do this for other + * platforms as well, like pineview. + */ + if (tObj-Target == GL_TEXTURE_CUBE_MAP_ARB + !is_power_of_two(firstImage-Height)) + return false; + Pineview should be DevBLB derived, which doens't have so that requirement, Yeah, it's true according to Bspec: [DevLPT, DevCST and DevBLB]: If not a power of 2, cube maps must have all faces enabled. Well, we have all faces enabled in our code and it still failed. And it's ok when the height is power of 2. That's why I think we need do fallback for other platforms as well. so this doesn't really make sense to me. Have you looked at the rendering to see what's going on? Not much, and I'm tring now. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i915: fix wrong rendering of gl_PointSize on Pineview
On Thu, Mar 01, 2012 at 09:54:46AM -0800, Eric Anholt wrote: On Thu, 23 Feb 2012 14:19:19 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: The current code would ignore the point size specified by gl_PointSize builtin variable in vertex shader on Pineview. This patch servers as fixing that. This patch fixes the following issues on Pineview: webglc: https://cvs.khronos.org/svn/repos/registry/trunk/public/webgl/sdk/tests/conformance/rendering/point-size.html piglit: glsl-vs-point-size NOTE: This is a candidate for stable release branches. v2: pick Eric's nice tip for fixing this issue in hardware rendering. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 4f016a3..2d60523 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1361,6 +1361,10 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12); } + /* Handle gl_PointSize builtin var here */ + if (ctx-Point._Attenuated || ctx-VertexProgram.PointSizeEnabled) + EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 1); All the other EMIT_ATTRs specify the number of bytes as the last arg. It seems like this code shouldn't have worked. Actually, it does work ;) Well, if the last arg specify the size in _bytes_, then this patch was wrong. And here is the updated one: - From 5b4c96c0d8b57bf2759b9cdfd4dc1ddfcda2f56a Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Thu, 23 Feb 2012 14:19:19 +0800 Subject: [PATCH] i915: fix wrong rendering of gl_PointSize on Pineview The current code would ignore the point size specified by gl_PointSize builtin variable in vertex shader on Pineview. This patch servers as fixing that. This patch fixes the following issues on Pineview: webglc: https://cvs.khronos.org/svn/repos/registry/trunk/public/webgl/sdk/tests/conformance/rendering/point-size.html piglit: glsl-vs-point-size NOTE: This is a candidate for stable release branches. v2: pick Eric's nice tip for fixing this issue in hardware rendering. v3: the last arg of EMIT_ATTR specify the size in _byte_. (Eric) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 4f016a3..5b7e93e 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1361,6 +1361,10 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12); } + /* Handle gl_PointSize builtin var here */ + if (ctx-Point._Attenuated || ctx-VertexProgram.PointSizeEnabled) + EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4); + if (inputsRead FRAG_BIT_COL0) { intel-coloroffset = offset / 4; EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); -- 1.7.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: handle gl_PointCoord for Gen4 and Gen5 platforms
Ping.. Comments? Thanks, Yuanhan Liu On Mon, Feb 27, 2012 at 03:46:32PM +0800, Yuanhan Liu wrote: This patch add the support of gl_PointCoord gl builtin variable for platform gen4 and gen5(ILK). Unlike gen6+, we don't have a hardware support of gl_PointCoord, means hardware will not calculate the interpolation coefficient for you. Instead, you should handle it yourself in sf shader stage. But badly, gl_PointCoord is a FS instead of VS builtin variable, thus it's not included in c.vue_map generated in VS stage. Thus the current code doesn't aware of this attribute. And to handle it correctly, we need add it to c.vue_map manually to let SF shader generate the needed interpolation coefficient for FS shader. SF stage has it's own copy of vue_map, thus I think it's safe to do it manually. Since handling gl_PointCoord for gen4 and gen5 platforms is somehow a little special, I added a lot of comments and hope I didn't overdo it ;) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45975 Piglit: glsl-fs-pointcoord and fbo-gl_pointcoord NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_context.h |6 ++ src/mesa/drivers/dri/i965/brw_fs.cpp|9 + src/mesa/drivers/dri/i965/brw_sf.c | 26 ++ src/mesa/drivers/dri/i965/brw_sf.h |1 + src/mesa/drivers/dri/i965/brw_sf_emit.c |4 5 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 09d8373..7c794ad 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -296,6 +296,12 @@ typedef enum BRW_VERT_RESULT_NDC = VERT_RESULT_MAX, BRW_VERT_RESULT_HPOS_DUPLICATE, BRW_VERT_RESULT_PAD, + /* +* It's actually not a vert_result but just a _mark_ to let sf aware that +* he need do something special to handle gl_PointCoord builtin variable +* correctly. see compile_sf_prog() for more info. +*/ + BRW_VERT_RESULT_PNTC, BRW_VERT_RESULT_MAX } brw_vert_result; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index bf59da3..5f3d79d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -710,6 +710,15 @@ fs_visitor::calculate_urb_setup() urb_setup[fp_index] = urb_next++; } } + + /* + * It's a FS only attribute, and we did interpolation for this attribute + * in SF thread. So, count it here, too. + * + * See compile_sf_prog() for more info. + */ + if (brw-fragment_program-Base.InputsRead BITFIELD64_BIT(FRAG_ATTRIB_PNTC)) + urb_setup[FRAG_ATTRIB_PNTC] = urb_next++; } /* Each attribute is 4 setup channels, each of which is half a reg. */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 6e63583..7950c47 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -64,6 +64,16 @@ static void compile_sf_prog( struct brw_context *brw, c.key = *key; c.vue_map = brw-vs.prog_data-vue_map; + if (c.key.do_point_coord) { + /* + * gl_PointCoord is a FS instead of VS builtin variable, thus it's + * not included in c.vue_map generated in VS stage. Here we add + * it manually to let SF shader generate the needed interpolation + * coefficient for FS shader. + */ + c.vue_map.vert_result_to_slot[BRW_VERT_RESULT_PNTC] = c.vue_map.num_slots; + c.vue_map.slot_to_vert_result[c.vue_map.num_slots++] = BRW_VERT_RESULT_PNTC; + } c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel); c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; @@ -125,6 +135,7 @@ brw_upload_sf_prog(struct brw_context *brw) { struct gl_context *ctx = brw-intel.ctx; struct brw_sf_prog_key key; + bool render_to_fbo = ctx-DrawBuffer-Name != 0; memset(key, 0, sizeof(key)); @@ -167,7 +178,15 @@ brw_upload_sf_prog(struct brw_context *brw) key.point_sprite_coord_replace |= (1 i); } } - key.sprite_origin_lower_left = (ctx-Point.SpriteOrigin == GL_LOWER_LEFT); + if (brw-fragment_program-Base.InputsRead BITFIELD64_BIT(FRAG_ATTRIB_PNTC)) + key.do_point_coord = 1; + /* +* Window coordinates in a FBO are inverted, which means point +* sprite origin must be inverted, too. +*/ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) + key.sprite_origin_lower_left = true; + /* _NEW_LIGHT */ key.do_flat_shading = (ctx-Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx-Light.Enabled ctx-Light.Model.TwoSide
Re: [Mesa-dev] [PATCH 1/2] tnl: let _TNL_ATTRIB_POINTSIZE do not depend on ctx-VertexProgram._Enabled
Hi Brian, comments? Thanks, Yuanhan Liu On Thu, Feb 23, 2012 at 02:19:18PM +0800, Yuanhan Liu wrote: We may specify the point size in a glsl vertex shader. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=46311 piglit: glsl-vs-point-size NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/tnl/t_context.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 1ded44c..e38c0a3 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -151,8 +151,7 @@ _tnl_InvalidateState( struct gl_context *ctx, GLuint new_state ) if (ctx-RenderMode == GL_FEEDBACK) tnl-render_inputs_bitset |= BITFIELD64_BIT(_TNL_ATTRIB_TEX0); - if (ctx-Point._Attenuated || - (ctx-VertexProgram._Enabled ctx-VertexProgram.PointSizeEnabled)) + if (ctx-Point._Attenuated || ctx-VertexProgram.PointSizeEnabled) tnl-render_inputs_bitset |= BITFIELD64_BIT(_TNL_ATTRIB_POINTSIZE); /* check for varying vars which are written by the vertex program */ -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i915: fix wrong rendering of gl_PointSize on Pineview
ping.. comments? Thanks, Yuanhan Liu On Thu, Feb 23, 2012 at 02:19:19PM +0800, Yuanhan Liu wrote: The current code would ignore the point size specified by gl_PointSize builtin variable in vertex shader on Pineview. This patch servers as fixing that. This patch fixes the following issues on Pineview: webglc: https://cvs.khronos.org/svn/repos/registry/trunk/public/webgl/sdk/tests/conformance/rendering/point-size.html piglit: glsl-vs-point-size NOTE: This is a candidate for stable release branches. v2: pick Eric's nice tip for fixing this issue in hardware rendering. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 4f016a3..2d60523 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1361,6 +1361,10 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12); } + /* Handle gl_PointSize builtin var here */ + if (ctx-Point._Attenuated || ctx-VertexProgram.PointSizeEnabled) + EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 1); + if (inputsRead FRAG_BIT_COL0) { intel-coloroffset = offset / 4; EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: handle gl_PointCoord for Gen4 and Gen5 platform
On Thu, Feb 23, 2012 at 02:37:06PM +0800, Yuanhan Liu wrote: On Tue, Feb 21, 2012 at 11:59:17AM -0800, Eric Anholt wrote: On Sun, 19 Feb 2012 13:31:33 +0800, Liu Aleaxander aleaxan...@gmail.com wrote: On Sun, Feb 19, 2012 at 8:54 AM, Eric Anholt e...@anholt.net wrote: On Sat, 18 Feb 2012 23:07:32 +0800, Liu Aleaxander aleaxan...@gmail.com wrote: + /* + * gl_PointCoord is a FS instead of VS builtin variable, thus is not + * included in c-nr_setup_regs. But FS need SF do the interpolation, + * so that here padding the interpolation for gl_PointCoord in last. + */ + if (c-key.do_point_coord) + c-nr_setup_regs++; So you're writing an extra attribute of setup, but you haven't increased the size of the URB entry. If the URB full except for pointcoord, you'd end up writing over the next URB entry and probably hanging the GPU. If you correctly allocate the URB size and that gets reflected in urb_read_length, you should be able to read your new attribute. That's maybe the place I don't understand quite well so far. Say, you write attributes into URB from SF thread to FS. I did increase the urb_read_length in wm_state. Is that the allocation you mean? Should I, say, allocate size for extra attributes just for FS in SF stage? If so, would you please tell me how? Since if I understand correctly, the urb_read_length in SF_STATE is counted from the attributes from VF stage. Thus at least urb_read_length in SF stage is not the right place for me to touch, right? urb_entry_size in the SF is the size of what the SF outputs and is what determines how much space is allocated by brw_urb.c Thanks for the info. Well, I was trying to figure out why this patch doesn't work when the urb read lenght per each attribute is set to 2 at calculate_urb_setup(), but it does work when set to 4. I may not quite understand the urb layout now; and I'm trying to figure it out. OK, finally, I guess I understand that now. And I wrote another patch to fix this issue. And this time we don't need to write a single PS shader to handle it thus it's much cleaner than this patch. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: handle gl_PointCoord for Gen4 and Gen5 platforms
This patch add the support of gl_PointCoord gl builtin variable for platform gen4 and gen5(ILK). Unlike gen6+, we don't have a hardware support of gl_PointCoord, means hardware will not calculate the interpolation coefficient for you. Instead, you should handle it yourself in sf shader stage. But badly, gl_PointCoord is a FS instead of VS builtin variable, thus it's not included in c.vue_map generated in VS stage. Thus the current code doesn't aware of this attribute. And to handle it correctly, we need add it to c.vue_map manually to let SF shader generate the needed interpolation coefficient for FS shader. SF stage has it's own copy of vue_map, thus I think it's safe to do it manually. Since handling gl_PointCoord for gen4 and gen5 platforms is somehow a little special, I added a lot of comments and hope I didn't overdo it ;) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45975 Piglit: glsl-fs-pointcoord and fbo-gl_pointcoord NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_context.h |6 ++ src/mesa/drivers/dri/i965/brw_fs.cpp|9 + src/mesa/drivers/dri/i965/brw_sf.c | 26 ++ src/mesa/drivers/dri/i965/brw_sf.h |1 + src/mesa/drivers/dri/i965/brw_sf_emit.c |4 5 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 09d8373..7c794ad 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -296,6 +296,12 @@ typedef enum BRW_VERT_RESULT_NDC = VERT_RESULT_MAX, BRW_VERT_RESULT_HPOS_DUPLICATE, BRW_VERT_RESULT_PAD, + /* +* It's actually not a vert_result but just a _mark_ to let sf aware that +* he need do something special to handle gl_PointCoord builtin variable +* correctly. see compile_sf_prog() for more info. +*/ + BRW_VERT_RESULT_PNTC, BRW_VERT_RESULT_MAX } brw_vert_result; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index bf59da3..5f3d79d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -710,6 +710,15 @@ fs_visitor::calculate_urb_setup() urb_setup[fp_index] = urb_next++; } } + + /* + * It's a FS only attribute, and we did interpolation for this attribute + * in SF thread. So, count it here, too. + * + * See compile_sf_prog() for more info. + */ + if (brw-fragment_program-Base.InputsRead BITFIELD64_BIT(FRAG_ATTRIB_PNTC)) + urb_setup[FRAG_ATTRIB_PNTC] = urb_next++; } /* Each attribute is 4 setup channels, each of which is half a reg. */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 6e63583..7950c47 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -64,6 +64,16 @@ static void compile_sf_prog( struct brw_context *brw, c.key = *key; c.vue_map = brw-vs.prog_data-vue_map; + if (c.key.do_point_coord) { + /* + * gl_PointCoord is a FS instead of VS builtin variable, thus it's + * not included in c.vue_map generated in VS stage. Here we add + * it manually to let SF shader generate the needed interpolation + * coefficient for FS shader. + */ + c.vue_map.vert_result_to_slot[BRW_VERT_RESULT_PNTC] = c.vue_map.num_slots; + c.vue_map.slot_to_vert_result[c.vue_map.num_slots++] = BRW_VERT_RESULT_PNTC; + } c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel); c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; @@ -125,6 +135,7 @@ brw_upload_sf_prog(struct brw_context *brw) { struct gl_context *ctx = brw-intel.ctx; struct brw_sf_prog_key key; + bool render_to_fbo = ctx-DrawBuffer-Name != 0; memset(key, 0, sizeof(key)); @@ -167,7 +178,15 @@ brw_upload_sf_prog(struct brw_context *brw) key.point_sprite_coord_replace |= (1 i); } } - key.sprite_origin_lower_left = (ctx-Point.SpriteOrigin == GL_LOWER_LEFT); + if (brw-fragment_program-Base.InputsRead BITFIELD64_BIT(FRAG_ATTRIB_PNTC)) + key.do_point_coord = 1; + /* +* Window coordinates in a FBO are inverted, which means point +* sprite origin must be inverted, too. +*/ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) + key.sprite_origin_lower_left = true; + /* _NEW_LIGHT */ key.do_flat_shading = (ctx-Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx-Light.Enabled ctx-Light.Model.TwoSide); @@ -176,10 +195,9 @@ brw_upload_sf_prog(struct brw_context *brw) if (key.do_twoside_color) { /* If we're rendering to a FBO, we have to invert the polygon * face orientation, just as we invert
Re: [Mesa-dev] [PATCH] i915: fallback to swrast when gl_PointSize met
On Wed, Feb 22, 2012 at 12:24:11PM -0800, Eric Anholt wrote: On Tue, 21 Feb 2012 23:21:07 +0800, Liu Aleaxander aleaxan...@gmail.com wrote: This patch(the way to fix this issue) is a little weird, as we already fallbacked to swrast when handling gl_PointSize(in vertex shader). Sounds that we didn't fallback enough;) This patch servers as fixing the gl_PointSize issue on Pineview platform. Since the patch is a little weird, I thought a while for another fix: get the point size at intelRenderStart() (or somewhere else before vtbl.emit_state()) by reading the vertex program output. This is so tricky that I droped this fix. Any better thoughts(or even fixes) are welcome and appreciate. This patch would fix piglit glsl-vs-point-size on Pineview(when the GL version 2.0 requirement is removed). It looks like to do this right in hardware, you just need to emit _TNL_ATTRIB_PSZ with S4_VFMT_POINT_WIDTH in i915ValidateFragmentProgram just before COLOR0. I tried it and it worked. This is so *cool*! Thanks for the nice tip. Will make a patch for that. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] tnl: let _TNL_ATTRIB_POINTSIZE do not depend on ctx-VertexProgram._Enabled
We may specify the point size in a glsl vertex shader. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=46311 piglit: glsl-vs-point-size NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/tnl/t_context.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 1ded44c..e38c0a3 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -151,8 +151,7 @@ _tnl_InvalidateState( struct gl_context *ctx, GLuint new_state ) if (ctx-RenderMode == GL_FEEDBACK) tnl-render_inputs_bitset |= BITFIELD64_BIT(_TNL_ATTRIB_TEX0); - if (ctx-Point._Attenuated || - (ctx-VertexProgram._Enabled ctx-VertexProgram.PointSizeEnabled)) + if (ctx-Point._Attenuated || ctx-VertexProgram.PointSizeEnabled) tnl-render_inputs_bitset |= BITFIELD64_BIT(_TNL_ATTRIB_POINTSIZE); /* check for varying vars which are written by the vertex program */ -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i915: fix wrong rendering of gl_PointSize on Pineview
The current code would ignore the point size specified by gl_PointSize builtin variable in vertex shader on Pineview. This patch servers as fixing that. This patch fixes the following issues on Pineview: webglc: https://cvs.khronos.org/svn/repos/registry/trunk/public/webgl/sdk/tests/conformance/rendering/point-size.html piglit: glsl-vs-point-size NOTE: This is a candidate for stable release branches. v2: pick Eric's nice tip for fixing this issue in hardware rendering. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i915/i915_fragprog.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 4f016a3..2d60523 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1361,6 +1361,10 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12); } + /* Handle gl_PointSize builtin var here */ + if (ctx-Point._Attenuated || ctx-VertexProgram.PointSizeEnabled) + EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 1); + if (inputsRead FRAG_BIT_COL0) { intel-coloroffset = offset / 4; EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: handle gl_PointCoord for Gen4 and Gen5 platform
On Tue, Feb 21, 2012 at 11:59:17AM -0800, Eric Anholt wrote: On Sun, 19 Feb 2012 13:31:33 +0800, Liu Aleaxander aleaxan...@gmail.com wrote: On Sun, Feb 19, 2012 at 8:54 AM, Eric Anholt e...@anholt.net wrote: On Sat, 18 Feb 2012 23:07:32 +0800, Liu Aleaxander aleaxan...@gmail.com wrote: + /* + * gl_PointCoord is a FS instead of VS builtin variable, thus is not + * included in c-nr_setup_regs. But FS need SF do the interpolation, + * so that here padding the interpolation for gl_PointCoord in last. + */ + if (c-key.do_point_coord) + c-nr_setup_regs++; So you're writing an extra attribute of setup, but you haven't increased the size of the URB entry. If the URB full except for pointcoord, you'd end up writing over the next URB entry and probably hanging the GPU. If you correctly allocate the URB size and that gets reflected in urb_read_length, you should be able to read your new attribute. That's maybe the place I don't understand quite well so far. Say, you write attributes into URB from SF thread to FS. I did increase the urb_read_length in wm_state. Is that the allocation you mean? Should I, say, allocate size for extra attributes just for FS in SF stage? If so, would you please tell me how? Since if I understand correctly, the urb_read_length in SF_STATE is counted from the attributes from VF stage. Thus at least urb_read_length in SF stage is not the right place for me to touch, right? urb_entry_size in the SF is the size of what the SF outputs and is what determines how much space is allocated by brw_urb.c Thanks for the info. Well, I was trying to figure out why this patch doesn't work when the urb read lenght per each attribute is set to 2 at calculate_urb_setup(), but it does work when set to 4. I may not quite understand the urb layout now; and I'm trying to figure it out. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: handle gl_PointCoord for Gen4 and Gen5 platform
This patch add the support of gl_PointCoord gl builtin var for platform gen4 and gen5(ILK). We can get the point start coord and the current pixel coord, and the only left element needed is the point size. Thus I wrote another simple SF routine for that. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45975 --- I am somehow not quite sure that this is the right way to implement gl_PointCoord for gen4 and gen5 platform. But it does work on G45 and ILK(the two platform I've tested, Zhao jian will help to test more gen4 platforms). Thus comments are hugely welcome!!! NOTE: This is a candidate for stable release branches. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_fs.cpp | 27 + src/mesa/drivers/dri/i965/brw_fs.h |1 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |2 + src/mesa/drivers/dri/i965/brw_sf.c |6 ++- src/mesa/drivers/dri/i965/brw_sf.h |2 + src/mesa/drivers/dri/i965/brw_sf_emit.c | 53 ++ 6 files changed, 90 insertions(+), 1 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6ecaa6c..178b9c9c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -421,6 +421,33 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) } fs_reg * +fs_visitor::emit_pointcoord_interpolation_gen4(ir_variable *ir) +{ + fs_reg *reg = new(this-mem_ctx) fs_reg(this, ir-type); + fs_reg pntcoord = *reg; + int urb_start = c-nr_payload_regs + c-prog_data.curb_read_length; + fs_reg pnt_size_rcp = fs_reg(brw_vec1_grf(urb_start, 0)); + bool render_to_fbo = ctx-DrawBuffer-Name != 0; + + /* gl_PointCoord.x */ + emit(BRW_OPCODE_MUL, pntcoord, this-delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], pnt_size_rcp); + pntcoord.reg_offset++; + + /* gl_PointCoord.y */ + emit(BRW_OPCODE_MUL, pntcoord, this-delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], pnt_size_rcp); + /* +* Window coordinates in an FBO are inverted, which means coord in Y +* must be inverted, too. +*/ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) { + pntcoord.negate = true; + emit(BRW_OPCODE_ADD, pntcoord, pntcoord, fs_reg(1.0f)); + } + + return reg; +} + +fs_reg * fs_visitor::emit_general_interpolation(ir_variable *ir) { fs_reg *reg = new(this-mem_ctx) fs_reg(this, ir-type); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5fdc055..782c695 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -517,6 +517,7 @@ public: void emit_dummy_fs(); fs_reg *emit_fragcoord_interpolation(ir_variable *ir); fs_reg *emit_frontfacing_interpolation(ir_variable *ir); + fs_reg *emit_pointcoord_interpolation_gen4(ir_variable *ir); fs_reg *emit_general_interpolation(ir_variable *ir); void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ea8cd37..ed53ad5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -63,6 +63,8 @@ fs_visitor::visit(ir_variable *ir) reg = emit_fragcoord_interpolation(ir); } else if (!strcmp(ir-name, gl_FrontFacing)) { reg = emit_frontfacing_interpolation(ir); + } else if (!strcmp(ir-name, gl_PointCoord) intel-gen 6) { + reg = emit_pointcoord_interpolation_gen4(ir); } else { reg = emit_general_interpolation(ir); } diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 54c27f9..69c507a 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -84,7 +84,9 @@ static void compile_sf_prog( struct brw_context *brw, break; case SF_POINTS: c.nr_verts = 1; - if (key-do_point_sprite) + if (key-do_point_sprite key-do_point_coord) + brw_emit_point_coord_setup( c, true ); + else if (key-do_point_sprite) brw_emit_point_sprite_setup( c, true ); else brw_emit_point_setup( c, true ); @@ -167,6 +169,8 @@ brw_upload_sf_prog(struct brw_context *brw) key.point_sprite_coord_replace |= (1 i); } } + if (brw-fragment_program-Base.InputsRead BITFIELD64_BIT(FRAG_ATTRIB_PNTC)) + key.do_point_coord = 1; key.sprite_origin_lower_left = (ctx-Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx-Light.ShadeModel == GL_FLAT); diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 4ef0240..d4c70e7 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -52,6 +52,7 @@ struct brw_sf_prog_key { GLuint do_flat_shading:1
Re: [Mesa-dev] [PATCH] mesa: add missing texture integer test in glTexSubImage()
On Wed, Feb 15, 2012 at 03:41:01PM -0700, Brian Paul wrote: If the texture format is integer, the incoming user data must also be integer (and similarly for non-integer textures). NOTE: This is a candidate for the stable branches. --- src/mesa/main/teximage.c | 11 +++ 1 files changed, 11 insertions(+), 0 deletions(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index e4eb7f6..a3ffb01 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1852,6 +1852,17 @@ subtexture_error_check2( struct gl_context *ctx, GLuint dimensions, } } + if (ctx-VersionMajor = 3 || ctx-Extensions.EXT_texture_integer) { + /* both source and dest must be integer-valued, or neither */ + if (_mesa_is_format_integer_color(destTex-TexFormat) != + _mesa_is_integer_format(format)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + glTexSubImage%d(integer/non-integer format mismatch), ^ I guess you missed one 'D' here. Other than that, Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com + dimensions); + return GL_TRUE; + } + } + return GL_FALSE; } -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
On Thu, Jan 19, 2012 at 09:51:32AM -0800, Eric Anholt wrote: On Thu, 19 Jan 2012 10:30:53 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 NOTE: This is a candidate for stable release branches. v2: add the simliar logic to ivb, too (comments from Ian) simplify the logic operation (comments from Brian) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_defines.h |1 + src/mesa/drivers/dri/i965/gen6_sf_state.c | 15 +-- src/mesa/drivers/dri/i965/gen7_sf_state.c | 20 +--- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 4d90a99..029be87 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1128,6 +1128,7 @@ enum brw_message_target { /* DW1 (for gen6) */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE(1 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT(0 20) # define GEN6_SF_POINT_SPRITE_LOWERLEFT(1 20) # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 548c5a3..67c208b 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -129,6 +129,7 @@ upload_sf_state(struct brw_context *brw) float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; bool userclip_active; + uint32_t point_sprite_origin; /* _NEW_TRANSFORM */ userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); @@ -258,8 +259,18 @@ upload_sf_state(struct brw_context *brw) /* Clamp to the hardware limits and convert to fixed point */ dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* +* When rendering to FBO, rendering is inverted. At the same time, +* we would also make sure the point sprite origin is inverted. +* Or, we will get an inverted result corresponding to rendering +* to the default/window FBO. +*/ I think this comment could be simplified to Window coordinates in an FBO are inverted, which means point sprite origin must be inverted. looks better, will change that. Thanks! + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) ^ render_to_fbo) { + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + } else { + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + } Much better. That logic was hideous before. Yeah, agreed. Well, the logic before was somehow clear ;) Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
On Thu, Jan 19, 2012 at 10:32:30AM -0700, Brian Paul wrote: On 01/19/2012 10:17 AM, Ian Romanick wrote: On 01/18/2012 06:30 PM, Yuanhan Liu wrote: When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 NOTE: This is a candidate for stable release branches. v2: add the simliar logic to ivb, too (comments from Ian) simplify the logic operation (comments from Brian) Signed-off-by: Yuanhan Liuyuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/gen6_sf_state.c | 15 +-- src/mesa/drivers/dri/i965/gen7_sf_state.c | 20 +--- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 4d90a99..029be87 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1128,6 +1128,7 @@ enum brw_message_target { /* DW1 (for gen6) */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE (1 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 20) # define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 20) # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 548c5a3..67c208b 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -129,6 +129,7 @@ upload_sf_state(struct brw_context *brw) float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; bool userclip_active; + uint32_t point_sprite_origin; /* _NEW_TRANSFORM */ userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); @@ -258,8 +259,18 @@ upload_sf_state(struct brw_context *brw) /* Clamp to the hardware limits and convert to fixed point */ dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* + * When rendering to FBO, rendering is inverted. At the same time, + * we would also make sure the point sprite origin is inverted. + * Or, we will get an inverted result corresponding to rendering + * to the default/window FBO. + */ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) ^ render_to_fbo) { I (mostly) like that. I was trying to think of a way to simplify the if-statements in the original patch, but I couldn't think of a good way. However, using the bit-wise xor is not correct here. The compiler accepts it because everything is an integer in C. Some tools, like Coverty, will probably complain about this. You really want a ^^ (logical xor), but C doesn't have that. What C does have, that does exactly the same thing, is ==. I suggest changing this to Thanks for the information. if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) == render_to_fbo) { It looks a bit weird, but it is correct. I suggested ^ to Yuanhan. Yes you have to be careful with it. Note that X ^ render_to_fbo is already used earlier in the function. But I think you want != in this case, not ==. Using '!=' is ok to me. Will change that. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 NOTE: This is a candidate for stable release branches. v2: add the simliar logic to ivb, too (comments from Ian) simplify the logic operation (comments from Brian) v3: pick a better comment from Eric use != for the logic instead of ^ (comments from Ian) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_defines.h |1 + src/mesa/drivers/dri/i965/gen6_sf_state.c | 13 +++-- src/mesa/drivers/dri/i965/gen7_sf_state.c | 18 +++--- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 4d90a99..029be87 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1128,6 +1128,7 @@ enum brw_message_target { /* DW1 (for gen6) */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE(1 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT(0 20) # define GEN6_SF_POINT_SPRITE_LOWERLEFT(1 20) # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 548c5a3..163b54c 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -129,6 +129,7 @@ upload_sf_state(struct brw_context *brw) float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; bool userclip_active; + uint32_t point_sprite_origin; /* _NEW_TRANSFORM */ userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); @@ -258,8 +259,16 @@ upload_sf_state(struct brw_context *brw) /* Clamp to the hardware limits and convert to fixed point */ dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* +* Window coordinates in an FBO are inverted, which means point +* sprite origin must be inverted, too. +*/ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) { + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + } else { + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + } + dw1 |= point_sprite_origin; /* _NEW_LIGHT */ if (ctx-Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 7691cb2..da7ef81 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -48,6 +48,9 @@ upload_sbe_state(struct brw_context *brw) int urb_entry_read_offset = 1; bool userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); uint16_t attr_overrides[FRAG_ATTRIB_MAX]; + /* _NEW_BUFFERS */ + bool render_to_fbo = ctx-DrawBuffer-Name != 0; + uint32_t point_sprite_origin; brw_compute_vue_map(vue_map, intel, userclip_active, vs_outputs_written); urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset; @@ -65,9 +68,18 @@ upload_sbe_state(struct brw_context *brw) urb_entry_read_length GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; - /* _NEW_POINT */ - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* _NEW_POINT +* +* Window coordinates in an FBO are inverted, which means point +* sprite origin must be inverted. +*/ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) { + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + } else { + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + } + dw1 |= point_sprite_origin; + dw10 = 0; dw11 = 0; -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_defines.h |1 + src/mesa/drivers/dri/i965/gen6_sf_state.c | 19 +-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 4d90a99..029be87 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1128,6 +1128,7 @@ enum brw_message_target { /* DW1 (for gen6) */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE(1 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT(0 20) # define GEN6_SF_POINT_SPRITE_LOWERLEFT(1 20) # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 548c5a3..d354a2b 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -129,6 +129,7 @@ upload_sf_state(struct brw_context *brw) float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; bool userclip_active; + int point_sprite_origin; /* _NEW_TRANSFORM */ userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); @@ -258,8 +259,22 @@ upload_sf_state(struct brw_context *brw) /* Clamp to the hardware limits and convert to fixed point */ dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* +* When rendering to FBO, rendering is inverted. At the same time, +* we would also make sure the point sprite origin is inverted. +* Or, we will get an inverted result corresponding to rendering +* to the default/window FBO. +*/ + if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) { + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + if (render_to_fbo) + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + } else { + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + if (render_to_fbo) + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + } + dw1 |= point_sprite_origin; /* _NEW_LIGHT */ if (ctx-Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
On Wed, Jan 18, 2012 at 06:23:52PM +0800, Yuanhan Liu wrote: When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 Actually, I'm not sure about this patch, and I would like to treat this patch as a _workaround_. So, any comments are welcome. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
On Wed, Jan 18, 2012 at 12:13:28PM -0800, Ian Romanick wrote: On 01/18/2012 02:21 AM, Yuanhan Liu wrote: On Wed, Jan 18, 2012 at 06:23:52PM +0800, Yuanhan Liu wrote: When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 Actually, I'm not sure about this patch, and I would like to treat this patch as a _workaround_. So, any comments are welcome. Why? Do you have a different solution in mind? Since I tried to 'fix' this issue. Acutally, this is the first workaround fix I thought when met this issue. I just don't know why then. Well, Nanhai's suggestion told me that this _might_ be the right fix for this issue. But I'm still a little unsure, so that's why. This patch fixes this problem the way I would have expected it to be fixed. Yes, it does. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
On Wed, Jan 18, 2012 at 11:53:20AM -0800, Ian Romanick wrote: On 01/18/2012 02:23 AM, Yuanhan Liu wrote: When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 Signed-off-by: Yuanhan Liuyuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_defines.h |1 + src/mesa/drivers/dri/i965/gen6_sf_state.c | 19 +-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 4d90a99..029be87 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1128,6 +1128,7 @@ enum brw_message_target { /* DW1 (for gen6) */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE(1 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 20) # define GEN6_SF_POINT_SPRITE_LOWERLEFT(1 20) # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 548c5a3..d354a2b 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c I think gen7_sf_state.c needs similar changes. Yeah, you are right. @@ -129,6 +129,7 @@ upload_sf_state(struct brw_context *brw) float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; bool userclip_active; + int point_sprite_origin; dw1 is a uint32_t, so this should be too. Yes. Will sent an updated patch soon. /* _NEW_TRANSFORM */ userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); @@ -258,8 +259,22 @@ upload_sf_state(struct brw_context *brw) /* Clamp to the hardware limits and convert to fixed point */ dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* +* When rendering to FBO, rendering is inverted. At the same time, +* we would also make sure the point sprite origin is inverted. +* Or, we will get an inverted result corresponding to rendering +* to the default/window FBO. +*/ + if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) { + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + if (render_to_fbo) + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + } else { + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + if (render_to_fbo) + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + } + dw1 |= point_sprite_origin; /* _NEW_LIGHT */ if (ctx-Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: fix inverted point sprite origin when rendering to FBO
When rendering to FBO, rendering is inverted. At the same time, we would also make sure the point sprite origin is inverted. Or, we will get an inverted result correspoinding to rendering to the default winsys FBO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44613 NOTE: This is a candidate for stable release branches. v2: add the simliar logic to ivb, too (comments from Ian) simplify the logic operation (comments from Brian) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_defines.h |1 + src/mesa/drivers/dri/i965/gen6_sf_state.c | 15 +-- src/mesa/drivers/dri/i965/gen7_sf_state.c | 20 +--- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 4d90a99..029be87 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1128,6 +1128,7 @@ enum brw_message_target { /* DW1 (for gen6) */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE(1 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT(0 20) # define GEN6_SF_POINT_SPRITE_LOWERLEFT(1 20) # define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 548c5a3..67c208b 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -129,6 +129,7 @@ upload_sf_state(struct brw_context *brw) float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; bool userclip_active; + uint32_t point_sprite_origin; /* _NEW_TRANSFORM */ userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); @@ -258,8 +259,18 @@ upload_sf_state(struct brw_context *brw) /* Clamp to the hardware limits and convert to fixed point */ dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* +* When rendering to FBO, rendering is inverted. At the same time, +* we would also make sure the point sprite origin is inverted. +* Or, we will get an inverted result corresponding to rendering +* to the default/window FBO. +*/ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) ^ render_to_fbo) { + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + } else { + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + } + dw1 |= point_sprite_origin; /* _NEW_LIGHT */ if (ctx-Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 7691cb2..75a1cb0 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -48,6 +48,9 @@ upload_sbe_state(struct brw_context *brw) int urb_entry_read_offset = 1; bool userclip_active = (ctx-Transform.ClipPlanesEnabled != 0); uint16_t attr_overrides[FRAG_ATTRIB_MAX]; + /* _NEW_BUFFERS */ + bool render_to_fbo = brw-intel.ctx.DrawBuffer-Name != 0; + uint32_t point_sprite_origin; brw_compute_vue_map(vue_map, intel, userclip_active, vs_outputs_written); urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset; @@ -65,9 +68,20 @@ upload_sbe_state(struct brw_context *brw) urb_entry_read_length GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; - /* _NEW_POINT */ - if (ctx-Point.SpriteOrigin == GL_LOWER_LEFT) - dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + /* _NEW_POINT +* +* When rendering to FBO, rendering is inverted. At the same time, +* we would also make sure the point sprite origin is inverted. +* Or, we will get an inverted result corresponding to rendering +* to the default/window FBO. +*/ + if ((ctx-Point.SpriteOrigin == GL_LOWER_LEFT) ^ render_to_fbo) { + point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; + } else { + point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; + } + dw1 |= point_sprite_origin; + dw10 = 0; dw11 = 0; -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: introduce vbo_get_minmax_indices function
On Tue, Jan 10, 2012 at 08:43:18PM -0700, Brian Paul wrote: On Tue, Jan 3, 2012 at 8:59 PM, Yuanhan Liu yuanhan@linux.intel.com wrote: On Wed, Jan 04, 2012 at 11:20:07AM +0800, Yuanhan Liu wrote: On Tue, Jan 03, 2012 at 08:25:31PM +0100, Roland Scheidegger wrote: Ah index scanning... I don't like that this will map/unmap the ib once for each prim, Me either :) though [snip].. +vbo_get_minmax_indices(struct gl_context *ctx, + const struct _mesa_prim *prims, + const struct _mesa_index_buffer *ib, + GLuint *min_index, + GLuint *max_index, + GLuint nr_prims) +{ + struct _mesa_prim start_prim; I think you could use a pointer for start_prim: const struct _mesa_prim *start_prim; to avoid copying 20 bytes per loop iteration below. The declaration could also be moved inside the loop. Aha, yes. I should do this. Thanks, here is the updated patch: From 66f309648a20736c932eb1d393ca7cad6679532a Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Sat, 31 Dec 2011 14:22:46 +0800 Subject: [PATCH] vbo: introduce vbo_get_minmax_indices function Introduce vbo_get_minmax_indices() function to handle the min/max index computation for nr_prims(= 1). The old code just compute the first prim's min/max index; this would results an error rendering if user called functions like glMultiDrawElements(). This patch servers as fixing this issue. As when nr_prims = 1, we can pass 1 to paramter nr_prims, thus I made vbo_get_minmax_index() static. v2: per Roland's suggestion, put the indices address compuation into vbo_get_minmax_index() instead. Also do comination if possible to reduce map/unmap count v3: per Brian's suggestion, use a pointer for start_prim to avoid structure copy per loop. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com Reviewed-by: Roland Scheidegger srol...@vmware.com Reviewed-by: Brian Paul bri...@vmware.com --- src/mesa/drivers/dri/i965/brw_draw.c |2 +- src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c |3 +- src/mesa/main/api_validate.c |2 +- src/mesa/state_tracker/st_draw.c |3 +- src/mesa/state_tracker/st_draw_feedback.c|2 +- src/mesa/tnl/t_draw.c|2 +- src/mesa/vbo/vbo.h |6 ++-- src/mesa/vbo/vbo_exec_array.c| 50 + 8 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 621195d..f50fffd 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -586,7 +586,7 @@ void brw_draw_prims( struct gl_context *ctx, if (!vbo_all_varyings_in_vbos(arrays)) { if (!index_bounds_valid) -vbo_get_minmax_index(ctx, prim, ib, min_index, max_index); +vbo_get_minmax_indices(ctx, prim, ib, min_index, max_index, nr_prims); /* Decide if we want to rebase. If so we end up recursing once * only into this function. diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c index de04d18..59f1542 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c @@ -437,7 +437,8 @@ TAG(vbo_render_prims)(struct gl_context *ctx, struct nouveau_render_state *render = to_render_state(ctx); if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); + vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index, + nr_prims); vbo_choose_render_mode(ctx, arrays); vbo_choose_attrs(ctx, arrays); diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 945f127..b6871d0 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -184,7 +184,7 @@ check_index_bounds(struct gl_context *ctx, GLsizei count, GLenum type, ib.ptr = indices; ib.obj = ctx-Array.ArrayObj-ElementArrayBufferObj; - vbo_get_minmax_index(ctx, prim, ib, min, max); + vbo_get_minmax_indices(ctx, prim, ib, min, max, 1); if ((int)(min + basevertex) 0 || max + basevertex ctx-Array.ArrayObj-_MaxElement) { diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 6d6fc85..c0554cf 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -990,7 +990,8 @@ st_draw_vbo(struct gl_context *ctx, /* Gallium probably doesn't want this in some cases. */ if (!index_bounds_valid) if (!all_varyings_in_vbos(arrays)) -vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); +vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index
Re: [Mesa-dev] [PATCH] vbo: introduce vbo_get_minmax_indices function
On Wed, Jan 04, 2012 at 07:23:24PM +0100, Roland Scheidegger wrote: Am 04.01.2012 04:59, schrieb Yuanhan Liu: On Wed, Jan 04, 2012 at 11:20:07AM +0800, Yuanhan Liu wrote: On Tue, Jan 03, 2012 at 08:25:31PM +0100, Roland Scheidegger wrote: Ah index scanning... I don't like that this will map/unmap the ib once for each prim, Me either :) though I don't really see a nice way to avoid that (I think if you have to Well, I thought a while, we may do some combine to reduce some map/unmap. Ok, here is the new patch, please help to review it. And Brian, since it touches the mesa core, it would be nice if you'd review it. Hi Brian, any comments? Thanks, Yuanhan Liu Looks good to me. Reviewed-by: Roland Scheidegger srol...@vmware.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: only map src/dest regions in _mesa_copy_buffer_subdata()
On Wed, Jan 04, 2012 at 02:55:44PM -0700, Brian Paul wrote: We were wastefully mapping the whole source/dest buffers before. --- src/mesa/main/bufferobj.c | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) Looks good to me. Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 66957e4..c4a7b8d 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -502,19 +502,19 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size) { - GLubyte *srcPtr, *dstPtr; + void *srcPtr, *dstPtr; /* buffer should not already be mapped */ assert(!_mesa_bufferobj_mapped(src)); assert(!_mesa_bufferobj_mapped(dst)); - srcPtr = (GLubyte *) ctx-Driver.MapBufferRange(ctx, 0, src-Size, -GL_MAP_READ_BIT, src); - dstPtr = (GLubyte *) ctx-Driver.MapBufferRange(ctx, 0, dst-Size, -GL_MAP_WRITE_BIT, dst); + srcPtr = ctx-Driver.MapBufferRange(ctx, readOffset, size, + GL_MAP_READ_BIT, src); + dstPtr = ctx-Driver.MapBufferRange(ctx, writeOffset, size, + GL_MAP_WRITE_BIT, dst); if (srcPtr dstPtr) - memcpy(dstPtr + writeOffset, srcPtr + readOffset, size); + memcpy(dstPtr, srcPtr, size); ctx-Driver.UnmapBuffer(ctx, src); ctx-Driver.UnmapBuffer(ctx, dst); -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: introduce vbo_get_minmax_indices function
On Tue, Jan 03, 2012 at 08:25:31PM +0100, Roland Scheidegger wrote: Ah index scanning... I don't like that this will map/unmap the ib once for each prim, Me either :) though I don't really see a nice way to avoid that (I think if you have to Well, I thought a while, we may do some combine to reduce some map/unmap. actually map the ib, you lose anyway). Hopefully won't hit that performance hog often... A comment inline. Am 31.12.2011 07:32, schrieb Yuanhan Liu: [snip]... + for (i = 0; i nr_prims; i++) { + tmp_ib.ptr = ib-ptr + prims[i].start * vbo_sizeof_ib_type(ib-type); I think you should not use a temporary ib. Figuring out the correct start offset clearly looks like it should be handled by vbo_get_minmax_index() itself (it should have done this previously probably, as there might never have been a guarantee that it is always 0 even if there's only a single primitive). Nice suggestion, thanks! Will fix it in the next patch. -- Yuanhan Liu + vbo_get_minmax_index(ctx, prims[i], tmp_ib, tmp_min, tmp_max); + *min_index = MIN2(*min_index, tmp_min); + *max_index = MAX2(*max_index, tmp_max); + } +} + /** * Check that element 'j' of the array has reasonable data. Otherwise looks ok to me. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: introduce vbo_get_minmax_indices function
On Wed, Jan 04, 2012 at 11:20:07AM +0800, Yuanhan Liu wrote: On Tue, Jan 03, 2012 at 08:25:31PM +0100, Roland Scheidegger wrote: Ah index scanning... I don't like that this will map/unmap the ib once for each prim, Me either :) though I don't really see a nice way to avoid that (I think if you have to Well, I thought a while, we may do some combine to reduce some map/unmap. Ok, here is the new patch, please help to review it. And Brian, since it touches the mesa core, it would be nice if you'd review it. Thanks, Yuanhan Liu -- From 7956b5c93bdfd0e94b6d3e25336c99cd7457f550 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Sat, 31 Dec 2011 14:22:46 +0800 Subject: [PATCH] vbo: introduce vbo_get_minmax_indices function Introduce vbo_get_minmax_indices() function to handle the min/max index computation for nr_prims(= 1). The old code just compute the first prim's min/max index; this would results an error rendering if user called functions like glMultiDrawElements(). This patch servers as fixing this issue. As when nr_prims = 1, we can pass 1 to paramter nr_prims, thus I made vbo_get_minmax_index() static. v2: per Roland's suggestion, put the indices address compuation into vbo_get_minmax_index() instead. Also do comination if possible to reduce map/unmap count Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_draw.c |2 +- src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c |3 +- src/mesa/main/api_validate.c |2 +- src/mesa/state_tracker/st_draw.c |3 +- src/mesa/state_tracker/st_draw_feedback.c|2 +- src/mesa/tnl/t_draw.c|2 +- src/mesa/vbo/vbo.h |6 ++-- src/mesa/vbo/vbo_exec_array.c| 49 + 8 files changed, 52 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 621195d..f50fffd 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -586,7 +586,7 @@ void brw_draw_prims( struct gl_context *ctx, if (!vbo_all_varyings_in_vbos(arrays)) { if (!index_bounds_valid) -vbo_get_minmax_index(ctx, prim, ib, min_index, max_index); +vbo_get_minmax_indices(ctx, prim, ib, min_index, max_index, nr_prims); /* Decide if we want to rebase. If so we end up recursing once * only into this function. diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c index de04d18..59f1542 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c @@ -437,7 +437,8 @@ TAG(vbo_render_prims)(struct gl_context *ctx, struct nouveau_render_state *render = to_render_state(ctx); if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); + vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index, + nr_prims); vbo_choose_render_mode(ctx, arrays); vbo_choose_attrs(ctx, arrays); diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 945f127..b6871d0 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -184,7 +184,7 @@ check_index_bounds(struct gl_context *ctx, GLsizei count, GLenum type, ib.ptr = indices; ib.obj = ctx-Array.ArrayObj-ElementArrayBufferObj; - vbo_get_minmax_index(ctx, prim, ib, min, max); + vbo_get_minmax_indices(ctx, prim, ib, min, max, 1); if ((int)(min + basevertex) 0 || max + basevertex ctx-Array.ArrayObj-_MaxElement) { diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 954f15a..6327a4c 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -933,7 +933,8 @@ st_draw_vbo(struct gl_context *ctx, /* Gallium probably doesn't want this in some cases. */ if (!index_bounds_valid) if (!all_varyings_in_vbos(arrays)) -vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); +vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index, + nr_prims); for (i = 0; i nr_prims; i++) { num_instances = MAX2(num_instances, prims[i].num_instances); diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index a99eb2b..f38f44c 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -119,7 +119,7 @@ st_feedback_draw_vbo(struct gl_context *ctx, st_validate_state(st); if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); + vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index, nr_prims); /* must get these after
Re: [Mesa-dev] [PATCH 2/2] i965: fix the wrong min/max_index for nr_prims 1
On Thu, Dec 29, 2011 at 09:10:03AM +0100, Michel Dänzer wrote: On Don, 2011-12-29 at 10:03 +0800, Yuanhan Liu wrote: On Wed, Dec 28, 2011 at 12:07:08PM -0800, Eric Anholt wrote: On Wed, 28 Dec 2011 13:54:43 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: The current code would just calculate min/max_index for the first prim unconditionally, which is wrong if nr_prims 1. This would some cases like that the index is stored in element array buffer object and drawing by glMultiDrawEelements. Thus it fixes some intel oglc primbuff test cases. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com It does look like gallium has the same bug -- i965g? I just found that the whole i965g is deleted by commit 2c27f204f1ca6f09f9520712be1da9a13ed5c01d. this should probably be a vbo helper function. If you were talking about i965g and now it was deleted, should I make this be a vbo helper function? i965g was just one Gallium driver. Presumably, Eric was referring to the Gallium Mesa state tracker (src/mesa/state_tracker/), which translates between the Mesa and Gallium driver interfaces. Hi Michel, Thanks for the information. Will make some new patches. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] vbo: introduce vbo_get_minmax_indices function
Introduce vbo_get_minmax_indices() function to handle the min/max index computation for nr_prims(= 1). The old code just compute the first prim's min/max index; this would results an error rendering if user called functions like glMultiDrawElements(). This patch servers as fixing this issue. As when nr_prims = 1, we can pass 1 to paramter nr_prims, thus I made vbo_get_minmax_index() static. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_draw.c |2 +- src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c |3 +- src/mesa/main/api_validate.c |2 +- src/mesa/state_tracker/st_draw.c |3 +- src/mesa/state_tracker/st_draw_feedback.c|2 +- src/mesa/tnl/t_draw.c|2 +- src/mesa/vbo/vbo.h |6 ++-- src/mesa/vbo/vbo_exec_array.c| 29 +- 8 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 621195d..f50fffd 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -586,7 +586,7 @@ void brw_draw_prims( struct gl_context *ctx, if (!vbo_all_varyings_in_vbos(arrays)) { if (!index_bounds_valid) -vbo_get_minmax_index(ctx, prim, ib, min_index, max_index); +vbo_get_minmax_indices(ctx, prim, ib, min_index, max_index, nr_prims); /* Decide if we want to rebase. If so we end up recursing once * only into this function. diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c index de04d18..59f1542 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c @@ -437,7 +437,8 @@ TAG(vbo_render_prims)(struct gl_context *ctx, struct nouveau_render_state *render = to_render_state(ctx); if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); + vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index, + nr_prims); vbo_choose_render_mode(ctx, arrays); vbo_choose_attrs(ctx, arrays); diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 945f127..b6871d0 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -184,7 +184,7 @@ check_index_bounds(struct gl_context *ctx, GLsizei count, GLenum type, ib.ptr = indices; ib.obj = ctx-Array.ArrayObj-ElementArrayBufferObj; - vbo_get_minmax_index(ctx, prim, ib, min, max); + vbo_get_minmax_indices(ctx, prim, ib, min, max, 1); if ((int)(min + basevertex) 0 || max + basevertex ctx-Array.ArrayObj-_MaxElement) { diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 954f15a..6327a4c 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -933,7 +933,8 @@ st_draw_vbo(struct gl_context *ctx, /* Gallium probably doesn't want this in some cases. */ if (!index_bounds_valid) if (!all_varyings_in_vbos(arrays)) -vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); +vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index, + nr_prims); for (i = 0; i nr_prims; i++) { num_instances = MAX2(num_instances, prims[i].num_instances); diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index a99eb2b..f38f44c 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -119,7 +119,7 @@ st_feedback_draw_vbo(struct gl_context *ctx, st_validate_state(st); if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prims, ib, min_index, max_index); + vbo_get_minmax_indices(ctx, prims, ib, min_index, max_index, nr_prims); /* must get these after state validation! */ vp = st-vp; diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index f949c34..17042cf 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -418,7 +418,7 @@ void _tnl_vbo_draw_prims(struct gl_context *ctx, struct gl_transform_feedback_object *tfb_vertcount) { if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prim, ib, min_index, max_index); + vbo_get_minmax_indices(ctx, prim, ib, min_index, max_index, nr_prims); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index ed8fc17..bf925ab 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -127,9 +127,9 @@ int vbo_sizeof_ib_type(GLenum type); void -vbo_get_minmax_index(struct gl_context *ctx, const struct _mesa_prim *prim, -const struct _mesa_index_buffer *ib, -GLuint
Re: [Mesa-dev] [PATCH 2/2] i965: fix the wrong min/max_index for nr_prims 1
On Wed, Dec 28, 2011 at 12:07:08PM -0800, Eric Anholt wrote: On Wed, 28 Dec 2011 13:54:43 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: The current code would just calculate min/max_index for the first prim unconditionally, which is wrong if nr_prims 1. This would some cases like that the index is stored in element array buffer object and drawing by glMultiDrawEelements. Thus it fixes some intel oglc primbuff test cases. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com It does look like gallium has the same bug -- i965g? I just found that the whole i965g is deleted by commit 2c27f204f1ca6f09f9520712be1da9a13ed5c01d. this should probably be a vbo helper function. If you were talking about i965g and now it was deleted, should I make this be a vbo helper function? Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] vbo: introduce vbo_sizeof_ib_type() function
introduce vbo_sizeof_ib_type() function to return the index data type size. I see some place use switch(ib-type) to get the index data type, which is sort of duplicate. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/state_tracker/st_draw.c | 15 + src/mesa/state_tracker/st_draw_feedback.c | 17 ++ src/mesa/tnl/t_draw.c | 20 +-- src/mesa/vbo/vbo.h|4 ++ src/mesa/vbo/vbo_exec_array.c | 52 ++-- 5 files changed, 29 insertions(+), 79 deletions(-) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 87a9978..954f15a 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -584,20 +584,7 @@ setup_index_buffer(struct gl_context *ctx, if (ib) { struct gl_buffer_object *bufobj = ib-obj; - switch (ib-type) { - case GL_UNSIGNED_INT: - ibuffer-index_size = 4; - break; - case GL_UNSIGNED_SHORT: - ibuffer-index_size = 2; - break; - case GL_UNSIGNED_BYTE: - ibuffer-index_size = 1; - break; - default: - assert(0); -return; - } + ibuffer-index_size = vbo_sizeof_ib_type(ib-type); /* get/create the index buffer object */ if (_mesa_is_bufferobj(bufobj)) { diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 4c1e674..a99eb2b 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -218,20 +218,9 @@ st_feedback_draw_vbo(struct gl_context *ctx, if (ib) { struct gl_buffer_object *bufobj = ib-obj; - switch (ib-type) { - case GL_UNSIGNED_INT: - ibuffer.index_size = 4; - break; - case GL_UNSIGNED_SHORT: - ibuffer.index_size = 2; - break; - case GL_UNSIGNED_BYTE: - ibuffer.index_size = 1; - break; - default: - assert(0); -goto out_unref_vertex; - } + ibuffer.index_size = vbo_sizeof_ib_type(ib-type); + if (ibuffer.index_size == 0) + goto out_unref_vertex; if (bufobj bufobj-Name) { struct st_buffer_object *stobj = st_buffer_object(bufobj); diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index 83ded19..f949c34 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -349,26 +349,10 @@ static void bind_indices( struct gl_context *ctx, if (_mesa_is_bufferobj(ib-obj) !_mesa_bufferobj_mapped(ib-obj)) { /* if the buffer object isn't mapped yet, map it now */ - unsigned map_size; - - switch (ib-type) { - case GL_UNSIGNED_BYTE: -map_size = ib-count * sizeof(GLubyte); -break; - case GL_UNSIGNED_SHORT: -map_size = ib-count * sizeof(GLushort); -break; - case GL_UNSIGNED_INT: -map_size = ib-count * sizeof(GLuint); -break; - default: -assert(0); -map_size = 0; - } - bo[*nr_bo] = ib-obj; (*nr_bo)++; - ptr = ctx-Driver.MapBufferRange(ctx, (GLsizeiptr) ib-ptr, map_size, + ptr = ctx-Driver.MapBufferRange(ctx, (GLsizeiptr) ib-ptr, + ib-count * vbo_sizeof_ib_type(ib-type), GL_MAP_READ_BIT, ib-obj); assert(ib-obj-Pointer); } else { diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 7384790..ed8fc17 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -122,6 +122,10 @@ void vbo_rebase_prims( struct gl_context *ctx, GLuint min_index, GLuint max_index, vbo_draw_func draw ); + +int +vbo_sizeof_ib_type(GLenum type); + void vbo_get_minmax_index(struct gl_context *ctx, const struct _mesa_prim *prim, const struct _mesa_index_buffer *ib, diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 2db85e2..fec49d3 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -75,6 +75,22 @@ vbo_check_buffers_are_unmapped(struct gl_context *ctx) assert(!_mesa_bufferobj_mapped(exec-vtx.bufferobj)); } +int +vbo_sizeof_ib_type(GLenum type) +{ + switch (type) { + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(!unsupported index data type); + /* In case assert is turned off */ + return 0; + } +} /** @@ -96,24 +112,8 @@ vbo_get_minmax_index(struct gl_context *ctx, GLuint i; if (_mesa_is_bufferobj(ib-obj)) { - unsigned map_size; - - switch (ib-type) { - case GL_UNSIGNED_INT: -map_size = count * sizeof(GLuint); -break; - case GL_UNSIGNED_SHORT: -map_size = count * sizeof(GLushort
[Mesa-dev] [PATCH 2/2] i965: fix the wrong min/max_index for nr_prims 1
The current code would just calculate min/max_index for the first prim unconditionally, which is wrong if nr_prims 1. This would some cases like that the index is stored in element array buffer object and drawing by glMultiDrawEelements. Thus it fixes some intel oglc primbuff test cases. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_draw.c | 18 -- 1 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 621195d..3d0cc7c 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -585,8 +585,22 @@ void brw_draw_prims( struct gl_context *ctx, return; if (!vbo_all_varyings_in_vbos(arrays)) { - if (!index_bounds_valid) -vbo_get_minmax_index(ctx, prim, ib, min_index, max_index); + if (!index_bounds_valid) { + struct _mesa_index_buffer tmp_ib; + GLuint tmp_min, tmp_max; + int i; + + min_index = ~0; + max_index = 0; + tmp_ib = *ib; + + for (i = 0; i nr_prims; i++) { + tmp_ib.ptr = ib-ptr + prim[i].start * vbo_sizeof_ib_type(ib-type); + vbo_get_minmax_index(ctx, prim[i], tmp_ib, tmp_min, tmp_max); + min_index = MIN2(min_index, tmp_min); + max_index = MAX2(max_index, tmp_max); + } + } /* Decide if we want to rebase. If so we end up recursing once * only into this function. -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: count min/max_index before vbo-draw_prims
On Tue, Dec 27, 2011 at 11:15:42AM -0800, Eric Anholt wrote: On Sun, 25 Dec 2011 12:26:25 +0800, Liu Aleaxander aleaxan...@gmail.com wrote: On Sun, Dec 25, 2011 at 8:03 AM, Eric Anholt e...@anholt.net wrote: On Thu, 22 Dec 2011 18:55:50 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: For the case that index data is stored in element array buffer object, and user called glMultiDrawElements, count the min/max_index before calling vbo-draw_prims. vbo_get_minmax_index() isn't friendly to this case. So do it while building the prim info. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com I'm really failing to understand what this commit is for. Does it fix some testcase? Sorry, my bad. I should note something more about this issue. And yes, it does fixes something. Say for following case: GLubyte indices_data[] = {0, 1, 2, 3, 4, 5, 6,1, 7, 8, 9, 10, 11}; GLsizei count[] = {7, 6} GLvoid * indices_off[2] = {0, 7}; /* here copy the indices data into Element Array Buffer Object */ .glBind..glBufferData /* Here call glMulitDrawElements */ glMultiDrawElements(GL_LINE_STRIP, count, GL_UNSIGNED_BYTE, indices_off, 2); The current code in brw_draw.c would just count the min_index and max_index of the first prim, thus got a min_index of 0, and max_index of 6. This is not right, thus the second prim(and any later, if you have more than 2 prim to render) rendered incorrectly. I found it's a little ugly to add those code similar in this patch at brw_draw_prims(), then I figured out a way to add it at the caller, vbo_validated_multidrawelements(). I guess it's reasonable, since the prim info is built at there. What about when the next caller that does multiple primitives comes along. We don't have the next caller besides glMultiDrawElements* _for_now_ , right? Would you add workaround code to it too? Please fix the actual broken code in brw_draw.c instead. Yes, anyway, you are right. I sent out a patch to fix this issue in brw_draw_prims(). And I found it isn't that ugly to fix it at there as I tought. ;) Thanks, Yuanhan Liu This would be a good time to check for the same breakage in other callers of vbo_get_minmax_index(), and see if we want a function that does this multi-primitive vbo_get_minmax_index() logic. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] vbo: count min/max_index before vbo-draw_prims
For the case that index data is stored in element array buffer object, and user called glMultiDrawElements, count the min/max_index before calling vbo-draw_prims. vbo_get_minmax_index() isn't friendly to this case. So do it while building the prim info. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/vbo/vbo_exec_array.c | 14 +- 1 files changed, 13 insertions(+), 1 deletions(-) diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index a6e41e9..70efd3f 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -1147,11 +1147,18 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode, fallback = GL_TRUE; if (!fallback) { + struct _mesa_index_buffer tmp_ib; + GLuint min_index = ~0; + GLuint max_index = 0; + GLuint tmp_min, tmp_max; + ib.count = (max_index_ptr - min_index_ptr) / index_type_size; ib.type = type; ib.obj = ctx-Array.ArrayObj-ElementArrayBufferObj; ib.ptr = (void *)min_index_ptr; + tmp_ib = ib; + for (i = 0; i primcount; i++) { prim[i].begin = (i == 0); prim[i].end = (i == primcount - 1); @@ -1166,11 +1173,16 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode, prim[i].basevertex = basevertex[i]; else prim[i].basevertex = 0; + + tmp_ib.ptr = indices[i]; + vbo_get_minmax_index(ctx, prim[i], tmp_ib, tmp_min, tmp_max); + min_index = MIN2(min_index, tmp_min); + max_index = MAX2(max_index, tmp_max); } check_buffers_are_unmapped(exec-array.inputs); vbo-draw_prims(ctx, exec-array.inputs, prim, primcount, ib, - GL_FALSE, ~0, ~0, NULL); + GL_TRUE, min_index, max_index, NULL); } else { /* render one prim at a time */ for (i = 0; i primcount; i++) { -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/8] i965: dynamic eu instruction store size
On Thu, Dec 22, 2011 at 02:37:58PM -0800, Kenneth Graunke wrote: On 12/21/2011 01:33 AM, Yuanhan Liu wrote: Hi, this is a new series of patches for dynamic eu instruction store size. The first 4 is from Eric. I just grabed it to make it rebase to current repo. The last 4 patch is from mine which some are based on those patches from Eric. Please help to review it. BTW, I checked those patches with all oglc test cases, and found no regression. (Sandybridge only). Thanks, Yuanhan Liu -- Eric Anholt (4): i965: Drop unused do_insn argument from gen6_CONT(). i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start i965: Don't make consumers of brw_WHILE do pre-gen6 BREAK/CONT patching i965: Don't make consumers of brw_CONT/brw_WHILE track if depth in loop Yuanhan Liu (4): i965: let the if_stack just store the instruction index i965: get the jmp distance by instruction index i965: call next_insn() before referencing a instruction by index Patches 1-7 (v2 of 6 and after changing to bool in 7) are: Reviewed-by: Kenneth Graunke kenn...@whitecape.org Thanks. i965: increase the brw eu instruction store size dynamically Patch 8 does not get a R-b just yet. Ok , will fix it. Thanks for doing this, Yuanhan, I'm really glad to see the arbitrary 1 limit die. Welcome and it's my pleasure. And Eric, thanks for cleaning up the rest of the control flow stack code---it's /so/ much nicer now! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/8] i965: increase the brw eu instruction store size dynamically
On Thu, Dec 22, 2011 at 02:33:03PM -0800, Kenneth Graunke wrote: On 12/21/2011 01:33 AM, Yuanhan Liu wrote: Here is the final patch to enable dynamic eu instruction store size: increase the brw eu instruction store size dynamically instead of just allocating it statically with a constant limit. This would fix something that 'GL_MAX_PROGRAM_INSTRUCTIONS_ARB was 16384 while the driver would limit it to 1'. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |7 +++ src/mesa/drivers/dri/i965/brw_eu.h |7 --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 12 +++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 9b4dde8..7d206f3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -174,6 +174,13 @@ void brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) { p-brw = brw; + /* +* Set the initial instruction store array size to 1024, if found that +* isn't enough, then it will double the store size at brw_next_insn() +* until it meet the BRW_EU_MAX_INSN +*/ + p-store_size = 1024; + p-store = rzalloc_array(mem_ctx, struct brw_instruction, p-store_size); p-nr_insn = 0; p-current = p-stack; p-compressed = false; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 9d3d7de..52567c2 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -100,11 +100,12 @@ struct brw_glsl_call; -#define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1 +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN (1024 * 1024) I'm actually surprised to see BRW_EU_MAX_INSN at all. As far as I know, there isn't an actual hardware limit on the number of instructions, Glad to know that. Thanks. so I'm not sure why we should cap it at all. Especially not to some arbitrary number. (I'm assuming that 1024 * 1024 is just something you came up with arbitrarily...) Aha, yes, you are right, I made it. :) Here is the fixed patch, please help to review it: From 66c30acdeae88cdba07ed85443b04d4bc6c56792 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Wed, 21 Dec 2011 15:38:44 +0800 Subject: [PATCH] i965: increase the brw eu instruction store size dynamically Here is the final patch to enable dynamic eu instruction store size: increase the brw eu instruction store size dynamically instead of just allocating it statically with a constant limit. This would fix something that 'GL_MAX_PROGRAM_INSTRUCTIONS_ARB was 16384 while the driver would limit it to 1'. v2: comments from ken, do not hardcode the eu limit to (1024 * 1024) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |7 +++ src/mesa/drivers/dri/i965/brw_eu.h |4 ++-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 10 +- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 9b4dde8..2b0593a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -174,6 +174,13 @@ void brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) { p-brw = brw; + /* +* Set the initial instruction store array size to 1024, if found that +* isn't enough, then it will double the store size at brw_next_insn() +* until out of memory. +*/ + p-store_size = 1024; + p-store = rzalloc_array(mem_ctx, struct brw_instruction, p-store_size); p-nr_insn = 0; p-current = p-stack; p-compressed = false; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index cc2f618..a41e988 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -101,10 +101,10 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1 struct brw_compile { - struct brw_instruction store[BRW_EU_MAX_INSN]; + struct brw_instruction *store; + int store_size; GLuint nr_insn; void *mem_ctx; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 829d92c..9288f9b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -691,7 +691,15 @@ brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; - assert(p-nr_insn + 1 BRW_EU_MAX_INSN); + if (p-nr_insn + 1 p-store_size) { + if (0) + printf(incresing the store size to %d\n, p-store_size 1); + p-store_size = 1; + p-store = reralloc(p-mem_ctx, p-store, + struct
Re: [Mesa-dev] [PATCH 7/8] i965: call next_insn() before referencing a instruction by index
On Thu, Dec 22, 2011 at 11:09:12AM -0800, Kenneth Graunke wrote: On 12/21/2011 01:33 AM, Yuanhan Liu wrote: [snip] + int emit_endif = 1; Please use bool and true/false rather than int. Yes, right. Will fix it. /* In single program flow mode, we can express IF and ELSE instructions * equivalently as ADD instructions that operate on IP. On platforms prior @@ -1219,14 +1211,32 @@ brw_ENDIF(struct brw_compile *p) * instructions to conditional ADDs. So we only do this trick on Gen4 and * Gen5. */ - if (intel-gen 6 p-single_program_flow) { + if (intel-gen 6 p-single_program_flow) + emit_endif = 0; You could actually just do this: /* In single program flow mode, we can express IF and ELSE ... */ bool emit_endif = !(intel-gen 6 p-single_program_flow); But I'm fine with bool emit_endif = true and emit_endif = false if you prefer that. Yes, I prefer that. From my point, in this case, with the comments, it can tell us why we can't emit endif clearly. Here is the fixed patch: From 7c8b8bc87846df9513a0c32cc8a388fb62f5476a Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Wed, 21 Dec 2011 15:32:02 +0800 Subject: [PATCH] i965: call next_insn() before referencing a instruction by index A single next_insn may change the base address of instruction store memory(p-store), so call it first before referencing the instruction store pointer from an index. This the final prepare work to enable the dynamic store size. v2: comments from Ken, define emit_endif as bool type Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com Reviewed-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 40 --- 1 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index b2ab013..843d12f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1197,15 +1197,7 @@ brw_ENDIF(struct brw_compile *p) struct brw_instruction *else_inst = NULL; struct brw_instruction *if_inst = NULL; struct brw_instruction *tmp; - - /* Pop the IF and (optional) ELSE instructions from the stack */ - p-if_depth_in_loop[p-loop_stack_depth]--; - tmp = pop_if_stack(p); - if (tmp-header.opcode == BRW_OPCODE_ELSE) { - else_inst = tmp; - tmp = pop_if_stack(p); - } - if_inst = tmp; + bool emit_endif = true; /* In single program flow mode, we can express IF and ELSE instructions * equivalently as ADD instructions that operate on IP. On platforms prior @@ -1219,14 +1211,32 @@ brw_ENDIF(struct brw_compile *p) * instructions to conditional ADDs. So we only do this trick on Gen4 and * Gen5. */ - if (intel-gen 6 p-single_program_flow) { + if (intel-gen 6 p-single_program_flow) + emit_endif = false; + + /* +* A single next_insn() may change the base adress of instruction store +* memory(p-store), so call it first before referencing the instruction +* store pointer from an index +*/ + if (emit_endif) + insn = next_insn(p, BRW_OPCODE_ENDIF); + + /* Pop the IF and (optional) ELSE instructions from the stack */ + p-if_depth_in_loop[p-loop_stack_depth]--; + tmp = pop_if_stack(p); + if (tmp-header.opcode == BRW_OPCODE_ELSE) { + else_inst = tmp; + tmp = pop_if_stack(p); + } + if_inst = tmp; + + if (!emit_endif) { /* ENDIF is useless; don't bother emitting it. */ convert_IF_ELSE_to_ADD(p, if_inst, else_inst); return; } - insn = next_insn(p, BRW_OPCODE_ENDIF); - if (intel-gen 6) { brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); @@ -1393,13 +1403,12 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) struct brw_instruction *insn, *do_insn; GLuint br = 1; - do_insn = get_inner_do_insn(p); - if (intel-gen = 5) br = 2; if (intel-gen = 7) { insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -1409,6 +1418,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) insn-header.execution_size = BRW_EXECUTE_8; } else if (intel-gen == 6) { insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); brw_set_dest(p, insn, brw_imm_w(0)); insn-bits1.branch_gen6.jump_count = br * (do_insn - insn); @@ -1419,6 +1429,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) } else { if (p-single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); + do_insn = get_inner_do_insn(p); brw_set_dest(p, insn
Re: [Mesa-dev] [PATCH 8/8] i965: increase the brw eu instruction store size dynamically
On Thu, Dec 22, 2011 at 07:51:46PM -0800, Kenneth Graunke wrote: On 12/22/2011 07:04 PM, Yuanhan Liu wrote: On Thu, Dec 22, 2011 at 02:33:03PM -0800, Kenneth Graunke wrote: On 12/21/2011 01:33 AM, Yuanhan Liu wrote: [snip] -#define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1 +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN (1024 * 1024) I'm actually surprised to see BRW_EU_MAX_INSN at all. As far as I know, there isn't an actual hardware limit on the number of instructions, Glad to know that. Thanks. so I'm not sure why we should cap it at all. Especially not to some arbitrary number. (I'm assuming that 1024 * 1024 is just something you came up with arbitrarily...) Aha, yes, you are right, I made it. :) Here is the fixed patch, please help to review it: Reviewed-by: Kenneth Graunke kenn...@whitecape.org I'd wait for an ack from Eric before pushing, though. It's OK to me. Eric, comments? Or, can I get your reviewed-by for this series? Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/8] i965: dynamic eu instruction store size
Hi, this is a new series of patches for dynamic eu instruction store size. The first 4 is from Eric. I just grabed it to make it rebase to current repo. The last 4 patch is from mine which some are based on those patches from Eric. Please help to review it. BTW, I checked those patches with all oglc test cases, and found no regression. (Sandybridge only). Thanks, Yuanhan Liu -- Eric Anholt (4): i965: Drop unused do_insn argument from gen6_CONT(). i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start i965: Don't make consumers of brw_WHILE do pre-gen6 BREAK/CONT patching i965: Don't make consumers of brw_CONT/brw_WHILE track if depth in loop Yuanhan Liu (4): i965: let the if_stack just store the instruction index i965: get the jmp distance by instruction index i965: call next_insn() before referencing a instruction by index i965: increase the brw eu instruction store size dynamically src/mesa/drivers/dri/i965/brw_clip_line.c |5 +- src/mesa/drivers/dri/i965/brw_clip_tri.c | 15 +-- src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 14 +-- src/mesa/drivers/dri/i965/brw_eu.c| 15 ++- src/mesa/drivers/dri/i965/brw_eu.h| 44 +-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 155 +++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 56 + src/mesa/drivers/dri/i965/brw_sf_emit.c |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 56 + src/mesa/drivers/dri/i965/brw_vs_emit.c | 43 +-- src/mesa/drivers/dri/i965/brw_wm_emit.c |2 +- 11 files changed, 201 insertions(+), 206 deletions(-) -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] i965: Drop unused do_insn argument from gen6_CONT().
From: Eric Anholt e...@anholt.net The branch distances get patched up later at the WHILE instruction. Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu_emit.c |3 +-- src/mesa/drivers/dri/i965/brw_fs_emit.cpp |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c |2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index a46a81b..c4f3a37 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1249,8 +1249,7 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) return insn; } -struct brw_instruction *gen6_CONT(struct brw_compile *p, - struct brw_instruction *do_insn) +struct brw_instruction *gen6_CONT(struct brw_compile *p) { struct brw_instruction *insn; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 7f0e58e..2f5a026 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -813,7 +813,7 @@ fs_visitor::generate_code() case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (intel-gen = 6) - gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + gen6_CONT(p); else brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 54bbe13..becb89e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -827,7 +827,7 @@ vec4_visitor::generate_code() case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (intel-gen = 6) - gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + gen6_CONT(p); else brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index bcaef04..6c96a48 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -2106,7 +2106,7 @@ void brw_old_vs_emit(struct brw_vs_compile *c ) case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); if (intel-gen = 6) { - gen6_CONT(p, loop_inst[loop_depth - 1]); + gen6_CONT(p); } else { brw_CONT(p, if_depth_in_loop[loop_depth]); } -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/8] i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start
From: Eric Anholt e...@anholt.net This is a similar cleanup to what we did for brw_IF(), brw_ELSE(), brw_ENDIF() handling. Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_clip_line.c |5 +-- src/mesa/drivers/dri/i965/brw_clip_tri.c | 15 +--- src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 14 +-- src/mesa/drivers/dri/i965/brw_eu.c|4 +++ src/mesa/drivers/dri/i965/brw_eu.h| 15 +--- src/mesa/drivers/dri/i965/brw_eu_emit.c | 30 ++-- src/mesa/drivers/dri/i965/brw_fs_emit.cpp |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c |2 +- 9 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 614849a..6cf2bd2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -132,7 +132,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_indirect newvtx0 = brw_indirect(2, 0); struct brw_indirect newvtx1 = brw_indirect(3, 0); struct brw_indirect plane_ptr = brw_indirect(4, 0); - struct brw_instruction *plane_loop; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); GLuint hpos_offset = brw_vert_result_to_offset(c-vue_map, VERT_RESULT_HPOS); @@ -160,7 +159,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); - plane_loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { /* if (planemask 1) */ @@ -245,7 +244,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_SHR(p, c-reg.planemask, c-reg.planemask, brw_imm_ud(1)); } - brw_WHILE(p, plane_loop); + brw_WHILE(p); brw_ADD(p, c-reg.t, c-reg.t0, c-reg.t1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c-reg.t, brw_imm_f(1.0)); diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 12d6724..a29f8e0 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -232,8 +232,6 @@ void brw_clip_tri( struct brw_clip_compile *c ) struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *vertex_loop; GLuint hpos_offset = brw_vert_result_to_offset(c-vue_map, VERT_RESULT_HPOS); @@ -244,7 +242,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c-reg.vertex[3]) ); - plane_loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { /* if (planemask 1) */ @@ -266,7 +264,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, c-reg.loopcount, c-reg.nr_verts); brw_MOV(p, c-reg.nr_verts, brw_imm_ud(0)); -vertex_loop = brw_DO(p, BRW_EXECUTE_1); +brw_DO(p, BRW_EXECUTE_1); { /* vtx = *input_ptr; */ @@ -364,7 +362,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_ADD(p, c-reg.loopcount, c-reg.loopcount, brw_imm_d(-1)); } -brw_WHILE(p, vertex_loop); +brw_WHILE(p); /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] * inlist = outlist @@ -396,7 +394,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_SHR(p, c-reg.planemask, c-reg.planemask, brw_imm_ud(1)); } - brw_WHILE(p, plane_loop); + brw_WHILE(p); } @@ -404,7 +402,6 @@ void brw_clip_tri( struct brw_clip_compile *c ) void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) { struct brw_compile *p = c-func; - struct brw_instruction *loop; /* for (loopcount = nr_verts-2; loopcount 0; loopcount--) */ @@ -429,7 +426,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN URB_WRITE_PRIM_TYPE_SHIFT)); @@ -440,7 +437,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_ADD(p, c-reg.loopcount, c-reg.loopcount, brw_imm_d(-1)); } - brw_WHILE(p, loop
[Mesa-dev] [PATCH 3/8] i965: Don't make consumers of brw_WHILE do pre-gen6 BREAK/CONT patching
From: Eric Anholt e...@anholt.net The EU code itself can just do this work, since all the consumers were duplicating it. Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 36 +- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 33 +++- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 33 +++- src/mesa/drivers/dri/i965/brw_vs_emit.c | 30 +++--- 4 files changed, 46 insertions(+), 86 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 0e708d2..4562a2d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1343,7 +1343,36 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) } } - +/** + * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE + * instruction here. + * + * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop + * nesting, since it can always just point to the end of the block/current loop. + */ +static void +brw_patch_break_cont(struct brw_compile *p) +{ + struct intel_context *intel = p-brw-intel; + struct brw_instruction *while_inst = p-store[p-nr_insn]; + struct brw_instruction *do_inst = get_inner_do_insn(p); + struct brw_instruction *inst; + int br = (intel-gen == 5) ? 2 : 1; + + for (inst = while_inst - 1; inst != do_inst; inst--) { + /* If the jump count is != 0, that means that this instruction has already + * been patched because it's part of a loop inside of the one we're + * patching. + */ + if (inst-header.opcode == BRW_OPCODE_BREAK + inst-bits3.if_else.jump_count == 0) { +inst-bits3.if_else.jump_count = br * ((while_inst - inst) + 1); + } else if (inst-header.opcode == BRW_OPCODE_CONTINUE +inst-bits3.if_else.jump_count == 0) { +inst-bits3.if_else.jump_count = br * (while_inst - inst); + } + } +} struct brw_instruction *brw_WHILE(struct brw_compile *p) { @@ -1352,7 +1381,6 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) GLuint br = 1; do_insn = get_inner_do_insn(p); - p-loop_stack_depth--; if (intel-gen = 5) br = 2; @@ -1396,11 +1424,15 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) insn-bits3.if_else.jump_count = br * (do_insn - insn + 1); insn-bits3.if_else.pop_count = 0; insn-bits3.if_else.pad0 = 0; + +brw_patch_break_cont(p); } } insn-header.compression_control = BRW_COMPRESSION_NONE; p-current-header.predicate_control = BRW_PREDICATE_NONE; + p-loop_stack_depth--; + return insn; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index ded58a2..91e6961 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -658,8 +658,6 @@ fs_visitor::generate_code() int loop_stack_array_size = 16; int loop_stack_depth = 0; - brw_instruction **loop_stack = - rzalloc_array(this-mem_ctx, brw_instruction *, loop_stack_array_size); int *if_depth_in_loop = rzalloc_array(this-mem_ctx, int, loop_stack_array_size); @@ -795,11 +793,10 @@ fs_visitor::generate_code() break; case BRW_OPCODE_DO: -loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); +brw_DO(p, BRW_EXECUTE_8); +loop_stack_depth++; if (loop_stack_array_size = loop_stack_depth) { loop_stack_array_size *= 2; - loop_stack = reralloc(this-mem_ctx, loop_stack, brw_instruction *, - loop_stack_array_size); if_depth_in_loop = reralloc(this-mem_ctx, if_depth_in_loop, int, loop_stack_array_size); } @@ -819,31 +816,10 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case BRW_OPCODE_WHILE: { -struct brw_instruction *inst0, *inst1; -GLuint br = 1; - -if (intel-gen = 5) - br = 2; - + case BRW_OPCODE_WHILE: assert(loop_stack_depth 0); loop_stack_depth--; -inst0 = inst1 = brw_WHILE(p); -if (intel-gen 6) { - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 loop_stack[loop_stack_depth]) { - inst0--; - if (inst0-header.opcode == BRW_OPCODE_BREAK - inst0-bits3.if_else.jump_count == 0) { - inst0-bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0-header.opcode == BRW_OPCODE_CONTINUE - inst0-bits3.if_else.jump_count == 0) { - inst0-bits3.if_else.jump_count = br * (inst1 - inst0
[Mesa-dev] [PATCH 4/8] i965: Don't make consumers of brw_CONT/brw_WHILE track if depth in loop
From: Eric Anholt e...@anholt.net The codegen backends all had this same tracking, so just do it at the EU level. Reviewed-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |1 + src/mesa/drivers/dri/i965/brw_eu.h | 10 -- src/mesa/drivers/dri/i965/brw_eu_emit.c | 13 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 23 ++- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 ++- src/mesa/drivers/dri/i965/brw_vs_emit.c | 13 +++-- 6 files changed, 25 insertions(+), 58 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index c0126ff..83aae3b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -197,6 +197,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) p-loop_stack_depth = 0; p-loop_stack_array_size = 16; p-loop_stack = rzalloc_array(mem_ctx, int, p-loop_stack_array_size); + p-if_depth_in_loop = rzalloc_array(mem_ctx, int, p-loop_stack_array_size); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 321924e..a7d1b17 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -134,6 +134,12 @@ struct brw_compile { * encountered. */ int *loop_stack; + /** +* pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF +* blocks they were popping out of, to fix up the mask stack. This tracks +* the IF/ENDIF nesting in each current nested loop level. +*/ + int *if_depth_in_loop; int loop_stack_depth; int loop_stack_array_size; @@ -1019,8 +1025,8 @@ struct brw_instruction *brw_DO(struct brw_compile *p, struct brw_instruction *brw_WHILE(struct brw_compile *p); -struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); -struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); struct brw_instruction *gen6_CONT(struct brw_compile *p); /* Forward jumps: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 4562a2d..7bc5469 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -918,10 +918,13 @@ push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) p-loop_stack_array_size *= 2; p-loop_stack = reralloc(p-mem_ctx, p-loop_stack, int, p-loop_stack_array_size); + p-if_depth_in_loop = reralloc(p-mem_ctx, p-if_depth_in_loop, int, +p-loop_stack_array_size); } p-loop_stack[p-loop_stack_depth] = inst - p-store; p-loop_stack_depth++; + p-if_depth_in_loop[p-loop_stack_depth] = 0; } static struct brw_instruction * @@ -980,6 +983,7 @@ brw_IF(struct brw_compile *p, GLuint execute_size) p-current-header.predicate_control = BRW_PREDICATE_NONE; push_if_stack(p, insn); + p-if_depth_in_loop[p-loop_stack_depth]++; return insn; } @@ -1187,6 +1191,7 @@ brw_ENDIF(struct brw_compile *p) struct brw_instruction *if_inst = NULL; /* Pop the IF and (optional) ELSE instructions from the stack */ + p-if_depth_in_loop[p-loop_stack_depth]--; p-if_stack_depth--; if (p-if_stack[p-if_stack_depth]-header.opcode == BRW_OPCODE_ELSE) { else_inst = p-if_stack[p-if_stack_depth]; @@ -1245,7 +1250,7 @@ brw_ENDIF(struct brw_compile *p) patch_IF_ELSE(p, if_inst, else_inst, insn); } -struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) +struct brw_instruction *brw_BREAK(struct brw_compile *p) { struct intel_context *intel = p-brw-intel; struct brw_instruction *insn; @@ -1260,7 +1265,7 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) brw_set_src0(p, insn, brw_ip_reg()); brw_set_src1(p, insn, brw_imm_d(0x0)); insn-bits3.if_else.pad0 = 0; - insn-bits3.if_else.pop_count = pop_count; + insn-bits3.if_else.pop_count = p-if_depth_in_loop[p-loop_stack_depth]; } insn-header.compression_control = BRW_COMPRESSION_NONE; insn-header.execution_size = BRW_EXECUTE_8; @@ -1284,7 +1289,7 @@ struct brw_instruction *gen6_CONT(struct brw_compile *p) return insn; } -struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) +struct brw_instruction *brw_CONT(struct brw_compile *p) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); @@ -1295,7 +1300,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) insn-header.execution_size = BRW_EXECUTE_8; /* insn-header.mask_control = BRW_MASK_DISABLE; */ insn-bits3.if_else.pad0 = 0; - insn-bits3.if_else.pop_count = pop_count; + insn
[Mesa-dev] [PATCH 5/8] i965: let the if_stack just store the instruction index
If dynamic instruction store size is enabled, while after the brw_IF/ELSE() and before the brw_ENDIF() function, the eu instruction store base address(p-store) may change. Thus let if_stack just store the instruction index. This is somehow more flexible and safe than store the instruction memory address. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |3 +-- src/mesa/drivers/dri/i965/brw_eu.h |4 +++- src/mesa/drivers/dri/i965/brw_eu_emit.c | 22 +++--- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 83aae3b..9b4dde8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -191,8 +191,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) /* Set up control flow stack */ p-if_stack_depth = 0; p-if_stack_array_size = 16; - p-if_stack = - rzalloc_array(mem_ctx, struct brw_instruction *, p-if_stack_array_size); + p-if_stack = rzalloc_array(mem_ctx, int, p-if_stack_array_size); p-loop_stack_depth = 0; p-loop_stack_array_size = 16; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index a7d1b17..7fd387b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -123,8 +123,10 @@ struct brw_compile { /* Control flow stacks: * - if_stack contains IF and ELSE instructions which must be patched * (and popped) once the matching ENDIF instruction is encountered. +* +* Just store the instruction pointer(an index). */ - struct brw_instruction **if_stack; + int *if_stack; int if_stack_depth; int if_stack_array_size; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 7bc5469..277bd74 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -901,16 +901,23 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, static void push_if_stack(struct brw_compile *p, struct brw_instruction *inst) { - p-if_stack[p-if_stack_depth] = inst; + p-if_stack[p-if_stack_depth] = inst - p-store; p-if_stack_depth++; if (p-if_stack_array_size = p-if_stack_depth) { p-if_stack_array_size *= 2; - p-if_stack = reralloc(p-mem_ctx, p-if_stack, struct brw_instruction *, + p-if_stack = reralloc(p-mem_ctx, p-if_stack, int, p-if_stack_array_size); } } +static struct brw_instruction * +pop_if_stack(struct brw_compile *p) +{ + p-if_stack_depth--; + return p-store[p-if_stack[p-if_stack_depth]]; +} + static void push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) { @@ -1189,15 +1196,16 @@ brw_ENDIF(struct brw_compile *p) struct brw_instruction *insn; struct brw_instruction *else_inst = NULL; struct brw_instruction *if_inst = NULL; + struct brw_instruction *tmp; /* Pop the IF and (optional) ELSE instructions from the stack */ p-if_depth_in_loop[p-loop_stack_depth]--; - p-if_stack_depth--; - if (p-if_stack[p-if_stack_depth]-header.opcode == BRW_OPCODE_ELSE) { - else_inst = p-if_stack[p-if_stack_depth]; - p-if_stack_depth--; + tmp = pop_if_stack(p); + if (tmp-header.opcode == BRW_OPCODE_ELSE) { + else_inst = tmp; + tmp = pop_if_stack(p); } - if_inst = p-if_stack[p-if_stack_depth]; + if_inst = tmp; /* In single program flow mode, we can express IF and ELSE instructions * equivalently as ADD instructions that operate on IP. On platforms prior -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/8] i965: get the jmp distance by instruction index
If dynamic instruction store size is enabled, while after the brw_JMPI() and before the brw_land_fwd_jump() function, the eu instruction store base address(p-store) may change. Thus, the safe way to reference the jmp instruction is by index instead of by the instruction address. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.h |8 +--- src/mesa/drivers/dri/i965/brw_eu_emit.c | 17 - src/mesa/drivers/dri/i965/brw_sf_emit.c |2 +- src/mesa/drivers/dri/i965/brw_wm_emit.c |2 +- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 7fd387b..9d3d7de 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -862,7 +862,6 @@ ALU2(SHL) ALU2(RSR) ALU2(RSL) ALU2(ASR) -ALU2(JMPI) ALU2(ADD) ALU2(MUL) ALU1(FRC) @@ -1032,8 +1031,11 @@ struct brw_instruction *brw_CONT(struct brw_compile *p); struct brw_instruction *gen6_CONT(struct brw_compile *p); /* Forward jumps: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn); +int brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1); +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 277bd74..87f1430 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -882,10 +882,10 @@ void brw_NOP(struct brw_compile *p) * Comparisons, if/else/endif */ -struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) +int brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); @@ -895,7 +895,7 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, p-current-header.predicate_control = BRW_PREDICATE_NONE; - return insn; + return insn - p-store; } static void @@ -1452,11 +1452,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) /* FORWARD JUMPS: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn) +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) { struct intel_context *intel = p-brw-intel; - struct brw_instruction *landing = p-store[p-nr_insn]; + struct brw_instruction *jmp_insn = p-store[jmp_insn_idx]; GLuint jmpi = 1; if (intel-gen = 5) @@ -1465,7 +1464,7 @@ void brw_land_fwd_jump(struct brw_compile *p, assert(jmp_insn-header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn-bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); - jmp_insn-bits3.ud = jmpi * ((landing - jmp_insn) - 1); + jmp_insn-bits3.ud = jmpi * (p-nr_insn - jmp_insn_idx - 1); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index fe3341c..ba890d4 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -717,7 +717,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; - struct brw_instruction *jmp; + int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); GLuint saveflag; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 5905ba9..44b14a5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1528,7 +1528,7 @@ void emit_fb_write(struct brw_wm_compile *c, else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); struct brw_reg ip = brw_ip_reg(); - struct brw_instruction *jmp; + int jmp; brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/8] i965: get the jmp distance by instruction index
On Wed, Dec 21, 2011 at 05:57:35AM -0800, Eric Anholt wrote: On Wed, 21 Dec 2011 17:33:41 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: If dynamic instruction store size is enabled, while after the brw_JMPI() and before the brw_land_fwd_jump() function, the eu instruction store base address(p-store) may change. Thus, the safe way to reference the jmp instruction is by index instead of by the instruction address. Our other instructions return the instruction pointer, I don't think jmpi should be special in that respect. Right. Fixed and how about the following patch? -- From 6fed6e4800cdc72a6b235022b7fa9dabeecf9106 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Wed, 21 Dec 2011 15:10:40 +0800 Subject: [PATCH] i965: get the jmp distance by instruction index If dynamic instruction store size is enabled, while after the brw_JMPI() and before the brw_land_fwd_jump() function, the eu instruction store base address(p-store) may change. Thus, the safe way to reference the jmp instruction is by index instead of by the instruction address. v2: comments from Eric, don't change the prototype of brw_JMPI Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.h |3 +-- src/mesa/drivers/dri/i965/brw_eu_emit.c |7 +++ src/mesa/drivers/dri/i965/brw_sf_emit.c |8 src/mesa/drivers/dri/i965/brw_wm_emit.c |4 ++-- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 7fd387b..cc2f618 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -1032,8 +1032,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p); struct brw_instruction *gen6_CONT(struct brw_compile *p); /* Forward jumps: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn); +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 277bd74..b2ab013 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1452,11 +1452,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) /* FORWARD JUMPS: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn) +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) { struct intel_context *intel = p-brw-intel; - struct brw_instruction *landing = p-store[p-nr_insn]; + struct brw_instruction *jmp_insn = p-store[jmp_insn_idx]; GLuint jmpi = 1; if (intel-gen = 5) @@ -1465,7 +1464,7 @@ void brw_land_fwd_jump(struct brw_compile *p, assert(jmp_insn-header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn-bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); - jmp_insn-bits3.ud = jmpi * ((landing - jmp_insn) - 1); + jmp_insn-bits3.ud = jmpi * (p-nr_insn - jmp_insn_idx - 1); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index fe3341c..1ee0098 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -717,7 +717,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; - struct brw_instruction *jmp; + int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); GLuint saveflag; @@ -738,7 +738,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1_3DPRIM_POLYGON) | (1_3DPRIM_RECTLIST) | (1_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p-store; { saveflag = p-flag_value; brw_push_insn_state(p); @@ -759,7 +759,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1_3DPRIM_LINESTRIP_CONT) | (1_3DPRIM_LINESTRIP_BF) | (1_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p-store; { saveflag = p-flag_value; brw_push_insn_state(p); @@ -772,7 +772,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1BRW_SPRITE_POINT_ENABLE)); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p-store; { saveflag
[Mesa-dev] [PATCH 0/5] i965: dynamic eu instruction store size
If dynamic eu instruction store size is enabled, the eu instruction store base address(p-store) may change. Like, in the following situation: struct brw_instruction *jmp = brw_JMPI(p, ...); ... ... /* This somehow may change the p-store base address */ 1) brw_emit_tri_setup(c, false); ... /* * And finally get the jmp distance: * * The old code would do: * p-store[p-nr_insn] - jmp *If the p-store is changed at line 1), this may instroduce *undefined result, like hang. */ brw_land_fwd_jump(p, jmp); So, the safe way to get the jmp distance is by instruction index. That's what the first 4 patches does. I checked those patches with all intel oglc testcases, and found no regressions. What's better, it fixed something. v2: do not take the brw_insn_of stuff, instead just handle some special cases here, like brw_DO/JMPI, to let them return the instruction index. -- Yuanhan Liu (5): i965: Add a help function brw_insn_index to get the instruction index i965: prepare work for dynamic instruction store size on IF/ELSE/ENDIF i965: prepare work for dynamic instruction store size on DO/WHILE i965: prepare work for dynamic instruction store size on JMPI i965: increase the brw eu instruction store size dynamically src/mesa/drivers/dri/i965/brw_clip_line.c |2 +- src/mesa/drivers/dri/i965/brw_clip_tri.c |6 +- src/mesa/drivers/dri/i965/brw_clip_unfilled.c |4 +- src/mesa/drivers/dri/i965/brw_eu.c| 10 +++- src/mesa/drivers/dri/i965/brw_eu.h| 35 +++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 82 +++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 11 ++-- src/mesa/drivers/dri/i965/brw_sf_emit.c |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |9 +-- src/mesa/drivers/dri/i965/brw_vs_emit.c |6 +- src/mesa/drivers/dri/i965/brw_wm_emit.c |2 +- 11 files changed, 101 insertions(+), 68 deletions(-) -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] i965: Add a help function brw_insn_index to get the instruction index
The reason to add a help function instead of just use 'insn - p-store' instead is that this help function includes an assert. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.h |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index dcb1fc9..c7eefe3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -786,6 +786,13 @@ static INLINE struct brw_instruction *current_insn( struct brw_compile *p) return p-store[p-nr_insn]; } +static INLINE int +brw_insn_index(struct brw_compile *p, struct brw_instruction *insn) +{ + assert((insn - p-store (int)p-nr_insn) (insn - p-store 0)); + return insn - p-store; +} + void brw_pop_insn_state( struct brw_compile *p ); void brw_push_insn_state( struct brw_compile *p ); void brw_set_mask_control( struct brw_compile *p, GLuint value ); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] i965: prepare work for dynamic instruction store size on IF/ELSE/ENDIF
If dynamic instruction store size is enabled, while after the brw_IF/ELSE() and before the brw_ENDIF() function, the eu instruction store base address(p-store) may change. Thus let if_stack just store the instruction pointer(an index). This is somehow more flexible and safe than store the instruction memory address. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |3 +-- src/mesa/drivers/dri/i965/brw_eu.h |4 +++- src/mesa/drivers/dri/i965/brw_eu_emit.c | 16 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b5a858b..77eb2cf 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -191,8 +191,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) /* Set up control flow stack */ p-if_stack_depth = 0; p-if_stack_array_size = 16; - p-if_stack = - rzalloc_array(mem_ctx, struct brw_instruction *, p-if_stack_array_size); + p-if_stack = rzalloc_array(mem_ctx, int, p-if_stack_array_size); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index c7eefe3..607ab96 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -123,8 +123,10 @@ struct brw_compile { /* Control flow stacks: * - if_stack contains IF and ELSE instructions which must be patched * (and popped) once the matching ENDIF instruction is encountered. +* +* Just store the instruction pointer(an index). */ - struct brw_instruction **if_stack; + int *if_stack; int if_stack_depth; int if_stack_array_size; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 60350ca..067111c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -896,14 +896,14 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, } static void -push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +push_if_stack(struct brw_compile *p, int inst) { p-if_stack[p-if_stack_depth] = inst; p-if_stack_depth++; if (p-if_stack_array_size = p-if_stack_depth) { p-if_stack_array_size *= 2; - p-if_stack = reralloc(p-mem_ctx, p-if_stack, struct brw_instruction *, + p-if_stack = reralloc(p-mem_ctx, p-if_stack, int, p-if_stack_array_size); } } @@ -957,7 +957,7 @@ brw_IF(struct brw_compile *p, GLuint execute_size) p-current-header.predicate_control = BRW_PREDICATE_NONE; - push_if_stack(p, insn); + push_if_stack(p, brw_insn_index(p, insn)); return insn; } @@ -989,7 +989,7 @@ gen6_IF(struct brw_compile *p, uint32_t conditional, if (!p-single_program_flow) insn-header.thread_control = BRW_THREAD_SWITCH; - push_if_stack(p, insn); + push_if_stack(p, brw_insn_index(p, insn)); return insn; } @@ -1139,7 +1139,7 @@ brw_ELSE(struct brw_compile *p) if (!p-single_program_flow) insn-header.thread_control = BRW_THREAD_SWITCH; - push_if_stack(p, insn); + push_if_stack(p, brw_insn_index(p, insn)); } void @@ -1152,11 +1152,11 @@ brw_ENDIF(struct brw_compile *p) /* Pop the IF and (optional) ELSE instructions from the stack */ p-if_stack_depth--; - if (p-if_stack[p-if_stack_depth]-header.opcode == BRW_OPCODE_ELSE) { - else_inst = p-if_stack[p-if_stack_depth]; + if (p-store[p-if_stack[p-if_stack_depth]].header.opcode == BRW_OPCODE_ELSE) { + else_inst = p-store[p-if_stack[p-if_stack_depth]]; p-if_stack_depth--; } - if_inst = p-if_stack[p-if_stack_depth]; + if_inst = p-store[p-if_stack[p-if_stack_depth]]; if (p-single_program_flow) { /* ENDIF is useless; don't bother emitting it. */ -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] i965: prepare work for dynamic instruction store size on DO/WHILE
If dynamic instruction store size is enabled, while after the brw_DO() and before the brw_WHILE() function, the eu instruction store base address(p-store) may change. Thus let brw_DO return the instruction index and brw_WHILE take the do_insn index as the second parameter. And also let the loop_stack to store the instruction index instead. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_clip_line.c |2 +- src/mesa/drivers/dri/i965/brw_clip_tri.c |6 ++-- src/mesa/drivers/dri/i965/brw_clip_unfilled.c |4 +- src/mesa/drivers/dri/i965/brw_eu.h|9 ++- src/mesa/drivers/dri/i965/brw_eu_emit.c | 27 ++-- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 11 - src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |9 +++ src/mesa/drivers/dri/i965/brw_vs_emit.c |6 ++-- 8 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 75c64c0..c37ac53 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -132,7 +132,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_indirect newvtx0 = brw_indirect(2, 0); struct brw_indirect newvtx1 = brw_indirect(3, 0); struct brw_indirect plane_ptr = brw_indirect(4, 0); - struct brw_instruction *plane_loop; + int plane_loop; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); GLuint hpos_offset = brw_vert_result_to_offset(c-vue_map, VERT_RESULT_HPOS); diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index ffbfe94..3182a98 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -232,8 +232,8 @@ void brw_clip_tri( struct brw_clip_compile *c ) struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *vertex_loop; + int plane_loop; + int vertex_loop; GLuint hpos_offset = brw_vert_result_to_offset(c-vue_map, VERT_RESULT_HPOS); @@ -404,7 +404,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) { struct brw_compile *p = c-func; - struct brw_instruction *loop; + int loop; /* for (loopcount = nr_verts-2; loopcount 0; loopcount--) */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index ae84e19..d057695 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -273,7 +273,7 @@ static void emit_lines(struct brw_clip_compile *c, bool do_offset) { struct brw_compile *p = c-func; - struct brw_instruction *loop; + int loop; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v1 = brw_indirect(1, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); @@ -338,7 +338,7 @@ static void emit_points(struct brw_clip_compile *c, bool do_offset ) { struct brw_compile *p = c-func; - struct brw_instruction *loop; + int loop; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 607ab96..19b919f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -1009,16 +1009,13 @@ void brw_ENDIF(struct brw_compile *p); /* DO/WHILE loops: */ -struct brw_instruction *brw_DO(struct brw_compile *p, - GLuint execute_size); +int brw_DO(struct brw_compile *p, GLuint execute_size); -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *patch_insn); +struct brw_instruction *brw_WHILE(struct brw_compile *p, int patch_insn_idx); struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); -struct brw_instruction *gen6_CONT(struct brw_compile *p, - struct brw_instruction *do_insn); +struct brw_instruction *gen6_CONT(struct brw_compile *p); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 067111c..2acfacf 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1220,8 +1220,7 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) return insn
[Mesa-dev] [PATCH 4/5] i965: prepare work for dynamic instruction store size on JMPI
If dynamic instruction store size is enabled, while after the brw_JMPI() and before the brw_land_fwd_jump() function, the eu instruction store base address(p-store) may change. Thus, the safe way to reference the jmp instruction is by index instead of by the instruction address. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.h |8 +--- src/mesa/drivers/dri/i965/brw_eu_emit.c | 17 - src/mesa/drivers/dri/i965/brw_sf_emit.c |2 +- src/mesa/drivers/dri/i965/brw_wm_emit.c |2 +- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 19b919f..18dd9c7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -849,7 +849,6 @@ ALU2(SHL) ALU2(RSR) ALU2(RSL) ALU2(ASR) -ALU2(JMPI) ALU2(ADD) ALU2(MUL) ALU1(FRC) @@ -1018,8 +1017,11 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); struct brw_instruction *gen6_CONT(struct brw_compile *p); /* Forward jumps: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn); +int brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1); +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 2acfacf..29dd623 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -879,10 +879,10 @@ void brw_NOP(struct brw_compile *p) * Comparisons, if/else/endif */ -struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) +int brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); @@ -892,7 +892,7 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, p-current-header.predicate_control = BRW_PREDICATE_NONE; - return insn; + return brw_insn_index(p, insn); } static void @@ -1360,11 +1360,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, int do_insn_idx) /* FORWARD JUMPS: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn) +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) { struct intel_context *intel = p-brw-intel; - struct brw_instruction *landing = p-store[p-nr_insn]; + struct brw_instruction *jmp_insn = p-store[jmp_insn_idx]; GLuint jmpi = 1; if (intel-gen = 5) @@ -1373,7 +1372,7 @@ void brw_land_fwd_jump(struct brw_compile *p, assert(jmp_insn-header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn-bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); - jmp_insn-bits3.ud = jmpi * ((landing - jmp_insn) - 1); + jmp_insn-bits3.ud = jmpi * ((p-nr_insn - jmp_insn_idx) - 1); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index fe3341c..ba890d4 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -717,7 +717,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; - struct brw_instruction *jmp; + int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); GLuint saveflag; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index b5a4a4f..ad49244 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1528,7 +1528,7 @@ void emit_fb_write(struct brw_wm_compile *c, else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); struct brw_reg ip = brw_ip_reg(); - struct brw_instruction *jmp; + int jmp; brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] i965: increase the brw eu instruction store size dynamically
Here is the final patch to enable dynamic eu instruction store size: increase the brw eu instruction store size dynamically instead of just allocating it statically with a constant limit. This would fix something that 'GL_MAX_PROGRAM_INSTRUCTIONS_ARB was 16384 while the driver would limit it to 1'. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |7 +++ src/mesa/drivers/dri/i965/brw_eu.h |7 --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 22 +++--- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 77eb2cf..f13affe 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -174,6 +174,13 @@ void brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) { p-brw = brw; + /* +* Set the initial instruction store array size to 1024, if found that +* isn't enough, then it will double the store size at brw_next_insn() +* until it meet the BRW_EU_MAX_INSN +*/ + p-store_size = 1024; + p-store = rzalloc_array(mem_ctx, struct brw_instruction, p-store_size); p-nr_insn = 0; p-current = p-stack; p-compressed = false; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 18dd9c7..e86efe6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -100,11 +100,12 @@ struct brw_glsl_call; -#define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1 +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN (1024 * 1024) struct brw_compile { - struct brw_instruction store[BRW_EU_MAX_INSN]; + struct brw_instruction *store; + int store_size; GLuint nr_insn; void *mem_ctx; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 29dd623..25f2feb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -688,7 +688,17 @@ brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; - assert(p-nr_insn + 1 BRW_EU_MAX_INSN); + if (p-nr_insn + 1 p-store_size) { + if (p-nr_insn + 1 BRW_EU_MAX_INSN) { + assert(!exceed max brw allowed eu instructions); + } else { + if (0) +printf(incresing the store size to %d\n, p-store_size 1); + p-store_size = 1; + p-store = reralloc(p-mem_ctx, p-store, + struct brw_instruction, p-store_size); + } + } insn = p-store[p-nr_insn++]; memcpy(insn, p-current, sizeof(*insn)); @@ -1150,6 +1160,14 @@ brw_ENDIF(struct brw_compile *p) struct brw_instruction *else_inst = NULL; struct brw_instruction *if_inst = NULL; + /* +* next_insn() may change the base address of instruction store +* memory(p-store), so call it first before referencing the +* instruction store pointer from an index. +*/ + if (!p-single_program_flow) + insn = next_insn(p, BRW_OPCODE_ENDIF); + /* Pop the IF and (optional) ELSE instructions from the stack */ p-if_stack_depth--; if (p-store[p-if_stack[p-if_stack_depth]].header.opcode == BRW_OPCODE_ELSE) { @@ -1164,8 +1182,6 @@ brw_ENDIF(struct brw_compile *p) return; } - insn = next_insn(p, BRW_OPCODE_ENDIF); - if (intel-gen 6) { brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/6] patches to increase the brw eu instruction store size dynamically
On Fri, Dec 02, 2011 at 11:25:55AM -0800, Eric Anholt wrote: On Thu, 1 Dec 2011 18:26:50 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: Actually the first 5 patches are all prepare work for patch 6. I checked those patches will all intel oglc testcases, and found no regressions. What's better, it fixed something. Yuanhan Liu (6): i965: let all the brw_OPCODE functions return an instruction index instead I don't like the brw_insn_of() thing for the common case of I just emitted an instruction and I want to tweak one parameter. Yeah, I somehow don't like it, either. I guess the old code doesn't consider the case that the p-store memory would change, thus it always reference the instruction by the pointer. Well, if p-store is dynamically changed, we should use the instruction index instead of pointer. Thus, I changed the prototype of brw_next_instruction() to let it return the instruction index. While making this change, I thought this is a little overdoing. And, yes, it turned out to be overdoing. As far as I can see, it should be possible to store the DO/WHILE/BRK/CONT information in struct brw_compile so that the only long-lived brw_instruction * pointers other than p-store go away. I may not get your idea. But I thought: since we are going to store the instruction index in the if_stack and loop_stack, it would be simple if we let brw_DO to return the instruction index. Thus I made several new patches yesterday(and already sent out). Please help to review it. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/6] patches to increase the brw eu instruction store size dynamically
Actually the first 5 patches are all prepare work for patch 6. I checked those patches will all intel oglc testcases, and found no regressions. What's better, it fixed something. Yuanhan Liu (6): i965: let all the brw_OPCODE functions return an instruction index instead i965: remove the second unused parameter of gen6_CONT i965: let all the while loop stack to store an instruction index instead i965: let if_stack just store the instruction index i965: let brw_lan_fwd_jump() get the jmp_insn by the instruction index i965: increase the brw eu instruction store size dynamically src/mesa/drivers/dri/i965/brw_clip_line.c |2 +- src/mesa/drivers/dri/i965/brw_clip_tri.c |6 +- src/mesa/drivers/dri/i965/brw_clip_unfilled.c |4 +- src/mesa/drivers/dri/i965/brw_eu.c| 10 +- src/mesa/drivers/dri/i965/brw_eu.h| 46 +++--- src/mesa/drivers/dri/i965/brw_eu_emit.c | 235 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 15 +- src/mesa/drivers/dri/i965/brw_sf_emit.c |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 ++-- src/mesa/drivers/dri/i965/brw_vs_emit.c | 15 +- src/mesa/drivers/dri/i965/brw_wm.h| 14 +- src/mesa/drivers/dri/i965/brw_wm_emit.c | 16 +- 12 files changed, 202 insertions(+), 186 deletions(-) -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] i965: let all the brw_OPCODE functions return an instruction index instead
Let all the brw_OPCODE functions return an instruction index instead, and use brw_insn_of(p, index) macro to reference the instruction stored at p-store[]. This is a prepare work of let us increase the instruction store size dynamically by reralloc. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_clip_line.c |2 +- src/mesa/drivers/dri/i965/brw_clip_tri.c |6 +- src/mesa/drivers/dri/i965/brw_clip_unfilled.c |6 +- src/mesa/drivers/dri/i965/brw_eu.h| 32 ++-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 194 - src/mesa/drivers/dri/i965/brw_fs_emit.cpp |4 +- src/mesa/drivers/dri/i965/brw_sf_emit.c |6 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 +- src/mesa/drivers/dri/i965/brw_vs_emit.c |7 +- src/mesa/drivers/dri/i965/brw_wm.h| 14 +- src/mesa/drivers/dri/i965/brw_wm_emit.c | 16 +- 11 files changed, 146 insertions(+), 153 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 75c64c0..4313637 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -160,7 +160,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); - plane_loop = brw_DO(p, BRW_EXECUTE_1); + plane_loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); { /* if (planemask 1) */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index ffbfe94..97eae35 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -244,7 +244,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c-reg.vertex[3]) ); - plane_loop = brw_DO(p, BRW_EXECUTE_1); + plane_loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); { /* if (planemask 1) */ @@ -266,7 +266,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, c-reg.loopcount, c-reg.nr_verts); brw_MOV(p, c-reg.nr_verts, brw_imm_ud(0)); -vertex_loop = brw_DO(p, BRW_EXECUTE_1); +vertex_loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); { /* vtx = *input_ptr; */ @@ -427,7 +427,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - loop = brw_DO(p, BRW_EXECUTE_1); + loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); { brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN 2)); diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index ae84e19..2a984fe 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -285,7 +285,7 @@ static void emit_lines(struct brw_clip_compile *c, brw_MOV(p, c-reg.loopcount, c-reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c-reg.inlist)); - loop = brw_DO(p, BRW_EXECUTE_1); + loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); @@ -307,7 +307,7 @@ static void emit_lines(struct brw_clip_compile *c, brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c-reg.nr_verts, BRW_REGISTER_TYPE_UW)); brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); - loop = brw_DO(p, BRW_EXECUTE_1); + loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); @@ -346,7 +346,7 @@ static void emit_points(struct brw_clip_compile *c, brw_MOV(p, c-reg.loopcount, c-reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c-reg.inlist)); - loop = brw_DO(p, BRW_EXECUTE_1); + loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 8a446eb..61d3178 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -797,7 +797,7 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p, void *mem_ctx); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); -struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +int brw_next_insn(struct brw_compile *p, GLuint opcode); void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, struct brw_reg dest); void brw_set_src0(struct
[Mesa-dev] [PATCH 2/6] i965: remove the second unused parameter of gen6_CONT
Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.h |2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c |2 +- src/mesa/drivers/dri/i965/brw_fs_emit.cpp |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c |2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 61d3178..53c0383 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -1006,7 +1006,7 @@ int brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); int brw_BREAK(struct brw_compile *p, int pop_count); int brw_CONT(struct brw_compile *p, int pop_count); -int gen6_CONT(struct brw_compile *p, struct brw_instruction *do_insn); +int gen6_CONT(struct brw_compile *p); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 4c0de2c..9b37e81 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1218,7 +1218,7 @@ int brw_BREAK(struct brw_compile *p, int pop_count) return insn_idx; } -int gen6_CONT(struct brw_compile *p, struct brw_instruction *do_insn) +int gen6_CONT(struct brw_compile *p) { int insn_idx = next_insn(p, BRW_OPCODE_CONTINUE); struct brw_instruction *insn = brw_insn_of(p, insn_idx); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 3b1577d..f78723e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -796,7 +796,7 @@ fs_visitor::generate_code() case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (intel-gen = 6) - gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + gen6_CONT(p); else brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index a279e87..e36adae 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -827,7 +827,7 @@ vec4_visitor::generate_code() case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (intel-gen = 6) - gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + gen6_CONT(p); else brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 0a0cfbe..6d50fad 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -2107,7 +2107,7 @@ void brw_old_vs_emit(struct brw_vs_compile *c ) case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); if (intel-gen = 6) { - gen6_CONT(p, loop_inst[loop_depth - 1]); + gen6_CONT(p); } else { brw_CONT(p, if_depth_in_loop[loop_depth]); } -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] i965: let all the while loop stack to store an instruction index instead
Let all the while loop stack just store the instruction index. This is somehow more flexible than store the instruction memory address. This is a prepare work of let us increase the instruction store size dynamically by reralloc. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_clip_line.c |4 ++-- src/mesa/drivers/dri/i965/brw_clip_tri.c | 12 ++-- src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 10 +- src/mesa/drivers/dri/i965/brw_eu.h|2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 11 ++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 15 --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 15 --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 12 +++- 8 files changed, 43 insertions(+), 38 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 4313637..c37ac53 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -132,7 +132,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_indirect newvtx0 = brw_indirect(2, 0); struct brw_indirect newvtx1 = brw_indirect(3, 0); struct brw_indirect plane_ptr = brw_indirect(4, 0); - struct brw_instruction *plane_loop; + int plane_loop; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); GLuint hpos_offset = brw_vert_result_to_offset(c-vue_map, VERT_RESULT_HPOS); @@ -160,7 +160,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); - plane_loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); + plane_loop = brw_DO(p, BRW_EXECUTE_1); { /* if (planemask 1) */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 97eae35..3182a98 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -232,8 +232,8 @@ void brw_clip_tri( struct brw_clip_compile *c ) struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *vertex_loop; + int plane_loop; + int vertex_loop; GLuint hpos_offset = brw_vert_result_to_offset(c-vue_map, VERT_RESULT_HPOS); @@ -244,7 +244,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c-reg.vertex[3]) ); - plane_loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); + plane_loop = brw_DO(p, BRW_EXECUTE_1); { /* if (planemask 1) */ @@ -266,7 +266,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, c-reg.loopcount, c-reg.nr_verts); brw_MOV(p, c-reg.nr_verts, brw_imm_ud(0)); -vertex_loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); +vertex_loop = brw_DO(p, BRW_EXECUTE_1); { /* vtx = *input_ptr; */ @@ -404,7 +404,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) { struct brw_compile *p = c-func; - struct brw_instruction *loop; + int loop; /* for (loopcount = nr_verts-2; loopcount 0; loopcount--) */ @@ -427,7 +427,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); + loop = brw_DO(p, BRW_EXECUTE_1); { brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN 2)); diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index 2a984fe..d057695 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -273,7 +273,7 @@ static void emit_lines(struct brw_clip_compile *c, bool do_offset) { struct brw_compile *p = c-func; - struct brw_instruction *loop; + int loop; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v1 = brw_indirect(1, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); @@ -285,7 +285,7 @@ static void emit_lines(struct brw_clip_compile *c, brw_MOV(p, c-reg.loopcount, c-reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c-reg.inlist)); - loop = brw_insn_of(p, brw_DO(p, BRW_EXECUTE_1)); + loop = brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); @@ -307,7 +307,7 @@ static void
[Mesa-dev] [PATCH 4/6] i965: let if_stack just store the instruction index
Let if_stack just store the instruction pointer(an index). This is somehow more flexible than store the instruction memory address. This is a prepare work of let us increase the instruction store size dynamically by reralloc. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |3 +-- src/mesa/drivers/dri/i965/brw_eu.h |4 +++- src/mesa/drivers/dri/i965/brw_eu_emit.c | 16 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b5a858b..77eb2cf 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -191,8 +191,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) /* Set up control flow stack */ p-if_stack_depth = 0; p-if_stack_array_size = 16; - p-if_stack = - rzalloc_array(mem_ctx, struct brw_instruction *, p-if_stack_array_size); + p-if_stack = rzalloc_array(mem_ctx, int, p-if_stack_array_size); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 638358f..cb324fe 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -123,8 +123,10 @@ struct brw_compile { /* Control flow stacks: * - if_stack contains IF and ELSE instructions which must be patched * (and popped) once the matching ENDIF instruction is encountered. +* +* Just store the instruction pointer(an index). */ - struct brw_instruction **if_stack; + int *if_stack; int if_stack_depth; int if_stack_array_size; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index a611a1b..352c72c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -898,14 +898,14 @@ int brw_JMPI(struct brw_compile *p, } static void -push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +push_if_stack(struct brw_compile *p, GLuint inst) { p-if_stack[p-if_stack_depth] = inst; p-if_stack_depth++; if (p-if_stack_array_size = p-if_stack_depth) { p-if_stack_array_size *= 2; - p-if_stack = reralloc(p-mem_ctx, p-if_stack, struct brw_instruction *, + p-if_stack = reralloc(p-mem_ctx, p-if_stack, int, p-if_stack_array_size); } } @@ -957,7 +957,7 @@ int brw_IF(struct brw_compile *p, GLuint execute_size) p-current-header.predicate_control = BRW_PREDICATE_NONE; - push_if_stack(p, insn); + push_if_stack(p, insn_idx); return insn_idx; } @@ -988,7 +988,7 @@ gen6_IF(struct brw_compile *p, uint32_t conditional, if (!p-single_program_flow) insn-header.thread_control = BRW_THREAD_SWITCH; - push_if_stack(p, insn); + push_if_stack(p, insn_idx); return insn_idx; } @@ -1137,7 +1137,7 @@ brw_ELSE(struct brw_compile *p) if (!p-single_program_flow) insn-header.thread_control = BRW_THREAD_SWITCH; - push_if_stack(p, insn); + push_if_stack(p, insn_idx); } void @@ -1150,11 +1150,11 @@ brw_ENDIF(struct brw_compile *p) /* Pop the IF and (optional) ELSE instructions from the stack */ p-if_stack_depth--; - if (p-if_stack[p-if_stack_depth]-header.opcode == BRW_OPCODE_ELSE) { - else_inst = p-if_stack[p-if_stack_depth]; + if (p-store[p-if_stack[p-if_stack_depth]].header.opcode == BRW_OPCODE_ELSE) { + else_inst = p-store[p-if_stack[p-if_stack_depth]]; p-if_stack_depth--; } - if_inst = p-if_stack[p-if_stack_depth]; + if_inst = p-store[p-if_stack[p-if_stack_depth]]; if (p-single_program_flow) { /* ENDIF is useless; don't bother emitting it. */ -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] i965: let brw_lan_fwd_jump() get the jmp_insn by the instruction index
This is a prepare work of let us increase the instruction store size dynamically by reralloc. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.h |3 +-- src/mesa/drivers/dri/i965/brw_eu_emit.c |4 ++-- src/mesa/drivers/dri/i965/brw_sf_emit.c |8 src/mesa/drivers/dri/i965/brw_wm_emit.c |4 ++-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index cb324fe..4207238 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -1011,8 +1011,7 @@ int brw_CONT(struct brw_compile *p, int pop_count); int gen6_CONT(struct brw_compile *p); /* Forward jumps: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn); +void brw_land_fwd_jump(struct brw_compile *p, int jmp); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 352c72c..8f8a5a5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1346,11 +1346,11 @@ int brw_WHILE(struct brw_compile *p, int do_insn_idx) /* FORWARD JUMPS: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn) +void brw_land_fwd_jump(struct brw_compile *p, int jmp) { struct intel_context *intel = p-brw-intel; struct brw_instruction *landing = p-store[p-nr_insn]; + struct brw_instruction *jmp_insn = brw_insn_of(p, jmp); GLuint jmpi = 1; if (intel-gen = 5) diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 6570ad0..ba890d4 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -717,7 +717,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; - struct brw_instruction *jmp; + int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); GLuint saveflag; @@ -738,7 +738,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1_3DPRIM_POLYGON) | (1_3DPRIM_RECTLIST) | (1_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_insn_of(p, brw_JMPI(p, ip, ip, brw_imm_d(0))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p-flag_value; brw_push_insn_state(p); @@ -759,7 +759,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1_3DPRIM_LINESTRIP_CONT) | (1_3DPRIM_LINESTRIP_BF) | (1_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_insn_of(p, brw_JMPI(p, ip, ip, brw_imm_d(0))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p-flag_value; brw_push_insn_state(p); @@ -772,7 +772,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1BRW_SPRITE_POINT_ENABLE)); - jmp = brw_insn_of(p, brw_JMPI(p, ip, ip, brw_imm_d(0))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p-flag_value; brw_push_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index c9e3bf6..9889f17 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1528,7 +1528,7 @@ void emit_fb_write(struct brw_wm_compile *c, else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); struct brw_reg ip = brw_ip_reg(); - struct brw_instruction *jmp; + int jmp; brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); @@ -1537,7 +1537,7 @@ void emit_fb_write(struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(126)); - jmp = brw_insn_of(p, brw_JMPI(p, ip, ip, brw_imm_w(0))); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] i965: increase the brw eu instruction store size dynamically
Here is the final patch to increase the brw eu instruction store size dynamically instead of just allocating it statically with a constant limit. This would fix something like 'GL_MAX_PROGRAM_INSTRUCTIONS_ARB was 16384 while the driver would limit it to 1'. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |7 +++ src/mesa/drivers/dri/i965/brw_eu.h |7 --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 12 +++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 77eb2cf..f13affe 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -174,6 +174,13 @@ void brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) { p-brw = brw; + /* +* Set the initial instruction store array size to 1024, if found that +* isn't enough, then it will double the store size at brw_next_insn() +* until it meet the BRW_EU_MAX_INSN +*/ + p-store_size = 1024; + p-store = rzalloc_array(mem_ctx, struct brw_instruction, p-store_size); p-nr_insn = 0; p-current = p-stack; p-compressed = false; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 4207238..88f1def 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -100,11 +100,12 @@ struct brw_glsl_call; -#define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1 +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN (1024 * 1024) struct brw_compile { - struct brw_instruction store[BRW_EU_MAX_INSN]; + struct brw_instruction *store; + int store_size; GLuint nr_insn; void *mem_ctx; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 8f8a5a5..0bcc015 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -686,7 +686,17 @@ int brw_next_insn(struct brw_compile *p, GLuint opcode) { int insn; - assert(p-nr_insn + 1 BRW_EU_MAX_INSN); + if (p-nr_insn + 1 p-store_size) { + if (p-nr_insn + 1 BRW_EU_MAX_INSN) { + assert(!exceed max brw allowed eu instructions); + } else { + if (0) +printf(incresing the store size to %d\n, p-store_size 1); + p-store_size = 1; + p-store = reralloc(p-mem_ctx, p-store, + struct brw_instruction, p-store_size); + } + } insn = p-nr_insn++; -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] glx: remove the unused var
Silence the compile warning Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/glx/drisw_glx.c |1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c index a150c61..7ba491b 100644 --- a/src/glx/drisw_glx.c +++ b/src/glx/drisw_glx.c @@ -304,7 +304,6 @@ drisw_bind_tex_image(Display * dpy, struct glx_context *gc = __glXGetCurrentContext(); struct drisw_context *pcp = (struct drisw_context *) gc; __GLXDRIdrawable *base = GetGLXDRIDrawable(dpy, drawable); - struct glx_display *dpyPriv = __glXInitialize(dpy); struct drisw_drawable *pdraw = (struct drisw_drawable *) base; struct drisw_screen *psc; -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] i965: let if_stack just store the instruction index
Let if_stack just store the instruction pointer(an index). This is somehow more flexible than store the instruction memory address. This patch is mainly for the next patch. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |3 +-- src/mesa/drivers/dri/i965/brw_eu.h |4 +++- src/mesa/drivers/dri/i965/brw_eu_emit.c | 16 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b5a858b..d6e5c09 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -191,8 +191,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) /* Set up control flow stack */ p-if_stack_depth = 0; p-if_stack_array_size = 16; - p-if_stack = - rzalloc_array(mem_ctx, struct brw_instruction *, p-if_stack_array_size); + p-if_stack = rzalloc_array(mem_ctx, GLuint, p-if_stack_array_size); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 8a446eb..9bc8a76 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -123,8 +123,10 @@ struct brw_compile { /* Control flow stacks: * - if_stack contains IF and ELSE instructions which must be patched * (and popped) once the matching ENDIF instruction is encountered. +* +* Just store the instruction pointer(an index). */ - struct brw_instruction **if_stack; + GLuint *if_stack; int if_stack_depth; int if_stack_array_size; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 60350ca..b9feb7d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -896,14 +896,14 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, } static void -push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +push_if_stack(struct brw_compile *p, GLuint inst) { p-if_stack[p-if_stack_depth] = inst; p-if_stack_depth++; if (p-if_stack_array_size = p-if_stack_depth) { p-if_stack_array_size *= 2; - p-if_stack = reralloc(p-mem_ctx, p-if_stack, struct brw_instruction *, + p-if_stack = reralloc(p-mem_ctx, p-if_stack, GLuint, p-if_stack_array_size); } } @@ -957,7 +957,7 @@ brw_IF(struct brw_compile *p, GLuint execute_size) p-current-header.predicate_control = BRW_PREDICATE_NONE; - push_if_stack(p, insn); + push_if_stack(p, p-nr_insn - 1); return insn; } @@ -989,7 +989,7 @@ gen6_IF(struct brw_compile *p, uint32_t conditional, if (!p-single_program_flow) insn-header.thread_control = BRW_THREAD_SWITCH; - push_if_stack(p, insn); + push_if_stack(p, p-nr_insn - 1); return insn; } @@ -1139,7 +1139,7 @@ brw_ELSE(struct brw_compile *p) if (!p-single_program_flow) insn-header.thread_control = BRW_THREAD_SWITCH; - push_if_stack(p, insn); + push_if_stack(p, p-nr_insn - 1); } void @@ -1152,11 +1152,11 @@ brw_ENDIF(struct brw_compile *p) /* Pop the IF and (optional) ELSE instructions from the stack */ p-if_stack_depth--; - if (p-if_stack[p-if_stack_depth]-header.opcode == BRW_OPCODE_ELSE) { - else_inst = p-if_stack[p-if_stack_depth]; + if (p-store[p-if_stack[p-if_stack_depth]].header.opcode == BRW_OPCODE_ELSE) { + else_inst = p-store[p-if_stack[p-if_stack_depth]]; p-if_stack_depth--; } - if_inst = p-if_stack[p-if_stack_depth]; + if_inst = p-store[p-if_stack[p-if_stack_depth]]; if (p-single_program_flow) { /* ENDIF is useless; don't bother emitting it. */ -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] i965: increase the brw eu instruction store size dynamically
Increase the brw eu instruction store size dynamically instead of just allocating it statically with a constant limit. This would fix something that 'GL_MAX_PROGRAM_INSTRUCTIONS_ARB was 16384 while the driver would limit it to 1'. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/drivers/dri/i965/brw_eu.c |7 +++ src/mesa/drivers/dri/i965/brw_eu.h |7 --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 12 +++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index d6e5c09..3de2a1e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -174,6 +174,13 @@ void brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) { p-brw = brw; + /* +* Set the initial instruction store array size to 1024, if found that +* isn't enough, then it will double the store size at brw_next_insn() +* until it meet the BRW_EU_MAX_INSN +*/ + p-store_size = 1024; + p-store = rzalloc_array(mem_ctx, struct brw_instruction, p-store_size); p-nr_insn = 0; p-current = p-stack; p-compressed = false; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 9bc8a76..cb5bf2a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -100,11 +100,12 @@ struct brw_glsl_call; -#define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1 +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN (1024 * 1024) struct brw_compile { - struct brw_instruction store[BRW_EU_MAX_INSN]; + struct brw_instruction *store; + GLuint store_size; GLuint nr_insn; void *mem_ctx; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index b9feb7d..782261f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -688,7 +688,17 @@ brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; - assert(p-nr_insn + 1 BRW_EU_MAX_INSN); + if (p-nr_insn + 1 p-store_size) { + if (p-nr_insn + 1 BRW_EU_MAX_INSN) { + assert(!exceed max brw allowed eu instructions); + } else { + if (0) +printf(incresing the store size to %d\n, p-store_size 1); + p-store_size = 1; + p-store = reralloc(p-mem_ctx, p-store, + struct brw_instruction, p-store_size); + } + } insn = p-store[p-nr_insn++]; memcpy(insn, p-current, sizeof(*insn)); -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/4] i965: let if_stack just store the instruction index
On Tue, Nov 29, 2011 at 10:35:42AM -0800, Eric Anholt wrote: On Tue, 29 Nov 2011 16:08:38 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: Let if_stack just store the instruction pointer(an index). This is somehow more flexible than store the instruction memory address. I'd be more specific: This lets us realloc the instruction store. diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b5a858b..d6e5c09 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -191,8 +191,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) /* Set up control flow stack */ p-if_stack_depth = 0; p-if_stack_array_size = 16; - p-if_stack = - rzalloc_array(mem_ctx, struct brw_instruction *, p-if_stack_array_size); + p-if_stack = rzalloc_array(mem_ctx, GLuint, p-if_stack_array_size); } Please use plain types instead of the awful GL-decorated types, unless it's something directly exposed in the GL API. Agreed. But I somehow followed the type usage of 'GLuint nr_insn' defined in brw_compile. Since the if_stack is used to store instruction index, and the index is defined in a GLuint type. That's why I origianlly used GLuint. So, I should also change the type of nr_insn to something like int, too? (BTW, I guess int would be enough, but for good semantics, a positive index, should I use uint32_t?) Thanks, Yuanhan Liu (if you want sized types, the standard ones in inttypes.h are good) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] i965: increase the brw eu instruction store size dynamically
On Tue, Nov 29, 2011 at 10:40:46AM -0800, Eric Anholt wrote: On Tue, 29 Nov 2011 16:08:39 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: Increase the brw eu instruction store size dynamically instead of just allocating it statically with a constant limit. This would fix something that 'GL_MAX_PROGRAM_INSTRUCTIONS_ARB was 16384 while the driver would limit it to 1'. I was going to caution against the other assumptions on insn pointers staying valid in pre-gen6 code in case someone reduced the initial store size at some point, and you'd already reduced the initial store size :) So, in brw_clip_emit.c I presume you were talking about brw_vec4_emit.cpp and brw_sf_emit.c there are a few cases (particularly loops) where the brw_instruction * is taken from one instruction emit and used later. I'd be comfortable with this patch if those were converted to using an instruction index as well. Right, I missed those. Will fix it. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: move ElementArrayBufferObj to gl_array_object
On Sat, Nov 26, 2011 at 09:01:51AM -0700, Brian Paul wrote: On 11/23/2011 06:15 PM, Yuanhan Liu wrote: On Wed, Nov 23, 2011 at 08:25:59AM -0700, Brian Paul wrote: On 11/23/2011 02:26 AM, Yuanhan Liu wrote: According opengl spec 4.2.pdf table 6.12 (Vertex Array Object State) at page 515, the element buffer object is listed in vertex array object. So, move the ElementArrayBufferObj inside gl_array_object to make element buffer object per-vao. This would fix most of(3 left) intel oglc vao test fail Signed-off-by: Yuanhan Liuyuanhan@linux.intel.com --- src/mesa/main/api_arrayelt.c |2 +- src/mesa/main/api_validate.c | 14 ++-- src/mesa/main/arrayobj.c |4 +++ src/mesa/main/attrib.c|7 ++--- src/mesa/main/bufferobj.c |9 ++- src/mesa/main/context.c |1 - src/mesa/main/get.c |2 +- src/mesa/main/mtypes.h|3 +- src/mesa/vbo/vbo_exec_array.c | 42 src/mesa/vbo/vbo_save_api.c |4 +- 10 files changed, 44 insertions(+), 44 deletions(-) I presume you've done a piglit run to check for regressions. Nope. But I did test this patch with all intel oglc testcases, and with no regression. Is this OK? You should probably find and run the piglit tests that use GL_ELEMENT_ARRAY_BUFFER, at least. Done and found no piglit regression. Can I push it? If no objection, I will push it tomorrow. Thanks, Yuanhan Liu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Piglit] [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao
On Thu, Nov 24, 2011 at 11:25:23AM -0800, Eric Anholt wrote: On Wed, 23 Nov 2011 12:24:37 -0700, Brian Paul bri...@vmware.com wrote: On 11/23/2011 12:12 PM, Eric Anholt wrote: On Wed, 23 Nov 2011 17:34:30 +0800, Yuanhan Liuyuanhan@linux.intel.com wrote: From 9a1da8748f0faa23f34398213ff7ee45fda6bf36 Mon Sep 17 00:00:00 2001 From: Yuanhan Liuyuanhan@linux.intel.com Date: Wed, 23 Nov 2011 17:37:33 +0800 Subject: [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao According opengl spec 4.2.pdf table 6.12 (Vertex Array Object State) at page 515: the element buffer object is listed in vertex array object. Add a testcase to test that. v2: fix n careless 'always-return-PIGLIT_PASS' fault. Signed-off-by: Yuanhan Liuyuanhan@linux.intel.com diff --git a/tests/general/vao-element-array-buffer.c b/tests/general/vao-element-array-buffer.c new file mode 100644 index 000..8803bff --- /dev/null +++ b/tests/general/vao-element-array-buffer.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Yuanhan Liuyuanhan@linux.intel.com + */ Generally, the style I advocate is to not include the Authors line in copyright messages. git records who the author was already, and will provide a more accurate view of who wrote the current code if someone wants to know some time down the line. I don't know how many times, 5 years later, I've had emails from someone asking about some code I'd written that just had my name in the header and nothing else really of mine, from back when we were including Authors lines in the CVS days. +glutSwapBuffers(); If you switched this to piglit_present_results, this test could be an add_concurrent_test() instead. Under what circumstances can't glutSwapBuffers() be replaced by piglit_present_results()? I'm guessing many/most of the tests that use the former could be changed to use the later. That might be a good project for someone who wants to contribute. I guess everything using the framework could be trivially converted to it. Doesn't mean all of those are ready to be made concurrent (things explicitly using the window system framebuffer would still need work), but it might save a measurable amount of time to skip that many swaps. Got it. Thanks. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: move ElementArrayBufferObj to gl_array_object
According opengl spec 4.2.pdf table 6.12 (Vertex Array Object State) at page 515, the element buffer object is listed in vertex array object. So, move the ElementArrayBufferObj inside gl_array_object to make element buffer object per-vao. This would fix most of(3 left) intel oglc vao test fail Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/main/api_arrayelt.c |2 +- src/mesa/main/api_validate.c | 14 ++-- src/mesa/main/arrayobj.c |4 +++ src/mesa/main/attrib.c|7 ++--- src/mesa/main/bufferobj.c |9 ++- src/mesa/main/context.c |1 - src/mesa/main/get.c |2 +- src/mesa/main/mtypes.h|3 +- src/mesa/vbo/vbo_exec_array.c | 42 src/mesa/vbo/vbo_save_api.c |4 +- 10 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c index b93a057..4d9ff43 100644 --- a/src/mesa/main/api_arrayelt.c +++ b/src/mesa/main/api_arrayelt.c @@ -1580,7 +1580,7 @@ static void _ae_update_state( struct gl_context *ctx ) aa++; } - check_vbo(actx, ctx-Array.ElementArrayBufferObj); + check_vbo(actx, arrayObj-ElementArrayBufferObj); ASSERT(at - actx-attribs = VERT_ATTRIB_MAX); ASSERT(aa - actx-arrays 32); diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 1fcf5cd..4c7baca 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -182,7 +182,7 @@ check_index_bounds(struct gl_context *ctx, GLsizei count, GLenum type, memset(ib, 0, sizeof(ib)); ib.type = type; ib.ptr = indices; - ib.obj = ctx-Array.ElementArrayBufferObj; + ib.obj = ctx-Array.ArrayObj-ElementArrayBufferObj; vbo_get_minmax_index(ctx, prim, ib, min, max); @@ -254,10 +254,10 @@ _mesa_validate_DrawElements(struct gl_context *ctx, return GL_FALSE; /* Vertex buffer object tests */ - if (_mesa_is_bufferobj(ctx-Array.ElementArrayBufferObj)) { + if (_mesa_is_bufferobj(ctx-Array.ArrayObj-ElementArrayBufferObj)) { /* use indices in the buffer object */ /* make sure count doesn't go outside buffer bounds */ - if (index_bytes(type, count) ctx-Array.ElementArrayBufferObj-Size) { + if (index_bytes(type, count) ctx-Array.ArrayObj-ElementArrayBufferObj-Size) { _mesa_warning(ctx, glDrawElements index out of buffer bounds); return GL_FALSE; } @@ -315,10 +315,10 @@ _mesa_validate_DrawRangeElements(struct gl_context *ctx, GLenum mode, return GL_FALSE; /* Vertex buffer object tests */ - if (_mesa_is_bufferobj(ctx-Array.ElementArrayBufferObj)) { + if (_mesa_is_bufferobj(ctx-Array.ArrayObj-ElementArrayBufferObj)) { /* use indices in the buffer object */ /* make sure count doesn't go outside buffer bounds */ - if (index_bytes(type, count) ctx-Array.ElementArrayBufferObj-Size) { + if (index_bytes(type, count) ctx-Array.ArrayObj-ElementArrayBufferObj-Size) { _mesa_warning(ctx, glDrawRangeElements index out of buffer bounds); return GL_FALSE; } @@ -454,10 +454,10 @@ _mesa_validate_DrawElementsInstanced(struct gl_context *ctx, return GL_FALSE; /* Vertex buffer object tests */ - if (_mesa_is_bufferobj(ctx-Array.ElementArrayBufferObj)) { + if (_mesa_is_bufferobj(ctx-Array.ArrayObj-ElementArrayBufferObj)) { /* use indices in the buffer object */ /* make sure count doesn't go outside buffer bounds */ - if (index_bytes(type, count) ctx-Array.ElementArrayBufferObj-Size) { + if (index_bytes(type, count) ctx-Array.ArrayObj-ElementArrayBufferObj-Size) { _mesa_warning(ctx, glDrawElementsInstanced index out of buffer bounds); return GL_FALSE; diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 1283940..a0c9b11 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -133,6 +133,7 @@ _mesa_delete_array_object( struct gl_context *ctx, struct gl_array_object *obj ) { (void) ctx; unbind_array_object_vbos(ctx, obj); + _mesa_reference_buffer_object(ctx, obj-ElementArrayBufferObj, NULL); _glthread_DESTROY_MUTEX(obj-Mutex); free(obj); } @@ -252,6 +253,9 @@ _mesa_initialize_array_object( struct gl_context *ctx, #if FEATURE_point_size_array init_array(ctx, obj-PointSize, 1, GL_FLOAT); #endif + + _mesa_reference_buffer_object(ctx, obj-ElementArrayBufferObj, + ctx-Shared-NullBufferObj); } diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index f368eec..30297de 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -1385,8 +1385,8 @@ save_array_attrib(struct gl_context *ctx, /* Just reference them here */ _mesa_reference_buffer_object(ctx, dest-ArrayBufferObj, src-ArrayBufferObj); - _mesa_reference_buffer_object(ctx
[Mesa-dev] [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao
According opengl spec 4.2.pdf table 6.12 (Vertex Array Object State) at page 515: the element buffer object is listed in vertex array object. Add a testcase to test that. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- tests/all.tests |1 + tests/general/CMakeLists.gl.txt |1 + tests/general/vao-element-array-buffer.c | 94 ++ 3 files changed, 96 insertions(+), 0 deletions(-) create mode 100644 tests/general/vao-element-array-buffer.c diff --git a/tests/all.tests b/tests/all.tests index 851db11..ad68d71 100644 --- a/tests/all.tests +++ b/tests/all.tests @@ -305,6 +305,7 @@ add_plain_test(general, 'two-sided-lighting-separate-specular') add_plain_test(general, 'user-clip') add_plain_test(general, 'vao-01') add_plain_test(general, 'vao-02') +add_plain_test(general, 'vao-element-array-buffer') add_plain_test(general, 'varray-disabled') add_plain_test(general, 'vbo-bufferdata') add_plain_test(general, 'vbo-map-remap') diff --git a/tests/general/CMakeLists.gl.txt b/tests/general/CMakeLists.gl.txt index 58cbaa1..185f59d 100644 --- a/tests/general/CMakeLists.gl.txt +++ b/tests/general/CMakeLists.gl.txt @@ -111,6 +111,7 @@ add_executable (user-clip user-clip.c) add_executable (varray-disabled varray-disabled.c) add_executable (vao-01 vao-01.c) add_executable (vao-02 vao-02.c) +add_executable (vao-element-array-buffer vao-element-array-buffer.c) add_executable (vbo-map-remap vbo-map-remap.c) add_executable (vbo-bufferdata vbo-bufferdata.c) add_executable (vbo-subdata-zero vbo-subdata-zero.c) diff --git a/tests/general/vao-element-array-buffer.c b/tests/general/vao-element-array-buffer.c new file mode 100644 index 000..32600b8 --- /dev/null +++ b/tests/general/vao-element-array-buffer.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Yuanhan Liu yuanhan@linux.intel.com + */ + +/** + * @file vao-element-buffer.c + * + * A simple test case to test that GL_ELEMENT_ARRAY_BUFFER is part of vao + * + */ + +#include piglit-util.h + +int piglit_width = 100; +int piglit_height = 100; +int piglit_window_mode = GLUT_RGB | GLUT_DOUBLE; + +static GLuint vao; + +enum piglit_result +piglit_display(void) +{ + GLboolean pass = GL_TRUE; + GLfloat expected[4] = {1, 0, 0, 1}; + + glClear(GL_COLOR_BUFFER_BIT); + + glBindVertexArray(vao); + + glColor3f(1, 0, 0); + glDrawElements(GL_QUADS, 4, GL_UNSIGNED_BYTE, NULL); + pass = piglit_probe_rect_rgba(0, 0, piglit_width, piglit_height, expected); + + glutSwapBuffers(); + + return PIGLIT_PASS; +} + +void +piglit_init(int argc, char **argv) +{ + GLuint vbo; + GLuint element; + GLfloat vertics[] = { + -1, -1, 0, +1, -1, 0, +1, 1, 0, + -1, 1, 0, + }; + GLubyte indics[] = {0, 1, 2, 3}; + + piglit_require_extension(GL_ARB_vertex_array_object); + + glClearColor(0, 0, 0, 1); + + glGenBuffers(1, vbo); + glGenBuffers(1, element); + + glGenVertexArrays(1, vao); + glBindVertexArray(vao); + + glBindBuffer(GL_ARRAY_BUFFER, vbo); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertics), vertics, GL_STATIC_DRAW); + glVertexPointer(3, GL_FLOAT, 0, NULL); + glEnableClientState(GL_VERTEX_ARRAY); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, element); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indics), indics, GL_STATIC_DRAW); + + glBindVertexArray(0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +} -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao
On Wed, Nov 23, 2011 at 05:27:32PM +0800, Yuanhan Liu wrote: According opengl spec 4.2.pdf table 6.12 (Vertex Array Object State) at page 515: the element buffer object is listed in vertex array object. Add a testcase to test that. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- tests/all.tests |1 + tests/general/CMakeLists.gl.txt |1 + tests/general/vao-element-array-buffer.c | 94 ++ 3 files changed, 96 insertions(+), 0 deletions(-) create mode 100644 tests/general/vao-element-array-buffer.c diff --git a/tests/all.tests b/tests/all.tests index 851db11..ad68d71 100644 --- a/tests/all.tests +++ b/tests/all.tests @@ -305,6 +305,7 @@ add_plain_test(general, 'two-sided-lighting-separate-specular') add_plain_test(general, 'user-clip') add_plain_test(general, 'vao-01') add_plain_test(general, 'vao-02') +add_plain_test(general, 'vao-element-array-buffer') add_plain_test(general, 'varray-disabled') add_plain_test(general, 'vbo-bufferdata') add_plain_test(general, 'vbo-map-remap') diff --git a/tests/general/CMakeLists.gl.txt b/tests/general/CMakeLists.gl.txt index 58cbaa1..185f59d 100644 --- a/tests/general/CMakeLists.gl.txt +++ b/tests/general/CMakeLists.gl.txt @@ -111,6 +111,7 @@ add_executable (user-clip user-clip.c) add_executable (varray-disabled varray-disabled.c) add_executable (vao-01 vao-01.c) add_executable (vao-02 vao-02.c) +add_executable (vao-element-array-buffer vao-element-array-buffer.c) add_executable (vbo-map-remap vbo-map-remap.c) add_executable (vbo-bufferdata vbo-bufferdata.c) add_executable (vbo-subdata-zero vbo-subdata-zero.c) diff --git a/tests/general/vao-element-array-buffer.c b/tests/general/vao-element-array-buffer.c new file mode 100644 index 000..32600b8 --- /dev/null +++ b/tests/general/vao-element-array-buffer.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Yuanhan Liu yuanhan@linux.intel.com + */ + +/** + * @file vao-element-buffer.c + * + * A simple test case to test that GL_ELEMENT_ARRAY_BUFFER is part of vao + * + */ + +#include piglit-util.h + +int piglit_width = 100; +int piglit_height = 100; +int piglit_window_mode = GLUT_RGB | GLUT_DOUBLE; + +static GLuint vao; + +enum piglit_result +piglit_display(void) +{ + GLboolean pass = GL_TRUE; + GLfloat expected[4] = {1, 0, 0, 1}; + + glClear(GL_COLOR_BUFFER_BIT); + + glBindVertexArray(vao); + + glColor3f(1, 0, 0); + glDrawElements(GL_QUADS, 4, GL_UNSIGNED_BYTE, NULL); + pass = piglit_probe_rect_rgba(0, 0, piglit_width, piglit_height, expected); + + glutSwapBuffers(); + + return PIGLIT_PASS; Oops, my fault, here is the new one: -- From 9a1da8748f0faa23f34398213ff7ee45fda6bf36 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Wed, 23 Nov 2011 17:37:33 +0800 Subject: [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao According opengl spec 4.2.pdf table 6.12 (Vertex Array Object State) at page 515: the element buffer object is listed in vertex array object. Add a testcase to test that. v2: fix n careless 'always-return-PIGLIT_PASS' fault. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- tests/all.tests |1 + tests/general/CMakeLists.gl.txt |1 + tests/general/vao-element-array-buffer.c | 94 ++ 3 files changed, 96 insertions(+), 0 deletions(-) create mode 100644 tests/general/vao-element-array-buffer.c diff --git a/tests/all.tests b/tests/all.tests index 851db11..ad68d71 100644 --- a/tests/all.tests +++ b/tests/all.tests @@ -305,6 +305,7 @@ add_plain_test(general, 'two-sided-lighting-separate
Re: [Mesa-dev] [Piglit] [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao
On Wed, Nov 23, 2011 at 11:12:19AM -0800, Eric Anholt wrote: On Wed, 23 Nov 2011 17:34:30 +0800, Yuanhan Liu yuanhan@linux.intel.com wrote: From 9a1da8748f0faa23f34398213ff7ee45fda6bf36 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Wed, 23 Nov 2011 17:37:33 +0800 Subject: [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao According opengl spec 4.2.pdf table 6.12 (Vertex Array Object State) at page 515: the element buffer object is listed in vertex array object. Add a testcase to test that. v2: fix n careless 'always-return-PIGLIT_PASS' fault. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com diff --git a/tests/general/vao-element-array-buffer.c b/tests/general/vao-element-array-buffer.c new file mode 100644 index 000..8803bff --- /dev/null +++ b/tests/general/vao-element-array-buffer.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Yuanhan Liu yuanhan@linux.intel.com + */ Generally, the style I advocate is to not include the Authors line in copyright messages. It's OK to me. git records who the author was already, and will provide a more accurate view of who wrote the current code if someone wants to know some time down the line. Agreed. I don't know how many times, 5 years later, I've had emails from someone asking about some code I'd written that just had my name in the header and nothing else really of mine, from back when we were including Authors lines in the CVS days. Aha, interesting. + glutSwapBuffers(); If you switched this to piglit_present_results, this test could be an add_concurrent_test() instead. + GLfloat vertics[] = { + -1, -1, 0, +1, -1, 0, +1, 1, 0, + -1, 1, 0, + }; + GLubyte indics[] = {0, 1, 2, 3}; vertices and indices Oops, sorry for the typos. Will fix it in the next patch. Thanks, Yuanhan Liu Other than these silly nitpicks, Reviewed-by: Eric Anholt e...@anholt.net Thanks! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Piglit] [PATCH] Add a simple testcase to test that GL_ELEMENT_ARRAY_BUFFER is per vao
On Wed, Nov 23, 2011 at 08:33:00AM -0700, Brian Paul wrote: On 11/23/2011 02:34 AM, Yuanhan Liu wrote: +GLuint element; +GLfloat vertics[] = { minor nit: s/vertics/vertices/ +-1, -1, 0, + 1, -1, 0, + 1, 1, 0, +-1, 1, 0, +}; +GLubyte indics[] = {0, 1, 2, 3}; and s/indics/indices/ Sorry for the typos, will fix it in the next patch. Thanks, Yuanhan Liu + +piglit_require_extension(GL_ARB_vertex_array_object); + +glClearColor(0, 0, 0, 1); + +glGenBuffers(1,vbo); +glGenBuffers(1,element); + +glGenVertexArrays(1,vao); +glBindVertexArray(vao); + +glBindBuffer(GL_ARRAY_BUFFER, vbo); +glBufferData(GL_ARRAY_BUFFER, sizeof(vertics), vertics, GL_STATIC_DRAW); +glVertexPointer(3, GL_FLOAT, 0, NULL); +glEnableClientState(GL_VERTEX_ARRAY); + +glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, element); +glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indics), indics, GL_STATIC_DRAW); + +glBindVertexArray(0); +glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +} Looks good otherwise. Reviewed-by: Brian Paul bri...@vmware.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: attach ElementArrayBufferObj to vertex array object
This would make ElementArrayBufferObj data per vertex array object. I did't find something to support this at the spec page[0]. But I did find something that somehow support this at opengl wiki page[1]. NOTE: with my test, both NV and ATI's closed source opengl implementation would attach the Elelemnt array buffer to vao. This would fix most of(3 left) intel oglc vao test fail [0]: http://www.opengl.org/registry/specs/ARB/vertex_array_object.txt [1]: http://www.opengl.org/wiki/Vertex_Array_Object Cc: i...@freedesktop.org Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- src/mesa/main/arrayobj.c |2 ++ src/mesa/main/bufferobj.c |4 src/mesa/main/mtypes.h|2 ++ 3 files changed, 8 insertions(+), 0 deletions(-) diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 1283940..765a7d5 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -382,6 +382,8 @@ bind_vertex_array(struct gl_context *ctx, GLuint id, GLboolean genRequired) ctx-NewState |= _NEW_ARRAY; ctx-Array.NewState |= _NEW_ARRAY_ALL; _mesa_reference_array_object(ctx, ctx-Array.ArrayObj, newObj); + if (newObj-ElementArrayBufferObj) + _mesa_reference_buffer_object(ctx, ctx-Array.ElementArrayBufferObj, newObj-ElementArrayBufferObj); /* Pass BindVertexArray call to device driver */ if (ctx-Driver.BindArrayObject newObj) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 4c77397..d96dc5f 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -603,6 +603,10 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer) /* bind new buffer */ _mesa_reference_buffer_object(ctx, bindTarget, newBufObj); + if (target == GL_ELEMENT_ARRAY_BUFFER) { + _mesa_reference_buffer_object(ctx, ctx-Array.ArrayObj-ElementArrayBufferObj, newBufObj); + } + /* Pass BindBuffer call to device driver */ if (ctx-Driver.BindBuffer) ctx-Driver.BindBuffer( ctx, target, newBufObj ); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index b3427da..5cd1084 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1620,6 +1620,8 @@ struct gl_array_object * we can determine the max legal (in bounds) glDrawElements array index. */ GLuint _MaxElement; + + struct gl_buffer_object *ElementArrayBufferObj; }; -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] Add a draw-pixel-with-texture testcase
Add a draw-pixel-with-texture testcase to check if texture sampling is happened while drawing pixels by glDrawPixels. v2: use piglit_probe_rect_rgba instead of just sampling a set of pixels(comments from Eric) Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- tests/all.tests |1 + tests/general/CMakeLists.gl.txt |1 + tests/general/draw-pixel-with-texture.c | 77 +++ 3 files changed, 79 insertions(+), 0 deletions(-) create mode 100644 tests/general/draw-pixel-with-texture.c diff --git a/tests/all.tests b/tests/all.tests index 48ce2cb..851db11 100644 --- a/tests/all.tests +++ b/tests/all.tests @@ -242,6 +242,7 @@ general['draw-elements-user'] = PlainExecTest(['draw-elements', '-auto', 'user'] add_plain_test(general, 'draw-elements-vs-inputs') add_plain_test(general, 'draw-instanced') add_plain_test(general, 'draw-instanced-divisor') +add_plain_test(general, 'draw-pixel-with-texture') add_plain_test(general, 'draw-vertices') general['draw-vertices-user'] = PlainExecTest(['draw-vertices', '-auto', 'user']) add_plain_test(general, 'draw-vertices-half-float') diff --git a/tests/general/CMakeLists.gl.txt b/tests/general/CMakeLists.gl.txt index 2cfc7be..58cbaa1 100644 --- a/tests/general/CMakeLists.gl.txt +++ b/tests/general/CMakeLists.gl.txt @@ -42,6 +42,7 @@ ENDIF (UNIX) add_executable (draw-elements-vs-inputs draw-elements-vs-inputs.c) add_executable (draw-instanced draw-instanced.c) add_executable (draw-instanced-divisor draw-instanced-divisor.c) +add_executable (draw-pixel-with-texture draw-pixel-with-texture.c) add_executable (draw-sync draw-sync.c) add_executable (draw-vertices draw-vertices.c) add_executable (draw-vertices-half-float draw-vertices-half-float.c) diff --git a/tests/general/draw-pixel-with-texture.c b/tests/general/draw-pixel-with-texture.c new file mode 100644 index 000..73e04c5 --- /dev/null +++ b/tests/general/draw-pixel-with-texture.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Yuanhan Liu yuanhan@linux.intel.com + */ + +#include piglit-util.h + +int piglit_width = 100, piglit_height = 100; +int piglit_window_mode = GLUT_RGB | GLUT_DOUBLE; + +#define SCREEN_SIZE_IN_PIXELS (piglit_width * piglit_height * 4) + + +enum piglit_result +piglit_display(void) +{ + GLboolean pass = GL_TRUE; + GLfloat tex_data[2 * 2 * 4] = { + 1, 0, 0, 1, 1, 0, 0, 1, + 1, 0, 0, 1, 1, 0, 0, 1, + }; + GLfloat pixels[SCREEN_SIZE_IN_PIXELS]; + GLfloat expected[4] = {0.2, 0, 0, 1}; + int i; + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_FLOAT, tex_data); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); + + glTexCoord2f(0.5, 0.5); + glEnable(GL_TEXTURE_2D); + + for (i = 0; i SCREEN_SIZE_IN_PIXELS; i += 4) { + pixels[i + 0] = 0.2; + pixels[i + 1] = 1; + pixels[i + 2] = 0; + pixels[i + 3] = 1; + } + + glClear(GL_COLOR_BUFFER_BIT); + glDrawPixels(piglit_width, piglit_height, GL_RGBA, GL_FLOAT, pixels); + + pass = piglit_probe_rect_rgba(0, 0, piglit_width, piglit_height, expected); + + glutSwapBuffers(); + + return pass ? PIGLIT_PASS : PIGLIT_FAIL; +} + + +void +piglit_init(int argc, char **argv) +{ + glClearColor(0.0, 0.0, 0.0, 1.0); +} -- 1.7.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/2] Patches to try to fix draw-pixel-with-textures in swrast
The two patches tries to fix an issue that happened while calling glDrawPixels with texture enabled. Here I attached a piglit testcase for this issue. Yuanhan Liu (2): swrast: simplify the prototype of function texture_combine swrast: fix unmatched span-array-ChanType src/mesa/swrast/s_texcombine.c | 24 ++-- 1 files changed, 14 insertions(+), 10 deletions(-) -- 1.7.4.4 Here is the piglit testcase: -- From c199828cddae5bd0f8e96d586b91be6ad423dbce Mon Sep 17 00:00:00 2001 From: Yuanhan Liu yuanhan@linux.intel.com Date: Fri, 18 Nov 2011 15:37:33 +0800 Subject: [PATCH] Add a draw-pixel-with-texture testcase Add a draw-pixel-with-texture testcase to check if sampling is happened while drawing pixels by glDrawPixels. Signed-off-by: Yuanhan Liu yuanhan@linux.intel.com --- tests/all.tests |1 + tests/general/CMakeLists.gl.txt |1 + tests/general/draw-pixel-with-texture.c | 80 +++ 3 files changed, 82 insertions(+), 0 deletions(-) create mode 100644 tests/general/draw-pixel-with-texture.c diff --git a/tests/all.tests b/tests/all.tests index 48ce2cb..851db11 100644 --- a/tests/all.tests +++ b/tests/all.tests @@ -242,6 +242,7 @@ general['draw-elements-user'] = PlainExecTest(['draw-elements', '-auto', 'user'] add_plain_test(general, 'draw-elements-vs-inputs') add_plain_test(general, 'draw-instanced') add_plain_test(general, 'draw-instanced-divisor') +add_plain_test(general, 'draw-pixel-with-texture') add_plain_test(general, 'draw-vertices') general['draw-vertices-user'] = PlainExecTest(['draw-vertices', '-auto', 'user']) add_plain_test(general, 'draw-vertices-half-float') diff --git a/tests/general/CMakeLists.gl.txt b/tests/general/CMakeLists.gl.txt index 2cfc7be..58cbaa1 100644 --- a/tests/general/CMakeLists.gl.txt +++ b/tests/general/CMakeLists.gl.txt @@ -42,6 +42,7 @@ ENDIF (UNIX) add_executable (draw-elements-vs-inputs draw-elements-vs-inputs.c) add_executable (draw-instanced draw-instanced.c) add_executable (draw-instanced-divisor draw-instanced-divisor.c) +add_executable (draw-pixel-with-texture draw-pixel-with-texture.c) add_executable (draw-sync draw-sync.c) add_executable (draw-vertices draw-vertices.c) add_executable (draw-vertices-half-float draw-vertices-half-float.c) diff --git a/tests/general/draw-pixel-with-texture.c b/tests/general/draw-pixel-with-texture.c new file mode 100644 index 000..c39d35e --- /dev/null +++ b/tests/general/draw-pixel-with-texture.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Yuanhan Liu yuanhan@linux.intel.com + */ + +#include piglit-util.h + +int piglit_width = 100, piglit_height = 100; +int piglit_window_mode = GLUT_RGB | GLUT_DOUBLE; + + +enum piglit_result +piglit_display(void) +{ + GLboolean pass = GL_TRUE; + GLfloat tex_data[2 * 2 * 4] = { + 1, 0, 0, 1, 1, 0, 0, 1, + 1, 0, 0, 1, 1, 0, 0, 1, + }; + GLfloat pixels[20 * 20 * 4]; + GLfloat expected[4] = {0.2, 0, 0, 1}; + int i; + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_FLOAT, tex_data); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); + + glTexCoord2f(0.5, 0.5); + glEnable(GL_TEXTURE_2D); + + for (i = 0; i 20 * 20 * 4; i += 4) { + pixels[i + 0] = 0.2; + pixels[i + 1] = 1; + pixels[i + 2] = 0; + pixels[i + 3] = 1; + } + + glClear(GL_COLOR_BUFFER_BIT); + + glDrawPixels(20, 20, GL_RGBA, GL_FLOAT, pixels); + + /* Here just sample a small set of pixels */ + pass