On Friday, January 23, 2009 4:25 pm Eric Anholt wrote: > On Fri, 2009-01-23 at 14:12 -0800, Jesse Barnes wrote: > > Adds code to set up fence registers at execbuf time on pre-965 chips > > as > > necessary. Also fixes up a few bugs in the pre-965 tile register > > support > > (get_order != ffs). The number of fences available to the kernel > > defaults > > to the hw limit minus 3 (for legacy X front/back/depth), but a new > > parameter > > allows userspace to override that as needed. > > > > We should probably be smarter here, and make sure the all the buffers > > are > > covered by fences before hanging the chip, and there may be other > > steps > > required in the fence eviction code (iirc Eric mentioned that some > > additional > > flushing may be required). > > Looks pretty good, and thanks for the flushing reminder. Basically we > need to flush the gpu write domain before waiting for rendering (or > we'll bug!), and we need to clear the read domain flags (or the next > person to come along after setting up their new fence register may get > stale speculated garbage from reads that occurred while the fence reg > wasn't in place). > > > -- > > Jesse Barnes, Intel Open Source Technology Center > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c > > b/drivers/gpu/drm/i915/i915_gem.c > > index 96316fd..41749cd 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -1445,21 +1445,27 @@ static void i915_write_fence_reg(struct > > drm_i915_fence_reg *reg) drm_i915_private_t *dev_priv = dev->dev_private; > > struct drm_i915_gem_object *obj_priv = obj->driver_private; > > int regnum = obj_priv->fence_reg; > > + int tile_width = 512; > > uint32_t val; > > uint32_t pitch_val; > > > > if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || > > (obj_priv->gtt_offset & (obj->size - 1))) { > > - WARN(1, "%s: object not 1M or size aligned\n", __func__); > > + WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n", > > + __func__, obj_priv->gtt_offset, obj->size); > > return; > > } > > > > if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) || > > IS_I945GM(dev) || > > IS_G33(dev))) > > - pitch_val = (obj_priv->stride / 128) - 1; > > - else > > - pitch_val = (obj_priv->stride / 512) - 1; > > + tile_width = 128; > > + > > + pitch_val = obj_priv->stride / tile_width; > > + WARN(pitch_val & (pitch_val - 1), > > + "pitch value not a power of two tile widths\n"); > > SET_TILING ioctl should just reject the tiling request if a bad stride > or a bad tiling type for a given chipset was passed in, so we don't have > to validate and WARN here. > > Hmm, actually all the fence management code probably ought to live in > i915_gem_tiling.c. Something for the next kernel, I guess.
Ok, here's what I've got based on the comments above. It looked like there was already to do what we wanted in the out of fence regs path, so I used it (i915_gem_object_set_to_gtt_domain seems to do what we want). And I moved the tile stride checks to set_tiling time and tried to make things clear there. -- Jesse Barnes, Intel Open Source Technology Center Adds code to set up fence registers at execbuf time on pre-965 chips as necessary. Also fixes up a few bugs in the pre-965 tile register support (get_order != ffs). The number of fences available to the kernel defaults to the hw limit minus 3 (for legacy X front/back/depth), but a new parameter allows userspace to override that as needed. Signed-off-by: Jesse Barnes <jbar...@virtuousgeek.org> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index e219ef7..f4c3f65 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -731,6 +731,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_GEM: value = dev_priv->has_gem; break; + case I915_PARAM_NUM_FENCES_AVAIL: + value = dev_priv->num_fence_regs - dev_priv->fence_reg_start; + break; default: DRM_ERROR("Unknown parameter %d\n", param->param); return -EINVAL; @@ -764,6 +767,13 @@ static int i915_setparam(struct drm_device *dev, void *data, case I915_SETPARAM_ALLOW_BATCHBUFFER: dev_priv->allow_batchbuffer = param->value; break; + case I915_SETPARAM_NUM_USED_FENCES: + if (param->value > dev_priv->num_fence_regs || + param->value < 0) + return -EINVAL; + /* Userspace can use first N regs */ + dev_priv->fence_reg_start = param->value; + break; default: DRM_ERROR("unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 899c92f..814f38f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj); static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); -static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj); +static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); static int i915_gem_evict_something(struct drm_device *dev); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, @@ -567,6 +567,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) pgoff_t page_offset; unsigned long pfn; int ret = 0; + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); /* We don't use vmf->pgoff since that has the fake offset */ page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> @@ -586,7 +587,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Need a new fence register? */ if (obj_priv->fence_reg == I915_FENCE_REG_NONE && obj_priv->tiling_mode != I915_TILING_NONE) - i915_gem_object_get_fence_reg(obj); + i915_gem_object_get_fence_reg(obj, write); pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + page_offset; @@ -1445,21 +1446,25 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; int regnum = obj_priv->fence_reg; + int tile_width = 512; uint32_t val; uint32_t pitch_val; if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || (obj_priv->gtt_offset & (obj->size - 1))) { - WARN(1, "%s: object not 1M or size aligned\n", __func__); + WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n", + __func__, obj_priv->gtt_offset, obj->size); return; } if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))) - pitch_val = (obj_priv->stride / 128) - 1; - else - pitch_val = (obj_priv->stride / 512) - 1; + tile_width = 128; + + /* Note: pitch better be a power of two tile widths */ + pitch_val = obj_priv->stride / tile_width; + pitch_val = ffs(pitch_val) - 1; val = obj_priv->gtt_offset; if (obj_priv->tiling_mode == I915_TILING_Y) @@ -1483,7 +1488,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || (obj_priv->gtt_offset & (obj->size - 1))) { - WARN(1, "%s: object not 1M or size aligned\n", __func__); + WARN(1, "%s: object 0x%08x not 1M or size aligned\n", + __func__, obj_priv->gtt_offset); return; } @@ -1503,6 +1509,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) /** * i915_gem_object_get_fence_reg - set up a fence reg for an object * @obj: object to map through a fence reg + * @write: object is about to be written * * When mapping objects through the GTT, userspace wants to be able to write * to them without having to worry about swizzling if the object is tiled. @@ -1514,7 +1521,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) * and tiling format. */ static void -i915_gem_object_get_fence_reg(struct drm_gem_object *obj) +i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write) { struct drm_device *dev = obj->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1527,12 +1534,18 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) WARN(1, "allocating a fence for non-tiled object?\n"); break; case I915_TILING_X: - WARN(obj_priv->stride & (512 - 1), - "object is X tiled but has non-512B pitch\n"); + if (!obj_priv->stride) + return; + WARN((obj_priv->stride & (512 - 1)), + "object 0x%08x is X tiled but has non-512B pitch\n", + obj_priv->gtt_offset); break; case I915_TILING_Y: - WARN(obj_priv->stride & (128 - 1), - "object is Y tiled but has non-128B pitch\n"); + if (!obj_priv->stride) + return; + WARN((obj_priv->stride & (128 - 1)), + "object 0x%08x is Y tiled but has non-128B pitch\n", + obj_priv->gtt_offset); break; } @@ -1563,9 +1576,9 @@ try_again: * objects to finish before trying again. */ if (i == dev_priv->num_fence_regs) { - ret = i915_gem_object_wait_rendering(reg->obj); + ret = i915_gem_object_set_to_gtt_domain(reg->obj, write); if (ret) { - WARN(ret, "wait_rendering failed: %d\n", ret); + WARN(ret, "switch to GTT domain failed: %d\n", ret); return; } goto try_again; @@ -1631,7 +1644,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) if (dev_priv->mm.suspended) return -EBUSY; if (alignment == 0) - alignment = PAGE_SIZE; + alignment = i915_gem_get_gtt_alignment(obj); if (alignment & (PAGE_SIZE - 1)) { DRM_ERROR("Invalid object alignment requested %u\n", alignment); return -EINVAL; @@ -2652,6 +2665,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) DRM_ERROR("Failure to bind: %d", ret); return ret; } + /* + * Pre-965 chips need a fence register set up in order to + * properly handle tiled surfaces. + */ + if (!IS_I965G(dev) && + obj_priv->fence_reg == I915_FENCE_REG_NONE && + obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_get_fence_reg(obj, true); } obj_priv->pin_count++; @@ -3291,7 +3312,7 @@ i915_gem_load(struct drm_device *dev) /* Old X drivers will take 0-2 for front, back, depth buffers */ dev_priv->fence_reg_start = 3; - if (IS_I965G(dev)) + if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) dev_priv->num_fence_regs = 16; else dev_priv->num_fence_regs = 8; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 241f39b..6847518 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -173,6 +173,36 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) dev_priv->mm.bit_6_swizzle_y = swizzle_y; } +/* Check pitch constriants for all chips & tiling formats */ +static bool +i915_tiling_ok(struct drm_device *dev, int stride, int tiling_mode) +{ + int tile_width = 512; + + /* Linear is always fine */ + if (tiling_mode == I915_TILING_NONE) + return true; + + if (tiling_mode == I915_TILING_Y) + tile_width = 128; + + /* 965+ just needs multiples of tile width */ + if (IS_I965G(dev)) { + if (stride & (tile_width - 1)) + return false; + return true; + } + + /* Pre-965 needs power of two tile widths */ + if (stride < tile_width) + return false; + + if (stride & (stride - 1)) + return false; + + return true; +} + /** * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. @@ -191,6 +221,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, return -EINVAL; obj_priv = obj->driver_private; + if (!i915_tiling_ok(dev, args->stride, args->tiling_mode)) + return -EINVAL; + mutex_lock(&dev->struct_mutex); if (args->tiling_mode == I915_TILING_NONE) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f632b9..7b2f9c3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,12 +186,12 @@ #define FENCE_REG_830_0 0x2000 #define I830_FENCE_START_MASK 0x07f80000 #define I830_FENCE_TILING_Y_SHIFT 12 -#define I830_FENCE_SIZE_BITS(size) ((get_order(size >> 19) - 1) << 8) +#define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8) #define I830_FENCE_PITCH_SHIFT 4 #define I830_FENCE_REG_VALID (1<<0) #define I915_FENCE_START_MASK 0x0ff00000 -#define I915_FENCE_SIZE_BITS(size) ((get_order(size >> 20) - 1) << 8) +#define I915_FENCE_SIZE_BITS(size) ((ffs((size) >> 20) - 1) << 8) #define FENCE_REG_965_0 0x03000 #define I965_FENCE_PITCH_SHIFT 2 diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index b3bcf72..912cd52 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -261,6 +261,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_LAST_DISPATCH 3 #define I915_PARAM_CHIPSET_ID 4 #define I915_PARAM_HAS_GEM 5 +#define I915_PARAM_NUM_FENCES_AVAIL 6 typedef struct drm_i915_getparam { int param; @@ -272,6 +273,7 @@ typedef struct drm_i915_getparam { #define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 #define I915_SETPARAM_ALLOW_BATCHBUFFER 3 +#define I915_SETPARAM_NUM_USED_FENCES 4 typedef struct drm_i915_setparam { int param; ------------------------------------------------------------------------------ This SF.net email is sponsored by: SourcForge Community SourceForge wants to tell your story. http://p.sf.net/sfu/sf-spreadtheword -- _______________________________________________ Dri-devel mailing list Dri-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/dri-devel