The diff below is a fairly large diff that moves the gtt management into the inteldrm driver. While this diff might fix some of the issues people have been reporting with inteldrm, I don't expect it to fix most of those issues. This brings our codebase closer to what Linux has though, and this will enable other changes that will address those issues.
There is one functional change though. This will take over the gtt entries that are mapping "stolen" graphics memory. This is what Linux does as well, so it should be safe. And it will give you a bit more space to map graphics objects into the aperture. If you didn't understand all that, don't worry. Just give this diff a try and test for regressions. And that means I just want to know about things that stopped working with this diff applied. Thanks, Mark Index: agp_i810.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/agp_i810.c,v retrieving revision 1.78 diff -u -p -r1.78 agp_i810.c --- agp_i810.c 6 Jun 2013 16:14:26 -0000 1.78 +++ agp_i810.c 17 Nov 2013 20:42:36 -0000 @@ -570,6 +570,8 @@ agp_i810_attach(struct device *parent, s printf(": no preallocated video memory\n"); #endif + isc->stolen = 0; + /* GATT address is already in there, make sure it's enabled */ gatt->ag_physical = READ4(AGP_I810_PGTBL_CTL) & ~1; break; Index: drm/drmP.h =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/drmP.h,v retrieving revision 1.152 diff -u -p -r1.152 drmP.h --- drm/drmP.h 7 Nov 2013 05:33:05 -0000 1.152 +++ drm/drmP.h 17 Nov 2013 18:28:25 -0000 @@ -71,6 +71,7 @@ #include "drm_linux_list.h" #include "drm.h" +#include "drm_mm.h" #include "drm_atomic.h" #include "agp.h" @@ -778,7 +779,6 @@ struct drm_device { atomic_t obj_count; u_int obj_name; atomic_t obj_memory; - uint32_t gtt_total; SPLAY_HEAD(drm_name_tree, drm_obj) name_tree; struct pool objpl; Index: drm/i915/i915_dma.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_dma.c,v retrieving revision 1.10 diff -u -p -r1.10 i915_dma.c --- drm/i915/i915_dma.c 7 Aug 2013 19:49:05 -0000 1.10 +++ drm/i915/i915_dma.c 17 Nov 2013 18:31:49 -0000 @@ -371,8 +371,6 @@ cleanup_gem_stolen: void i915_driver_lastclose(struct drm_device *dev) { - struct inteldrm_softc *dev_priv = dev->dev_private; - struct vm_page *p; int ret; if (drm_core_check_feature(dev, DRIVER_MODESET)) { @@ -383,21 +381,6 @@ i915_driver_lastclose(struct drm_device ret = i915_gem_idle(dev); if (ret) DRM_ERROR("failed to idle hardware: %d\n", ret); - - if (dev_priv->agpdmat != NULL) { - /* - * make sure we nuke everything, we may have mappings that we've - * unrefed, but uvm has a reference to them for maps. Make sure - * they get unbound and any accesses will segfault. - * XXX only do ones in GEM. - */ - for (p = dev_priv->pgs; p < dev_priv->pgs + - (dev->agp->info.ai_aperture_size / PAGE_SIZE); p++) - pmap_page_protect(p, VM_PROT_NONE); - agp_bus_dma_destroy((struct agp_softc *)dev->agp->agpdev, - dev_priv->agpdmat); - } - dev_priv->agpdmat = NULL; } int Index: drm/i915/i915_drv.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_drv.c,v retrieving revision 1.47 diff -u -p -r1.47 i915_drv.c --- drm/i915/i915_drv.c 17 Nov 2013 18:47:13 -0000 1.47 +++ drm/i915/i915_drv.c 17 Nov 2013 19:08:38 -0000 @@ -1035,6 +1035,7 @@ inteldrm_attach(struct device *parent, s */ if (INTEL_INFO(dev)->gen < 3 || IS_I915G(dev) || IS_I915GM(dev)) ri->ri_flg |= RI_WRONLY; + ri->ri_flg |= RI_WRONLY; inteldrm_stdscreen.capabilities = ri->ri_caps; inteldrm_stdscreen.nrows = ri->ri_rows; Index: drm/i915/i915_drv.h =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_drv.h,v retrieving revision 1.33 diff -u -p -r1.33 i915_drv.h --- drm/i915/i915_drv.h 17 Nov 2013 18:47:13 -0000 1.33 +++ drm/i915/i915_drv.h 17 Nov 2013 19:56:32 -0000 @@ -248,6 +248,8 @@ struct intel_opregion { #define I915_FENCE_REG_NONE -1 #define I915_MAX_NUM_FENCES 16 +/* 16 fences + sign bit for FENCE_REG_NONE */ +#define I915_MAX_NUM_FENCE_BITS 5 struct drm_i915_fence_reg { struct list_head lru_list; @@ -505,7 +507,6 @@ struct intel_l3_parity { struct inteldrm_softc { struct device dev; struct device *drmdev; - bus_dma_tag_t agpdmat; /* tag from intagp for GEM */ bus_dma_tag_t dmat; bus_space_tag_t bst; struct agp_map *agph; @@ -651,6 +652,10 @@ struct inteldrm_softc { bool modeset_on_lid; struct { + /** Bridge to intel-gtt-ko */ + struct intel_gtt *gtt; + /** Memory allocator for GTT */ + struct drm_mm gtt_space; /** List of all objects in gtt_space. Used to restore gtt * mappings on resume */ struct list_head bound_list; @@ -891,6 +896,8 @@ struct inteldrm_file { struct drm_i915_gem_object { struct drm_obj base; + /** Current space allocated to this object in the GTT, if any. */ + struct drm_mm_node *gtt_space; struct list_head gtt_list; /** This object's place on the active/flushing/inactive lists */ @@ -898,25 +905,6 @@ struct drm_i915_gem_object { struct list_head mm_list; /** This object's place in the batchbuffer or on the eviction list */ struct list_head exec_list; - /* GTT binding. */ - bus_dmamap_t dmamap; - /* Current offset of the object in GTT space. */ - bus_addr_t gtt_offset; - struct intel_ring_buffer *ring; - u_int32_t *bit_17; - /* extra flags to bus_dma */ - int dma_flags; - /* Fence register for this object. needed for tiling. */ - int fence_reg; - - /** Breadcrumb of last rendering to the buffer. */ - u_int32_t last_read_seqno; - u_int32_t last_write_seqno; - /** Breadcrumb of last fenced GPU access to the buffer. */ - u_int32_t last_fenced_seqno; - /** Current tiling mode for the object. */ - u_int32_t tiling_mode; - u_int32_t stride; /** * This is set if the object is on the active lists (has pending @@ -932,11 +920,22 @@ struct drm_i915_gem_object { unsigned int dirty:1; /** + * Fence register bits (if any) for this object. Will be set + * as needed when mapped into the GTT. + * Protected by dev->struct_mutex. + */ + signed int fence_reg:I915_MAX_NUM_FENCE_BITS; + + /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; /** + * Current tiling mode for the object. + */ + unsigned int tiling_mode:2; + /** * Whether the tiling parameters for the currently associated fence * register have changed. Note that for the purposes of tracking * tiling changes we also treat the unfenced register, the register @@ -981,6 +980,7 @@ struct drm_i915_gem_object { unsigned int has_aliasing_ppgtt_mapping:1; unsigned int has_global_gtt_mapping:1; + unsigned int has_dma_mapping:1; bus_dma_segment_t *pages; int pages_pin_count; @@ -992,6 +992,27 @@ struct drm_i915_gem_object { unsigned long exec_handle; struct drm_i915_gem_exec_object2 *exec_entry; + /** + * Current offset of the object in GTT space. + * + * This is the same as gtt_space->start + */ + uint32_t gtt_offset; + + struct intel_ring_buffer *ring; + + /** Breadcrumb of last rendering to the buffer. */ + uint32_t last_read_seqno; + uint32_t last_write_seqno; + /** Breadcrumb of last fenced GPU access to the buffer. */ + uint32_t last_fenced_seqno; + + /** Current tiling stride for the object, if it's tiled. */ + uint32_t stride; + + /** Record of address bit 17 of each page at last unbind. */ + unsigned long *bit_17; + /** User space pin count and filp owning the pin */ uint32_t user_pin_count; struct drm_file *pin_filp; @@ -1294,8 +1315,15 @@ static inline void intel_opregion_enable /* i915_gem_gtt.c */ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev); void i915_gem_restore_gtt_mappings(struct drm_device *dev); -void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, - enum i915_cache_level); +int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); +void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level); +void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj); +void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj); +void i915_gem_init_global_gtt(struct drm_device *dev, + unsigned long start, + unsigned long mappable_end, + unsigned long end); /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); Index: drm/i915/i915_gem.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_gem.c,v retrieving revision 1.43 diff -u -p -r1.43 i915_gem.c --- drm/i915/i915_gem.c 17 Nov 2013 18:47:13 -0000 1.43 +++ drm/i915/i915_gem.c 17 Nov 2013 19:56:19 -0000 @@ -170,46 +170,29 @@ int i915_mutex_lock_interruptible(struct static inline bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) { - return obj->dmamap && !obj->active; + return obj->gtt_space && !obj->active; } int i915_gem_init_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct inteldrm_softc *dev_priv = dev->dev_private; - struct drm_i915_gem_init *args = data; + struct drm_i915_gem_init *args = data; if (drm_core_check_feature(dev, DRIVER_MODESET)) return -ENODEV; - DRM_LOCK(); - if (args->gtt_start >= args->gtt_end || - args->gtt_end > dev->agp->info.ai_aperture_size || - (args->gtt_start & PAGE_MASK) != 0 || - (args->gtt_end & PAGE_MASK) != 0) { - DRM_UNLOCK(); + (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) return -EINVAL; - } - /* - * putting stuff in the last page of the aperture can cause nasty - * problems with prefetch going into unassigned memory. Since we put - * a scratch page on all unused aperture pages, just leave the last - * page as a spill to prevent gpu hangs. - */ - if (args->gtt_end == dev->agp->info.ai_aperture_size) - args->gtt_end -= 4096; - - if (agp_bus_dma_init((struct agp_softc *)dev->agp->agpdev, - dev->agp->base + args->gtt_start, dev->agp->base + args->gtt_end, - &dev_priv->agpdmat) != 0) { - DRM_UNLOCK(); - return -ENOMEM; - } - dev->gtt_total = (uint32_t)(args->gtt_end - args->gtt_start); + /* GEM with user mode setting was never supported on ilk and later. */ + if (INTEL_INFO(dev)->gen >= 5) + return -ENODEV; + DRM_LOCK(); + i915_gem_init_global_gtt(dev, args->gtt_start, + args->gtt_end, args->gtt_end); DRM_UNLOCK(); return 0; @@ -228,7 +211,7 @@ i915_gem_get_aperture_ioctl(struct drm_d DRM_LOCK(); list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) if (obj->pin_count) - pinned += obj->dmamap->dm_segs[0].ds_len; + pinned += obj->gtt_space->size; DRM_UNLOCK(); args->aper_size = dev_priv->mm.gtt_total; @@ -1820,7 +1803,7 @@ i915_gem_object_put_pages(struct drm_i91 if (obj->pages == NULL) return 0; - BUG_ON(obj->dmamap); + BUG_ON(obj->gtt_space); if (obj->pages_pin_count) return -EBUSY; @@ -2615,12 +2598,7 @@ i915_gem_object_unbind(struct drm_i915_g drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret = 0; - DRM_ASSERT_HELD(&obj->base); - /* - * if it's already unbound, or we've already done lastclose, just - * let it happen. XXX does this fail to unwire? - */ - if (obj->dmamap == NULL || dev_priv->agpdmat == NULL) + if (obj->gtt_space == NULL) return 0; if (obj->pin_count) @@ -2645,21 +2623,23 @@ i915_gem_object_unbind(struct drm_i915_g trace_i915_gem_object_unbind(obj); - /* - * unload the map, then unwire the backing object. - */ - bus_dmamap_unload(dev_priv->agpdmat, obj->dmamap); + if (obj->has_global_gtt_mapping) + i915_gem_gtt_unbind_object(obj); +#ifdef notyet + if (obj->has_aliasing_ppgtt_mapping) { + i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); + obj->has_aliasing_ppgtt_mapping = 0; + } +#endif + i915_gem_gtt_finish_object(obj); list_del(&obj->mm_list); list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); /* Avoid an unnecessary call to unbind on rebind. */ obj->map_and_fenceable = true; - obj->has_global_gtt_mapping = 0; - - /* XXX persistent dmamap worth the memory? */ - bus_dmamap_destroy(dev_priv->agpdmat, obj->dmamap); - obj->dmamap = NULL; + drm_mm_put_block(obj->gtt_space); + obj->gtt_space = NULL; obj->gtt_offset = 0; /* XXX Until we've hooked up the shrinking functions. */ @@ -2718,7 +2698,7 @@ static void i965_write_fence_reg(struct POSTING_READ(fence_reg); if (obj) { - u32 size = obj->dmamap->dm_segs[0].ds_len; + u32 size = obj->gtt_space->size; uint64_t val; val = (uint64_t)((obj->gtt_offset + size - 4096) & @@ -2747,7 +2727,7 @@ static void i915_write_fence_reg(struct u32 val; if (obj) { - u32 size = obj->dmamap->dm_segs[0].ds_len; + u32 size = obj->gtt_space->size; int pitch_val; int tile_width; @@ -2791,7 +2771,7 @@ static void i830_write_fence_reg(struct uint32_t val; if (obj) { - u32 size = obj->dmamap->dm_segs[0].ds_len; + u32 size = obj->gtt_space->size; uint32_t pitch_val; WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || @@ -2993,7 +2973,35 @@ i915_gem_object_get_fence(struct drm_i91 return 0; } -// i915_gem_valid_gtt_space +static bool i915_gem_valid_gtt_space(struct drm_device *dev, + struct drm_mm_node *gtt_space, + unsigned long cache_level) +{ + struct drm_mm_node *other; + + /* On non-LLC machines we have to be careful when putting differing + * types of snoopable memory together to avoid the prefetcher + * crossing memory domains and dieing. + */ + if (HAS_LLC(dev)) + return true; + + if (gtt_space == NULL) + return true; + + if (list_empty(>t_space->node_list)) + return true; + + other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); + if (other->allocated && !other->hole_follows && other->color != cache_level) + return false; + + other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); + if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) + return false; + + return true; +} static void i915_gem_verify_gtt(struct drm_device *dev) { @@ -3010,12 +3018,10 @@ i915_gem_object_bind_to_gtt(struct drm_i { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_mm_node *node; u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; int ret; - int flags; - - DRM_ASSERT_HELD(&obj->base); if (obj->madv != I915_MADV_WILLNEED) { DRM_ERROR("Attempting to bind a purgeable object\n"); @@ -3058,68 +3064,66 @@ i915_gem_object_bind_to_gtt(struct drm_i i915_gem_object_pin_pages(obj); - ret = -bus_dmamap_create(dev_priv->agpdmat, size, 1, - size, 0, BUS_DMA_WAITOK, &obj->dmamap); - if (ret) { - DRM_ERROR("Failed to create dmamap\n"); + node = malloc(sizeof(*node), M_DRM, M_NOWAIT | M_ZERO); + if (node == NULL) { i915_gem_object_unpin_pages(obj); /* XXX Until we've hooked up the shrinking functions. */ i915_gem_object_put_pages(obj); - return ret; + return -ENOMEM; } - agp_bus_dma_set_alignment(dev_priv->agpdmat, obj->dmamap, - alignment); search_free: - switch (obj->cache_level) { - case I915_CACHE_NONE: - flags = BUS_DMA_GTT_NOCACHE; - break; - case I915_CACHE_LLC: - flags = BUS_DMA_GTT_CACHE_LLC; - break; - case I915_CACHE_LLC_MLC: - flags = BUS_DMA_GTT_CACHE_LLC_MLC; - break; - default: - BUG(); - } - ret = -bus_dmamap_load_raw(dev_priv->agpdmat, obj->dmamap, obj->pages, - obj->base.size / PAGE_SIZE, obj->base.size, - BUS_DMA_WAITOK | obj->dma_flags | flags); + if (map_and_fenceable) + ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, + size, alignment, obj->cache_level, + 0, dev_priv->mm.gtt_mappable_end); + else + ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, + size, alignment, obj->cache_level); if (ret) { - ret = i915_gem_evict_something(dev_priv, obj->base.size); - if (ret) - goto error; - goto search_free; +#if 0 + ret = i915_gem_evict_something(dev, size, alignment, + obj->cache_level, + map_and_fenceable, + nonblocking); +#else + ret = i915_gem_evict_something(dev_priv, size); +#endif + if (ret == 0) + goto search_free; + + i915_gem_object_unpin_pages(obj); + /* XXX Until we've hooked up the shrinking functions. */ + i915_gem_object_put_pages(obj); + free(node, M_DRM); + return ret; + } + if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { + i915_gem_object_unpin_pages(obj); + /* XXX Until we've hooked up the shrinking functions. */ + i915_gem_object_put_pages(obj); + drm_mm_put_block(node); + return -EINVAL; } - /* - * Create a mapping that wraps around once; the second half - * maps to the same set of physical pages as the first half. - * Used to implement fast vertical scrolling in inteldrm(4). - * - * XXX This is an ugly hack that wastes pages and abuses the - * internals of the scatter gather DMA code. - */ - if (obj->dma_flags & BUS_DMA_GTT_WRAPAROUND) { - struct sg_page_map *spm = obj->dmamap->_dm_cookie; - int i; - - for (i = spm->spm_pagecnt / 2; i < spm->spm_pagecnt; i++) - spm->spm_map[i].spe_pa = - spm->spm_map[i - spm->spm_pagecnt / 2].spe_pa; - agp_bus_dma_rebind(dev_priv->agpdmat, obj->dmamap, flags); + ret = i915_gem_gtt_prepare_object(obj); + if (ret) { + i915_gem_object_unpin_pages(obj); + /* XXX Until we've hooked up the shrinking functions. */ + i915_gem_object_put_pages(obj); + drm_mm_put_block(node); + return ret; } list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - obj->gtt_offset = obj->dmamap->dm_segs[0].ds_addr - dev->agp->base; + obj->gtt_space = node; + obj->gtt_offset = node->start; fenceable = - obj->dmamap->dm_segs[0].ds_len == fence_size && - (obj->dmamap->dm_segs[0].ds_addr & (fence_alignment - 1)) == 0; + node->size == fence_size && + (node->start & (fence_alignment - 1)) == 0; mappable = obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; @@ -3130,32 +3134,16 @@ i915_gem_object_bind_to_gtt(struct drm_i trace_i915_gem_object_bind(obj, map_and_fenceable); i915_gem_verify_gtt(dev); return 0; - -error: - i915_gem_object_unpin_pages(obj); - /* XXX Until we've hooked up the shrinking functions. */ - i915_gem_object_put_pages(obj); - bus_dmamap_destroy(dev_priv->agpdmat, obj->dmamap); - obj->dmamap = NULL; - obj->gtt_offset = 0; - return ret; } void i915_gem_clflush_object(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; - /* If we don't have a page list set up, then we're not pinned * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. - * - * XXX On OpenBSD we check if we have a DMA mapping instead, - * as the bus_dmamap_sync(9) call below needs one. If we're - * not pinned to the GPU, we don't have a DMA mapping either. */ - if (obj->dmamap == NULL) + if (obj->pages == NULL) return; /* If the GPU is snooping the contents of the CPU cache, @@ -3169,8 +3157,28 @@ i915_gem_clflush_object(struct drm_i915_ if (obj->cache_level != I915_CACHE_NONE) return; - bus_dmamap_sync(dev_priv->agpdmat, obj->dmamap, 0, - obj->base.size, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); +#if 0 + trace_i915_gem_object_clflush(obj); + + drm_clflush_sg(obj->pages); +#else +{ + bus_dma_segment_t *segp; + int page_count = obj->base.size >> PAGE_SHIFT; + int i, n; + + segp = &obj->pages[0]; + n = 0; + for (i = 0; i < page_count; i++) { + pmap_flush_page(segp->ds_addr + n); + n += PAGE_SIZE; + if (n >= segp->ds_len) { + n = 0; + segp++; + } + } +} +#endif } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3235,7 +3243,7 @@ i915_gem_object_set_to_gtt_domain(struct DRM_ASSERT_HELD(&obj->base); /* Not valid to be called on unbound objects. */ - if (obj->dmamap == NULL) + if (obj->gtt_space == NULL) return -EINVAL; if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) @@ -3287,7 +3295,13 @@ int i915_gem_object_set_cache_level(stru return -EBUSY; } - if (obj->dmamap) { + if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { + ret = i915_gem_object_unbind(obj); + if (ret) + return ret; + } + + if (obj->gtt_space) { ret = i915_gem_object_finish_gpu(obj); if (ret) return ret; @@ -3305,12 +3319,14 @@ int i915_gem_object_set_cache_level(stru } if (obj->has_global_gtt_mapping) - i915_gem_gtt_rebind_object(obj, cache_level); + i915_gem_gtt_bind_object(obj, cache_level); #ifdef notyet if (obj->has_aliasing_ppgtt_mapping) i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, obj, cache_level); #endif + + obj->gtt_space->color = cache_level; } if (cache_level == I915_CACHE_NONE) { @@ -3583,12 +3599,10 @@ i915_gem_object_pin(struct drm_i915_gem_ { int ret; - DRM_ASSERT_HELD(&obj->base); - if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; - if (obj->dmamap != NULL) { + if (obj->gtt_space != NULL) { if ((alignment && obj->gtt_offset & (alignment - 1)) || (map_and_fenceable && !obj->map_and_fenceable)) { WARN(obj->pin_count, @@ -3604,7 +3618,7 @@ i915_gem_object_pin(struct drm_i915_gem_ } } - if (obj->dmamap == NULL) { + if (obj->gtt_space == NULL) { #ifdef notyet struct drm_i915_private *dev_priv = obj->base.dev->dev_private; #endif @@ -3618,11 +3632,11 @@ i915_gem_object_pin(struct drm_i915_gem_ #ifdef notyet if (!dev_priv->mm.aliasing_ppgtt) #endif - i915_gem_gtt_rebind_object(obj, obj->cache_level); + i915_gem_gtt_bind_object(obj, obj->cache_level); } if (!obj->has_global_gtt_mapping && map_and_fenceable) - i915_gem_gtt_rebind_object(obj, obj->cache_level); + i915_gem_gtt_bind_object(obj, obj->cache_level); obj->pin_count++; obj->pin_mappable |= map_and_fenceable; @@ -3634,7 +3648,7 @@ void i915_gem_object_unpin(struct drm_i915_gem_object *obj) { BUG_ON(obj->pin_count == 0); - BUG_ON(obj->dmamap == NULL); + BUG_ON(obj->gtt_space == NULL); if (--obj->pin_count == 0) obj->pin_mappable = false; @@ -4109,37 +4123,43 @@ intel_enable_ppgtt(struct drm_device *de int i915_gem_init(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint64_t gtt_start, gtt_end; - struct agp_softc *asc; - int ret; - - DRM_LOCK(); + unsigned long gtt_size, mappable_size; + int ret; - asc = (struct agp_softc *)dev->agp->agpdev; - gtt_start = asc->sc_stolen_entries * 4096; + gtt_size = dev->agp->info.ai_aperture_size; + mappable_size = dev->agp->info.ai_aperture_size; - /* - * putting stuff in the last page of the aperture can cause nasty - * problems with prefetch going into unassigned memory. Since we put - * a scratch page on all unused aperture pages, just leave the last - * page as a spill to prevent gpu hangs. - */ - gtt_end = dev->agp->info.ai_aperture_size - 4096; - - if (agp_bus_dma_init(asc, - dev->agp->base + gtt_start, dev->agp->base + gtt_end, - &dev_priv->agpdmat) != 0) { - DRM_UNLOCK(); - return -ENOMEM; + DRM_LOCK(); +#if 0 + if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { + /* PPGTT pdes are stolen from global gtt ptes, so shrink the + * aperture accordingly when using aliasing ppgtt. */ + gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; + + i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); + + ret = i915_gem_init_aliasing_ppgtt(dev); + if (ret) { + mutex_unlock(&dev->struct_mutex); + return ret; + } + } else { +#endif + /* Let GEM Manage all of the aperture. + * + * However, leave one page at the end still bound to the scratch + * page. There are a number of places where the hardware + * apparently prefetches past the end of the object, and we've + * seen multiple hangs with the GPU head pointer stuck in a + * batchbuffer bound at the last page of the aperture. One page + * should be enough to keep any prefetching inside of the + * aperture. + */ + i915_gem_init_global_gtt(dev, 0, mappable_size, + gtt_size); +#if 0 } - - dev->gtt_total = (uint32_t)(gtt_end - gtt_start); - - dev_priv->mm.gtt_start = gtt_start; - dev_priv->mm.gtt_mappable_end = gtt_end; - dev_priv->mm.gtt_end = gtt_end; - dev_priv->mm.gtt_total = gtt_end - gtt_start; +#endif ret = i915_gem_init_hw(dev); DRM_UNLOCK(); Index: drm/i915/i915_gem_context.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_gem_context.c,v retrieving revision 1.4 diff -u -p -r1.4 i915_gem_context.c --- drm/i915/i915_gem_context.c 11 Nov 2013 02:43:20 -0000 1.4 +++ drm/i915/i915_gem_context.c 17 Nov 2013 13:51:23 -0000 @@ -418,7 +418,7 @@ static int do_switch(struct i915_hw_cont } if (!to->obj->has_global_gtt_mapping) - i915_gem_gtt_rebind_object(to->obj, to->obj->cache_level); + i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); if (!to->is_initialized || is_default_context(to)) hw_flags |= MI_RESTORE_INHIBIT; Index: drm/i915/i915_gem_execbuffer.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_gem_execbuffer.c,v retrieving revision 1.17 diff -u -p -r1.17 i915_gem_execbuffer.c --- drm/i915/i915_gem_execbuffer.c 16 Nov 2013 18:24:59 -0000 1.17 +++ drm/i915/i915_gem_execbuffer.c 17 Nov 2013 13:51:23 -0000 @@ -144,7 +144,7 @@ i915_gem_execbuffer_relocate_entry(struc if (unlikely(IS_GEN6(dev) && reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && !target_i915_obj->has_global_gtt_mapping)) { - i915_gem_gtt_rebind_object(target_i915_obj, + i915_gem_gtt_bind_object(target_i915_obj, target_i915_obj->cache_level); } @@ -421,7 +421,7 @@ i915_gem_execbuffer_unreserve_object(str { struct drm_i915_gem_exec_object2 *entry; - if (obj->dmamap == NULL) + if (obj->gtt_space == NULL) return; entry = obj->exec_entry; @@ -493,7 +493,7 @@ i915_gem_execbuffer_reserve(struct intel struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence, need_mappable; - if (obj->dmamap == NULL) + if (obj->gtt_space == NULL) continue; need_fence = @@ -513,7 +513,7 @@ i915_gem_execbuffer_reserve(struct intel /* Bind fresh objects */ list_for_each_entry(obj, objects, exec_list) { - if (obj->dmamap != NULL) + if (obj->gtt_space != NULL) continue; ret = i915_gem_execbuffer_reserve_object(obj, ring); @@ -1058,7 +1058,7 @@ i915_gem_do_execbuffer(struct drm_device * hsw should have this fixed, but let's be paranoid and do it * unconditionally for now. */ if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) - i915_gem_gtt_rebind_object(batch_obj, batch_obj->cache_level); + i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); if (ret) Index: drm/i915/i915_gem_gtt.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_gem_gtt.c,v retrieving revision 1.4 diff -u -p -r1.4 i915_gem_gtt.c --- drm/i915/i915_gem_gtt.c 5 Oct 2013 07:30:06 -0000 1.4 +++ drm/i915/i915_gem_gtt.c 17 Nov 2013 18:12:28 -0000 @@ -29,9 +29,9 @@ #include "i915_trace.h" #include "intel_drv.h" -#ifdef notyet typedef uint32_t gtt_pte_t; +#ifdef notyet /* PPGTT stuff */ #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) @@ -344,11 +344,13 @@ void i915_gem_init_ppgtt(struct drm_devi I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); } } +#endif static bool do_idling(struct drm_i915_private *dev_priv) { bool ret = dev_priv->mm.interruptible; +#if 0 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) { dev_priv->mm.interruptible = false; if (i915_gpu_idle(dev_priv->dev)) { @@ -357,17 +359,21 @@ static bool do_idling(struct drm_i915_pr udelay(10); } } +#endif return ret; } static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) { +#if 0 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) dev_priv->mm.interruptible = interruptible; +#endif } +#if 0 static void i915_ggtt_clear_range(struct drm_device *dev, unsigned first_entry, unsigned num_entries) @@ -393,35 +399,57 @@ static void i915_ggtt_clear_range(struct iowrite32(scratch_pte, >t_base[i]); readl(gtt_base); } -#endif /* notyet */ +#else +static void i915_ggtt_clear_range(struct drm_device *dev, + unsigned first_entry, + unsigned num_entries) +{ + struct agp_softc *sc = dev->agp->agpdev; + bus_addr_t addr = sc->sc_apaddr + (first_entry << PAGE_SHIFT); + int i; + + for (i = 0; i < num_entries; i++) { + sc->sc_methods->unbind_page(sc->sc_chipc, addr); + addr += PAGE_SIZE; + } + agp_flush_cache(); + sc->sc_methods->flush_tlb(sc->sc_chipc); +} +#endif void i915_gem_restore_gtt_mappings(struct drm_device *dev) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; + /* First fill our portion of the GTT with scratch pages */ + i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE, + (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); + list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) { i915_gem_clflush_object(obj); - i915_gem_gtt_rebind_object(obj, obj->cache_level); + i915_gem_gtt_bind_object(obj, obj->cache_level); } i915_gem_chipset_flush(dev); } -#ifdef notyet int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) { if (obj->has_dma_mapping) return 0; +#if 0 if (!dma_map_sg(&obj->base.dev->pdev->dev, obj->pages->sgl, obj->pages->nents, PCI_DMA_BIDIRECTIONAL)) return -ENOSPC; +#endif return 0; } +#ifdef notyet /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -470,7 +498,9 @@ static void gen6_ggtt_bind_object(struct I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); POSTING_READ(GFX_FLSH_CNTL_GEN6); } +#endif +#if 0 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { @@ -487,6 +517,51 @@ void i915_gem_gtt_bind_object(struct drm obj->has_global_gtt_mapping = 1; } +#else +void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) +{ + struct drm_device *dev = obj->base.dev; + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + 0 : BUS_DMA_COHERENT; + struct agp_softc *sc = dev->agp->agpdev; + bus_dma_segment_t *segp; + bus_addr_t addr = sc->sc_apaddr + obj->gtt_space->start; + int page_count = obj->base.size >> PAGE_SHIFT; + int i, n; + + switch (cache_level) { + case I915_CACHE_NONE: + flags |= BUS_DMA_GTT_NOCACHE; + break; + case I915_CACHE_LLC: + flags |= BUS_DMA_GTT_CACHE_LLC; + break; + case I915_CACHE_LLC_MLC: + flags |= BUS_DMA_GTT_CACHE_LLC_MLC; + break; + default: + BUG(); + } + + segp = &obj->pages[0]; + n = 0; + for (i = 0; i < page_count; i++) { + sc->sc_methods->bind_page(sc->sc_chipc, addr, + segp->ds_addr + n, flags); + n += PAGE_SIZE; + if (n >= segp->ds_len) { + n = 0; + segp++; + } + addr += PAGE_SIZE; + } + agp_flush_cache(); + sc->sc_methods->flush_tlb(sc->sc_chipc); + + obj->has_global_gtt_mapping = 1; +} +#endif void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { @@ -505,10 +580,12 @@ void i915_gem_gtt_finish_object(struct d interruptible = do_idling(dev_priv); +#ifdef notyet if (!obj->has_dma_mapping) dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, PCI_DMA_BIDIRECTIONAL); +#endif undo_idling(dev_priv, interruptible); } @@ -552,6 +629,7 @@ void i915_gem_init_global_gtt(struct drm i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE); } +#ifdef notyet static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -712,30 +790,3 @@ void i915_gem_gtt_fini(struct drm_device kfree(dev_priv->mm.gtt); } #endif /* notyet */ - -void -i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level) -{ - struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; - int flags = obj->dma_flags; - - switch (cache_level) { - case I915_CACHE_NONE: - flags |= BUS_DMA_GTT_NOCACHE; - break; - case I915_CACHE_LLC: - flags |= BUS_DMA_GTT_CACHE_LLC; - break; - case I915_CACHE_LLC_MLC: - flags |= BUS_DMA_GTT_CACHE_LLC_MLC; - break; - default: - BUG(); - } - - agp_bus_dma_rebind(dev_priv->agpdmat, obj->dmamap, flags); - - obj->has_global_gtt_mapping = 1; -} Index: drm/i915/i915_gem_tiling.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/i915_gem_tiling.c,v retrieving revision 1.8 diff -u -p -r1.8 i915_gem_tiling.c --- drm/i915/i915_gem_tiling.c 29 Oct 2013 06:30:57 -0000 1.8 +++ drm/i915/i915_gem_tiling.c 10 Nov 2013 14:11:57 -0000 @@ -304,7 +304,7 @@ i915_gem_object_fence_ok(struct drm_i915 while (size < obj->base.size) size <<= 1; - if (obj->dmamap->dm_segs[0].ds_len != size) + if (obj->gtt_space->size != size) return false; if (obj->gtt_offset & (size - 1)) @@ -388,7 +388,7 @@ i915_gem_set_tiling(struct drm_device *d */ obj->map_and_fenceable = - obj->dmamap == NULL || + obj->gtt_space == NULL || (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && i915_gem_object_fence_ok(obj, args->tiling_mode)); Index: drm/i915/intel_fb.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/intel_fb.c,v retrieving revision 1.6 diff -u -p -r1.6 intel_fb.c --- drm/i915/intel_fb.c 13 Aug 2013 10:23:51 -0000 1.6 +++ drm/i915/intel_fb.c 10 Nov 2013 14:38:14 -0000 @@ -59,14 +59,13 @@ static int intelfb_create(struct intel_f sizes->surface_depth); size = mode_cmd.pitches[0] * mode_cmd.height; - size = roundup2(size, PAGE_SIZE) * 2; + size = roundup2(size, PAGE_SIZE); obj = i915_gem_alloc_object(dev, size); if (!obj) { DRM_ERROR("failed to allocate framebuffer\n"); ret = -ENOMEM; goto out; } - obj->dma_flags |= BUS_DMA_GTT_WRAPAROUND; DRM_LOCK(); Index: drm/i915/intel_ringbuffer.c =================================================================== RCS file: /home/cvs/src/sys/dev/pci/drm/i915/intel_ringbuffer.c,v retrieving revision 1.6 diff -u -p -r1.6 intel_ringbuffer.c --- drm/i915/intel_ringbuffer.c 17 Nov 2013 18:47:13 -0000 1.6 +++ drm/i915/intel_ringbuffer.c 17 Nov 2013 19:08:39 -0000 @@ -467,13 +467,6 @@ init_pipe_control(struct intel_ring_buff i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - /* - * snooped gtt mapping please . - * Normally this flag is only to dmamem_map, but it's been overloaded - * for the agp mapping - */ - obj->dma_flags = BUS_DMA_COHERENT | BUS_DMA_READ; - ret = i915_gem_object_pin(obj, 4096, true, false); if (ret) goto err_unref; @@ -1106,13 +1099,6 @@ static int init_status_page(struct intel i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - /* - * snooped gtt mapping please . - * Normally this flag is only to dmamem_map, but it's been overloaded - * for the agp mapping - */ - obj->dma_flags = BUS_DMA_COHERENT | BUS_DMA_READ; - ret = i915_gem_object_pin(obj, 4096, true, false); if (ret != 0) { goto err_unref; @@ -1174,25 +1160,9 @@ static int init_phys_hws_pga(struct inte u32 intel_read_status_page(struct intel_ring_buffer *ring, int reg) { - struct inteldrm_softc *dev_priv = ring->dev->dev_private; - struct drm_device *dev = ring->dev; - struct drm_i915_gem_object *obj_priv; - bus_dma_tag_t tag; - bus_dmamap_t map; u32 val; - if (I915_NEED_GFX_HWS(dev)) { - obj_priv = ring->status_page.obj; - map = obj_priv->dmamap; - tag = dev_priv->agpdmat; - } else { - map = dev_priv->status_page_dmah->map; - tag = dev->dmat; - } - /* Ensure that the compiler doesn't optimize away the load. */ - bus_dmamap_sync(tag, map, 0, PAGE_SIZE, BUS_DMASYNC_POSTREAD); val = ((volatile u_int32_t *)(ring->status_page.page_addr))[reg]; - bus_dmamap_sync(tag, map, 0, PAGE_SIZE, BUS_DMASYNC_PREREAD); return (val); }