The movntqda requires 16-byte alignment for the source pointer. Avoid
falling back to clflush if the source pointer is misaligned by doing the
doing a small uncached memcpy to fixup the alignments.

Signed-off-by: Chris Wilson <[email protected]>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 30 +++++++++++++++++---------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 6cf4e336461b..2977316d64ae 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1132,8 +1132,8 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dst_obj,
 {
        unsigned int src_needs_clflush;
        unsigned int dst_needs_clflush;
-       void *dst, *src;
-       int ret;
+       void *dst, *src, *ptr;
+       int ret, len;
 
        ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
        if (ret)
@@ -1150,19 +1150,30 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dst_obj,
                return ERR_PTR(ret);
        }
 
+       ptr = dst;
        src = ERR_PTR(-ENODEV);
-       if (src_needs_clflush &&
-           i915_can_memcpy_from_wc(NULL, offset, 0)) {
+       if (src_needs_clflush && i915_has_memcpy_from_wc()) {
                src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
                if (!IS_ERR(src)) {
-                       i915_memcpy_from_wc(dst,
-                                           src + offset,
-                                           ALIGN(length, 16));
+                       src += offset;
+
+                       if (!IS_ALIGNED(offset, 16)) {
+                               len = min(ALIGN(offset, 16) - offset, length);
+
+                               memcpy(ptr, src, len);
+
+                               offset += len;
+                               length -= len;
+                               ptr += len;
+                               src += len;
+                       }
+                       GEM_BUG_ON(!IS_ALIGNED((unsigned long)src, 16));
+
+                       i915_memcpy_from_wc(ptr, src, ALIGN(length, 16));
                        i915_gem_object_unpin_map(src_obj);
                }
        }
        if (IS_ERR(src)) {
-               void *ptr;
                int x, n;
 
                /*
@@ -1177,10 +1188,9 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dst_obj,
                        length = round_up(length,
                                          boot_cpu_data.x86_clflush_size);
 
-               ptr = dst;
                x = offset_in_page(offset);
                for (n = offset >> PAGE_SHIFT; length; n++) {
-                       int len = min_t(int, length, PAGE_SIZE - x);
+                       len = min_t(int, length, PAGE_SIZE - x);
 
                        src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
                        if (src_needs_clflush)
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to