Re: [Intel-gfx] [PATCH v3 07/12] drm, drm/i915: Move the memcpy_from_wc functionality to core drm

2021-05-24 Thread Thomas Hellström
On Mon, 2021-05-24 at 17:45 +0100, Matthew Auld wrote:
> On Fri, 21 May 2021 at 16:33, Thomas Hellström
>  wrote:
> > 
> > Memcpy from wc will be used as well by TTM memcpy.
> > Move it to core drm, and make the interface do the right thing
> > even on !X86.
> > 
> > Cc: Christian König 
> > Cc: Daniel Vetter 
> > Cc: Dave Airlie 
> > Signed-off-by: Thomas Hellström 
> > ---
> 
> 
> 
> > +
> > +#ifdef CONFIG_X86
> > +bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long
> > len);
> > +bool drm_memcpy_from_wc_dbm(struct dma_buf_map *dst,
> > +   const struct dma_buf_map *src,
> > +   unsigned long len);
> > +void drm_unaligned_memcpy_from_wc(void *dst, const void *src,
> > unsigned long len);
> > +
> > +/* The movntdqa instructions used for memcpy-from-wc require 16-
> > byte alignment,
> > + * as well as SSE4.1 support. drm_memcpy_from_wc() will report if
> > it cannot
> > + * perform the operation. To check beforehand, pass in the
> > parameters to
> > + * drm_can_memcpy_from_wc() - since we only care about the low 4
> > bits,
> > + * you only need to pass in the minor offsets, page-aligned
> > pointers are
> > + * always valid.
> > + *
> > + * For just checking for SSE4.1, in the foreknowledge that the
> > future use
> > + * will be correctly aligned, just use drm_has_memcpy_from_wc().
> > + */
> > +#define drm_can_memcpy_from_wc(dst, src, len) \
> > +   drm_memcpy_from_wc((void *)((unsigned long)(dst) |
> > (unsigned long)(src) | (len)), NULL, 0)
> > +
> > +#define drm_has_memcpy_from_wc() \
> > +   drm_memcpy_from_wc(NULL, NULL, 0)
> > +
> > +void drm_memcpy_init_early(void);
> > +
> > +#else
> > +
> > +#define drm_memcpy_from_wc(_dst, _src, _len) (false)
> > +#define drm_memcpy_from_wc_dbm(_dst, _src, _len) (false)
> > +#define drm_can_memcpy_from_wc(_dst, _src, _len) (false)
> > +#define drm_has_memcpy_from_wc() (false)
> 
> Does the compiler not complain for these on !x86, if called without
> checking the result of the statement? Maybe just make these function
> stubs?
> 
> Otherwise,
> Reviewed-by: Matthew Auld 

Hmm, you're right. I'll fix. Thanks for reviewing!

/Thomas


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 07/12] drm, drm/i915: Move the memcpy_from_wc functionality to core drm

2021-05-24 Thread Matthew Auld
On Fri, 21 May 2021 at 16:33, Thomas Hellström
 wrote:
>
> Memcpy from wc will be used as well by TTM memcpy.
> Move it to core drm, and make the interface do the right thing
> even on !X86.
>
> Cc: Christian König 
> Cc: Daniel Vetter 
> Cc: Dave Airlie 
> Signed-off-by: Thomas Hellström 
> ---



> +
> +#ifdef CONFIG_X86
> +bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len);
> +bool drm_memcpy_from_wc_dbm(struct dma_buf_map *dst,
> +   const struct dma_buf_map *src,
> +   unsigned long len);
> +void drm_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long 
> len);
> +
> +/* The movntdqa instructions used for memcpy-from-wc require 16-byte 
> alignment,
> + * as well as SSE4.1 support. drm_memcpy_from_wc() will report if it cannot
> + * perform the operation. To check beforehand, pass in the parameters to
> + * drm_can_memcpy_from_wc() - since we only care about the low 4 bits,
> + * you only need to pass in the minor offsets, page-aligned pointers are
> + * always valid.
> + *
> + * For just checking for SSE4.1, in the foreknowledge that the future use
> + * will be correctly aligned, just use drm_has_memcpy_from_wc().
> + */
> +#define drm_can_memcpy_from_wc(dst, src, len) \
> +   drm_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned 
> long)(src) | (len)), NULL, 0)
> +
> +#define drm_has_memcpy_from_wc() \
> +   drm_memcpy_from_wc(NULL, NULL, 0)
> +
> +void drm_memcpy_init_early(void);
> +
> +#else
> +
> +#define drm_memcpy_from_wc(_dst, _src, _len) (false)
> +#define drm_memcpy_from_wc_dbm(_dst, _src, _len) (false)
> +#define drm_can_memcpy_from_wc(_dst, _src, _len) (false)
> +#define drm_has_memcpy_from_wc() (false)

Does the compiler not complain for these on !x86, if called without
checking the result of the statement? Maybe just make these function
stubs?

Otherwise,
Reviewed-by: Matthew Auld 

> +#define drm_unaligned_memcpy_from_wc(_dst, _src, _len) WARN_ON(1)
> +#define drm_memcpy_init_early() do {} while (0)
> +#endif /* CONFIG_X86 */
> +#endif /* __DRM_MEMCPY_H__ */
> --
> 2.31.1
>
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 07/12] drm, drm/i915: Move the memcpy_from_wc functionality to core drm

2021-05-21 Thread Thomas Hellström
Memcpy from wc will be used as well by TTM memcpy.
Move it to core drm, and make the interface do the right thing
even on !X86.

Cc: Christian König 
Cc: Daniel Vetter 
Cc: Dave Airlie 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/Makefile  |  2 +-
 drivers/gpu/drm/drm_drv.c |  2 +
 .../drm/{i915/i915_memcpy.c => drm_memcpy.c}  | 63 ++-
 drivers/gpu/drm/i915/Makefile |  1 -
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  5 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  7 ++-
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c| 11 ++--
 drivers/gpu/drm/i915/i915_cmd_parser.c|  4 +-
 drivers/gpu/drm/i915/i915_drv.c   |  2 -
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +--
 drivers/gpu/drm/i915/i915_memcpy.h| 34 --
 .../drm/i915/selftests/intel_memory_region.c  |  7 ++-
 include/drm/drm_memcpy.h  | 47 ++
 14 files changed, 121 insertions(+), 76 deletions(-)
 rename drivers/gpu/drm/{i915/i915_memcpy.c => drm_memcpy.c} (70%)
 delete mode 100644 drivers/gpu/drm/i915/i915_memcpy.h
 create mode 100644 include/drm/drm_memcpy.h

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index a91cc7684904..f3ab8586c3d7 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -18,7 +18,7 @@ drm-y   :=drm_aperture.o drm_auth.o drm_cache.o \
drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
-   drm_managed.o drm_vblank_work.o
+   drm_managed.o drm_vblank_work.o drm_memcpy.o \
 
 drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o 
drm_dma.o \
drm_legacy_misc.o drm_lock.o drm_memory.o 
drm_scatter.o \
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3d8d68a98b95..351cc2900cf1 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -1041,6 +1042,7 @@ static int __init drm_core_init(void)
 
drm_connector_ida_init();
idr_init(_minors_idr);
+   drm_memcpy_init_early();
 
ret = drm_sysfs_init();
if (ret < 0) {
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/drm_memcpy.c
similarity index 70%
rename from drivers/gpu/drm/i915/i915_memcpy.c
rename to drivers/gpu/drm/drm_memcpy.c
index 1b021a4902de..740377749caa 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/drm_memcpy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 /*
  * Copyright © 2016 Intel Corporation
  *
@@ -22,16 +23,12 @@
  *
  */
 
+#ifdef CONFIG_X86
+#include 
 #include 
 #include 
 
-#include "i915_memcpy.h"
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
-#define CI_BUG_ON(expr) BUG_ON(expr)
-#else
-#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
-#endif
+#include "drm/drm_memcpy.h"
 
 static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
 
@@ -94,23 +91,24 @@ static void __memcpy_ntdqu(void *dst, const void *src, 
unsigned long len)
 }
 
 /**
- * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
+ * drm_memcpy_from_wc: perform an accelerated *aligned* read from WC
  * @dst: destination pointer
  * @src: source pointer
  * @len: how many bytes to copy
  *
- * i915_memcpy_from_wc copies @len bytes from @src to @dst using
+ * drm_memcpy_from_wc copies @len bytes from @src to @dst using
  * non-temporal instructions where available. Note that all arguments
  * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
  * of 16.
  *
  * To test whether accelerated reads from WC are supported, use
- * i915_memcpy_from_wc(NULL, NULL, 0);
+ * drm_memcpy_from_wc(NULL, NULL, 0);
+ * This interface is intended for memremapped memory without the __iomem tag.
  *
  * Returns true if the copy was successful, false if the preconditions
  * are not met.
  */
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
return false;
@@ -123,24 +121,53 @@ bool i915_memcpy_from_wc(void *dst, const void *src, 
unsigned long len)
 
return false;
 }
+EXPORT_SYMBOL(drm_memcpy_from_wc);
 
 /**
- * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
+ * drm_memcpy_from_wc_dbm: perform an accelerated *aligned* read from WC with
+ * struct dma_buf_map arguments.
+ * @dst: destination map
+ * @src: source map
+ * @len: how many bytes to copy
+ *
+ * This is identical to drm_memcpy_from_wc, except it's intended for
+ * potentially ioremapped memory rather than memremapped memory.
+ *
+ * Returns