Re: [PATCH v2 1/2] dma-mapping: add benchmark support for streaming DMA APIs

2020-11-01 Thread kernel test robot
Hi Barry,

I love your patch! Yet something to improve:

[auto build test ERROR on kselftest/next]
[also build test ERROR on linus/master v5.10-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Barry-Song/dma-mapping-provide-a-benchmark-for-streaming-DMA-mapping/20201101-182009
base:   
https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
config: h8300-allyesconfig (attached as .config)
compiler: h8300-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/b9abda38be7f32b9420c27b6c24eff2e69defa87
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Barry-Song/dma-mapping-provide-a-benchmark-for-streaming-DMA-mapping/20201101-182009
git checkout b9abda38be7f32b9420c27b6c24eff2e69defa87
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=h8300 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   h8300-linux-ld: kernel/dma/map_benchmark.o: in function `.L28':
   map_benchmark.c:(.text+0x283): undefined reference to `__udivdi3'
>> h8300-linux-ld: map_benchmark.c:(.text+0x2c1): undefined reference to 
>> `__udivdi3'
   h8300-linux-ld: map_benchmark.c:(.text+0x327): undefined reference to 
`__udivdi3'
   h8300-linux-ld: kernel/dma/map_benchmark.o: in function `.L26':
   map_benchmark.c:(.text+0x3d7): undefined reference to `__udivdi3'
   h8300-linux-ld: kernel/dma/map_benchmark.o: in function `.L44':
   map_benchmark.c:(.text+0x799): undefined reference to `__divdi3'
   h8300-linux-ld: kernel/dma/map_benchmark.o: in function `.L45':
   map_benchmark.c:(.text+0x7f5): undefined reference to `__divdi3'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 1/2] dma-mapping: add benchmark support for streaming DMA APIs

2020-11-01 Thread kernel test robot
Hi Barry,

I love your patch! Yet something to improve:

[auto build test ERROR on kselftest/next]
[also build test ERROR on linus/master v5.10-rc1 next-20201030]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Barry-Song/dma-mapping-provide-a-benchmark-for-streaming-DMA-mapping/20201101-182009
base:   
https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
config: mips-allyesconfig (attached as .config)
compiler: mips-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/b9abda38be7f32b9420c27b6c24eff2e69defa87
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Barry-Song/dma-mapping-provide-a-benchmark-for-streaming-DMA-mapping/20201101-182009
git checkout b9abda38be7f32b9420c27b6c24eff2e69defa87
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   arch/mips/kernel/head.o: in function `dtb_found':
   (.ref.text+0xe0): relocation truncated to fit: R_MIPS_26 against 
`start_kernel'
   init/main.o: in function `set_reset_devices':
   main.c:(.init.text+0x20): relocation truncated to fit: R_MIPS_26 against 
`_mcount'
   main.c:(.init.text+0x30): relocation truncated to fit: R_MIPS_26 against 
`__sanitizer_cov_trace_pc'
   init/main.o: in function `debug_kernel':
   main.c:(.init.text+0x9c): relocation truncated to fit: R_MIPS_26 against 
`_mcount'
   main.c:(.init.text+0xac): relocation truncated to fit: R_MIPS_26 against 
`__sanitizer_cov_trace_pc'
   init/main.o: in function `quiet_kernel':
   main.c:(.init.text+0x118): relocation truncated to fit: R_MIPS_26 against 
`_mcount'
   main.c:(.init.text+0x128): relocation truncated to fit: R_MIPS_26 against 
`__sanitizer_cov_trace_pc'
   init/main.o: in function `init_setup':
   main.c:(.init.text+0x1a4): relocation truncated to fit: R_MIPS_26 against 
`_mcount'
   main.c:(.init.text+0x1c8): relocation truncated to fit: R_MIPS_26 against 
`__sanitizer_cov_trace_pc'
   main.c:(.init.text+0x1e8): relocation truncated to fit: R_MIPS_26 against 
`__sanitizer_cov_trace_pc'
   main.c:(.init.text+0x1fc): additional relocation overflows omitted from the 
output
   mips-linux-ld: kernel/dma/map_benchmark.o: in function 
`map_benchmark_thread':
>> map_benchmark.c:(.text.map_benchmark_thread+0x1f4): undefined reference to 
>> `__divdi3'
>> mips-linux-ld: map_benchmark.c:(.text.map_benchmark_thread+0x218): undefined 
>> reference to `__divdi3'
   mips-linux-ld: kernel/dma/map_benchmark.o: in function `do_map_benchmark':
>> map_benchmark.c:(.text.do_map_benchmark+0x260): undefined reference to 
>> `__udivdi3'
>> mips-linux-ld: map_benchmark.c:(.text.do_map_benchmark+0x284): undefined 
>> reference to `__udivdi3'
   mips-linux-ld: map_benchmark.c:(.text.do_map_benchmark+0x2b4): undefined 
reference to `__udivdi3'
   mips-linux-ld: map_benchmark.c:(.text.do_map_benchmark+0x300): undefined 
reference to `__udivdi3'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 1/2] dma-mapping: add benchmark support for streaming DMA APIs

2020-11-01 Thread Barry Song
Nowadays, there are increasing requirements to benchmark the performance
of dma_map and dma_unmap particually while the device is attached to an
IOMMU.

This patch enables the support. Users can run specified number of threads
to do dma_map_page and dma_unmap_page on a specific NUMA node with the
specified duration. Then dma_map_benchmark will calculate the average
latency for map and unmap.

A difficulity for this benchmark is that dma_map/unmap APIs must run on
a particular device. Each device might have different backend of IOMMU or
non-IOMMU.

So we use the driver_override to bind dma_map_benchmark to a particual
device by:
For platform devices:
echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override
echo xxx > /sys/bus/platform/drivers/xxx/unbind
echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind

For PCI devices:
echo dma_map_benchmark > /sys/bus/pci/devices/:00:01.0/driver_override
echo :00:01.0 > /sys/bus/pci/drivers/xxx/unbind
echo :00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind

Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Shuah Khan 
Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Signed-off-by: Barry Song 
---
-v2:
  * add PCI support; v1 supported platform devices only
  * replace ssleep by msleep_interruptible() to permit users to exit
benchmark before it is completed
  * many changes according to Robin's suggestions, thanks! Robin
- add standard deviation output to reflect the worst case
- check users' parameters strictly like the number of threads
- make cache dirty before dma_map
- fix unpaired dma_map_page and dma_unmap_single;
- remove redundant "long long" before ktime_to_ns();
- use devm_add_action();
- wakeup all threads together after they are ready

 kernel/dma/Kconfig |   8 +
 kernel/dma/Makefile|   1 +
 kernel/dma/map_benchmark.c | 295 +
 3 files changed, 304 insertions(+)
 create mode 100644 kernel/dma/map_benchmark.c

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index c99de4a21458..949c53da5991 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -225,3 +225,11 @@ config DMA_API_DEBUG_SG
  is technically out-of-spec.
 
  If unsure, say N.
+
+config DMA_MAP_BENCHMARK
+   bool "Enable benchmarking of streaming DMA mapping"
+   help
+ Provides /sys/kernel/debug/dma_map_benchmark that helps with testing
+ performance of dma_(un)map_page.
+
+ See tools/testing/selftests/dma/dma_map_benchmark.c
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index dc755ab68aab..7aa6b26b1348 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_DMA_API_DEBUG)   += debug.o
 obj-$(CONFIG_SWIOTLB)  += swiotlb.o
 obj-$(CONFIG_DMA_COHERENT_POOL)+= pool.o
 obj-$(CONFIG_DMA_REMAP)+= remap.o
+obj-$(CONFIG_DMA_MAP_BENCHMARK)+= map_benchmark.o
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
new file mode 100644
index ..ac397758087b
--- /dev/null
+++ b/kernel/dma/map_benchmark.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Hisilicon Limited.
+ */
+
+#define pr_fmt(fmt)KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define DMA_MAP_BENCHMARK  _IOWR('d', 1, struct map_benchmark)
+#define DMA_MAP_MAX_THREADS1024
+#define DMA_MAP_MAX_SECONDS300
+
+struct map_benchmark {
+   __u64 avg_map_100ns; /* average map latency in 100ns */
+   __u64 map_stddev; /* standard deviation of map latency */
+   __u64 avg_unmap_100ns; /* as above */
+   __u64 unmap_stddev;
+   __u32 threads; /* how many threads will do map/unmap in parallel */
+   __u32 seconds; /* how long the test will last */
+   int node; /* which numa node this benchmark will run on */
+   __u64 expansion[10];/* For future use */
+};
+
+struct map_benchmark_data {
+   struct map_benchmark bparam;
+   struct device *dev;
+   struct dentry  *debugfs;
+   atomic64_t sum_map_100ns;
+   atomic64_t sum_unmap_100ns;
+   atomic64_t sum_square_map;
+   atomic64_t sum_square_unmap;
+   atomic64_t loops;
+};
+
+static int map_benchmark_thread(void *data)
+{
+   void *buf;
+   dma_addr_t dma_addr;
+   struct map_benchmark_data *map = data;
+   int ret = 0;
+
+   buf = (void *)__get_free_page(GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   while (!kthread_should_stop())  {
+   __u64 map_100ns, unmap_100ns, map_square, unmap_square;
+   ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
+
+   /*
+* for a non-coherent device, if we don't stain them in the 
cache,
+* this will give an underes