date:20200605

[PATCH v6 3/8] test_firmware: add partial read support for request_firmware_into_buf

2020-06-05 Thread Scott Branden

Add additional hooks to test_firmware to pass in support
for partial file read using request_firmware_into_buf.
buf_size: size of buffer to request firmware into
partial: indicates that a partial file request is being made
file_offset: to indicate offset into file to request

Signed-off-by: Scott Branden 
---
 lib/test_firmware.c | 146 +---
 1 file changed, 136 insertions(+), 10 deletions(-)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index af747660fe29..1a79611cae78 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -50,6 +50,9 @@ struct test_batched_req {
  * @name: the name of the firmware file to look for
  * @into_buf: when the into_buf is used if this is true
  * request_firmware_into_buf() will be used instead.
+ * @buf_size: size of buf to allocate when into_buf is true
+ * @file_offset: file offset to request when calling request_firmware_into_buf
+ * @partial: partial read opt when calling request_firmware_into_buf
  * @sync_direct: when the sync trigger is used if this is true
  * request_firmware_direct() will be used instead.
  * @send_uevent: whether or not to send a uevent for async requests
@@ -89,6 +92,9 @@ struct test_batched_req {
 struct test_config {
char *name;
bool into_buf;
+   size_t buf_size;
+   size_t file_offset;
+   bool partial;
bool sync_direct;
bool send_uevent;
u8 num_requests;
@@ -183,6 +189,9 @@ static int __test_firmware_config_init(void)
test_fw_config->num_requests = TEST_FIRMWARE_NUM_REQS;
test_fw_config->send_uevent = true;
test_fw_config->into_buf = false;
+   test_fw_config->buf_size = TEST_FIRMWARE_BUF_SIZE;
+   test_fw_config->file_offset = 0;
+   test_fw_config->partial = false;
test_fw_config->sync_direct = false;
test_fw_config->req_firmware = request_firmware;
test_fw_config->test_result = 0;
@@ -236,28 +245,35 @@ static ssize_t config_show(struct device *dev,
dev_name(dev));
 
if (test_fw_config->name)
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"name:\t%s\n",
test_fw_config->name);
else
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"name:\tEMTPY\n");
 
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"num_requests:\t%u\n", test_fw_config->num_requests);
 
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"send_uevent:\t\t%s\n",
test_fw_config->send_uevent ?
"FW_ACTION_HOTPLUG" :
"FW_ACTION_NOHOTPLUG");
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"into_buf:\t\t%s\n",
test_fw_config->into_buf ? "true" : "false");
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "buf_size:\t%zu\n", test_fw_config->buf_size);
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "file_offset:\t%zu\n", test_fw_config->file_offset);
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "partial:\t\t%s\n",
+   test_fw_config->partial ? "true" : "false");
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"sync_direct:\t\t%s\n",
test_fw_config->sync_direct ? "true" : "false");
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"read_fw_idx:\t%u\n", test_fw_config->read_fw_idx);
 
mutex_unlock(_fw_mutex);
@@ -315,6 +331,30 @@ static ssize_t test_dev_config_show_bool(char *buf, bool 
val)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
 }
 
+static int test_dev_config_update_size_t(const char *buf,
+size_t size,
+size_t *cfg)
+{
+   int ret;
+   long new;
+
+   ret = kstrtol(buf, 10, );
+   if (ret)
+   return ret;
+
+   mutex_lock(_fw_mutex);
+   *(size_t *)cfg = new;
+   mutex_unlock(_fw_mutex);
+
+   /* Always return full write size even if we didn't consume all */
+   return size;
+}
+
+static ssize_t test_dev_config_show_size_t(char *buf, size_t val)
+{
+   return snprintf(buf, PAGE_SIZE, "%zu\n", val);
+}
+
 static ssize_t test_dev_config_show_int(char *buf, int val)
 {
return snprintf(buf, PAGE_SIZE, "%d\n", val);
@@ -400,6 +440,83 @@ static ssize_t

[PATCH v6 6/8] misc: bcm-vk: add Broadcom VK driver

2020-06-05 Thread Scott Branden

Add Broadcom VK driver offload engine.
This driver interfaces to the VK PCIe offload engine to perform
should offload functions as video transcoding on multiple streams
in parallel.  VK device is booted from files loaded using
request_firmware_into_buf mechanism.  After booted card status is updated
and messages can then be sent to the card.
Such messages contain scatter gather list of addresses
to pull data from the host to perform operations on.

Signed-off-by: Scott Branden 
Signed-off-by: Desmond Yan 
Signed-off-by: James Hu 
---
 drivers/misc/Kconfig |1 +
 drivers/misc/Makefile|1 +
 drivers/misc/bcm-vk/Kconfig  |   29 +
 drivers/misc/bcm-vk/Makefile |   11 +
 drivers/misc/bcm-vk/bcm_vk.h |  408 +
 drivers/misc/bcm-vk/bcm_vk_dev.c | 1312 +++
 drivers/misc/bcm-vk/bcm_vk_msg.c | 1438 ++
 drivers/misc/bcm-vk/bcm_vk_msg.h |  201 +
 drivers/misc/bcm-vk/bcm_vk_sg.c  |  271 ++
 drivers/misc/bcm-vk/bcm_vk_sg.h  |   60 ++
 drivers/misc/bcm-vk/bcm_vk_tty.c |  352 
 11 files changed, 4084 insertions(+)
 create mode 100644 drivers/misc/bcm-vk/Kconfig
 create mode 100644 drivers/misc/bcm-vk/Makefile
 create mode 100644 drivers/misc/bcm-vk/bcm_vk.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_dev.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_tty.c

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index edd5dd5ebfdc..986fea8ea0f3 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -469,6 +469,7 @@ source "drivers/misc/genwqe/Kconfig"
 source "drivers/misc/echo/Kconfig"
 source "drivers/misc/cxl/Kconfig"
 source "drivers/misc/ocxl/Kconfig"
+source "drivers/misc/bcm-vk/Kconfig"
 source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c7bd01ac6291..766837e4b961 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_ECHO)+= echo/
 obj-$(CONFIG_CXL_BASE) += cxl/
 obj-$(CONFIG_PCI_ENDPOINT_TEST)+= pci_endpoint_test.o
 obj-$(CONFIG_OCXL) += ocxl/
+obj-$(CONFIG_BCM_VK)   += bcm-vk/
 obj-y  += cardreader/
 obj-$(CONFIG_PVPANIC)  += pvpanic.o
 obj-$(CONFIG_HABANA_AI)+= habanalabs/
diff --git a/drivers/misc/bcm-vk/Kconfig b/drivers/misc/bcm-vk/Kconfig
new file mode 100644
index ..a3a020b19e3b
--- /dev/null
+++ b/drivers/misc/bcm-vk/Kconfig
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Broadcom VK device
+#
+config BCM_VK
+   tristate "Support for Broadcom VK Accelerators"
+   depends on PCI_MSI
+   help
+ Select this option to enable support for Broadcom
+ VK Accelerators.  VK is used for performing
+ specific offload processing.
+ This driver enables userspace programs to access these
+ accelerators via /dev/bcm-vk.N devices.
+
+ If unsure, say N.
+
+if BCM_VK
+
+config BCM_VK_QSTATS
+   bool "VK Queue Statistics"
+   help
+ Turn on to enable Queue Statistics.
+ These are useful for debugging purposes.
+ Some performance loss by enabling this debug config.
+ For properly operating PCIe hardware no need to enable this.
+
+ If unsure, say N.
+
+endif
diff --git a/drivers/misc/bcm-vk/Makefile b/drivers/misc/bcm-vk/Makefile
new file mode 100644
index ..05cb213ee826
--- /dev/null
+++ b/drivers/misc/bcm-vk/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Broadcom VK driver
+#
+
+obj-$(CONFIG_BCM_VK) += bcm_vk.o
+bcm_vk-objs := \
+   bcm_vk_dev.o \
+   bcm_vk_msg.o \
+   bcm_vk_sg.o \
+   bcm_vk_tty.o
diff --git a/drivers/misc/bcm-vk/bcm_vk.h b/drivers/misc/bcm-vk/bcm_vk.h
new file mode 100644
index ..1a241074296d
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk.h
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_H
+#define BCM_VK_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "bcm_vk_msg.h"
+
+#define DRV_MODULE_NAME"bcm-vk"
+
+/*
+ * Load Image is completed in two stages:
+ *
+ * 1) When the VK device boot-up, M7 CPU runs and executes the BootROM.
+ * The Secure Boot Loader (SBL) as part of the BootROM will run
+ * to open up ITCM for host to push BOOT1 image.
+ * SBL will authenticate the image before jumping to BOOT1 image.
+ *
+ * 2) Because BOOT1 image is a secured image, we also called it the
+ * Secure Boot Image (SBI). At second stage, SBI

[PATCH v6 1/8] fs: introduce kernel_pread_file* support

2020-06-05 Thread Scott Branden

Add kernel_pread_file* support to kernel to allow for partial read
of files with an offset into the file.  Existing kernel_read_file
functions call new kernel_pread_file functions with offset=0 and
opt=KERNEL_PREAD_WHOLE.

Signed-off-by: Scott Branden 
---
 fs/exec.c  | 95 --
 include/linux/fs.h | 29 ++
 2 files changed, 103 insertions(+), 21 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index de90a66587ab..e5c241c07b75 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -925,10 +925,15 @@ struct file *open_exec(const char *name)
 }
 EXPORT_SYMBOL(open_exec);
 
-int kernel_read_file(struct file *file, void **buf, loff_t *size,
-loff_t max_size, enum kernel_read_file_id id)
-{
-   loff_t i_size, pos;
+int kernel_pread_file(struct file *file, void **buf, loff_t *size,
+ loff_t pos, loff_t max_size,
+ enum kernel_pread_opt opt,
+ enum kernel_read_file_id id)
+{
+   loff_t alloc_size;
+   loff_t buf_pos;
+   loff_t read_end;
+   loff_t i_size;
ssize_t bytes = 0;
int ret;
 
@@ -948,21 +953,31 @@ int kernel_read_file(struct file *file, void **buf, 
loff_t *size,
ret = -EINVAL;
goto out;
}
-   if (i_size > SIZE_MAX || (max_size > 0 && i_size > max_size)) {
+
+   /* Default read to end of file */
+   read_end = i_size;
+
+   /* Allow reading partial portion of file */
+   if ((opt == KERNEL_PREAD_PART) &&
+   (i_size > (pos + max_size)))
+   read_end = pos + max_size;
+
+   alloc_size = read_end - pos;
+   if (i_size > SIZE_MAX || (max_size > 0 && alloc_size > max_size)) {
ret = -EFBIG;
goto out;
}
 
if (id != READING_FIRMWARE_PREALLOC_BUFFER)
-   *buf = vmalloc(i_size);
+   *buf = vmalloc(alloc_size);
if (!*buf) {
ret = -ENOMEM;
goto out;
}
 
-   pos = 0;
-   while (pos < i_size) {
-   bytes = kernel_read(file, *buf + pos, i_size - pos, );
+   buf_pos = 0;
+   while (pos < read_end) {
+   bytes = kernel_read(file, *buf + buf_pos, read_end - pos, );
if (bytes < 0) {
ret = bytes;
goto out_free;
@@ -970,14 +985,16 @@ int kernel_read_file(struct file *file, void **buf, 
loff_t *size,
 
if (bytes == 0)
break;
+
+   buf_pos += bytes;
}
 
-   if (pos != i_size) {
+   if (pos != read_end) {
ret = -EIO;
goto out_free;
}
 
-   ret = security_kernel_post_read_file(file, *buf, i_size, id);
+   ret = security_kernel_post_read_file(file, *buf, alloc_size, id);
if (!ret)
*size = pos;
 
@@ -993,10 +1010,20 @@ int kernel_read_file(struct file *file, void **buf, 
loff_t *size,
allow_write_access(file);
return ret;
 }
+
+int kernel_read_file(struct file *file, void **buf, loff_t *size,
+loff_t max_size, enum kernel_read_file_id id)
+{
+   return kernel_pread_file(file, buf, size, 0, max_size,
+KERNEL_PREAD_WHOLE, id);
+}
 EXPORT_SYMBOL_GPL(kernel_read_file);
 
-int kernel_read_file_from_path(const char *path, void **buf, loff_t *size,
-  loff_t max_size, enum kernel_read_file_id id)
+int kernel_pread_file_from_path(const char *path, void **buf,
+   loff_t *size, loff_t pos,
+   loff_t max_size,
+   enum kernel_pread_opt opt,
+   enum kernel_read_file_id id)
 {
struct file *file;
int ret;
@@ -1008,15 +1035,24 @@ int kernel_read_file_from_path(const char *path, void 
**buf, loff_t *size,
if (IS_ERR(file))
return PTR_ERR(file);
 
-   ret = kernel_read_file(file, buf, size, max_size, id);
+   ret = kernel_pread_file(file, buf, size, pos, max_size, opt, id);
fput(file);
return ret;
 }
+
+int kernel_read_file_from_path(const char *path, void **buf, loff_t *size,
+  loff_t max_size, enum kernel_read_file_id id)
+{
+   return kernel_pread_file_from_path(path, buf, size, 0, max_size,
+  KERNEL_PREAD_WHOLE, id);
+}
 EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
 
-int kernel_read_file_from_path_initns(const char *path, void **buf,
- loff_t *size, loff_t max_size,
- enum kernel_read_file_id id)
+extern int kernel_pread_file_from_path_initns(const char *path, void **buf,
+ loff_t *size, loff_t pos,
+ loff_t max_size,
+

[PATCH v6 4/8] firmware: test partial file reads of request_firmware_into_buf

2020-06-05 Thread Scott Branden

Add firmware tests for partial file reads of request_firmware_into_buf.

Signed-off-by: Scott Branden 
---
 .../selftests/firmware/fw_filesystem.sh   | 80 +++
 1 file changed, 80 insertions(+)

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh 
b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..38e89ba1b4d3 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -149,6 +149,26 @@ config_unset_into_buf()
echo 0 >  $DIR/config_into_buf
 }
 
+config_set_buf_size()
+{
+   echo $1 >  $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+   echo $1 >  $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+   echo 1 >  $DIR/config_partial
+}
+
+config_unset_partial()
+{
+   echo 0 >  $DIR/config_partial
+}
+
 config_set_sync_direct()
 {
echo 1 >  $DIR/config_sync_direct
@@ -207,6 +227,35 @@ read_firmwares()
done
 }
 
+read_firmwares_partial()
+{
+   if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+   fwfile="${FW_INTO_BUF}"
+   else
+   fwfile="${FW}"
+   fi
+
+   if [ "$1" = "xzonly" ]; then
+   fwfile="${fwfile}-orig"
+   fi
+
+   # Strip fwfile down to match partial offset and length
+   partial_data="$(cat $fwfile)"
+   partial_data="${partial_data:$2:$3}"
+
+   for i in $(seq 0 3); do
+   config_set_read_fw_idx $i
+
+   read_firmware="$(cat $DIR/read_firmware)"
+
+   # Verify the contents are what we expect.
+   if [ $read_firmware != $partial_data ]; then
+   echo "request #$i: partial firmware was not loaded" >&2
+   exit 1
+   fi
+   done
+}
+
 read_firmwares_expect_nofile()
 {
for i in $(seq 0 3); do
@@ -319,6 +368,21 @@ test_batched_request_firmware_into_buf()
echo "OK"
 }
 
+test_batched_request_firmware_into_buf_partial()
+{
+   echo -n "Batched request_firmware_into_buf_partial() $2 off=$3 size=$4 
try #$1: "
+   config_reset
+   config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+   config_set_into_buf
+   config_set_partial
+   config_set_buf_size $4
+   config_set_file_offset $3
+   config_trigger_sync
+   read_firmwares_partial $2 $3 $4
+   release_all_firmware
+   echo "OK"
+}
+
 test_batched_request_firmware_direct()
 {
echo -n "Batched request_firmware_direct() $2 try #$1: "
@@ -371,6 +435,22 @@ for i in $(seq 1 5); do
test_batched_request_firmware_into_buf $i normal
 done
 
+for i in $(seq 1 5); do
+   test_batched_request_firmware_into_buf_partial $i normal 0 10
+done
+
+for i in $(seq 1 5); do
+   test_batched_request_firmware_into_buf_partial $i normal 0 5
+done
+
+for i in $(seq 1 5); do
+   test_batched_request_firmware_into_buf_partial $i normal 1 6
+done
+
+for i in $(seq 1 5); do
+   test_batched_request_firmware_into_buf_partial $i normal 2 10
+done
+
 for i in $(seq 1 5); do
test_batched_request_firmware_direct $i normal
 done
-- 
2.17.1

[PATCH v6 2/8] firmware: add offset to request_firmware_into_buf

2020-06-05 Thread Scott Branden

Add offset to request_firmware_into_buf to allow for portions
of firmware file to be read into a buffer.  Necessary where firmware
needs to be loaded in portions from file in memory constrained systems.

Signed-off-by: Scott Branden 
---
 drivers/base/firmware_loader/firmware.h |  5 +++
 drivers/base/firmware_loader/main.c | 53 +
 drivers/soc/qcom/mdt_loader.c   |  7 +++-
 include/linux/firmware.h|  8 +++-
 lib/test_firmware.c |  4 +-
 5 files changed, 56 insertions(+), 21 deletions(-)

diff --git a/drivers/base/firmware_loader/firmware.h 
b/drivers/base/firmware_loader/firmware.h
index 933e2192fbe8..c9b6ba8d29d8 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -32,6 +32,8 @@
  * @FW_OPT_FALLBACK_PLATFORM: Enable fallback to device fw copy embedded in
  * the platform's main firmware. If both this fallback and the sysfs
  *  fallback are enabled, then this fallback will be tried first.
+ * @FW_OPT_PARTIAL: Allow partial read of firmware instead of needing to read
+ * entire file.
  */
 enum fw_opt {
FW_OPT_UEVENT   = BIT(0),
@@ -41,6 +43,7 @@ enum fw_opt {
FW_OPT_NOCACHE  = BIT(4),
FW_OPT_NOFALLBACK_SYSFS = BIT(5),
FW_OPT_FALLBACK_PLATFORM= BIT(6),
+   FW_OPT_PARTIAL  = BIT(7),
 };
 
 enum fw_status {
@@ -68,6 +71,8 @@ struct fw_priv {
void *data;
size_t size;
size_t allocated_size;
+   size_t offset;
+   enum kernel_pread_opt opt;
 #ifdef CONFIG_FW_LOADER_PAGED_BUF
bool is_paged_buf;
struct page **pages;
diff --git a/drivers/base/firmware_loader/main.c 
b/drivers/base/firmware_loader/main.c
index ca871b13524e..93e7fee42cd4 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -167,7 +167,9 @@ static int fw_cache_piggyback_on_request(const char *name);
 
 static struct fw_priv *__allocate_fw_priv(const char *fw_name,
  struct firmware_cache *fwc,
- void *dbuf, size_t size)
+ void *dbuf, size_t size,
+ size_t offset,
+ enum kernel_pread_opt opt)
 {
struct fw_priv *fw_priv;
 
@@ -185,6 +187,8 @@ static struct fw_priv *__allocate_fw_priv(const char 
*fw_name,
fw_priv->fwc = fwc;
fw_priv->data = dbuf;
fw_priv->allocated_size = size;
+   fw_priv->offset = offset;
+   fw_priv->opt = opt;
fw_state_init(fw_priv);
 #ifdef CONFIG_FW_LOADER_USER_HELPER
INIT_LIST_HEAD(_priv->pending_list);
@@ -210,9 +214,11 @@ static struct fw_priv *__lookup_fw_priv(const char 
*fw_name)
 static int alloc_lookup_fw_priv(const char *fw_name,
struct firmware_cache *fwc,
struct fw_priv **fw_priv, void *dbuf,
-   size_t size, u32 opt_flags)
+   size_t size, u32 opt_flags,
+   size_t offset)
 {
struct fw_priv *tmp;
+   enum kernel_pread_opt pread_opt;
 
spin_lock(>lock);
if (!(opt_flags & FW_OPT_NOCACHE)) {
@@ -226,7 +232,12 @@ static int alloc_lookup_fw_priv(const char *fw_name,
}
}
 
-   tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size);
+   if (opt_flags & FW_OPT_PARTIAL)
+   pread_opt = KERNEL_PREAD_PART;
+   else
+   pread_opt = KERNEL_PREAD_WHOLE;
+
+   tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size, offset, pread_opt);
if (tmp) {
INIT_LIST_HEAD(>list);
if (!(opt_flags & FW_OPT_NOCACHE))
@@ -495,8 +506,10 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
fw_priv->size = 0;
 
/* load firmware files from the mount namespace of init */
-   rc = kernel_read_file_from_path_initns(path, ,
-  , msize, id);
+   rc = kernel_pread_file_from_path_initns(path, ,
+   , fw_priv->offset,
+   msize,
+   fw_priv->opt, id);
if (rc) {
if (rc != -ENOENT)
dev_warn(device, "loading %s failed with error 
%d\n",
@@ -683,7 +696,7 @@ int assign_fw(struct firmware *fw, struct device *device, 
u32 opt_flags)
 static int
 _request_firmware_prepare(struct firmware **firmware_p, const char *name,
  struct device *device, void *dbuf, size_t size,
- u32 opt_flags)
+ u32 opt_flags, size_t

[PATCH v6 0/8] firmware: add partial read support in request_firmware_into_buf

2020-06-05 Thread Scott Branden

This patch series adds partial read support in request_firmware_into_buf.
In order to accept the enhanced API it has been requested that kernel
selftests and upstreamed driver utilize the API enhancement and so
are included in this patch series.

Also in this patch series is the addition of a new Broadcom VK driver
utilizing the new request_firmware_into_buf enhanced API.

Further comment followed to add IMA support of the partial reads
originating from request_firmware_into_buf calls.

Changes from v5:
 - add IMA FIRMWARE_PARTIAL_READ support
 - change kernel pread flags to enum
 - removed legacy support from driver
 - driver fixes
Changes from v4:
 - handle reset issues if card crashes
 - allow driver to have min required msix
 - add card utilization information
Changes from v3:
 - fix sparse warnings
 - fix printf format specifiers for size_t
 - fix 32-bit cross-compiling reports 32-bit shifts
 - use readl/writel,_relaxed to access pci ioremap memory,
  removed memory barriers and volatile keyword with such change
 - driver optimizations for interrupt/poll functionalities
Changes from v2:
 - remove unnecessary code and mutex locks in lib/test_firmware.c
 - remove VK_IOCTL_ACCESS_BAR support from driver and use pci sysfs instead
 - remove bitfields
 - remove Kconfig default m
 - adjust formatting and some naming based on feedback
 - fix error handling conditions
 - use appropriate return codes
 - use memcpy_toio instead of direct access to PCIE bar

Scott Branden (8):
  fs: introduce kernel_pread_file* support
  firmware: add offset to request_firmware_into_buf
  test_firmware: add partial read support for request_firmware_into_buf
  firmware: test partial file reads of request_firmware_into_buf
  bcm-vk: add bcm_vk UAPI
  misc: bcm-vk: add Broadcom VK driver
  MAINTAINERS: bcm-vk: add maintainer for Broadcom VK Driver
  ima: add FIRMWARE_PARTIAL_READ support

 MAINTAINERS   |7 +
 drivers/base/firmware_loader/firmware.h   |5 +
 drivers/base/firmware_loader/main.c   |   59 +-
 drivers/misc/Kconfig  |1 +
 drivers/misc/Makefile |1 +
 drivers/misc/bcm-vk/Kconfig   |   29 +
 drivers/misc/bcm-vk/Makefile  |   11 +
 drivers/misc/bcm-vk/bcm_vk.h  |  408 +
 drivers/misc/bcm-vk/bcm_vk_dev.c  | 1312 +++
 drivers/misc/bcm-vk/bcm_vk_msg.c  | 1438 +
 drivers/misc/bcm-vk/bcm_vk_msg.h  |  201 +++
 drivers/misc/bcm-vk/bcm_vk_sg.c   |  271 
 drivers/misc/bcm-vk/bcm_vk_sg.h   |   60 +
 drivers/misc/bcm-vk/bcm_vk_tty.c  |  352 
 drivers/soc/qcom/mdt_loader.c |7 +-
 fs/exec.c |  101 +-
 include/linux/firmware.h  |8 +-
 include/linux/fs.h|   30 +
 include/uapi/linux/misc/bcm_vk.h  |   99 ++
 lib/test_firmware.c   |  144 +-
 security/integrity/ima/ima_main.c |   24 +-
 .../selftests/firmware/fw_filesystem.sh   |   80 +
 22 files changed, 4595 insertions(+), 53 deletions(-)
 create mode 100644 drivers/misc/bcm-vk/Kconfig
 create mode 100644 drivers/misc/bcm-vk/Makefile
 create mode 100644 drivers/misc/bcm-vk/bcm_vk.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_dev.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_tty.c
 create mode 100644 include/uapi/linux/misc/bcm_vk.h

-- 
2.17.1

[PATCH v6 7/8] MAINTAINERS: bcm-vk: add maintainer for Broadcom VK Driver

2020-06-05 Thread Scott Branden

Add maintainer entry for new Broadcom VK Driver

Signed-off-by: Scott Branden 
---
 MAINTAINERS | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index b045b70e54df..9fbf255fe093 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3656,6 +3656,13 @@ L:   net...@vger.kernel.org
 S: Supported
 F: drivers/net/ethernet/broadcom/tg3.*
 
+BROADCOM VK DRIVER
+M: Scott Branden 
+L: bcm-kernel-feedback-l...@broadcom.com
+S: Supported
+F: drivers/misc/bcm-vk/
+F: include/uapi/linux/misc/bcm_vk.h
+
 BROCADE BFA FC SCSI DRIVER
 M: Anil Gurumurthy 
 M: Sudarsana Kalluru 
-- 
2.17.1

Re: [PATCHSET v5 0/12] Add support for async buffered reads

2020-06-05 Thread Jens Axboe

On 6/5/20 4:54 PM, Andres Freund wrote:
> Hi,
> 
> On 2020-06-05 16:49:24 -0600, Jens Axboe wrote:
>> Yes that's expected, if we have to fallback to ->readpage(), then it'll
>> go to a worker. read-ahead is what drives the async nature of it, as we
>> issue the range (plus more, depending on RA window) as read-ahead for
>> the normal read, then wait for it.
> 
> But I assume async would still work for files with POSIX_FADV_RANDOM
> set, or not? Assuming the system wide setting isn't zero, of course.

Yes it'll work if FADV_RANDOM is set. But just not if read-ahead is
totally disabled. I guess we could make that work too, though not sure
that it's super important.

-- 
Jens Axboe

Re: [PATCHSET v5 0/12] Add support for async buffered reads

2020-06-05 Thread Andres Freund

Hi,

On 2020-06-05 16:49:24 -0600, Jens Axboe wrote:
> Yes that's expected, if we have to fallback to ->readpage(), then it'll
> go to a worker. read-ahead is what drives the async nature of it, as we
> issue the range (plus more, depending on RA window) as read-ahead for
> the normal read, then wait for it.

But I assume async would still work for files with POSIX_FADV_RANDOM
set, or not? Assuming the system wide setting isn't zero, of course.

Greetings,

Andres Freund

Re: [PATCH] iomap: Handle I/O errors gracefully in page_mkwrite

2020-06-05 Thread Matthew Wilcox

On Sat, Jun 06, 2020 at 07:48:41AM +1000, Dave Chinner wrote:
> On Fri, Jun 05, 2020 at 05:48:26AM -0700, Matthew Wilcox wrote:
> > ... I don't think that's the interesting path.  I mean, that's
> > the submission path, and usually we discover errors in the completion
> > path, not the submission path.
> 
> Where in the iomap write IO completion path do we call
> ClearPageUptodate()?

Oh, I misread.  You're right, I was looking at the read completion path.

So, this is also inconsistent.  We clear PageUptodate on errors we
discover during submission, but not for errors we discover during
completion.  That doesn't make sense.

> This comes back to my original, underlying worry about the fragility
> of the page fault path: the page fault path is not even checking for
> PageError during faults, and I'm betting that almost no
> ->page_mkwrite implementation is checking it, either

I think it's a reasonable assumption that user page tables should never
contain a PTE for a page which is !Uptodate.  Otherwise the user can
read stale data.

> > I don't see why it can't be done from the submission path.
> > unmap_mapping_range() calls i_mmap_lock_write(), which is
> > down_write(i_mmap_rwsem) in drag.  There might be a lock ordering
> > issue there, although lockdep should find it pretty quickly.
> > 
> > The bigger problem is the completion path.  We're in softirq context,
> > so that will have to punt to a thread that can take mutexes.
> 
> Punt to workqueue if we aren't already in a workqueue context -
> for a lot of writes on XFS we already will be running completion in
> a workqueue context

Yep.

Re: [PATCHSET v5 0/12] Add support for async buffered reads

2020-06-05 Thread Jens Axboe

On 6/5/20 4:36 PM, Andres Freund wrote:
> Hi,
> 
> On 2020-06-05 15:30:44 -0700, Andres Freund wrote:
>> On 2020-06-05 15:21:34 -0600, Jens Axboe wrote:
> I can reproduce this, and I see what it is. I'll send out a patch soonish.

 Thinko, can you try with this on top?
>>>
>>> Sorry that was incomplete, please use this one!
>>
>> That seems to fix it! Yay.
>>
>>
>> Bulk buffered reads somehow don't quite seem to be performing that well
>> though, looking into it. Could be on the pg side too.
> 
> While looking into that, I played with setting
> /sys//queue/read_ahead_kb to 0 and noticed that seems to result in
> all/most IO done in workers. Is that to be expected?

Yes that's expected, if we have to fallback to ->readpage(), then it'll
go to a worker. read-ahead is what drives the async nature of it, as we
issue the range (plus more, depending on RA window) as read-ahead for
the normal read, then wait for it.

-- 
Jens Axboe

Re: [PATCH v1 5/5] kselftests: cgroup: add perpcu memory accounting test

2020-06-05 Thread Roman Gushchin

On Fri, Jun 05, 2020 at 08:07:51PM +, Dennis Zhou wrote:
> On Thu, May 28, 2020 at 04:25:08PM -0700, Roman Gushchin wrote:
> > Add a simple test to check the percpu memory accounting.
> > The test creates a cgroup tree with 1000 child cgroups
> > and checks values of memory.current and memory.stat::percpu.
> > 
> > Signed-off-by: Roman Gushchin 
> > ---
> >  tools/testing/selftests/cgroup/test_kmem.c | 59 ++
> >  1 file changed, 59 insertions(+)
> > 
> > diff --git a/tools/testing/selftests/cgroup/test_kmem.c 
> > b/tools/testing/selftests/cgroup/test_kmem.c
> > index 5224dae216e5..a0d4f1a3137d 100644
> > --- a/tools/testing/selftests/cgroup/test_kmem.c
> > +++ b/tools/testing/selftests/cgroup/test_kmem.c
> > @@ -331,6 +331,64 @@ static int test_kmem_dead_cgroups(const char *root)
> > return ret;
> >  }
> >  
> > +/*
> > + * This test creates a sub-tree with 1000 memory cgroups.
> > + * Then it checks that the memory.current on the parent level
> > + * is greater than 0 and approximates matches the percpu value
> > + * from memory.stat.
> > + */
> > +static int test_percpu_basic(const char *root)
> > +{
> > +   int ret = KSFT_FAIL;
> > +   char *parent, *child;
> > +   long current, percpu;
> > +   int i;
> > +
> > +   parent = cg_name(root, "percpu_basic_test");
> > +   if (!parent)
> > +   goto cleanup;
> > +
> > +   if (cg_create(parent))
> > +   goto cleanup;
> > +
> > +   if (cg_write(parent, "cgroup.subtree_control", "+memory"))
> > +   goto cleanup;
> > +
> > +   for (i = 0; i < 1000; i++) {
> > +   child = cg_name_indexed(parent, "child", i);
> > +   if (!child)
> > +   return -1;
> > +
> > +   if (cg_create(child))
> > +   goto cleanup_children;
> > +
> > +   free(child);
> > +   }
> > +
> > +   current = cg_read_long(parent, "memory.current");
> > +   percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
> > +
> > +   if (current > 0 && percpu > 0 && abs(current - percpu) <
> > +   4096 * 32 * get_nprocs())
> 
> So this is checking that we've allocated less than 32 pages per cpu over
> 1000 child cgroups that's not percpu memory? Is there a more definitive
> measurement or at least a comment we can leave saying why this limit was
> chosen.

It simple means that "current" should be approximately equal to "percpu" 
statistics.
Both charging and vmstat paths are using percpu batching, and the batch size is
32 pages.

I'll add a comment to make it more obvious.

Thanks!

> 
> > +   ret = KSFT_PASS;
> > +   else
> > +   printf("memory.current %ld\npercpu %ld\n",
> > +  current, percpu);
> > +
> > +cleanup_children:
> > +   for (i = 0; i < 1000; i++) {
> > +   child = cg_name_indexed(parent, "child", i);
> > +   cg_destroy(child);
> > +   free(child);
> > +   }
> > +
> > +cleanup:
> > +   cg_destroy(parent);
> > +   free(parent);
> > +
> > +   return ret;
> > +}
> > +
> >  #define T(x) { x, #x }
> >  struct kmem_test {
> > int (*fn)(const char *root);
> > @@ -341,6 +399,7 @@ struct kmem_test {
> > T(test_kmem_proc_kpagecgroup),
> > T(test_kmem_kernel_stacks),
> > T(test_kmem_dead_cgroups),
> > +   T(test_percpu_basic),
> >  };
> >  #undef T
> >  
> > -- 
> > 2.25.4
> > 
> >

Re: [PATCH v1 2/5] mm: memcg/percpu: account percpu memory to memory cgroups

2020-06-05 Thread Roman Gushchin

On Fri, Jun 05, 2020 at 07:49:53PM +, Dennis Zhou wrote:
> On Thu, May 28, 2020 at 04:25:05PM -0700, Roman Gushchin wrote:
> > Percpu memory is becoming more and more widely used by various
> > subsystems, and the total amount of memory controlled by the percpu
> > allocator can make a good part of the total memory.
> > 
> > As an example, bpf maps can consume a lot of percpu memory,
> > and they are created by a user. Also, some cgroup internals
> > (e.g. memory controller statistics) can be quite large.
> > On a machine with many CPUs and big number of cgroups they
> > can consume hundreds of megabytes.
> > 
> > So the lack of memcg accounting is creating a breach in the memory
> > isolation. Similar to the slab memory, percpu memory should be
> > accounted by default.
> > 
> > To implement the perpcu accounting it's possible to take the slab
> > memory accounting as a model to follow. Let's introduce two types of
> > percpu chunks: root and memcg. What makes memcg chunks different is
> > an additional space allocated to store memcg membership information.
> > If __GFP_ACCOUNT is passed on allocation, a memcg chunk should be be
> > used. If it's possible to charge the corresponding size to the target
> > memory cgroup, allocation is performed, and the memcg ownership data
> > is recorded. System-wide allocations are performed using root chunks,
> > so there is no additional memory overhead.
> > 
> > To implement a fast reparenting of percpu memory on memcg removal,
> > we don't store mem_cgroup pointers directly: instead we use obj_cgroup
> > API, introduced for slab accounting.
> > 
> > Signed-off-by: Roman Gushchin 
> > ---
> >  mm/percpu-internal.h |  57 -
> >  mm/percpu-km.c   |   5 +-
> >  mm/percpu-stats.c|  36 +
> >  mm/percpu-vm.c   |   5 +-
> >  mm/percpu.c  | 186 ++-
> >  5 files changed, 248 insertions(+), 41 deletions(-)
> > 
> > diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
> > index 0468ba500bd4..0cf36337eb47 100644
> > --- a/mm/percpu-internal.h
> > +++ b/mm/percpu-internal.h
> > @@ -5,6 +5,27 @@
> >  #include 
> >  #include 
> >  
> > +/*
> > + * There are two chunk types: root and memcg-aware.
> > + * Chunks of each type have separate slots list.
> > + *
> > + * Memcg-aware chunks have an attached vector of obj_cgroup
> > + * pointers, which is used to store memcg membership data
> > + * of a percpu object. Obj_cgroups are ref-counted pointers
> > + * to a memory cgroup with an ability to switch dynamically
> > + * to the parent memory cgroup. This allows to reclaim a deleted
> > + * memory cgroup without reclaiming of all outstanding objects,
> > + * which do hold a reference at it.
> > + */
> 
> nit: do you mind reflowing this to 80 characters and doing 2 spaces
> after each period to keep the formatting uniform.
> 
> > +enum pcpu_chunk_type {
> > +   PCPU_CHUNK_ROOT,
> > +#ifdef CONFIG_MEMCG_KMEM
> > +   PCPU_CHUNK_MEMCG,
> > +#endif
> > +   PCPU_NR_CHUNK_TYPES,
> > +   PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES
> > +};
> > +
> >  /*
> >   * pcpu_block_md is the metadata block struct.
> >   * Each chunk's bitmap is split into a number of full blocks.
> > @@ -54,6 +75,9 @@ struct pcpu_chunk {
> > int end_offset; /* additional area required to
> >have the region end page
> >aligned */
> > +#ifdef CONFIG_MEMCG_KMEM
> > +   struct obj_cgroup   **obj_cgroups;  /* vector of object cgroups */
> > +#endif
> >  
> > int nr_pages;   /* # of pages served by this 
> > chunk */
> > int nr_populated;   /* # of populated pages */
> > @@ -63,7 +87,7 @@ struct pcpu_chunk {
> >  
> >  extern spinlock_t pcpu_lock;
> >  
> > -extern struct list_head *pcpu_slot;
> > +extern struct list_head *pcpu_chunk_lists;
> >  extern int pcpu_nr_slots;
> >  extern int pcpu_nr_empty_pop_pages;
> >  
> > @@ -106,6 +130,37 @@ static inline int pcpu_chunk_map_bits(struct 
> > pcpu_chunk *chunk)
> > return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
> >  }
> >  
> > +#ifdef CONFIG_MEMCG_KMEM
> > +static enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
> > +{
> > +   if (chunk->obj_cgroups)
> > +   return PCPU_CHUNK_MEMCG;
> > +   return PCPU_CHUNK_ROOT;
> > +}
> > +
> > +static bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
> > +{
> > +   return chunk_type == PCPU_CHUNK_MEMCG;
> > +}
> > +
> > +#else
> > +static enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
> > +{
> > +   return PCPU_CHUNK_ROOT;
> > +}
> > +
> > +static bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
> > +{
> > +   return false;
> > +}
> > +#endif
> > +
> > +static struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type)
> > +{
> > +   return _chunk_lists[pcpu_nr_slots *
> > +

[PATCHv2 2/6] power: supply: gpio-charger: Make gpios optional

2020-06-05 Thread Sebastian Reichel

While strongly recommended, not all devices have a gpio to
detect if the charger is connected. This moves the 'gpios'
from required to optional section.

This also modifies error handling for the GPIO a bit: We
no longer fallback to pdata, if a GPIO is specified using
GPIO descriptor tables. This is a bit cleaner and does
not have any real impact: There are only two mainline pdata
users (arm/mach-sa1100/collie.c, arm/mach-pxa/tosa.c) and
none of them specify the GPIO via gpiod descriptor tables.
Once both have been converted the driver's support for
specifying GPIOs numbers in pdata will be dropped.

Signed-off-by: Sebastian Reichel 
---
 .../bindings/power/supply/gpio-charger.yaml   |  7 +++-
 drivers/power/supply/gpio-charger.c   | 38 ---
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml 
b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
index 78b167c62ab1..30eabbb14ef3 100644
--- a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
@@ -41,7 +41,12 @@ properties:
 
 required:
   - compatible
-  - gpios
+
+anyOf:
+  - required:
+- gpios
+  - required:
+- charge-status-gpios
 
 additionalProperties: false
 
diff --git a/drivers/power/supply/gpio-charger.c 
b/drivers/power/supply/gpio-charger.c
index 1b959c7f8b0e..875735d50716 100644
--- a/drivers/power/supply/gpio-charger.c
+++ b/drivers/power/supply/gpio-charger.c
@@ -112,9 +112,14 @@ static int gpio_charger_get_irq(struct device *dev, void 
*dev_id,
return irq;
 }
 
+/*
+ * The entries will be overwritten by driver's probe routine depending
+ * on the available features. This list ensures, that the array is big
+ * enough for all optional features.
+ */
 static enum power_supply_property gpio_charger_properties[] = {
POWER_SUPPLY_PROP_ONLINE,
-   POWER_SUPPLY_PROP_STATUS /* Must always be last in the array. */
+   POWER_SUPPLY_PROP_STATUS,
 };
 
 static int gpio_charger_probe(struct platform_device *pdev)
@@ -128,6 +133,7 @@ static int gpio_charger_probe(struct platform_device *pdev)
int charge_status_irq;
unsigned long flags;
int ret;
+   int num_props = 0;
 
if (!pdata && !dev->of_node) {
dev_err(dev, "No platform data\n");
@@ -142,13 +148,13 @@ static int gpio_charger_probe(struct platform_device 
*pdev)
 * This will fetch a GPIO descriptor from device tree, ACPI or
 * boardfile descriptor tables. It's good to try this first.
 */
-   gpio_charger->gpiod = devm_gpiod_get(dev, NULL, GPIOD_IN);
+   gpio_charger->gpiod = devm_gpiod_get_optional(dev, NULL, GPIOD_IN);
 
/*
-* If this fails and we're not using device tree, try the
-* legacy platform data method.
+* Fallback to legacy platform data method, if no GPIO is specified
+* using boardfile descriptor tables.
 */
-   if (IS_ERR(gpio_charger->gpiod) && !dev->of_node) {
+   if (!gpio_charger->gpiod && pdata) {
/* Non-DT: use legacy GPIO numbers */
if (!gpio_is_valid(pdata->gpio)) {
dev_err(dev, "Invalid gpio pin in pdata\n");
@@ -173,17 +179,23 @@ static int gpio_charger_probe(struct platform_device 
*pdev)
return PTR_ERR(gpio_charger->gpiod);
}
 
+   if (gpio_charger->gpiod) {
+   gpio_charger_properties[num_props] = POWER_SUPPLY_PROP_ONLINE;
+   num_props++;
+   }
+
charge_status = devm_gpiod_get_optional(dev, "charge-status", GPIOD_IN);
-   gpio_charger->charge_status = charge_status;
-   if (IS_ERR(gpio_charger->charge_status))
-   return PTR_ERR(gpio_charger->charge_status);
+   if (IS_ERR(charge_status))
+   return PTR_ERR(charge_status);
+   if (charge_status) {
+   gpio_charger->charge_status = charge_status;
+   gpio_charger_properties[num_props] = POWER_SUPPLY_PROP_STATUS;
+   num_props++;
+   }
 
charger_desc = _charger->charger_desc;
charger_desc->properties = gpio_charger_properties;
-   charger_desc->num_properties = ARRAY_SIZE(gpio_charger_properties);
-   /* Remove POWER_SUPPLY_PROP_STATUS from the supported properties. */
-   if (!gpio_charger->charge_status)
-   charger_desc->num_properties -= 1;
+   charger_desc->num_properties = num_props;
charger_desc->get_property = gpio_charger_get_property;
 
psy_cfg.of_node = dev->of_node;
@@ -269,6 +281,6 @@ static struct platform_driver gpio_charger_driver = {
 module_platform_driver(gpio_charger_driver);
 
 MODULE_AUTHOR("Lars-Peter Clausen ");
-MODULE_DESCRIPTION("Driver for chargers which report their online status 
through a GPIO");
+MODULE_DESCRIPTION("Driver for chargers only communicating via GPIO(s)");

[PATCHv2 3/6] power: supply: gpio-charger: add charge-current-limit feature

2020-06-05 Thread Sebastian Reichel

Add new charge-current-limit feature to gpio-charger.

Signed-off-by: Sebastian Reichel 
---
 .../bindings/power/supply/gpio-charger.yaml   |  31 
 drivers/power/supply/gpio-charger.c   | 140 ++
 2 files changed, 171 insertions(+)

diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml 
b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
index 30eabbb14ef3..e11cfdc68a51 100644
--- a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
@@ -39,6 +39,25 @@ properties:
 maxItems: 1
 description: GPIO indicating the charging status
 
+  charge-current-limit-gpios:
+minItems: 1
+maxItems: 32
+description: GPIOs used for current limiting
+
+  charge-current-limit-mapping:
+description: List of touples with current in uA and a GPIO bitmap (in
+  this order). The touples must be provided in descending order of the
+  current limit.
+$ref: /schemas/types.yaml#/definitions/uint32-matrix
+items:
+  items:
+- description:
+Current limit in uA
+- description:
+Encoded GPIO setting. Bit 0 represents last GPIO from the
+charge-current-limit-gpios property. Bit 1 second to last
+GPIO and so on.
+
 required:
   - compatible
 
@@ -47,6 +66,12 @@ anyOf:
 - gpios
   - required:
 - charge-status-gpios
+  - required:
+- charge-current-limit-gpios
+
+dependencies:
+  charge-current-limit-gpios: [ charge-current-limit-mapping ]
+  charge-current-limit-mapping: [ charge-current-limit-gpios ]
 
 additionalProperties: false
 
@@ -60,4 +85,10 @@ examples:
 
   gpios = < 28 GPIO_ACTIVE_LOW>;
   charge-status-gpios = < 27 GPIO_ACTIVE_LOW>;
+
+  charge-current-limit-gpios = < 11 GPIO_ACTIVE_HIGH>,
+   < 12 GPIO_ACTIVE_HIGH>;
+  charge-current-limit-mapping = <250 0x00>, // 2.5 A => both GPIOs low
+ <70 0x01>, // 700 mA => GPIO A.12 high
+ <0 0x02>; // 0 mA => GPIO A.11 high
 };
diff --git a/drivers/power/supply/gpio-charger.c 
b/drivers/power/supply/gpio-charger.c
index 875735d50716..74fc664c01e3 100644
--- a/drivers/power/supply/gpio-charger.c
+++ b/drivers/power/supply/gpio-charger.c
@@ -18,7 +18,13 @@
 
 #include 
 
+struct gpio_mapping {
+   u32 limit_ua;
+   u32 gpiodata;
+} __packed;
+
 struct gpio_charger {
+   struct device *dev;
unsigned int irq;
unsigned int charge_status_irq;
bool wakeup_enabled;
@@ -27,6 +33,11 @@ struct gpio_charger {
struct power_supply_desc charger_desc;
struct gpio_desc *gpiod;
struct gpio_desc *charge_status;
+
+   struct gpio_descs *current_limit_gpios;
+   struct gpio_mapping *current_limit_map;
+   u32 current_limit_map_size;
+   u32 charge_current_limit;
 };
 
 static irqreturn_t gpio_charger_irq(int irq, void *devid)
@@ -43,6 +54,35 @@ static inline struct gpio_charger 
*psy_to_gpio_charger(struct power_supply *psy)
return power_supply_get_drvdata(psy);
 }
 
+static int set_charge_current_limit(struct gpio_charger *gpio_charger, int val)
+{
+   struct gpio_mapping mapping;
+   int ndescs = gpio_charger->current_limit_gpios->ndescs;
+   struct gpio_desc **gpios = gpio_charger->current_limit_gpios->desc;
+   int i;
+
+   if (!gpio_charger->current_limit_map_size)
+   return -EINVAL;
+
+   for (i = 0; i < gpio_charger->current_limit_map_size; i++) {
+   if (gpio_charger->current_limit_map[i].limit_ua <= val)
+   break;
+   }
+   mapping = gpio_charger->current_limit_map[i];
+
+   for (i = 0; i < ndescs; i++) {
+   bool val = (mapping.gpiodata >> i) & 1;
+   gpiod_set_value_cansleep(gpios[ndescs-i-1], val);
+   }
+
+   gpio_charger->charge_current_limit = mapping.limit_ua;
+
+   dev_dbg(gpio_charger->dev, "set charge current limit to %d (requested: 
%d)\n",
+   gpio_charger->charge_current_limit, val);
+
+   return 0;
+}
+
 static int gpio_charger_get_property(struct power_supply *psy,
enum power_supply_property psp, union power_supply_propval *val)
 {
@@ -58,6 +98,9 @@ static int gpio_charger_get_property(struct power_supply *psy,
else
val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
break;
+   case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+   val->intval = gpio_charger->charge_current_limit;
+   break;
default:
return -EINVAL;
}
@@ -65,6 +108,34 @@ static int gpio_charger_get_property(struct power_supply 
*psy,
return 0;
 }
 
+static int gpio_charger_set_property(struct power_supply *psy,
+   enum power_supply_property psp, const union

[PATCHv2 0/6] misc. gpio-charger patches

2020-06-05 Thread Sebastian Reichel

Hi,

This is v2 of a patchset for gpio-charger. The patches are
mostly unrelated to each other, but have some dependencies.

Merge plan is:

Assuming there are no huge blockers, the plan is to merge
patches 1-3 through power-supply subsystem for 5.9. The
ARM patches can go via ARM subsystem for 5.9 and the final
patch can simply be postponed for 5.10.

Changelog since PATCHv1:
 * Fixed the YAML binding as suggested by Rob
 * Implemented the suggestions from Emil
 * Split making "gpios" optional into separate patch, which
   simplifies reviewing and follows "one change per patch"
   style
 * Add two new patches converting platform data users to
   use GPIO descriptor tables
 * Add final patch removing gpio from platform data

-- Sebastian

Sebastian Reichel (6):
  dt-bindings: power: supply: gpio-charger: convert to yaml
  power: supply: gpio-charger: Make gpios optional
  power: supply: gpio-charger: add charge-current-limit feature
  ARM: pxa: Use GPIO descriptor for gpio-charger
  ARM: sa1100: Use GPIO descriptor for gpio-charger
  power: supply: gpio-charger: drop legacy GPIO support

 .../bindings/power/supply/gpio-charger.txt|  31 ---
 .../bindings/power/supply/gpio-charger.yaml   |  94 
 arch/arm/mach-pxa/tosa.c  |  24 +--
 arch/arm/mach-sa1100/collie.c |  11 +-
 drivers/power/supply/gpio-charger.c   | 200 ++
 include/linux/power/gpio-charger.h|   5 -
 6 files changed, 275 insertions(+), 90 deletions(-)
 delete mode 100644 
Documentation/devicetree/bindings/power/supply/gpio-charger.txt
 create mode 100644 
Documentation/devicetree/bindings/power/supply/gpio-charger.yaml

-- 
2.26.2

[PATCHv2 4/6] ARM: pxa: Use GPIO descriptor for gpio-charger

2020-06-05 Thread Sebastian Reichel

Provide AC detect GPIO via gpiod table instead of
legacy platform data so that legacy GPIO support
can be removed from the driver.

Also remove useless IRQ resource, which is not
used by the driver.

Due to lack of hardware this has only been compile
tested.

Signed-off-by: Sebastian Reichel 
---
 arch/arm/mach-pxa/tosa.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 3d2c108e911e..e4da2b4c5055 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -369,6 +369,14 @@ static struct pxaficp_platform_data 
tosa_ficp_platform_data = {
 /*
  * Tosa AC IN
  */
+static struct gpiod_lookup_table tosa_charger_gpiod_table = {
+   .dev_id = "gpio-charger",
+   .table = {
+   GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_AC_IN, NULL, GPIO_ACTIVE_LOW),
+   {},
+   },
+};
+
 static char *tosa_ac_supplied_to[] = {
"main-battery",
"backup-battery",
@@ -378,29 +386,14 @@ static char *tosa_ac_supplied_to[] = {
 static struct gpio_charger_platform_data tosa_power_data = {
.name   = "charger",
.type   = POWER_SUPPLY_TYPE_MAINS,
-   .gpio   = TOSA_GPIO_AC_IN,
-   .gpio_active_low= 1,
.supplied_to= tosa_ac_supplied_to,
.num_supplicants= ARRAY_SIZE(tosa_ac_supplied_to),
 };
 
-static struct resource tosa_power_resource[] = {
-   {
-   .name   = "ac",
-   .start  = PXA_GPIO_TO_IRQ(TOSA_GPIO_AC_IN),
-   .end= PXA_GPIO_TO_IRQ(TOSA_GPIO_AC_IN),
-   .flags  = IORESOURCE_IRQ |
- IORESOURCE_IRQ_HIGHEDGE |
- IORESOURCE_IRQ_LOWEDGE,
-   },
-};
-
 static struct platform_device tosa_power_device = {
.name   = "gpio-charger",
.id = -1,
.dev.platform_data  = _power_data,
-   .resource   = tosa_power_resource,
-   .num_resources  = ARRAY_SIZE(tosa_power_resource),
 };
 
 /*
@@ -950,6 +943,7 @@ static void __init tosa_init(void)
 
clk_add_alias("CLK_CK3P6MI", tc6393xb_device.name, "GPIO11_CLK", NULL);
 
+   gpiod_add_lookup_table(_charger_gpiod_table);
gpiod_add_lookup_table(_udc_gpiod_table);
platform_add_devices(devices, ARRAY_SIZE(devices));
 }
-- 
2.26.2

[PATCHv2 5/6] ARM: sa1100: Use GPIO descriptor for gpio-charger

2020-06-05 Thread Sebastian Reichel

Provide AC detect GPIO via gpiod table instead of
legacy platform data so that legacy GPIO support
can be removed from the driver.

Due to lack of hardware this has only been compile
tested.

Signed-off-by: Sebastian Reichel 
---
 arch/arm/mach-sa1100/collie.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 3cc2b71e16f0..3e871a3db3b0 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -131,6 +132,14 @@ static struct irda_platform_data collie_ir_data = {
 /*
  * Collie AC IN
  */
+static struct gpiod_lookup_table collie_charger_gpiod_table = {
+   .dev_id = "gpio-charger",
+   .table = {
+   GPIO_LOOKUP("gpio", COLLIE_GPIO_AC_IN, NULL, GPIO_ACTIVE_HIGH),
+   {},
+   },
+};
+
 static char *collie_ac_supplied_to[] = {
"main-battery",
"backup-battery",
@@ -140,7 +149,6 @@ static char *collie_ac_supplied_to[] = {
 static struct gpio_charger_platform_data collie_power_data = {
.name   = "charger",
.type   = POWER_SUPPLY_TYPE_MAINS,
-   .gpio   = COLLIE_GPIO_AC_IN,
.supplied_to= collie_ac_supplied_to,
.num_supplicants= ARRAY_SIZE(collie_ac_supplied_to),
 };
@@ -386,6 +394,7 @@ static void __init collie_init(void)
 
platform_scoop_config = _pcmcia_config;
 
+   gpiod_add_lookup_table(_charger_gpiod_table);
ret = platform_add_devices(devices, ARRAY_SIZE(devices));
if (ret) {
printk(KERN_WARNING "collie: Unable to register LoCoMo 
device\n");
-- 
2.26.2

[PATCHv2 1/6] dt-bindings: power: supply: gpio-charger: convert to yaml

2020-06-05 Thread Sebastian Reichel

Convert the gpio-charger bindings from text format to
new YAML based representation.

Signed-off-by: Sebastian Reichel 
---
 .../bindings/power/supply/gpio-charger.txt| 31 --
 .../bindings/power/supply/gpio-charger.yaml   | 58 +++
 2 files changed, 58 insertions(+), 31 deletions(-)
 delete mode 100644 
Documentation/devicetree/bindings/power/supply/gpio-charger.txt
 create mode 100644 
Documentation/devicetree/bindings/power/supply/gpio-charger.yaml

diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.txt 
b/Documentation/devicetree/bindings/power/supply/gpio-charger.txt
deleted file mode 100644
index 0fb33b2c62a6..
--- a/Documentation/devicetree/bindings/power/supply/gpio-charger.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-gpio-charger
-
-Required properties :
- - compatible : "gpio-charger"
- - gpios : GPIO indicating the charger presence.
-   See GPIO binding in bindings/gpio/gpio.txt .
- - charger-type : power supply type, one of
- unknown
- battery
- ups
- mains
- usb-sdp (USB standard downstream port)
- usb-dcp (USB dedicated charging port)
- usb-cdp (USB charging downstream port)
- usb-aca (USB accessory charger adapter)
-
-Optional properties:
- - charge-status-gpios: GPIO indicating whether a battery is charging.
-
-Example:
-
-   usb_charger: charger {
-   compatible = "gpio-charger";
-   charger-type = "usb-sdp";
-   gpios = < 28 GPIO_ACTIVE_LOW>;
-   charge-status-gpios = < 27 GPIO_ACTIVE_LOW>;
-   };
-
-   battery {
-   power-supplies = <_charger>;
-   };
diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml 
b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
new file mode 100644
index ..78b167c62ab1
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/gpio-charger.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: simple battery chargers only communicating through GPIOs
+
+maintainers:
+  - Sebastian Reichel 
+
+description:
+  This binding is for all chargers, which are working more or less
+  autonomously, only providing some status GPIOs and possibly some
+  GPIOs for limited control over the charging process.
+
+properties:
+  compatible:
+const: gpio-charger
+
+  charger-type:
+enum:
+  - unknown
+  - battery
+  - ups
+  - mains
+  - usb-sdp   # USB standard downstream port
+  - usb-dcp   # USB dedicated charging port
+  - usb-cdp   # USB charging downstream port
+  - usb-aca   # USB accessory charger adapter
+description:
+  Type of the charger, e.g. "mains" for a wall charger.
+
+  gpios:
+maxItems: 1
+description: GPIO indicating the charger presence
+
+  charge-status-gpios:
+maxItems: 1
+description: GPIO indicating the charging status
+
+required:
+  - compatible
+  - gpios
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+
+charger {
+  compatible = "gpio-charger";
+  charger-type = "usb-sdp";
+
+  gpios = < 28 GPIO_ACTIVE_LOW>;
+  charge-status-gpios = < 27 GPIO_ACTIVE_LOW>;
+};
-- 
2.26.2

[PATCHv2 6/6] power: supply: gpio-charger: drop legacy GPIO support

2020-06-05 Thread Sebastian Reichel

All board files have been converted to use boardfile GPIO
descriptor tables, so GPIO support can be removed from
platform data.

Signed-off-by: Sebastian Reichel 
---
 drivers/power/supply/gpio-charger.c | 30 +
 include/linux/power/gpio-charger.h  |  5 -
 2 files changed, 1 insertion(+), 34 deletions(-)

diff --git a/drivers/power/supply/gpio-charger.c 
b/drivers/power/supply/gpio-charger.c
index 74fc664c01e3..8c9f69f3d13e 100644
--- a/drivers/power/supply/gpio-charger.c
+++ b/drivers/power/supply/gpio-charger.c
@@ -258,7 +258,6 @@ static int gpio_charger_probe(struct platform_device *pdev)
struct power_supply_desc *charger_desc;
struct gpio_desc *charge_status;
int charge_status_irq;
-   unsigned long flags;
int ret;
int num_props = 0;
 
@@ -272,41 +271,14 @@ static int gpio_charger_probe(struct platform_device 
*pdev)
return -ENOMEM;
gpio_charger->dev = dev;
 
-   /*
-* This will fetch a GPIO descriptor from device tree, ACPI or
-* boardfile descriptor tables. It's good to try this first.
-*/
gpio_charger->gpiod = devm_gpiod_get_optional(dev, NULL, GPIOD_IN);
-
-   /*
-* Fallback to legacy platform data method, if no GPIO is specified
-* using boardfile descriptor tables.
-*/
-   if (!gpio_charger->gpiod && pdata) {
-   /* Non-DT: use legacy GPIO numbers */
-   if (!gpio_is_valid(pdata->gpio)) {
-   dev_err(dev, "Invalid gpio pin in pdata\n");
-   return -EINVAL;
-   }
-   flags = GPIOF_IN;
-   if (pdata->gpio_active_low)
-   flags |= GPIOF_ACTIVE_LOW;
-   ret = devm_gpio_request_one(dev, pdata->gpio, flags,
-   dev_name(dev));
-   if (ret) {
-   dev_err(dev, "Failed to request gpio pin: %d\n", ret);
-   return ret;
-   }
-   /* Then convert this to gpiod for now */
-   gpio_charger->gpiod = gpio_to_desc(pdata->gpio);
-   } else if (IS_ERR(gpio_charger->gpiod)) {
+   if (IS_ERR(gpio_charger->gpiod)) {
/* Just try again if this happens */
if (PTR_ERR(gpio_charger->gpiod) == -EPROBE_DEFER)
return -EPROBE_DEFER;
dev_err(dev, "error getting GPIO descriptor\n");
return PTR_ERR(gpio_charger->gpiod);
}
-
if (gpio_charger->gpiod) {
gpio_charger_properties[num_props] = POWER_SUPPLY_PROP_ONLINE;
num_props++;
diff --git a/include/linux/power/gpio-charger.h 
b/include/linux/power/gpio-charger.h
index 5a5a8de98181..3081391e93c9 100644
--- a/include/linux/power/gpio-charger.h
+++ b/include/linux/power/gpio-charger.h
@@ -13,8 +13,6 @@
  * struct gpio_charger_platform_data - platform_data for gpio_charger devices
  * @name:  Name for the chargers power_supply device
  * @type:  Type of the charger
- * @gpio:  GPIO which is used to indicate the chargers status
- * @gpio_active_low:   Should be set to 1 if the GPIO is active low otherwise 0
  * @supplied_to:   Array of battery names to which this chargers supplies 
power
  * @num_supplicants:   Number of entries in the supplied_to array
  */
@@ -22,9 +20,6 @@ struct gpio_charger_platform_data {
const char *name;
enum power_supply_type type;
 
-   int gpio;
-   int gpio_active_low;
-
char **supplied_to;
size_t num_supplicants;
 };
-- 
2.26.2

Re: [PATCH] x86/umip: Add emulation/spoofing for SLDT and STR instructions

2020-06-05 Thread Ricardo Neri

On Fri, Jun 05, 2020 at 11:58:13AM -0700, Brendan Shanks wrote:
> 
> > On Jun 3, 2020, at 9:39 PM, Andy Lutomirski  wrote:
> > 
> > On Wed, Jun 3, 2020 at 5:12 PM Ricardo Neri
> >  > > wrote:
> >> 
> >> On Tue, Jun 02, 2020 at 11:42:12AM -0700, Brendan Shanks wrote:
> >>> Add emulation/spoofing of SLDT and STR for both 32- and 64-bit
> >>> processes.
> >>> 
> >>> Wine users have found a small number of Windows apps using SLDT that
> >>> were crashing when run on UMIP-enabled systems.
> >>> 
> >>> Reported-by: Andreas Rammhold 
> >>> Originally-by: Ricardo Neri 
> >>> Signed-off-by: Brendan Shanks 
> >>> ---
> >>> arch/x86/kernel/umip.c | 23 ++-
> >>> 1 file changed, 14 insertions(+), 9 deletions(-)
> >>> 
> >>> diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
> >>> index 8d5cbe1bbb3b..59dfceac5cc0 100644
> >>> --- a/arch/x86/kernel/umip.c
> >>> +++ b/arch/x86/kernel/umip.c
> >>> @@ -64,6 +64,8 @@
> >>> #define UMIP_DUMMY_GDT_BASE 0xfffeULL
> >>> #define UMIP_DUMMY_IDT_BASE 0xULL
> >>> 
> >>> +#define UMIP_DUMMY_TASK_REGISTER_SELECTOR 0x40
> >>> +
> >>> /*
> >>>  * The SGDT and SIDT instructions store the contents of the global 
> >>> descriptor
> >>>  * table and interrupt table registers, respectively. The destination is a
> >>> @@ -244,16 +246,24 @@ static int emulate_umip_insn(struct insn *insn, int 
> >>> umip_inst,
> >>>  *data_size += UMIP_GDT_IDT_LIMIT_SIZE;
> >>>  memcpy(data, _limit, UMIP_GDT_IDT_LIMIT_SIZE);
> >>> 
> >>> - } else if (umip_inst == UMIP_INST_SMSW) {
> >>> - unsigned long dummy_value = CR0_STATE;
> >>> + } else if (umip_inst == UMIP_INST_SMSW || umip_inst == 
> >>> UMIP_INST_SLDT ||
> >>> +umip_inst == UMIP_INST_STR) {
> >>> + unsigned long dummy_value;
> >>> +
> >>> + if (umip_inst == UMIP_INST_SMSW)
> >>> + dummy_value = CR0_STATE;
> >>> + else if (umip_inst == UMIP_INST_STR)
> >>> + dummy_value = UMIP_DUMMY_TASK_REGISTER_SELECTOR;
> >>> + else
> >>> + dummy_value = 0;
> >> 
> >> Perhaps you can return a non-zero value for SLDT if it has an LDT, as
> >> Andy had suggested. Maybe this can be implemented by looking at
> >> current->mm->context.ldt
> >> 
> >> I guess the non-zero value can be (GDT_ENTRY_LDT*8).
> > 
> > You could probably even get away with always returning a nonzero
> > value.  After all, an empty LDT is quite similar to no LDT.
> 
> 
> Is something like this what you both had in mind?

> I don’t have any software handy to test the LDT-present case though.

Perhaps you can insert a test in the kernel selftest. Something like
this (based on Andreas' test program):

--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -220,12 +220,23 @@ static void install_invalid(const struct user_desc *desc, 
bool oldmode)
}
 }

+unsigned long test(void)
+{
+unsigned char ldtr[5] = "\xef\xbe\xad\xde";
+unsigned long ldt = 0;
+asm("sldt %0\n" : "=m" (ldtr));
+ldt = *((unsigned long *)[0]);
+printf ("LDT base: 0x%lx\n", ldt);
+return (ldt);
+}
+
 static int safe_modify_ldt(int func, struct user_desc *ptr,
   unsigned long bytecount)
 {
int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount);
if (ret < -1)
errno = -ret;
+   test();
return ret;
 }

Thanks and BR,
Ricardo
> 
> 
> else if (umip_inst == UMIP_INST_STR)
> dummy_value = UMIP_DUMMY_TASK_REGISTER_SELECTOR;
> else if (umip_inst == UMIP_INST_SLDT)
> {
> #ifdef CONFIG_MODIFY_LDT_SYSCALL
> down_read(>mm->context.ldt_usr_sem);
> if (current->mm->context.ldt)
> dummy_value = GDT_ENTRY_LDT * 8;
> else
> dummy_value = 0;
> up_read(>mm->context.ldt_usr_sem);
> #else
> dummy_value = 0;
> #endif
> 

It looks fine to me. Perhaps Andy prefers a simpler, always-non-zero
implementation?

Thanks and BR,
Ricardo

Hang on wireless removal..

2020-06-05 Thread Linus Torvalds

So I think there's something wrong with wireless networking, and
(likely) in particular turning off wireless. And I think the problem
came in this merge window, because now my machine hangs on shutdown.

My new desktop is otherwise working fine, but it has some unnecessary
wireless capability on the motherboard, in the form of a Intel Wi-Fi 6
AX200 module that I don't use (since I end up using wired gig ethernet
instead).

And while debugging the shutdown hang (symptom: systemd waits forever
for NetworkManager and WPA supplicant), I turned off the WiFi.

And what do you know, things went all sideways.

They went sideways because everything that wants the rtnl lock seems
to just hang.

Example:

  kworker/57:2D0  1592  2 0x80004080
  Workqueue: events_power_efficient reg_check_chans_work [cfg80211]
  Call Trace:
   __schedule+0x30b/0x4b0
   ? schedule+0x77/0xa0
   ? schedule_preempt_disabled+0xa/0x10
   ? __mutex_lock+0x264/0x410
   ? psi_group_change+0x44/0x260
   ? reg_check_chans_work+0x1d/0x300 [cfg80211]
   ? __switch_to_asm+0x42/0x70
   ? process_one_work+0x1fa/0x3f0
   ? worker_thread+0x25d/0x480
   ? kthread+0x121/0x130
   ? process_one_work+0x3f0/0x3f0
   ? kthread_blkcg+0x30/0x30
   ? ret_from_fork+0x22/0x30
  kworker/60:2D0  1926  2 0x80004000
  Workqueue: ipv6_addrconf addrconf_verify_work
  Call Trace:
   __schedule+0x30b/0x4b0
   ? schedule+0x77/0xa0
   ? schedule_preempt_disabled+0xa/0x10
   ? __mutex_lock+0x264/0x410
   ? addrconf_verify_work+0xa/0x20
   ? process_one_work+0x1fa/0x3f0
   ? worker_thread+0x25d/0x480
   ? kthread+0x121/0x130
   ? process_one_work+0x3f0/0x3f0
   ? kthread_blkcg+0x30/0x30
   ? ret_from_fork+0x22/0x30
  NetworkManager  D0  4329  1 0x4000
  Call Trace:
   __schedule+0x30b/0x4b0
   ? schedule+0x77/0xa0
   ? schedule_preempt_disabled+0xa/0x10
   ? __mutex_lock+0x264/0x410
   ? __netlink_dump_start+0xa7/0x300
   ? rtnl_dellink+0x3c0/0x3c0
   ? rtnetlink_rcv_msg+0x375/0x3d0
   ? poll_freewait+0x35/0xa0
   ? do_sys_poll+0x58f/0x5f0
   ? rtnl_dellink+0x3c0/0x3c0
   ? __ia32_compat_sys_ppoll_time64+0x120/0x120
   ? ip_output+0x6a/0xd0
   ? ip_mc_finish_output+0x120/0x120
   ? avc_has_perm+0x34/0xa0
   ? rtnetlink_bind+0x30/0x30
   ? netlink_rcv_skb+0xfb/0x130
   ? netlink_unicast+0x1bf/0x2e0
   ? netlink_sendmsg+0x385/0x410
   ? __sys_sendto+0x21f/0x230
   ? move_addr_to_user+0x97/0xc0
   ? alloc_file_pseudo+0x9b/0xd0
   ? sock_alloc_file+0xc4/0x100
   ? __x64_sys_sendto+0x22/0x30
   ? do_syscall_64+0x5e/0xd0
   ? entry_SYSCALL_64_after_hwframe+0x44/0xa9

and perhaps most interestingly, wpa_supplicant is waiting for some of
those workqueues that are waiting for the lock:

  wpa_supplicant  D0  2162  1 0x4000
  Call Trace:
   __schedule+0x30b/0x4b0
   ? schedule+0x77/0xa0
   ? schedule_timeout+0x22/0x150
   ? ttwu_queue+0xf4/0x120
   ? wait_for_common+0xac/0x110
   ? __flush_work+0x200/0x230
   ? put_pwq+0x70/0x70
   ? __cfg80211_unregister_wdev+0x95/0x130 [cfg80211]
   ? ieee80211_if_remove+0xa3/0xe0 [mac80211]
   ? ieee80211_del_iface+0xe/0x20 [mac80211]
   ? rdev_del_virtual_intf+0x2b/0xc0 [cfg80211]
   ? genl_rcv_msg+0x451/0x570
   ? genl_unbind+0xb0/0xb0
   ? netlink_rcv_skb+0xfb/0x130
   ? genl_rcv+0x24/0x40
   ? netlink_unicast+0x1bf/0x2e0
   ? netlink_sendmsg+0x385/0x410
   ? sys_sendmsg+0x26b/0x290
   ? __sys_sendmsg+0x128/0x180
   ? selinux_socket_setsockopt+0xc3/0xd0
   ? __cgroup_bpf_run_filter_setsockopt+0x99/0x290
   ? netlink_setsockopt+0x38/0x4d0
   ? __sys_setsockopt+0x11b/0x1b0
   ? do_syscall_64+0x5e/0xd0
   ? entry_SYSCALL_64_after_hwframe+0x44/0xa9

which explains why systemd waits for that one too.

So something seems to have never released the rtnl lock.

In fact, I suspect it's exactly that wpa_supplicant itself that
deadlocks on it and holds the rntl lock while it does that
"flush_work()". Which in turn waits for things to go away, but they'll
never go away because they need the rtnl lock. That wpa_supplicant is
holding.

If I were a betting man, I'd suspect it's due to commit 6cd536fe62ef
("cfg80211: change internal management frame registration API"), which
seems to move that

flush_work(>mgmt_registrations_update_wk);

into __cfg80211_unregister_wdev(). But honestly, that's just a guess.

I'd bisect this and verify things, but I'm really hoping I don't have to.

I still have a number of pull requests for the merge window, so
instead I'm sending this email out with my current guesses, and I hope
someody will say "Yeah, you're right, the fix is already pending", or
"No Linus, you're barking up completely the wrong tree, but I think I
know what the problem is".

Btw, I'm not a networking person, but I have to say, I've seen rtnl
lock problems enough over time even as an outsider to have grown to
really hate that thing. Am I wrong? It really seems to get involved
much too much, and held in really awkward places.

Am I wrong?

 Linus

Re: [PATCHSET v5 0/12] Add support for async buffered reads

2020-06-05 Thread Andres Freund

Hi,

On 2020-06-05 15:30:44 -0700, Andres Freund wrote:
> On 2020-06-05 15:21:34 -0600, Jens Axboe wrote:
> > >> I can reproduce this, and I see what it is. I'll send out a patch 
> > >> soonish.
> > > 
> > > Thinko, can you try with this on top?
> > 
> > Sorry that was incomplete, please use this one!
> 
> That seems to fix it! Yay.
> 
> 
> Bulk buffered reads somehow don't quite seem to be performing that well
> though, looking into it. Could be on the pg side too.

While looking into that, I played with setting
/sys//queue/read_ahead_kb to 0 and noticed that seems to result in
all/most IO done in workers. Is that to be expected?

Greetings,

Andres Freund

Re: [PATCH v2] rtc: fsl-ftm-alarm: fix freeze(s2idle) failed to wake

2020-06-05 Thread Alexandre Belloni

On Mon, 1 Jun 2020 15:19:14 +0800, Ran Wang wrote:
> Use dev_pm_set_wake_irq() instead of flag IRQF_NO_SUSPEND to enable
> wakeup system feature for both freeze(s2idle) and mem(deep).

Applied, thanks!

[1/1] rtc: fsl-ftm-alarm: fix freeze(s2idle) failed to wake
  commit: 3a8ce46ce15accad53b39837735c12d886964211

Best regards,
-- 
Alexandre Belloni

Re: [PATCH v5 5/7] blktrace: fix debugfs use after free

2020-06-05 Thread Luis Chamberlain

On Thu, Jun 04, 2020 at 09:48:43PM -0700, Bart Van Assche wrote:
> On 2020-06-01 10:05, Luis Chamberlain wrote:
> > diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> > index a55cbfd060f5..5b0310f38e11 100644
> > --- a/kernel/trace/blktrace.c
> > +++ b/kernel/trace/blktrace.c
> > @@ -511,6 +511,11 @@ static int do_blk_trace_setup(struct request_queue *q, 
> > char *name, dev_t dev,
> >  */
> > if (bdev && bdev != bdev->bd_contains) {
> > dir = bdev->bd_part->debugfs_dir;
> > +   } else if (q->sg_debugfs_dir &&
> > +  strlen(buts->name) == strlen(q->sg_debugfs_dir->d_name.name)
> > +  && strcmp(buts->name, q->sg_debugfs_dir->d_name.name) == 0) {
> > +   /* scsi-generic requires use of its own directory */
> > +   dir = q->sg_debugfs_dir;
> > } else {
> > /*
> >  * For queues that do not have a gendisk attached to them, that
> > 
> 
> Please Cc Martin Petersen for patches that modify SCSI code.

Sure thing.
> The string comparison check looks fragile to me. Is the purpose of that

> check perhaps to verify whether tracing is being activated through the
> SCSI generic interface?

Yes.

> If so, how about changing that test into
> something like the following?
> 
>   MAJOR(dev) == SCSI_GENERIC_MAJOR

Sure.

  Luis

Re: [PATCHSET v5 0/12] Add support for async buffered reads

2020-06-05 Thread Andres Freund

Hi,

On 2020-06-05 15:21:34 -0600, Jens Axboe wrote:
> >> I can reproduce this, and I see what it is. I'll send out a patch soonish.
> > 
> > Thinko, can you try with this on top?
> 
> Sorry that was incomplete, please use this one!

That seems to fix it! Yay.

Bulk buffered reads somehow don't quite seem to be performing that well
though, looking into it. Could be on the pg side too.

Greetings,

Andres Freund

Re: [PATCH] leds: mt6360: Add LED driver for MT6360

2020-06-05 Thread Jacek Anaszewski


Hi Gene,

Thank you for the patch. Please find my comments in the code below.

On 6/4/20 8:26 AM, Gene Chen wrote:

From: Gene Chen 

Add MT6360 LED driver include 2-channel Flash LED with torch/strobe mode,
and 3-channel RGB LED support Register/Flash/Breath Mode

Signed-off-by: Gene Chen 
base-commit: 098c4adf249c198519a4abebe482b1e6b8c50e47
---
  drivers/leds/Kconfig   |   11 +
  drivers/leds/Makefile  |1 +
  drivers/leds/leds-mt6360.c | 1061 
  include/linux/mfd/mt6360.h |6 +-
  4 files changed, 1078 insertions(+), 1 deletion(-)
  create mode 100644 drivers/leds/leds-mt6360.c

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index c664d84..c47be91 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -229,6 +229,17 @@ config LEDS_MT6323
  This option enables support for on-chip LED drivers found on
  Mediatek MT6323 PMIC.
  
+config LEDS_MT6360

+   tristate "LED Support for Mediatek MT6360 PMIC"
+   depends on LEDS_CLASS_FLASH && OF


I can't find DT bindings for this driver. Neither in this patch,
nor in mfd bindings.


+   depends on V4L2_FLASH_LED_CLASS || !V4L2_FLASH_LED_CLASS
+   depends on MFD_MT6360
+   help
+ This option enables support for dual Flash LED drivers found on
+ Mediatek MT6360 PMIC.
+ Support Torch and Strobe mode independently current source.
+ Include Low-VF and short protection.
+
  config LEDS_S3C24XX
tristate "LED Support for Samsung S3C24XX GPIO LEDs"
depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 45235d5..2883b4d 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_LEDS_MIKROTIK_RB532) += leds-rb532.o
  obj-$(CONFIG_LEDS_MLXCPLD)+= leds-mlxcpld.o
  obj-$(CONFIG_LEDS_MLXREG) += leds-mlxreg.o
  obj-$(CONFIG_LEDS_MT6323) += leds-mt6323.o
+obj-$(CONFIG_LEDS_MT6360)  += leds-mt6360.o
  obj-$(CONFIG_LEDS_NET48XX)+= leds-net48xx.o
  obj-$(CONFIG_LEDS_NETXBIG)+= leds-netxbig.o
  obj-$(CONFIG_LEDS_NIC78BX)+= leds-nic78bx.o
diff --git a/drivers/leds/leds-mt6360.c b/drivers/leds/leds-mt6360.c
new file mode 100644
index 000..3e62547
--- /dev/null
+++ b/drivers/leds/leds-mt6360.c
@@ -0,0 +1,1061 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 MediaTek Inc.
+ *
+ * Author: Gene Chen 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+enum {
+   MT6360_LED_ISINK1 = 0,
+   MT6360_LED_ISINK2,
+   MT6360_LED_ISINK3,
+   MT6360_LED_ISINK4,
+   MT6360_LED_MAX,
+};
+
+enum {
+   MT6360_LEDMODE_PWM = 0,
+   MT6360_LEDMODE_BREATH,
+   MT6360_LEDMODE_CC,
+   MT6360_LEDMODE_MAX,
+};
+
+enum {
+   MT6360_FLED_CH1 = 0,
+   MT6360_FLED_CH2,
+   MT6360_FLED_MAX,
+};
+
+/* ILED setting/reg */
+#define MT6360_SINKCUR_MAX1(0x0d)
+#define MT6360_SINKCUR_MAX2(0x0d)
+#define MT6360_SINKCUR_MAX3(0x0d)
+#define MT6360_SINKCUR_MAX4(0x1f)


There is much redundancy and needless code obscurity in this
approach. I propose to provide macros for calculating particular
register value basing on passed argument.

e.g.

#define MT6360_SINKCUR_MAX(val) (((val) == 4) ? 0x1f : 0x0d)


+#define MT6360_CURRSEL_REG1(MT6360_PMU_RGB1_ISNK)
+#define MT6360_CURRSEL_REG2(MT6360_PMU_RGB2_ISNK)
+#define MT6360_CURRSEL_REG3(MT6360_PMU_RGB3_ISNK)
+#define MT6360_CURRSEL_REG4(MT6360_PMU_RGB_ML_ISNK)
+#define MT6360_CURRSEL_MASK1   (0x0f)
+#define MT6360_CURRSEL_MASK2   (0x0f)
+#define MT6360_CURRSEL_MASK3   (0x0f)
+#define MT6360_CURRSEL_MASK4   (0x1f)
+#define MT6360_LEDMODE_REG1(MT6360_PMU_RGB1_ISNK)
+#define MT6360_LEDMODE_REG2(MT6360_PMU_RGB2_ISNK)
+#define MT6360_LEDMODE_REG3(MT6360_PMU_RGB3_ISNK)
+#define MT6360_LEDMODE_REG4(0)
+#define MT6360_LEDMODE_MASK1   (0xc0)
+#define MT6360_LEDMODE_MASK2   (0xc0)
+#define MT6360_LEDMODE_MASK3   (0xc0)
+#define MT6360_LEDMODE_MASK4   (0)
+#define MT6360_PWMDUTY_REG1(MT6360_PMU_RGB1_DIM)
+#define MT6360_PWMDUTY_REG2(MT6360_PMU_RGB2_DIM)
+#define MT6360_PWMDUTY_REG3(MT6360_PMU_RGB3_DIM)
+#define MT6360_PWMDUTY_REG4(0)
+#define MT6360_PWMDUTY_MASK1   (0xff)
+#define MT6360_PWMDUTY_MASK2   (0xff)
+#define MT6360_PWMDUTY_MASK3   (0xff)
+#define MT6360_PWMDUTY_MASK4   (0)
+#define MT6360_PWMFREQ_REG1(MT6360_PMU_RGB12_Freq)
+#define MT6360_PWMFREQ_REG2(MT6360_PMU_RGB12_Freq)
+#define MT6360_PWMFREQ_REG3(MT6360_PMU_RGB34_Freq)
+#define MT6360_PWMFREQ_REG4(0)
+#define MT6360_PWMFREQ_MASK1   (0xe0)
+#define MT6360_PWMFREQ_MASK2   (0x1c)
+#define MT6360_PWMFREQ_MASK3   (0xe0)
+#define MT6360_PWMFREQ_MASK4   (0)
+#define MT6360_BREATH_REGBASE1 (MT6360_PMU_RGB1_Tr)
+#define MT6360_BREATH_REGBASE2 (MT6360_PMU_RGB2_Tr)
+#define MT6360_BREATH_REGBASE3

[rcu:dev.2020.06.02a 85/90] smp.c:undefined reference to `__udivdi3'

2020-06-05 Thread kernel test robot

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git 
dev.2020.06.02a
head:   5216948905dd07a84cef8a7dc72c2ec076802efd
commit: 92ebbb71443dced2019cd24b737ce60b03a29e10 [85/90] EXP kernel/smp: 
Provide CSD lock timeout diagnostics
config: i386-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-13) 9.3.0
reproduce (this is a W=1 build):
git checkout 92ebbb71443dced2019cd24b737ce60b03a29e10
# save the attached .config to linux build tree
make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>, old ones prefixed by <<):

ld: kernel/smp.o: in function `smp_call_function_single':
>> smp.c:(.text+0x55a): undefined reference to `__udivdi3'
>> ld: smp.c:(.text+0x5a3): undefined reference to `__udivdi3'
>> ld: smp.c:(.text+0x648): undefined reference to `__umoddi3'
ld: smp.c:(.text+0x65f): undefined reference to `__udivdi3'
ld: smp.c:(.text+0x71c): undefined reference to `__udivdi3'
ld: smp.c:(.text+0x75e): undefined reference to `__udivdi3'
ld: smp.c:(.text+0x80b): undefined reference to `__umoddi3'
ld: smp.c:(.text+0x822): undefined reference to `__udivdi3'
ld: kernel/smp.o: in function `smp_call_function_many_cond':
smp.c:(.text+0xbd6): undefined reference to `__udivdi3'
ld: smp.c:(.text+0xc1e): undefined reference to `__udivdi3'
ld: smp.c:(.text+0xcc5): undefined reference to `__umoddi3'
ld: smp.c:(.text+0xcdc): undefined reference to `__udivdi3'
ld: smp.c:(.text+0xe9d): undefined reference to `__udivdi3'
ld: smp.c:(.text+0xede): undefined reference to `__udivdi3'
ld: smp.c:(.text+0xf85): undefined reference to `__umoddi3'
ld: smp.c:(.text+0xf9c): undefined reference to `__udivdi3'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip

[PATCH v1] arm64/module: Optimize module load time by optimizing PLT counting

2020-06-05 Thread Saravana Kannan

When loading a module, module_frob_arch_sections() tries to figure out
the number of PLTs that'll be needed to handle all the RELAs. While
doing this, it tries to dedupe PLT allocations for multiple
R_AARCH64_CALL26 relocations to the same symbol. It does the same for
R_AARCH64_JUMP26 relocations too.

To make checks for duplicates easier/faster, it sorts the relocation
list by type, symbol and addend. That way, to check for a duplicate
relocation, it just needs to compare with the previous entry.

However, sorting the entire relocation array is unnecessary and
expensive (O(n log n)) because there are a lot of other relocation types
that don't need deduping or can't be deduped.

So this commit partitions the array into entries that need deduping and
those that don't. And then sorts just the part that needs deduping. And
when CONFIG_RANDOMIZE_BASE is disabled, the sorting is skipped entirely
because PLTs are not allocated for R_AARCH64_CALL26 and R_AARCH64_JUMP26
if it's disabled.

This gives significant reduction in module load time for modules with
large number of relocations with no measurable impact on modules with a
small number of relocations. In my test setup with CONFIG_RANDOMIZE_BASE
enabled, the load time for one module went down from 268ms to 100ms.
Another module went down from 143ms to 83ms.

This commit also disables the sorting if CONFIG_RANDOMIZE_BASE is
disabled because it looks like PLTs are not allocated for
R_AARCH64_CALL26 and R_AARCH64_JUMP26 if it's disabled.

Cc: Ard Biesheuvel 
Signed-off-by: Saravana Kannan 
---
 arch/arm64/kernel/module-plts.c | 37 -
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 65b08a74aec6..bf5118b3b828 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -253,6 +253,36 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela 
*rela, int num,
return ret;
 }
 
+static bool rela_needs_dedup(Elf64_Rela *rela)
+{
+   return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26
+  || ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26;
+}
+
+/* Group the CALL26/JUMP26 relas toward the beginning of the array. */
+static int partition_dedup_relas(Elf64_Rela *rela, int numrels)
+{
+   int i = 0, j = numrels - 1;
+   Elf64_Rela t;
+
+   while (i < j) {
+   while (rela_needs_dedup(rela + i) && i < j)
+   i++;
+   while (!rela_needs_dedup(rela + j) && i < j)
+   j--;
+   if (i < j) {
+   t = *(rela + j);
+   *(rela + j) = *(rela + i);
+   *(rela + i) = t;
+   }
+   }
+   /* If the entire array needs dedup, make sure i == numrels */
+   if (rela_needs_dedup(rela + i))
+   i++;
+
+   return i;
+}
+
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
  char *secstrings, struct module *mod)
 {
@@ -291,6 +321,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr 
*sechdrs,
for (i = 0; i < ehdr->e_shnum; i++) {
Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+   int num_dedup_rels = 0;
Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
 
if (sechdrs[i].sh_type != SHT_RELA)
@@ -300,8 +331,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr 
*sechdrs,
if (!(dstsec->sh_flags & SHF_EXECINSTR))
continue;
 
+   if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+   num_dedup_rels = partition_dedup_relas(rels, numrels);
/* sort by type, symbol index and addend */
-   sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+   if (num_dedup_rels)
+   sort(rels, num_dedup_rels, sizeof(Elf64_Rela),
+cmp_rela, NULL);
 
if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
core_plts += count_plts(syms, rels, numrels,
-- 
2.27.0.278.ge193c7cf3a9-goog

Re: 5.7.0 / BUG: kernel NULL pointer dereference / setup_cpu_watcher

2020-06-05 Thread Christian Kujau

On Fri, 5 Jun 2020, Andrew Cooper wrote:
> PVH domains don't have the emulated platform device, so Linux will be
> finding ~0 when it goes looking in config space.
> 
> The diagnostic should be skipped in that case, to avoid giving the false
> impression that something is wrong.

Understood, thanks for explaining that. I won't be able to edit 
arch/x86/xen/platform-pci-unplug.c to fix that though :-\

Christian.
-- 
BOFH excuse #134:

because of network lag due to too many people playing deathmatch

Re: [PATCH 4.19 00/28] 4.19.127-rc1 review

2020-06-05 Thread Shuah Khan


On 6/5/20 8:15 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.19.127 release.
There are 28 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun, 07 Jun 2020 13:54:56 +.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.127-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.19.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 5.4 00/38] 5.4.45-rc1 review

2020-06-05 Thread Shuah Khan


On 6/5/20 8:14 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 5.4.45 release.
There are 38 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun, 07 Jun 2020 13:54:56 +.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.45-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-5.4.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

[PATCH 0/2] DRA7 timer/mailbox dts fixes

2020-06-05 Thread Suman Anna

Hi Tony,

The following 2 patches are couple of minor fixes that clean up
couple of commits from the "ARM: dts: dra7/am57xx: remoteproc
support" series [1]. Please pick these for the 5.8-rc fixes.

The issues look to be result of incorrect rebase-conflict resolution
of the downstream TI patches based on 5.4 kernel.

regards
Suman

[1] https://patchwork.kernel.org/cover/11508091/

Suman Anna (2):
  ARM: dts: dra7: Fix timer nodes properly for timer_sys_ck clocks
  ARM: dts: dra7-evm-common: Fix duplicate mailbox nodes

 arch/arm/boot/dts/dra7-evm-common.dtsi | 20 ---
 arch/arm/boot/dts/dra7-l4.dtsi | 34 --
 2 files changed, 16 insertions(+), 38 deletions(-)

-- 
2.26.0

[PATCH 1/2] ARM: dts: dra7: Fix timer nodes properly for timer_sys_ck clocks

2020-06-05 Thread Suman Anna

The commit 5390130f3b28 ("ARM: dts: dra7: add timer_sys_ck entries
for IPU/DSP timers") was added to allow the OMAP clocksource timer
driver to use the clock aliases when reconfiguring the parent clock
source for the timer functional clocks after the timer_sys_ck clock
aliases got cleaned up in commit a8202cd5174d ("clk: ti: dra7: drop
unnecessary clock aliases").

The above patch however has missed adding the entries for couple of
timers (14, 15 and 16), and also added erroneously in the parent
ti-sysc nodes for couple of clocks (timers 4, 5 and 6). Fix these
properly, so that any of these timers can be used with OMAP remoteproc
IPU and DSP devices. The always-on timers 1 and 12 are not expected
to use this clock source, so they are not modified.

Fixes: 5390130f3b28 ("ARM: dts: dra7: add timer_sys_ck entries for IPU/DSP 
timers")
Signed-off-by: Suman Anna 
---
 arch/arm/boot/dts/dra7-l4.dtsi | 34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 62ca89551219..0c6f26605506 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -1207,9 +1207,8 @@ target-module@36000 { /* 0x48036000, 
ap 9 4e.0 */
,
;
/* Domains (P, C): l4per_pwrdm, l4per_clkdm */
-   clocks = <_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 0>,
-<_sys_clk_div>;
-   clock-names = "fck", "timer_sys_ck";
+   clocks = <_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 0>;
+   clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0x36000 0x1000>;
@@ -3352,8 +3351,8 @@ target-module@2 { /* 0x4882, 
ap 5 08.0 */
,
;
/* Domains (P, C): ipu_pwrdm, ipu_clkdm */
-   clocks = <_clkctrl DRA7_IPU_TIMER5_CLKCTRL 0>, 
<_sys_clk_div>;
-   clock-names = "fck", "timer_sys_ck";
+   clocks = <_clkctrl DRA7_IPU_TIMER5_CLKCTRL 0>;
+   clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0x2 0x1000>;
@@ -3361,8 +3360,8 @@ target-module@2 { /* 0x4882, 
ap 5 08.0 */
timer5: timer@0 {
compatible = "ti,omap5430-timer";
reg = <0x0 0x80>;
-   clocks = <_clkctrl DRA7_IPU_TIMER5_CLKCTRL 
24>;
-   clock-names = "fck";
+   clocks = <_clkctrl DRA7_IPU_TIMER5_CLKCTRL 
24>, <_sys_clk_div>;
+   clock-names = "fck", "timer_sys_ck";
interrupts = ;
};
};
@@ -3379,9 +3378,8 @@ target-module@22000 { /* 0x48822000, 
ap 7 24.0 */
,
;
/* Domains (P, C): ipu_pwrdm, ipu_clkdm */
-   clocks = <_clkctrl DRA7_IPU_TIMER6_CLKCTRL 0>,
-<_sys_clk_div>;
-   clock-names = "fck", "timer_sys_ck";
+   clocks = <_clkctrl DRA7_IPU_TIMER6_CLKCTRL 0>;
+   clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0x22000 0x1000>;
@@ -3389,8 +3387,8 @@ target-module@22000 { /* 0x48822000, 
ap 7 24.0 */
timer6: timer@0 {
compatible = "ti,omap5430-timer";
reg = <0x0 0x80>;
-   clocks = <_clkctrl DRA7_IPU_TIMER6_CLKCTRL 
24>;
-   clock-names = "fck";
+   clocks = <_clkctrl DRA7_IPU_TIMER6_CLKCTRL 
24>, <_sys_clk_div>;
+   clock-names = "fck", "timer_sys_ck";
interrupts = ;
};
};
@@ -3498,8 +3496,8 @@ target-module@2a000 { /* 0x4882a000, 
ap 15 10.0 */
timer14: timer@0 {
compatible = "ti,omap5430-timer";
reg = <0x0 0x80>;
-   clocks = <_clkctrl 
DRA7_L4PER3_TIMER14_CLKCTRL 24>;
-   clock-names = "fck";
+   clocks = <_clkctrl 
DRA7_L4PER3_TIMER14_CLKCTRL 24>,

Re: [PATCH 5.6 00/43] 5.6.17-rc1 review

2020-06-05 Thread Shuah Khan


On 6/5/20 8:14 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 5.6.17 release.
There are 43 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun, 07 Jun 2020 13:54:56 +.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.6.17-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-5.6.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

[PATCH 2/2] ARM: dts: dra7-evm-common: Fix duplicate mailbox nodes

2020-06-05 Thread Suman Anna

The mailbox nodes defined in various dts files have been moved to
common dra7-ipu-dsp-common.dtsi and dra74-ipu-dsp-common.dtsi files
in commit a11a2f73b32d ("ARM: dts: dra7-ipu-dsp-common: Move mailboxes
into common files"), but the nodes were erroneously left out in the
dra7-evm-common.dtsi file. Fix this by removing these duplicate nodes.

Signed-off-by: Suman Anna 
---
 arch/arm/boot/dts/dra7-evm-common.dtsi | 20 
 1 file changed, 20 deletions(-)

diff --git a/arch/arm/boot/dts/dra7-evm-common.dtsi 
b/arch/arm/boot/dts/dra7-evm-common.dtsi
index f89a64cbcd53..2cf6a529d4ad 100644
--- a/arch/arm/boot/dts/dra7-evm-common.dtsi
+++ b/arch/arm/boot/dts/dra7-evm-common.dtsi
@@ -245,26 +245,6 @@  {
rx-num-evt = <32>;
 };
 
- {
-   status = "okay";
-   mbox_ipu1_ipc3x: mbox_ipu1_ipc3x {
-   status = "okay";
-   };
-   mbox_dsp1_ipc3x: mbox_dsp1_ipc3x {
-   status = "okay";
-   };
-};
-
- {
-   status = "okay";
-   mbox_ipu2_ipc3x: mbox_ipu2_ipc3x {
-   status = "okay";
-   };
-   mbox_dsp2_ipc3x: mbox_dsp2_ipc3x {
-   status = "okay";
-   };
-};
-
 _rc {
status = "okay";
 };
-- 
2.26.0

Re: [rcu:dev.2020.06.02a 85/90] kernel/smp.c:122: undefined reference to `__udivdi3'

2020-06-05 Thread Paul E. McKenney

On Sat, Jun 06, 2020 at 05:38:31AM +0800, kernel test robot wrote:
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git 
> dev.2020.06.02a
> head:   5216948905dd07a84cef8a7dc72c2ec076802efd
> commit: 92ebbb71443dced2019cd24b737ce60b03a29e10 [85/90] EXP kernel/smp: 
> Provide CSD lock timeout diagnostics
> config: i386-randconfig-c001-20200605 (attached as .config)
> compiler: gcc-9 (Debian 9.3.0-13) 9.3.0
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
> 
> All errors (new ones prefixed by >>, old ones prefixed by <<):
> 
> ld: kernel/smp.o: in function `csd_lock_wait':
> >> kernel/smp.c:122: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
> ld: kernel/smp.c:136: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:122: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
> ld: kernel/smp.c:136: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:122: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
> ld: kernel/smp.c:136: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:122: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
> >> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
> ld: kernel/smp.c:136: undefined reference to `__udivdi3'

Good catch, thank you, fixup patch applied.

Thanx, Paul

> vim +122 kernel/smp.c
> 
>107
>108/*
>109 * csd_lock/csd_unlock used to serialize access to per-cpu csd 
> resources
>110 *
>111 * For non-synchronous ipi calls the csd can still be in use by 
> the
>112 * previous function call. For multi-cpu calls its even more 
> interesting
>113 * as we'll have to ensure no other cpu is observing our csd.
>114 */
>115static __always_inline void csd_lock_wait(call_single_data_t 
> *csd)
>116{
>117int bug_id = 0;
>118int cpu;
>119call_single_data_t *cpu_cur_csd;
>120u64 ts0, ts1, ts2, ts_delta;
>121
>  > 122ts1 = ts0 = sched_clock() / 1000 / 1000;
>123for (;;) {
>124unsigned long flags = READ_ONCE(csd->flags);
>125
>126if (!(flags & CSD_FLAG_LOCK))
>127break;
>  > 128ts2 = sched_clock() / 1000 / 1000;
>129ts_delta = ts2 - ts1;
>130if (unlikely(ts_delta > CSD_LOCK_TIMEOUT)) {
>131bug_id = 
> atomic_inc_return(_bug_count);
>132cpu = csd->cpu;
>133smp_mb(); // No stale cur_csd values!
>134cpu_cur_csd = per_cpu(cur_csd, cpu);
>135smp_mb(); // No refetching cur_csd 
> values!
>  > 136printk("csd: Detected non-responsive 
> CSD lock (#%d) on CPU#%d, waiting %Ld.%03Ld secs for CPU#%02d %pf(%ps), 
> currently %s.\n",
>137   bug_id, raw_smp_processor_id(),
>138   ts_delta/1000ULL, ts_delta % 
> 1000ULL, cpu,
>139   csd->func, csd->info,
>140   !cpu_cur_csd ? "unresponsive"
>141: csd == cpu_cur_csd
>142? "handling 
> this request"
>143: "handling 
> prior request");
>144if (!trigger_single_cpu_backtrace(cpu))
>145dump_cpu_task(cpu);
>146if (!cpu_cur_csd) {
>147printk("csd: Re-sending CSD 
> lock (#%d) IPI from CPU#%02d to CPU#%02d\n", bug_id, raw_smp_processor_id(), 
> cpu);
>148
> arch_s

Re: [PATCH 5.7 00/14] 5.7.1-rc1 review

2020-06-05 Thread Shuah Khan


On 6/5/20 8:14 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 5.7.1 release.
There are 14 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun, 07 Jun 2020 13:54:56 +.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.7.1-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-5.7.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH] shmem, memcg: enable memcg aware shrinker

2020-06-05 Thread Yang Shi

On Thu, Jun 4, 2020 at 1:17 AM Greg Thelen  wrote:
>
> Yang Shi  wrote:
>
> > On Sun, May 31, 2020 at 8:22 PM Greg Thelen  wrote:
> >>
> >> Since v4.19 commit b0dedc49a2da ("mm/vmscan.c: iterate only over charged
> >> shrinkers during memcg shrink_slab()") a memcg aware shrinker is only
> >> called when the per-memcg per-node shrinker_map indicates that the
> >> shrinker may have objects to release to the memcg and node.
> >>
> >> shmem_unused_huge_count and shmem_unused_huge_scan support the per-tmpfs
> >> shrinker which advertises per memcg and numa awareness.  The shmem
> >> shrinker releases memory by splitting hugepages that extend beyond
> >> i_size.
> >>
> >> Shmem does not currently set bits in shrinker_map.  So, starting with
> >> b0dedc49a2da, memcg reclaim avoids calling the shmem shrinker under
> >> pressure.  This leads to undeserved memcg OOM kills.
> >> Example that reliably sees memcg OOM kill in unpatched kernel:
> >>   FS=/tmp/fs
> >>   CONTAINER=/cgroup/memory/tmpfs_shrinker
> >>   mkdir -p $FS
> >>   mount -t tmpfs -o huge=always nodev $FS
> >>   # Create 1000 MB container, which shouldn't suffer OOM.
> >>   mkdir $CONTAINER
> >>   echo 1000M > $CONTAINER/memory.limit_in_bytes
> >>   echo $BASHPID >> $CONTAINER/cgroup.procs
> >>   # Create 4000 files.  Ideally each file uses 4k data page + a little
> >>   # metadata.  Assume 8k total per-file, 32MB (4000*8k) should easily
> >>   # fit within container's 1000 MB.  But if data pages use 2MB
> >>   # hugepages (due to aggressive huge=always) then files consume 8GB,
> >>   # which hits memcg 1000 MB limit.
> >>   for i in {1..4000}; do
> >> echo . > $FS/$i
> >>   done
> >
> > It looks all the inodes which have tail THP beyond i_size are on one
> > single list, then the shrinker actually just splits the first
> > nr_to_scan inodes. But since the list is not memcg aware, so it seems
> > it may split the THPs which are not charged to the victim memcg and
> > the victim memcg still may suffer from pre-mature oom, right?
>
> Correct.  shmem_unused_huge_shrink() is not memcg aware.  In response to
> memcg pressure it will split the post-i_size tails of nr_to_scan tmpfs
> inodes regardless of if they're charged to the under-pressure memcg.
> do_shrink_slab() looks like it'll repeatedly call
> shmem_unused_huge_shrink().  So it will split tails of many inodes.  So
> I think it'll avoid the oom by over shrinking.  This is not ideal.  But
> it seems better than undeserved oom kill.
>
> I think the solution (as Kirill Tkhai suggested) a memcg-aware index
> would solve both:
> 1) avoid premature oom by registering shrinker to responding to memcg
>pressure
> 2) avoid shrinking/splitting inodes unrelated to the under-pressure
>memcg

I do agree with Kirill. Using list_lru sounds optimal. But, it looks
the memcg index is tricky. The index of memcg which the beyond i_size
THP is charged to should be used instead of the inode's memcg which
may charge to different memcg.

>
> I can certainly look into that (thanks Kirill for the pointers).  In the
> short term I'm still interested in avoiding premature OOMs with the
> original thread (i.e. restore pre-4.19 behavior to shmem shrinker for
> memcg pressure).  I plan to test and repost v2.

Re: [PATCH v2] spi: bcm2835: Enable shared interrupt support

2020-06-05 Thread Florian Fainelli




On 6/5/2020 7:41 AM, Robin Murphy wrote:
> On 2020-06-05 14:46, Robin Murphy wrote:
>> On 2020-06-05 14:20, Mark Brown wrote:
>>> On Fri, Jun 05, 2020 at 12:34:36PM +0100, Robin Murphy wrote:
 On 2020-06-04 22:28, Florian Fainelli wrote:
>>>
> For the BCM2835 case which is deemed performance critical, we would
> like
> to continue using an interrupt handler which does not have the extra
> comparison on BCM2835_SPI_CS_INTR.
>>>
 FWIW, if I'm reading the patch correctly, then with sensible codegen
 that
 "overhead" should amount to a bit test on a live register plus a
 not-taken
 conditional branch - according to the 1176 TRM that should add up to a
 whopping 2 cycles. If that's really significant then I'd have to wonder
 whether you want to be at the mercy of the whole generic IRQ stack
 at all,
 and should perhaps consider using FIQ instead.
>>>
>>> Yes, and indeed the compiler does seem to manage that.  It *is* non-zero
>>> overhead though.
>>
>> True, but so's the existing level of pointer-chasing indirection that
>> with some straightforward refactoring could be taken right out of the
>> critical path and confined to just the conditional complete() call.
>> That's the kind of thing leaving me unconvinced that this is code
>> where every single cycle counts ;)
> 
> Ha, and in fact having checked a build out of curiosity, this patch
> as-is actually stands to make things considerably worse. At least with
> GCC 8.3 and bcm2835_defconfig, bcm2835_spi_interrupt_common() doesn't
> get inlined, which means bcm2835_spi_interrupt() pushes/pops a stack
> frame and makes an out-of-line call to bcm2835_spi_interrupt_common(),
> resulting in massively *more* work than the extra two instructions of
> simply inlining the test.
> 
> So yes, the overhead of inlining the test vs. the alternative is indeed
> non-zero. It's just also negative :D

Is it reliable across compiler versions if we use __always_inline?

The only other alternative that I can think of is using a static key to
eliminate the test for the single controller case. This feels highly
over engineered, but if that proves more reliable and gets everybody
their cookie, why not.

Lukas, do you have any way to test with the conditional being present
that the performance or latency does not suffer so much that it becomes
unacceptable for your use cases?
-- 
Florian

Re: [RFC] perf/core: allow ftrace for functions in kernel/event/core.c

2020-06-05 Thread Steven Rostedt

On Fri, 5 Jun 2020 21:58:48 +
Song Liu  wrote:

> 
> How does this work in your tests? 

I started it, but got distracted by other work. It did not crash with
the little testing I did do. I wanted to also look at my patch that
adds tracing to the ftrace directory too.

I'll try to remember to address this some more next week. Feel free to
ping me again then.

-- Steve

Re: [RFC][PATCH] slimbus: core: Set fwnode for a device when setting of_node

2020-06-05 Thread John Stultz

On Fri, Jun 5, 2020 at 2:44 PM Saravana Kannan  wrote:
> On Fri, Jun 5, 2020 at 2:19 PM John Stultz  wrote:
> >
> > From: Saravana Kannan 
> >
> > When setting the of_node for a newly created device, also set the
> > fwnode. This allows fw_devlink to work for slimbus devices.
> >
> > Cc: Srinivas Kandagatla 
> > Cc: alsa-de...@alsa-project.org
> > Signed-off-by: Saravana Kannan 
> > Signed-off-by: John Stultz 
>
> I thought Srinivas already picked this up and sent it to Greg.
> https://lore.kernel.org/lkml/20200511151334.362-1-srinivas.kandaga...@linaro.org/
>

Oh! I'm sorry, I missed that! That's great to hear!

thanks
-john

Re: [RFC] perf/core: allow ftrace for functions in kernel/event/core.c

2020-06-05 Thread Song Liu

Hi Steven,

> On May 26, 2020, at 3:04 PM, Steven Rostedt  wrote:
> 
> On Tue, 26 May 2020 23:54:15 +0200
> Peter Zijlstra  wrote:
> 
>> On Tue, May 26, 2020 at 09:46:29PM +, Song Liu wrote:
>>> 
>>> 
 On May 26, 2020, at 2:39 PM, Peter Zijlstra  wrote:
 
 On Tue, May 26, 2020 at 02:28:26PM -0700, Song Liu wrote:  
> It is useful to trace functions in kernel/event/core.c. Allow ftrace for
> them by removing $(CC_FLAGS_FTRACE) from Makefile.  
 
 Did you try using the ftrace event with perf with this on?  
>>> 
>>> I have tried a few things, like 
>>> 
>>>  perf stat -e probe:perf_read -I 1000
>>>  perf record -e probe:__x64_sys_perf_event_open -aR
>>> 
>>> They all work fine. 
>>> 
>>> Do you have some tricky functions that we should double check?  
>> 
>> I've no idea what probe: does. iirc there's something like
>> ftrace:function that is like regular function tracing.
>> 
>> At some point using that made the kernel really sick due to recursion
>> between ftrace and perf. Quite possibly that's been fixed, dunno.
> 
> In the early days there was a lot of issues with recursions, but I added a
> lot of recursion protection since then. I'll give this patch a spin and see
> if I can make it crash.

How does this work in your tests? 

Thanks,
Song

Re: [PATCH] iomap: Handle I/O errors gracefully in page_mkwrite

2020-06-05 Thread Dave Chinner

On Fri, Jun 05, 2020 at 05:48:26AM -0700, Matthew Wilcox wrote:
> On Fri, Jun 05, 2020 at 01:07:58PM +1000, Dave Chinner wrote:
> > On Thu, Jun 04, 2020 at 07:24:51PM -0700, Matthew Wilcox wrote:
> > > On Fri, Jun 05, 2020 at 10:31:59AM +1000, Dave Chinner wrote:
> > > > On Thu, Jun 04, 2020 at 04:50:50PM -0700, Matthew Wilcox wrote:
> > > > > > Sure, but that's not really what I was asking: why isn't this
> > > > > > !uptodate state caught before the page fault code calls
> > > > > > ->page_mkwrite? The page fault code has a reference to the page,
> > > > > > after all, and in a couple of paths it even has the page locked.
> > > > > 
> > > > > If there's already a PTE present, then the page fault code doesn't
> > > > > check the uptodate bit.  Here's the path I'm looking at:
> > > > > 
> > > > > do_wp_page()
> > > > >  -> vm_normal_page()
> > > > >  -> wp_page_shared()
> > > > >  -> do_page_mkwrite()
> > > > > 
> > > > > I don't see anything in there that checked Uptodate.
> > > > 
> > > > Yup, exactly the code I was looking at when I asked this question.
> > > > The kernel has invalidated the contents of a page, yet we still have
> > > > it mapped into userspace as containing valid contents, and we don't
> > > > check it at all when userspace generates a protection fault on the
> > > > page?
> > > 
> > > Right.  The iomap error path only clears PageUptodate.  It doesn't go
> > > to the effort of unmapping the page from userspace, so userspace has a
> > > read-only view of a !Uptodate page.
> > 
> > Hmmm - did you miss the ->discard_page() callout just before we call
> > ClearPageUptodate() on error in iomap_writepage_map()? That results
> > in XFS calling iomap_invalidatepage() on the page, which 
> 
> ... I don't think that's the interesting path.  I mean, that's
> the submission path, and usually we discover errors in the completion
> path, not the submission path.

Where in the iomap write IO completion path do we call
ClearPageUptodate()?

I mean, it ends up in iomap_finish_page_writeback(), which does:

static void
iomap_finish_page_writeback(struct inode *inode, struct page *page,
int error)
{
struct iomap_page *iop = to_iomap_page(page);

if (error) {
SetPageError(page);
mapping_set_error(inode->i_mapping, -EIO);
}

WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
WARN_ON_ONCE(iop && atomic_read(>write_count) <= 0);

if (!iop || atomic_dec_and_test(>write_count))
end_page_writeback(page);
}

I mean, we call SetPageError() and tag the mapping, but we most
certainly don't clear the PageUptodate state here.

So AFAICT, the -only- places that iomap clears the uptodate state on
a page is on -read- errors and on write submission failures.

If it's a read error, the page fault should already be failing. If
it's on submission, we invalidate it as we currently do and punch
out the user mappings, and then when userspace refaults it can be
killed by a read IO failure.

But I just don't see how this problem results from errors reported
at IO completion.

This comes back to my original, underlying worry about the fragility
of the page fault path: the page fault path is not even checking for
PageError during faults, and I'm betting that almost no
->page_mkwrite implementation is checking it, either

> > It's not clear to me that we can actually unmap those pages safely
> > in a race free manner from this code - can we actually do that from
> > the page writeback path?
> 
> I don't see why it can't be done from the submission path.
> unmap_mapping_range() calls i_mmap_lock_write(), which is
> down_write(i_mmap_rwsem) in drag.  There might be a lock ordering
> issue there, although lockdep should find it pretty quickly.
> 
> The bigger problem is the completion path.  We're in softirq context,
> so that will have to punt to a thread that can take mutexes.

Punt to workqueue if we aren't already in a workqueue context -
for a lot of writes on XFS we already will be running completion in
a workqueue context

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com

[PATCH] virtio_net: Unregister and re-register xdp_rxq across freeze/restore

2020-06-05 Thread Sean Christopherson

Unregister each queue's xdp_rxq during freeze, and re-register the new
instance during restore.  All queues are released during free and
recreated during restore, i.e. the pre-freeze xdp_rxq will be lost.

The bug is detected by WARNs in xdp_rxq_info_unreg() and
xdp_rxq_info_unreg_mem_model() that fire after a suspend/resume cycle as
virtnet_close() attempts to unregister an uninitialized xdp_rxq object.

  [ cut here ]
  Driver BUG
  WARNING: CPU: 0 PID: 880 at net/core/xdp.c:163 xdp_rxq_info_unreg+0x48/0x50
  Modules linked in:
  CPU: 0 PID: 880 Comm: ip Not tainted 5.7.0-rc5+ #80
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:xdp_rxq_info_unreg+0x48/0x50
  Code: <0f> 0b eb ca 0f 1f 40 00 0f 1f 44 00 00 53 48 83 ec 10 8b 47 0c 83
  RSP: 0018:c91ab540 EFLAGS: 00010286
  RAX:  RBX: 88827f83ac80 RCX: 
  RDX: 000a RSI: 8253bc2a RDI: 825397ec
  RBP:  R08: 8253bc20 R09: 000a
  R10: c91ab548 R11: 0370 R12: 88817a89c000
  R13:  R14: c91abbc8 R15: 0001
  FS:  7f48b70e70c0() GS:88817bc0() knlGS:
  CS:  0010 DS:  ES:  CR0: 80050033
  CR2: 7f48b6634950 CR3: 000277f1d002 CR4: 00160eb0
  Call Trace:
   virtnet_close+0x6a/0xb0
   __dev_close_many+0x91/0x100
   __dev_change_flags+0xc1/0x1c0
   dev_change_flags+0x23/0x60
   do_setlink+0x350/0xdf0
   __rtnl_newlink+0x553/0x860
   rtnl_newlink+0x43/0x60
   rtnetlink_rcv_msg+0x289/0x340
   netlink_rcv_skb+0xd1/0x110
   netlink_unicast+0x203/0x310
   netlink_sendmsg+0x32b/0x460
   sock_sendmsg+0x5b/0x60
   sys_sendmsg+0x23e/0x260
   ___sys_sendmsg+0x88/0xd0
   __sys_sendmsg+0x63/0xa0
   do_syscall_64+0x4c/0x170
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  [ cut here ]

Cc: Jesper Dangaard Brouer 
Fixes: 754b8a21a96d5 ("virtio_net: setup xdp_rxq_info")
Signed-off-by: Sean Christopherson 
---

Disclaimer: I am not remotely confident that this patch is 100% correct
or complete, my VirtIO knowledge is poor and my networking knowledge is
downright abysmal.

 drivers/net/virtio_net.c | 37 +
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ba38765dc490..61055be3615e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1469,6 +1469,21 @@ static int virtnet_poll(struct napi_struct *napi, int 
budget)
return received;
 }
 
+static int virtnet_reg_xdp(struct xdp_rxq_info *xdp_rxq,
+  struct net_device *dev, u32 queue_index)
+{
+   int err;
+
+   err = xdp_rxq_info_reg(xdp_rxq, dev, queue_index);
+   if (err < 0)
+   return err;
+
+   err = xdp_rxq_info_reg_mem_model(xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL);
+   if (err < 0)
+   xdp_rxq_info_unreg(xdp_rxq);
+   return err;
+}
+
 static int virtnet_open(struct net_device *dev)
 {
struct virtnet_info *vi = netdev_priv(dev);
@@ -1480,17 +1495,10 @@ static int virtnet_open(struct net_device *dev)
if (!try_fill_recv(vi, >rq[i], GFP_KERNEL))
schedule_delayed_work(>refill, 0);
 
-   err = xdp_rxq_info_reg(>rq[i].xdp_rxq, dev, i);
+   err = virtnet_reg_xdp(>rq[i].xdp_rxq, dev, i);
if (err < 0)
return err;
 
-   err = xdp_rxq_info_reg_mem_model(>rq[i].xdp_rxq,
-MEM_TYPE_PAGE_SHARED, NULL);
-   if (err < 0) {
-   xdp_rxq_info_unreg(>rq[i].xdp_rxq);
-   return err;
-   }
-
virtnet_napi_enable(vi->rq[i].vq, >rq[i].napi);
virtnet_napi_tx_enable(vi, vi->sq[i].vq, >sq[i].napi);
}
@@ -2306,6 +2314,7 @@ static void virtnet_freeze_down(struct virtio_device 
*vdev)
 
if (netif_running(vi->dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
+   xdp_rxq_info_unreg(>rq[i].xdp_rxq);
napi_disable(>rq[i].napi);
virtnet_napi_tx_disable(>sq[i].napi);
}
@@ -2313,6 +2322,8 @@ static void virtnet_freeze_down(struct virtio_device 
*vdev)
 }
 
 static int init_vqs(struct virtnet_info *vi);
+static void virtnet_del_vqs(struct virtnet_info *vi);
+static void free_receive_page_frags(struct virtnet_info *vi);
 
 static int virtnet_restore_up(struct virtio_device *vdev)
 {
@@ -2331,6 +2342,10 @@ static int virtnet_restore_up(struct virtio_device *vdev)
schedule_delayed_work(>refill, 0);
 
for (i = 0; i < vi->max_queue_pairs; i++) {
+   err = virtnet_reg_xdp(>rq[i].xdp_rxq, vi->dev, i);
+

RE: slub freelist issue / BUG: unable to handle page fault for address: 000000003ffe0018

2020-06-05 Thread Kaneda, Erik



> -Original Message-
> From: Vegard Nossum 
> Sent: Friday, June 5, 2020 7:45 AM
> To: Vlastimil Babka ; Rafael J. Wysocki
> ; Moore, Robert ; Kaneda,
> Erik 
> Cc: Kees Cook ; Wysocki, Rafael J
> ; Christoph Lameter ; Andrew
> Morton ; Marco Elver ;
> Waiman Long ; LKML  ker...@vger.kernel.org>; Linux MM ; ACPI Devel
> Maling List ; Len Brown ;
> Steven Rostedt 
> Subject: Re: slub freelist issue / BUG: unable to handle page fault for
> address: 3ffe0018
> 
> On 2020-06-05 16:08, Vlastimil Babka wrote:
> > On 6/5/20 3:12 PM, Rafael J. Wysocki wrote:
> >> On Fri, Jun 5, 2020 at 2:48 PM Vegard Nossum
>  wrote:
> >>>
> >>> On 2020-06-05 11:36, Vegard Nossum wrote:
> 
>  On 2020-06-05 11:11, Vlastimil Babka wrote:
> > On 6/4/20 8:46 PM, Vlastimil Babka wrote:
> >> On 6/4/20 7:57 PM, Kees Cook wrote:
> >>> On Thu, Jun 04, 2020 at 07:20:18PM +0200, Vegard Nossum wrote:
>  On 2020-06-04 19:18, Vlastimil Babka wrote:
> > On 6/4/20 7:14 PM, Vegard Nossum wrote:
> >>
> >> Hi all,
> >>
> >> I ran into a boot problem with latest linus/master
> >> (6929f71e46bdddbf1c4d67c2728648176c67c555) that manifests
> like this:
> >
> > Hi, what's the .config you use?
> 
>  Pretty much x86_64 defconfig minus a few options (PCI, USB,
>  ...)
> >>>
> >>> Oh yes indeed. I immediately crash in the same way with this config.
> >>> I'll
> >>> start digging...
> >>>
> >>> (defconfig finishes boot)
> >>
> >> This is funny, booting with slub_debug=F results in:
> >> I'm not sure if it's ACPI or ftrace wrong here, but looks like
> >> the changed free pointer offset merely exposes a bug in something
> >> else.
> >
> > So, with Kees' patch reverted, booting with slub_debug=F (or even
> > more specific slub_debug=F,ftrace_event_field) also hits this bug
> > below. I wanted to bisect it, but v5.7 was also bad, and also
> > v5.6. Didn't try further in history. So it's not new at all, and
> > likely very specific to your config+QEMU? (and related to the ACPI
> > error messages that precede it?).
> 
>  I see it too, but not on v5.0. I can bisect it.
> >>>
> >>> commit 67a72420a326b45514deb3f212085fb2cd1595b5
> >>> Author: Bob Moore 
> >>> Date:   Fri Aug 16 14:43:21 2019 -0700
> >>>
> >>>   ACPICA: Increase total number of possible Owner IDs
> >>>
> >>>   ACPICA commit 1f1652dad88b9d767767bc1f7eb4f7d99e6b5324
> >>>
> >>>   From 255 to 4095 possible IDs.
> >>>
> >>>   Link: https://github.com/acpica/acpica/commit/1f1652da
> >>>   Reported-by: Hedi Berriche 
> >>>   Signed-off-by: Bob Moore 
> >>>   Signed-off-by: Erik Schmauss 
> >>>   Signed-off-by: Rafael J. Wysocki 
> >>
> >> Bob, Erik, did we miss something in that patch?
> >
> > Maybe the patch just changes layout in a way that exposes the bug.
> >
> > Anyway the "ftrace_event_field" cache is not really involved, this is
> > just because of slab merging. After adding "slub_nomerge" to
> > "slub_debug=F", it starts making more sense, as the cache becomes
> > Acpi-Namespace
> >
> > [0.140408] [ cut here ]
> > [0.140837] cache_from_obj: Wrong slab cache. Acpi-Namespace but
> object is from kmalloc-64
> > [0.141406] WARNING: CPU: 0 PID: 1 at mm/slab.h:524
> kmem_cache_free+0x1d3/0x250
> > [0.142105] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.7.0+ #45
> > [0.142393] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS rel-1.13.0-0-gf21b5a4-rebuilt.opensuse.org 04/01/2014
> > [0.142393] RIP: 0010:kmem_cache_free+0x1d3/0x250
> > [0.142393] Code: 18 4d 85 ed 0f 84 10 ff ff ff 4c 39 ed 74 2f 49 8b 4d 
> > 58 48
> 8b 55 58 48 c7 c6 10 47 a1 ac 48 c7 c7 00 c2 b0 ac e8 b1 cc eb ff <0f> 0b 48 
> 89 de
> 4c 89 ef e8 10 d7 ff ff 48 8b 15 59 36 9b 00 4c 89
> > [0.142393] RSP: 0018:b39cc0013dc0 EFLAGS: 00010282
> > [0.142393] RAX:  RBX: 937287409e00 RCX:
> 
> > [0.142393] RDX: 0001 RSI: 0092 RDI:
> acfdd32c
> > [0.142393] RBP: 93728742ef00 R08: b39cc0013c7d R09:
> 00fc
> > [0.142393] R10: b39cc0013c78 R11: b39cc0013c7d R12:
> 937307409e00
> > [0.142393] R13: 937287401d00 R14:  R15:
> 
> > [0.142393] FS:  () GS:937287a0()
> knlGS:
> > [0.142393] CS:  0010 DS:  ES:  CR0: 80050033
> > [0.142393] CR2:  CR3: 03a0a000 CR4:
> 003406f0
> > [0.142393] Call Trace:
> > [0.142393]  acpi_os_release_object+0x5/0x10
> > [0.142393]  acpi_ns_delete_children+0x46/0x59
> > [0.142393]  acpi_ns_delete_namespace_subtree+0x5c/0x79
> > [0.142393]  ? acpi_sleep_proc_init+0x1f/0x1f
> > [0.142393]  acpi_ns_terminate+0xc/0x31
> > [

[PATCH 00/21] KVM: Cleanup and unify kvm_mmu_memory_cache usage

2020-06-05 Thread Sean Christopherson

This series resurrects Christoffer Dall's series[1] to provide a common
MMU memory cache implementation that can be shared by x86, arm64 and MIPS.

It also picks up a suggested change from Ben Gardon[2] to clear shadow
page tables during initial allocation so as to avoid clearing entire
pages while holding mmu_lock.

The front half of the patches do house cleaning on x86's memory cache
implementation in preparation for moving it to common code, along with a
fair bit of cleanup on the usage.  The middle chunk moves the patches to
common KVM, and the last two chunks convert arm64 and MIPS to the common
implementation.

Cleanup aside, the notable difference from Christoffer and Ben's proposed
patches is to make __GFP_ZERO optional, e.g. to allow x86 to skip zeroing
for its gfns array and to provide line of sight for my
cannot-yet-be-discussed-in-detail use case for non-zero initialized shadow
page tables[3].

Tested on x86 only, no testing whatsoever on arm64 or MIPS.

[1] https://lkml.kernel.org/r/20191105110357.8607-1-christoffer.d...@arm.com
[2] https://lkml.kernel.org/r/20190926231824.149014-4-bgar...@google.com
[3] https://lkml.kernel.org/r/20191127180731.gc16...@linux.intel.com

Sean Christopherson (21):
  KVM: x86/mmu: Track the associated kmem_cache in the MMU caches
  KVM: x86/mmu: Consolidate "page" variant of memory cache helpers
  KVM: x86/mmu: Use consistent "mc" name for kvm_mmu_memory_cache locals
  KVM: x86/mmu: Remove superfluous gotos from mmu_topup_memory_caches()
  KVM: x86/mmu: Try to avoid crashing KVM if a MMU memory cache is empty
  KVM: x86/mmu: Move fast_page_fault() call above
mmu_topup_memory_caches()
  KVM: x86/mmu: Topup memory caches after walking GVA->GPA
  KVM: x86/mmu: Clean up the gorilla math in mmu_topup_memory_caches()
  KVM: x86/mmu: Separate the memory caches for shadow pages and gfn
arrays
  KVM: x86/mmu: Make __GFP_ZERO a property of the memory cache
  KVM: x86/mmu: Zero allocate shadow pages (outside of mmu_lock)
  KVM: x86/mmu: Skip filling the gfn cache for guaranteed direct MMU
topups
  KVM: x86/mmu: Prepend "kvm_" to memory cache helpers that will be
global
  KVM: Move x86's version of struct kvm_mmu_memory_cache to common code
  KVM: Move x86's MMU memory cache helpers to common KVM code
  KVM: arm64: Drop @max param from mmu_topup_memory_cache()
  KVM: arm64: Use common code's approach for __GFP_ZERO with memory
caches
  KVM: arm64: Use common KVM implementation of MMU memory caches
  KVM: MIPS: Drop @max param from mmu_topup_memory_cache()
  KVM: MIPS: Account pages used for GPA page tables
  KVM: MIPS: Use common KVM implementation of MMU memory caches

 arch/arm64/include/asm/kvm_host.h|  11 ---
 arch/arm64/include/asm/kvm_types.h   |   8 ++
 arch/arm64/kvm/arm.c |   2 +
 arch/arm64/kvm/mmu.c |  54 +++
 arch/mips/include/asm/kvm_host.h |  11 ---
 arch/mips/include/asm/kvm_types.h|   7 ++
 arch/mips/kvm/mmu.c  |  44 ++---
 arch/powerpc/include/asm/kvm_types.h |   5 ++
 arch/s390/include/asm/kvm_types.h|   5 ++
 arch/x86/include/asm/kvm_host.h  |  14 +--
 arch/x86/include/asm/kvm_types.h |   7 ++
 arch/x86/kvm/mmu/mmu.c   | 129 +--
 arch/x86/kvm/mmu/paging_tmpl.h   |  10 +--
 include/linux/kvm_host.h |   7 ++
 include/linux/kvm_types.h|  19 
 virt/kvm/kvm_main.c  |  55 
 16 files changed, 178 insertions(+), 210 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_types.h
 create mode 100644 arch/mips/include/asm/kvm_types.h
 create mode 100644 arch/powerpc/include/asm/kvm_types.h
 create mode 100644 arch/s390/include/asm/kvm_types.h
 create mode 100644 arch/x86/include/asm/kvm_types.h

-- 
2.26.0

[PATCH 15/21] KVM: Move x86's MMU memory cache helpers to common KVM code

2020-06-05 Thread Sean Christopherson

Move x86's memory cache helpers to common KVM code so that they can be
reused by arm64 and MIPS in future patches.

Suggested-by: Christoffer Dall 
Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c   | 53 --
 include/linux/kvm_host.h |  7 +
 virt/kvm/kvm_main.c  | 55 
 3 files changed, 62 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b85d3e8e8403..a627437f73fd 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1060,47 +1060,6 @@ static void walk_shadow_page_lockless_end(struct 
kvm_vcpu *vcpu)
local_irq_enable();
 }
 
-static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
-  gfp_t gfp_flags)
-{
-   gfp_flags |= mc->gfp_zero;
-
-   if (mc->kmem_cache)
-   return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
-   else
-   return (void *)__get_free_page(gfp_flags);
-}
-
-static int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
-{
-   void *obj;
-
-   if (mc->nobjs >= min)
-   return 0;
-   while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
-   obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
-   if (!obj)
-   return mc->nobjs >= min ? 0 : -ENOMEM;
-   mc->objects[mc->nobjs++] = obj;
-   }
-   return 0;
-}
-
-static int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache 
*mc)
-{
-   return mc->nobjs;
-}
-
-static void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
-{
-   while (mc->nobjs) {
-   if (mc->kmem_cache)
-   kmem_cache_free(mc->kmem_cache, 
mc->objects[--mc->nobjs]);
-   else
-   free_page((unsigned long)mc->objects[--mc->nobjs]);
-   }
-}
-
 static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
 {
int r;
@@ -1132,18 +1091,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_cache(>arch.mmu_page_header_cache);
 }
 
-static void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
-{
-   void *p;
-
-   if (WARN_ON(!mc->nobjs))
-   p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT);
-   else
-   p = mc->objects[--mc->nobjs];
-   BUG_ON(!p);
-   return p;
-}
-
 static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
 {
return kvm_mmu_memory_cache_alloc(>arch.mmu_pte_list_desc_cache);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d38d6b9c24be..802b9e2306f0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -815,6 +815,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool 
usermode_vcpu_not_eligible);
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 
+#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
+int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
+int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc);
+void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
+void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
+#endif
+
 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 struct kvm_vcpu *except,
 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4db151f6101e..fead5f1d5594 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -342,6 +342,61 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
+#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
+static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
+  gfp_t gfp_flags)
+{
+   gfp_flags |= mc->gfp_zero;
+
+   if (mc->kmem_cache)
+   return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
+   else
+   return (void *)__get_free_page(gfp_flags);
+}
+
+int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
+{
+   void *obj;
+
+   if (mc->nobjs >= min)
+   return 0;
+   while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
+   obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
+   if (!obj)
+   return mc->nobjs >= min ? 0 : -ENOMEM;
+   mc->objects[mc->nobjs++] = obj;
+   }
+   return 0;
+}
+
+int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc)
+{
+   return mc->nobjs;
+}
+
+void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
+{
+   while (mc->nobjs) {
+   if (mc->kmem_cache)
+

Re: [RFC][PATCH] slimbus: core: Set fwnode for a device when setting of_node

2020-06-05 Thread Saravana Kannan

Hi John,

On Fri, Jun 5, 2020 at 2:19 PM John Stultz  wrote:
>
> From: Saravana Kannan 
>
> When setting the of_node for a newly created device, also set the
> fwnode. This allows fw_devlink to work for slimbus devices.
>
> Cc: Srinivas Kandagatla 
> Cc: alsa-de...@alsa-project.org
> Signed-off-by: Saravana Kannan 
> Signed-off-by: John Stultz 

I thought Srinivas already picked this up and sent it to Greg.
https://lore.kernel.org/lkml/20200511151334.362-1-srinivas.kandaga...@linaro.org/

Am I missing something?

-Saravana

[PATCH 01/21] KVM: x86/mmu: Track the associated kmem_cache in the MMU caches

2020-06-05 Thread Sean Christopherson

Track the kmem_cache used for non-page KVM MMU memory caches instead of
passing in the associated kmem_cache when filling the cache.  This will
allow consolidating code and other cleanups.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c  | 24 +++-
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1da5858501ca..16347b050754 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -251,6 +251,7 @@ struct kvm_kernel_irq_routing_entry;
  */
 struct kvm_mmu_memory_cache {
int nobjs;
+   struct kmem_cache *kmem_cache;
void *objects[KVM_NR_MEM_OBJS];
 };
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fdd05c233308..0830c195c9ed 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1060,15 +1060,14 @@ static void walk_shadow_page_lockless_end(struct 
kvm_vcpu *vcpu)
local_irq_enable();
 }
 
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
- struct kmem_cache *base_cache, int min)
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min)
 {
void *obj;
 
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
+   obj = kmem_cache_zalloc(cache->kmem_cache, GFP_KERNEL_ACCOUNT);
if (!obj)
return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = obj;
@@ -1081,11 +1080,10 @@ static int mmu_memory_cache_free_objects(struct 
kvm_mmu_memory_cache *cache)
return cache->nobjs;
 }
 
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
- struct kmem_cache *cache)
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
 {
while (mc->nobjs)
-   kmem_cache_free(cache, mc->objects[--mc->nobjs]);
+   kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]);
 }
 
 static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
@@ -1115,25 +1113,22 @@ static int mmu_topup_memory_caches(struct kvm_vcpu 
*vcpu)
int r;
 
r = mmu_topup_memory_cache(>arch.mmu_pte_list_desc_cache,
-  pte_list_desc_cache, 8 + PTE_PREFETCH_NUM);
+  8 + PTE_PREFETCH_NUM);
if (r)
goto out;
r = mmu_topup_memory_cache_page(>arch.mmu_page_cache, 8);
if (r)
goto out;
-   r = mmu_topup_memory_cache(>arch.mmu_page_header_cache,
-  mmu_page_header_cache, 4);
+   r = mmu_topup_memory_cache(>arch.mmu_page_header_cache, 4);
 out:
return r;
 }
 
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
-   mmu_free_memory_cache(>arch.mmu_pte_list_desc_cache,
-   pte_list_desc_cache);
+   mmu_free_memory_cache(>arch.mmu_pte_list_desc_cache);
mmu_free_memory_cache_page(>arch.mmu_page_cache);
-   mmu_free_memory_cache(>arch.mmu_page_header_cache,
-   mmu_page_header_cache);
+   mmu_free_memory_cache(>arch.mmu_page_header_cache);
 }
 
 static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
@@ -5684,6 +5679,9 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
uint i;
int ret;
 
+   vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache;
+   vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
+
vcpu->arch.mmu = >arch.root_mmu;
vcpu->arch.walk_mmu = >arch.root_mmu;
 
-- 
2.26.0

[PATCH 10/21] KVM: x86/mmu: Make __GFP_ZERO a property of the memory cache

2020-06-05 Thread Sean Christopherson

Add a gfp_zero flag to 'struct kvm_mmu_memory_cache' and use it to
control __GFP_ZERO instead of hardcoding a call to kmem_cache_zalloc().
A future patch needs such a flag for the __get_free_page() path, as
gfn arrays do not need/want the allocator to zero the memory.  Convert
the kmem_cache paths to __GFP_ZERO now so as to avoid a weird and
inconsistent API in the future.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/mmu/mmu.c  | 7 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e7a427547557..fb99e6776e27 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -251,6 +251,7 @@ struct kvm_kernel_irq_routing_entry;
  */
 struct kvm_mmu_memory_cache {
int nobjs;
+   gfp_t gfp_zero;
struct kmem_cache *kmem_cache;
void *objects[KVM_NR_MEM_OBJS];
 };
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d245acece3cd..6b0ec9060786 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1063,8 +1063,10 @@ static void walk_shadow_page_lockless_end(struct 
kvm_vcpu *vcpu)
 static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
   gfp_t gfp_flags)
 {
+   gfp_flags |= mc->gfp_zero;
+
if (mc->kmem_cache)
-   return kmem_cache_zalloc(mc->kmem_cache, gfp_flags);
+   return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
else
return (void *)__get_free_page(gfp_flags);
 }
@@ -5680,7 +5682,10 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
int ret;
 
vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache;
+   vcpu->arch.mmu_pte_list_desc_cache.gfp_zero = __GFP_ZERO;
+
vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
+   vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
 
vcpu->arch.mmu = >arch.root_mmu;
vcpu->arch.walk_mmu = >arch.root_mmu;
-- 
2.26.0

[PATCH 16/21] KVM: arm64: Drop @max param from mmu_topup_memory_cache()

2020-06-05 Thread Sean Christopherson

Replace the @max param in mmu_topup_memory_cache() and instead use
ARRAY_SIZE() to terminate the loop to fill the cache.  This removes a
BUG_ON() and sets the stage for moving arm64 to the common memory cache
implementation.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/arm64/kvm/mmu.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index a1f6bc70c4e4..9398b66f8a87 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -124,15 +124,13 @@ static void stage2_dissolve_pud(struct kvm *kvm, 
phys_addr_t addr, pud_t *pudp)
put_page(virt_to_page(pudp));
 }
 
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
- int min, int max)
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min)
 {
void *page;
 
-   BUG_ON(max > KVM_NR_MEM_OBJS);
if (cache->nobjs >= min)
return 0;
-   while (cache->nobjs < max) {
+   while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = (void *)__get_free_page(GFP_PGTABLE_USER);
if (!page)
return -ENOMEM;
@@ -1356,8 +1354,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t 
guest_ipa,
pte = kvm_s2pte_mkwrite(pte);
 
ret = mmu_topup_memory_cache(,
-kvm_mmu_cache_min_pages(kvm),
-KVM_NR_MEM_OBJS);
+kvm_mmu_cache_min_pages(kvm));
if (ret)
goto out;
spin_lock(>mmu_lock);
@@ -1737,8 +1734,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
up_read(>mm->mmap_sem);
 
/* We need minimum second+third level pages */
-   ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
-KVM_NR_MEM_OBJS);
+   ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm));
if (ret)
return ret;
 
-- 
2.26.0

[PATCH 08/21] KVM: x86/mmu: Clean up the gorilla math in mmu_topup_memory_caches()

2020-06-05 Thread Sean Christopherson

Clean up the minimums in mmu_topup_memory_caches() to document the
driving mechanisms behind the minimums.  Now that encountering an empty
cache is unlikely to trigger BUG_ON(), it is less dangerous to be more
precise when defining the minimums.

For rmaps, the logic is 1 parent PTE per level, plus a single rmap, and
prefetched rmaps.  The extra objects in the current '8 + PREFETCH'
minimum came about due to an abundance of paranoia in commit
c41ef344de212 ("KVM: MMU: increase per-vcpu rmap cache alloc size"),
i.e. it could have increased the minimum to 2 rmaps.  Furthermore, the
unexpected extra rmap case was killed off entirely by commits
f759e2b4c728c ("KVM: MMU: avoid pte_list_desc running out in
kvm_mmu_pte_write") and f5a1e9f89504f ("KVM: MMU: remove call to
kvm_mmu_pte_write from walk_addr").

For the so called page cache, replace '8' with 2*PT64_ROOT_MAX_LEVEL.
The 2x multiplier is needed because the cache is used for both shadow
pages and gfn arrays for indirect MMUs.

And finally, for page headers, replace '4' with PT64_ROOT_MAX_LEVEL.

Note, KVM now supports 5-level paging, i.e. the old minimums that used a
baseline derived from 4-level paging were technically wrong.  But, KVM
always allocates roots in a separate flow, e.g. it's impossible in the
current implementation to actually need 5 new shadow pages in a single
flow.  Use PT64_ROOT_MAX_LEVEL unmodified instead of subtracting 1, as
the direct usage is likely more intuitive to uninformed readers, and the
inflated minimum is unlikely to affect functionality in practice.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4b4c3234d623..451e0365e5dd 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1103,14 +1103,17 @@ static int mmu_topup_memory_caches(struct kvm_vcpu 
*vcpu)
 {
int r;
 
+   /* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */
r = mmu_topup_memory_cache(>arch.mmu_pte_list_desc_cache,
-  8 + PTE_PREFETCH_NUM);
+  1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM);
if (r)
return r;
-   r = mmu_topup_memory_cache(>arch.mmu_page_cache, 8);
+   r = mmu_topup_memory_cache(>arch.mmu_page_cache,
+  2 * PT64_ROOT_MAX_LEVEL);
if (r)
return r;
-   return mmu_topup_memory_cache(>arch.mmu_page_header_cache, 4);
+   return mmu_topup_memory_cache(>arch.mmu_page_header_cache,
+ PT64_ROOT_MAX_LEVEL);
 }
 
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
-- 
2.26.0

[PATCH 17/21] KVM: arm64: Use common code's approach for __GFP_ZERO with memory caches

2020-06-05 Thread Sean Christopherson

Add a "gfp_zero" member to arm64's 'struct kvm_mmu_memory_cache' to make
the struct and its usage compatible with the common 'struct
kvm_mmu_memory_cache' in linux/kvm_host.h.  This will minimize code
churn when arm64 moves to the common implementation in a future patch, at
the cost of temporarily having somewhat silly code.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/arm64/include/asm/kvm_host.h | 1 +
 arch/arm64/kvm/arm.c  | 2 ++
 arch/arm64/kvm/mmu.c  | 5 +++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index abbdf9703e20..2385dede96e0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -105,6 +105,7 @@ struct kvm_arch {
  */
 struct kvm_mmu_memory_cache {
int nobjs;
+   gfp_t gfp_zero;
void *objects[KVM_NR_MEM_OBJS];
 };
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 45276ed50dd6..4c98c6b4d850 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -270,6 +270,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
+   vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
+
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
 
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 9398b66f8a87..688213ef34f0 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -131,7 +131,8 @@ static int mmu_topup_memory_cache(struct 
kvm_mmu_memory_cache *cache, int min)
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   page = (void *)__get_free_page(GFP_PGTABLE_USER);
+   page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT |
+  cache->gfp_zero);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
@@ -1342,7 +1343,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t 
guest_ipa,
phys_addr_t addr, end;
int ret = 0;
unsigned long pfn;
-   struct kvm_mmu_memory_cache cache = { 0, };
+   struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, };
 
end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(pa);
-- 
2.26.0

[PATCH 11/21] KVM: x86/mmu: Zero allocate shadow pages (outside of mmu_lock)

2020-06-05 Thread Sean Christopherson

Set __GFP_ZERO for the shadow page memory cache and drop the explicit
clear_page() from kvm_mmu_get_page().  This moves the cost of zeroing a
page to the allocation time of the physical page, i.e. when topping up
the memory caches, and thus avoids having to zero out an entire page
while holding mmu_lock.

Cc: Peter Feiner 
Cc: Peter Shier 
Cc: Junaid Shahid 
Cc: Jim Mattson 
Suggested-by: Ben Gardon 
Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6b0ec9060786..a8f8eebf67df 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2545,7 +2545,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
if (level > PG_LEVEL_4K && need_sync)
flush |= kvm_sync_pages(vcpu, gfn, _list);
}
-   clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
 
kvm_mmu_flush_or_zap(vcpu, _list, false, flush);
@@ -5687,6 +5686,8 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
 
+   vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
+
vcpu->arch.mmu = >arch.root_mmu;
vcpu->arch.walk_mmu = >arch.root_mmu;
 
-- 
2.26.0

[PATCH 03/21] KVM: x86/mmu: Use consistent "mc" name for kvm_mmu_memory_cache locals

2020-06-05 Thread Sean Christopherson

Use "mc" for local variables to shorten line lengths and provide
consistent names, which will be especially helpful when some of the
helpers are moved to common KVM code in future patches.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cbc101663a89..36c90f004ef4 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1060,27 +1060,27 @@ static void walk_shadow_page_lockless_end(struct 
kvm_vcpu *vcpu)
local_irq_enable();
 }
 
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min)
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
 {
void *obj;
 
-   if (cache->nobjs >= min)
+   if (mc->nobjs >= min)
return 0;
-   while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   if (cache->kmem_cache)
-   obj = kmem_cache_zalloc(cache->kmem_cache, 
GFP_KERNEL_ACCOUNT);
+   while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
+   if (mc->kmem_cache)
+   obj = kmem_cache_zalloc(mc->kmem_cache, 
GFP_KERNEL_ACCOUNT);
else
obj = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!obj)
-   return cache->nobjs >= min ? 0 : -ENOMEM;
-   cache->objects[cache->nobjs++] = obj;
+   return mc->nobjs >= min ? 0 : -ENOMEM;
+   mc->objects[mc->nobjs++] = obj;
}
return 0;
 }
 
-static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache)
+static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *mc)
 {
-   return cache->nobjs;
+   return mc->nobjs;
 }
 
 static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
@@ -1395,10 +1395,10 @@ static struct kvm_rmap_head *gfn_to_rmap(struct kvm 
*kvm, gfn_t gfn,
 
 static bool rmap_can_add(struct kvm_vcpu *vcpu)
 {
-   struct kvm_mmu_memory_cache *cache;
+   struct kvm_mmu_memory_cache *mc;
 
-   cache = >arch.mmu_pte_list_desc_cache;
-   return mmu_memory_cache_free_objects(cache);
+   mc = >arch.mmu_pte_list_desc_cache;
+   return mmu_memory_cache_free_objects(mc);
 }
 
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
-- 
2.26.0

[PATCH 07/21] KVM: x86/mmu: Topup memory caches after walking GVA->GPA

2020-06-05 Thread Sean Christopherson

Topup memory caches after walking the GVA->GPA translation during a
shadow page fault, there is no need to ensure the caches are full when
walking the GVA.  As of commit f5a1e9f89504f ("KVM: MMU: remove call
to kvm_mmu_pte_write from walk_addr"), the FNAME(walk_addr) flow no
longer add rmaps via kvm_mmu_pte_write().

This avoids allocating memory in the case that the GVA is unmapped in
the guest, and also provides a paper trail of why/when the memory caches
need to be filled.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/paging_tmpl.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 38c576495048..3de32122f601 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -791,10 +791,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t 
addr, u32 error_code,
 
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 
-   r = mmu_topup_memory_caches(vcpu);
-   if (r)
-   return r;
-
/*
 * If PFEC.RSVD is set, this is a shadow page fault.
 * The bit needs to be cleared before walking guest page tables.
@@ -822,6 +818,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t 
addr, u32 error_code,
return RET_PF_EMULATE;
}
 
+   r = mmu_topup_memory_caches(vcpu);
+   if (r)
+   return r;
+
vcpu->arch.write_fault_to_shadow_pgtable = false;
 
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
-- 
2.26.0

[PATCH v8 16/16] s390/vfio-ap: handle probe/remove not due to host AP config changes

2020-06-05 Thread Tony Krowiak

AP queue devices are probed or removed for reasons other than changes
to the host AP configuration:

* Each queue device associated with a card device will get created and
  probed when the state of the AP adapter represented by the card device
  dynamically changes from standby to online.

* Each queue device associated with a card device will get removed
  when the state of the AP adapter to which the queue represented by the
  queue device dynamically changes from online to standby.

* Each queue device associated with a card device will get removed
  when the type of the AP adapter to which the queue represented by the
  queue device dynamically changes.

* Each queue device associated with a card device will get removed
  when the status of the queue represented by the queue device changes
  from operating to check stop.

* AP queue devices can be manually bound to or unbound from the vfio_ap
  device driver by a root user via the sysfs bind/unbind attributes of the
  driver.

In response to a queue device probe or remove that is not the result of a
change to the host's AP configuration, if a KVM guest is using the matrix
mdev to which the APQN of the queue device is assigned, the vfio_ap device
driver must respond accordingly. In an ideal world, the queue corresponding
to the queue device being probed would be hot plugged into the guest.
Likewise, the queue corresponding to the queue device being removed would
be hot unplugged from the guest. Unfortunately, the AP architecture
precludes plugging or unplugging individual queues, so let's handle
the probe or remove of an AP queue device as follows:

Handling Probe
--
There are two requirements that must be met in order to give a
guest access to the queue corresponding to the queue device being probed:

* Each APQN derived from the APID of the queue device and the APQIs of the
  domains already assigned to the guest's AP configuration must reference
  a queue device bound to the vfio_ap device driver.

* Each APQN derived from the APQI of the queue device and the APIDs of the
  adapters assigned to the guest's AP configuration must reference a queue
  device bound to the vfio_ap device driver.

If the above conditions are met, the APQN will be assigned to the guest's
AP configuration and the guest will be given access to the queue.

Handling Remove
---
Since the AP architecture precludes us from taking access to an individual
queue from a guest, we are left with the choice of taking access away from
either the adapter or the domain to which the queue is connected. Access to
the adapter will be taken away because it is likely that most of the time,
the remove callback will be invoked because the adapter state has
transitioned from online to standby. In such a case, no queue connected
to the adapter will be available to access.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 38 +++
 1 file changed, 38 insertions(+)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index cfe93ff9cc8c..5ee60dac7ad1 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1681,6 +1681,15 @@ static void vfio_ap_queue_link_mdev(struct vfio_ap_queue 
*q)
}
 }
 
+void vfio_ap_mdev_hot_plug_queue(struct vfio_ap_queue *q)
+{
+   if ((q->matrix_mdev == NULL) || !vfio_ap_mdev_has_crycb(q->matrix_mdev))
+   return;
+
+   if (vfio_ap_mdev_config_shadow_apcb(q->matrix_mdev))
+   vfio_ap_mdev_commit_shadow_apcb(q->matrix_mdev);
+}
+
 int vfio_ap_mdev_probe_queue(struct ap_queue *queue)
 {
struct vfio_ap_queue *q;
@@ -1694,11 +1703,35 @@ int vfio_ap_mdev_probe_queue(struct ap_queue *queue)
q->apqn = queue->qid;
q->saved_isc = VFIO_AP_ISC_INVALID;
vfio_ap_queue_link_mdev(q);
+   /* Make sure we're not in the middle of an AP configuration change. */
+   if (!(matrix_dev->flags & AP_MATRIX_CFG_CHG))
+   vfio_ap_mdev_hot_plug_queue(q);
mutex_unlock(_dev->lock);
 
return 0;
 }
 
+void vfio_ap_mdev_hot_unplug_queue(struct vfio_ap_queue *q)
+{
+   unsigned long apid = AP_QID_CARD(q->apqn);
+   unsigned long apqi = AP_QID_QUEUE(q->apqn);
+
+   if ((q->matrix_mdev == NULL) || !vfio_ap_mdev_has_crycb(q->matrix_mdev))
+   return;
+
+   /*
+* If the APQN is assigned to the guest, then let's
+* go ahead and unplug the adapter since the
+* architecture does not provide a means to unplug
+* an individual queue.
+*/
+   if (test_bit_inv(apid, q->matrix_mdev->shadow_apcb.apm) &&
+   test_bit_inv(apqi, q->matrix_mdev->shadow_apcb.aqm)) {
+   if (vfio_ap_mdev_unassign_guest_apid(q->matrix_mdev, apid))
+   vfio_ap_mdev_commit_shadow_apcb(q->matrix_mdev);
+   }
+}
+
 void vfio_ap_mdev_remove_queue(struct ap_queue *queue)
 {
struct

[PATCH v8 00/16] s390/vfio-ap: dynamic configuration support

2020-06-05 Thread Tony Krowiak

Note: Patch 1 - s390/ap: introduce new ap function ap_get_qdev() - is not
  a part of this series. It is a forthcoming patch that is a
  prerequisite to this series and is being provided so this series
  will compile.

The current design for AP pass-through does not support making dynamic
changes to the AP matrix of a running guest resulting in a few 
deficiencies this patch series is intended to mitigate:

1. Adapters, domains and control domains can not be added to or removed
   from a running guest. In order to modify a guest's AP configuration,
   the guest must be terminated; only then can AP resources be assigned
   to or unassigned from the guest's matrix mdev. The new AP 
   configuration becomes available to the guest when it is subsequently
   restarted.

2. The AP bus's /sys/bus/ap/apmask and /sys/bus/ap/aqmask interfaces can
   be modified by a root user without any restrictions. A change to
   either mask can result in AP queue devices being unbound from the
   vfio_ap device driver and bound to a zcrypt device driver even if a
   guest is using the queues, thus giving the host access to the guest's
   private crypto data and vice versa.

3. The APQNs derived from the Cartesian product of the APIDs of the
   adapters and APQIs of the domains assigned to a matrix mdev must
   reference an AP queue device bound to the vfio_ap device driver. The
   AP architecture allows assignment of AP resources that are not
   available to the system, so this artificial restriction is not 
   compliant with the architecture.

4. The AP configuration profile can be dynamically changed for the linux
   host after a KVM guest is started. For example, a new domain can be
   dynamically added to the configuration profile via the SE or an HMC
   connected to a DPM enabled lpar. Likewise, AP adapters can be 
   dynamically configured (online state) and deconfigured (standby state)
   using the SE, an SCLP command or an HMC connected to a DPM enabled
   lpar. This can result in inadvertent sharing of AP queues between the
   guest and host.

5. A root user can manually unbind an AP queue device representing a 
   queue in use by a KVM guest via the vfio_ap device driver's sysfs 
   unbind attribute. In this case, the guest will be using a queue that
   is not bound to the driver which violates the device model.

This patch series introduces the following changes to the current design
to alleviate the shortcomings described above as well as to implement
more of the AP architecture:

1. A root user will be prevented from making changes to the AP bus's
   /sys/bus/ap/apmask or /sys/bus/ap/aqmask if the ownership of an APQN
   changes from the vfio_ap device driver to a zcrypt driver when the
   APQN is assigned to a matrix mdev.

2. Allow a root user to hot plug/unplug AP adapters, domains and control
   domains using the matrix mdev's assign/unassign attributes.

4. Allow assignment of an AP adapter or domain to a matrix mdev even if
   it results in assignment of an APQN that does not reference an AP
   queue device bound to the vfio_ap device driver, as long as the APQN
   is not reserved for use by the default zcrypt drivers (also known as
   over-provisioning of AP resources). Allowing over-provisioning of AP
   resources better models the architecture which does not preclude
   assigning AP resources that are not yet available in the system. Such
   APQNs, however, will not be assigned to the guest using the matrix
   mdev; only APQNs referencing AP queue devices bound to the vfio_ap
   device driver will actually get assigned to the guest.

5. Handle dynamic changes to the AP device model. 

1. Rationale for changes to AP bus's apmask/aqmask interfaces:
--
Due to the extremely sensitive nature of cryptographic data, it is
imperative that great care be taken to ensure that such data is secured.
Allowing a root user, either inadvertently or maliciously, to configure
these masks such that a queue is shared between the host and a guest is
not only avoidable, it is advisable. It was suggested that this scenario
is better handled in user space with management software, but that does
not preclude a malicious administrator from using the sysfs interfaces
to gain access to a guest's crypto data. It was also suggested that this
scenario could be avoided by taking access to the adapter away from the
guest and zeroing out the queues prior to the vfio_ap driver releasing the
device; however, stealing an adapter in use from a guest as a by-product
of an operation is bad and will likely cause problems for the guest
unnecessarily. It was decided that the most effective solution with the
least number of negative side effects is to prevent the situation at the
source.

2. Rationale for hot plug/unplug using matrix mdev sysfs interfaces:

Allowing a user to hot plug/unplug AP resources using the matrix

[PATCH 13/21] KVM: x86/mmu: Prepend "kvm_" to memory cache helpers that will be global

2020-06-05 Thread Sean Christopherson

Rename the memory helpers that will soon be moved to common code and be
made globaly available via linux/kvm_host.h.  "mmu" alone is not a
sufficient namespace for globally available KVM symbols.

Opportunistically add "nr_" in mmu_memory_cache_free_objects() to make
it clear the function returns the number of free objects, as opposed to
freeing existing objects.

Suggested-by: Christoffer Dall 
Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8d66cf558f1b..b85d3e8e8403 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1071,7 +1071,7 @@ static inline void *mmu_memory_cache_alloc_obj(struct 
kvm_mmu_memory_cache *mc,
return (void *)__get_free_page(gfp_flags);
 }
 
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
+static int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
 {
void *obj;
 
@@ -1086,12 +1086,12 @@ static int mmu_topup_memory_cache(struct 
kvm_mmu_memory_cache *mc, int min)
return 0;
 }
 
-static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *mc)
+static int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache 
*mc)
 {
return mc->nobjs;
 }
 
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
+static void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
 {
while (mc->nobjs) {
if (mc->kmem_cache)
@@ -1106,33 +1106,33 @@ static int mmu_topup_memory_caches(struct kvm_vcpu 
*vcpu, bool maybe_indirect)
int r;
 
/* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */
-   r = mmu_topup_memory_cache(>arch.mmu_pte_list_desc_cache,
-  1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM);
+   r = kvm_mmu_topup_memory_cache(>arch.mmu_pte_list_desc_cache,
+  1 + PT64_ROOT_MAX_LEVEL + 
PTE_PREFETCH_NUM);
if (r)
return r;
-   r = mmu_topup_memory_cache(>arch.mmu_shadow_page_cache,
-  PT64_ROOT_MAX_LEVEL);
+   r = kvm_mmu_topup_memory_cache(>arch.mmu_shadow_page_cache,
+  PT64_ROOT_MAX_LEVEL);
if (r)
return r;
if (maybe_indirect) {
-   r = mmu_topup_memory_cache(>arch.mmu_gfn_array_cache,
-  PT64_ROOT_MAX_LEVEL);
+   r = kvm_mmu_topup_memory_cache(>arch.mmu_gfn_array_cache,
+  PT64_ROOT_MAX_LEVEL);
if (r)
return r;
}
-   return mmu_topup_memory_cache(>arch.mmu_page_header_cache,
- PT64_ROOT_MAX_LEVEL);
+   return kvm_mmu_topup_memory_cache(>arch.mmu_page_header_cache,
+ PT64_ROOT_MAX_LEVEL);
 }
 
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
-   mmu_free_memory_cache(>arch.mmu_pte_list_desc_cache);
-   mmu_free_memory_cache(>arch.mmu_shadow_page_cache);
-   mmu_free_memory_cache(>arch.mmu_gfn_array_cache);
-   mmu_free_memory_cache(>arch.mmu_page_header_cache);
+   kvm_mmu_free_memory_cache(>arch.mmu_pte_list_desc_cache);
+   kvm_mmu_free_memory_cache(>arch.mmu_shadow_page_cache);
+   kvm_mmu_free_memory_cache(>arch.mmu_gfn_array_cache);
+   kvm_mmu_free_memory_cache(>arch.mmu_page_header_cache);
 }
 
-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
+static void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 {
void *p;
 
@@ -1146,7 +1146,7 @@ static void *mmu_memory_cache_alloc(struct 
kvm_mmu_memory_cache *mc)
 
 static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
 {
-   return mmu_memory_cache_alloc(>arch.mmu_pte_list_desc_cache);
+   return kvm_mmu_memory_cache_alloc(>arch.mmu_pte_list_desc_cache);
 }
 
 static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
@@ -1417,7 +1417,7 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu)
struct kvm_mmu_memory_cache *mc;
 
mc = >arch.mmu_pte_list_desc_cache;
-   return mmu_memory_cache_free_objects(mc);
+   return kvm_mmu_memory_cache_nr_free_objects(mc);
 }
 
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
@@ -2104,10 +2104,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
kvm_vcpu *vcpu, int direct
 {
struct kvm_mmu_page *sp;
 
-   sp = mmu_memory_cache_alloc(>arch.mmu_page_header_cache);
-   sp->spt = mmu_memory_cache_alloc(>arch.mmu_shadow_page_cache);
+   sp = kvm_mmu_memory_cache_alloc(>arch.mmu_page_header_cache);
+   sp->spt = kvm_mmu_memory_cache_alloc(>arch.mmu_shadow_page_cache);
if (!direct)
-   sp->gfns =

[PATCH v8 01/16] s390/ap: introduce new ap function ap_get_qdev()

2020-06-05 Thread Tony Krowiak

From: Harald Freudenberger 

Provide a new interface function to be used by the ap drivers:
  struct ap_queue *ap_get_qdev(ap_qid_t qid);
Returns ptr to the struct ap_queue device or NULL if there
was no ap_queue device with this qid found. When something is
found, the reference count of the embedded device is increased.
So the caller has to decrease the reference count after use
with a call to put_device(>ap_dev.device).

With this patch also the ap_card_list is removed from the
ap core code and a new hashtable is introduced which stores
hnodes of all the ap queues known to the ap bus.

The hashtable approach and a first implementation of this
interface comes from a previous patch from
Anthony Krowiak and an idea from Halil Pasic.

Signed-off-by: Harald Freudenberger 
Suggested-by: Tony Krowiak 
Suggested-by: Halil Pasic 
Reviewed-by: Tony Krowiak 
Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/ap_bus.c   | 94 +++---
 drivers/s390/crypto/ap_bus.h   | 25 +
 drivers/s390/crypto/ap_card.c  | 47 +
 drivers/s390/crypto/ap_queue.c | 10 ++--
 4 files changed, 95 insertions(+), 81 deletions(-)

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 35064443e748..e71ca4a719a5 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -62,8 +62,10 @@ MODULE_PARM_DESC(aqmask, "AP bus domain mask.");
 
 static struct device *ap_root_device;
 
-DEFINE_SPINLOCK(ap_list_lock);
-LIST_HEAD(ap_card_list);
+/* Hashtable of all queue devices on the AP bus */
+DEFINE_HASHTABLE(ap_queues, 8);
+/* lock used for the ap_queues hashtable */
+DEFINE_SPINLOCK(ap_queues_lock);
 
 /* Default permissions (ioctl, card and domain masking) */
 struct ap_perms ap_perms;
@@ -414,7 +416,7 @@ static void ap_interrupt_handler(struct airq_struct *airq, 
bool floating)
  */
 static void ap_tasklet_fn(unsigned long dummy)
 {
-   struct ap_card *ac;
+   int bkt;
struct ap_queue *aq;
enum ap_wait wait = AP_WAIT_NONE;
 
@@ -425,34 +427,30 @@ static void ap_tasklet_fn(unsigned long dummy)
if (ap_using_interrupts())
xchg(ap_airq.lsi_ptr, 0);
 
-   spin_lock_bh(_list_lock);
-   for_each_ap_card(ac) {
-   for_each_ap_queue(aq, ac) {
-   spin_lock_bh(>lock);
-   wait = min(wait, ap_sm_event_loop(aq, AP_EVENT_POLL));
-   spin_unlock_bh(>lock);
-   }
+   spin_lock_bh(_queues_lock);
+   hash_for_each(ap_queues, bkt, aq, hnode) {
+   spin_lock_bh(>lock);
+   wait = min(wait, ap_sm_event_loop(aq, AP_EVENT_POLL));
+   spin_unlock_bh(>lock);
}
-   spin_unlock_bh(_list_lock);
+   spin_unlock_bh(_queues_lock);
 
ap_wait(wait);
 }
 
 static int ap_pending_requests(void)
 {
-   struct ap_card *ac;
+   int bkt;
struct ap_queue *aq;
 
-   spin_lock_bh(_list_lock);
-   for_each_ap_card(ac) {
-   for_each_ap_queue(aq, ac) {
-   if (aq->queue_count == 0)
-   continue;
-   spin_unlock_bh(_list_lock);
-   return 1;
-   }
+   spin_lock_bh(_queues_lock);
+   hash_for_each(ap_queues, bkt, aq, hnode) {
+   if (aq->queue_count == 0)
+   continue;
+   spin_unlock_bh(_queues_lock);
+   return 1;
}
-   spin_unlock_bh(_list_lock);
+   spin_unlock_bh(_queues_lock);
return 0;
 }
 
@@ -683,24 +681,20 @@ static int ap_device_probe(struct device *dev)
}
 
/* Add queue/card to list of active queues/cards */
-   spin_lock_bh(_list_lock);
-   if (is_card_dev(dev))
-   list_add(_ap_card(dev)->list, _card_list);
-   else
-   list_add(_ap_queue(dev)->list,
-_ap_queue(dev)->card->queues);
-   spin_unlock_bh(_list_lock);
+   spin_lock_bh(_queues_lock);
+   if (is_queue_dev(dev))
+   hash_add(ap_queues, _ap_queue(dev)->hnode,
+to_ap_queue(dev)->qid);
+   spin_unlock_bh(_queues_lock);
 
ap_dev->drv = ap_drv;
rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
 
if (rc) {
-   spin_lock_bh(_list_lock);
-   if (is_card_dev(dev))
-   list_del_init(_ap_card(dev)->list);
-   else
-   list_del_init(_ap_queue(dev)->list);
-   spin_unlock_bh(_list_lock);
+   spin_lock_bh(_queues_lock);
+   if (is_queue_dev(dev))
+   hash_del(_ap_queue(dev)->hnode);
+   spin_unlock_bh(_queues_lock);
ap_dev->drv = NULL;
}
 
@@ -725,16 +719,33 @@ static int ap_device_remove(struct device *dev)
ap_queue_remove(to_ap_queue(dev));
 
/* Remove queue/card from

[PATCH 18/21] KVM: arm64: Use common KVM implementation of MMU memory caches

2020-06-05 Thread Sean Christopherson

Move to the common MMU memory cache implementation now that the common
code and arm64's existing code are semantically compatible.

No functional change intended.

Suggested-by: Christoffer Dall 
Signed-off-by: Sean Christopherson 
---
 arch/arm64/include/asm/kvm_host.h  | 12 ---
 arch/arm64/include/asm/kvm_types.h |  2 ++
 arch/arm64/kvm/mmu.c   | 51 ++
 3 files changed, 12 insertions(+), 53 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 2385dede96e0..d221b6b129fd 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -97,18 +97,6 @@ struct kvm_arch {
bool return_nisv_io_abort_to_user;
 };
 
-#define KVM_NR_MEM_OBJS 40
-
-/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
-   int nobjs;
-   gfp_t gfp_zero;
-   void *objects[KVM_NR_MEM_OBJS];
-};
-
 struct kvm_vcpu_fault_info {
u32 esr_el2;/* Hyp Syndrom Register */
u64 far_el2;/* Hyp Fault Address Register */
diff --git a/arch/arm64/include/asm/kvm_types.h 
b/arch/arm64/include/asm/kvm_types.h
index d0987007d581..9a126b9e2d7c 100644
--- a/arch/arm64/include/asm/kvm_types.h
+++ b/arch/arm64/include/asm/kvm_types.h
@@ -2,5 +2,7 @@
 #ifndef _ASM_ARM64_KVM_TYPES_H
 #define _ASM_ARM64_KVM_TYPES_H
 
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
 #endif /* _ASM_ARM64_KVM_TYPES_H */
 
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 688213ef34f0..976405e2fbb2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -124,37 +124,6 @@ static void stage2_dissolve_pud(struct kvm *kvm, 
phys_addr_t addr, pud_t *pudp)
put_page(virt_to_page(pudp));
 }
 
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min)
-{
-   void *page;
-
-   if (cache->nobjs >= min)
-   return 0;
-   while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT |
-  cache->gfp_zero);
-   if (!page)
-   return -ENOMEM;
-   cache->objects[cache->nobjs++] = page;
-   }
-   return 0;
-}
-
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
-{
-   while (mc->nobjs)
-   free_page((unsigned long)mc->objects[--mc->nobjs]);
-}
-
-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
-{
-   void *p;
-
-   BUG_ON(!mc || !mc->nobjs);
-   p = mc->objects[--mc->nobjs];
-   return p;
-}
-
 static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t 
addr)
 {
pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL);
@@ -1024,7 +993,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct 
kvm_mmu_memory_cache *cache
if (stage2_pgd_none(kvm, *pgd)) {
if (!cache)
return NULL;
-   pud = mmu_memory_cache_alloc(cache);
+   pud = kvm_mmu_memory_cache_alloc(cache);
stage2_pgd_populate(kvm, pgd, pud);
get_page(virt_to_page(pgd));
}
@@ -1045,7 +1014,7 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct 
kvm_mmu_memory_cache *cache
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return NULL;
-   pmd = mmu_memory_cache_alloc(cache);
+   pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
@@ -1251,7 +1220,7 @@ static int stage2_set_pte(struct kvm *kvm, struct 
kvm_mmu_memory_cache *cache,
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
-   pmd = mmu_memory_cache_alloc(cache);
+   pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
@@ -1276,7 +1245,7 @@ static int stage2_set_pte(struct kvm *kvm, struct 
kvm_mmu_memory_cache *cache,
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
-   pte = mmu_memory_cache_alloc(cache);
+   pte = kvm_mmu_memory_cache_alloc(cache);
kvm_pmd_populate(pmd, pte);
get_page(virt_to_page(pmd));
}
@@ -1343,7 +1312,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t 
guest_ipa,
phys_addr_t addr, end;
int ret = 0;
unsigned long pfn;
-   struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, };
+   struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
 
end = (guest_ipa + size +

[PATCH v8 03/16] s390/vfio-ap: manage link between queue struct and matrix mdev

2020-06-05 Thread Tony Krowiak

A vfio_ap_queue structure is created for each queue device probed. To
ensure that the matrix mdev to which a queue's APQN is assigned is linked
to the queue structure as long as the queue device is bound to the vfio_ap
device driver, let's go ahead and manage these links when the queue device
is probed and removed as well as whenever an adapter or domain is assigned
to or unassigned from the matrix mdev.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 93 +--
 1 file changed, 88 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 7c96b6fd9f70..21b98a392f36 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -160,7 +160,6 @@ struct ap_queue_status vfio_ap_irq_disable(struct 
vfio_ap_queue *q)
  status.response_code);
 end_free:
vfio_ap_free_aqic_resources(q);
-   q->matrix_mdev = NULL;
return status;
 }
 
@@ -262,7 +261,6 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
struct vfio_ap_queue *q;
struct ap_queue_status qstatus = {
   .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
-   struct ap_matrix_mdev *matrix_mdev;
 
/* If we do not use the AIV facility just go to userland */
if (!(vcpu->arch.sie_block->eca & ECA_AIV))
@@ -273,14 +271,11 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
 
if (!vcpu->kvm->arch.crypto.pqap_hook)
goto out_unlock;
-   matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
-  struct ap_matrix_mdev, pqap_hook);
 
q = vfio_ap_get_queue(apqn);
if (!q)
goto out_unlock;
 
-   q->matrix_mdev = matrix_mdev;
status = vcpu->run->s.regs.gprs[1];
 
/* If IR bit(16) is set we enable the interrupt */
@@ -548,6 +543,67 @@ static int vfio_ap_mdev_verify_no_sharing(struct 
ap_matrix_mdev *matrix_mdev)
return 0;
 }
 
+enum qlink_type {
+   LINK_APID,
+   LINK_APQI,
+   UNLINK_APID,
+   UNLINK_APQI,
+};
+
+/**
+ * vfio_ap_mdev_link_queues
+ *
+ * @matrix_mdev: The matrix mdev to link.
+ * @type:   The type of link.
+ * @qlink_id:   The APID or APQI of the queues to link.
+ *
+ * Sets the link from the queues with the specified @qlink_id (i.e., APID or
+ * APQI) to @matrix_mdev:
+ * qlink_id == LINK_APID: Link @matrix_mdev to the queues with the
+ * specified APID>
+ * qlink_id == UNLINK_APID: Unlink @matrix_mdev from the queues with the
+ * specified APID>
+ * qlink_id == LINK_APQI: Link @matrix_mdev to the queues with the
+ * specified APQI>
+ * qlink_id == UNLINK_APQI: Unlink @matrix_mdev from the queues with the
+ * specified APQI>
+ */
+static void vfio_ap_mdev_link_queues(struct ap_matrix_mdev *matrix_mdev,
+enum qlink_type type,
+unsigned long qlink_id)
+{
+   unsigned long id;
+   struct vfio_ap_queue *q;
+
+   switch (type) {
+   case LINK_APID:
+   case UNLINK_APID:
+   for_each_set_bit_inv(id, matrix_mdev->matrix.aqm,
+matrix_mdev->matrix.aqm_max + 1) {
+   q = vfio_ap_get_queue(AP_MKQID(qlink_id, id));
+   if (q) {
+   if (type == LINK_APID)
+   q->matrix_mdev = matrix_mdev;
+   else
+   q->matrix_mdev = NULL;
+   }
+   }
+   break;
+   default:
+   for_each_set_bit_inv(id, matrix_mdev->matrix.apm,
+matrix_mdev->matrix.apm_max + 1) {
+   q = vfio_ap_get_queue(AP_MKQID(id, qlink_id));
+   if (q) {
+   if (type == LINK_APQI)
+   q->matrix_mdev = matrix_mdev;
+   else
+   q->matrix_mdev = NULL;
+   }
+   }
+   break;
+   }
+}
+
 /**
  * assign_adapter_store
  *
@@ -617,6 +673,7 @@ static ssize_t assign_adapter_store(struct device *dev,
if (ret)
goto share_err;
 
+   vfio_ap_mdev_link_queues(matrix_mdev, LINK_APID, apid);
ret = count;
goto done;
 
@@ -668,6 +725,7 @@ static ssize_t unassign_adapter_store(struct device *dev,
 
mutex_lock(_dev->lock);
clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
+   vfio_ap_mdev_link_queues(matrix_mdev, UNLINK_APID, apid);
mutex_unlock(_dev->lock);
 
return count;
@@ -758,6 +816,7 @@ static ssize_t assign_domain_store(struct device *dev,
if (ret)
goto

[PATCH v8 06/16] s390/vfio-ap: introduce shadow APCB

2020-06-05 Thread Tony Krowiak

The APCB is a field within the CRYCB that provides the AP configuration
to a KVM guest. Let's introduce a shadow copy of the KVM guest's APCB and
maintain it for the lifespan of the guest.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 33 +++
 drivers/s390/crypto/vfio_ap_private.h |  2 ++
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 2eebb2b6d2d4..b5ed36e2c948 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -292,14 +292,35 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
return 0;
 }
 
+static void vfio_ap_matrix_clear_masks(struct ap_matrix *matrix)
+{
+   bitmap_clear(matrix->apm, 0, AP_DEVICES);
+   bitmap_clear(matrix->aqm, 0, AP_DOMAINS);
+   bitmap_clear(matrix->adm, 0, AP_DOMAINS);
+}
+
 static void vfio_ap_matrix_init(struct ap_config_info *info,
struct ap_matrix *matrix)
 {
+   vfio_ap_matrix_clear_masks(matrix);
matrix->apm_max = info->apxa ? info->Na : 63;
matrix->aqm_max = info->apxa ? info->Nd : 15;
matrix->adm_max = info->apxa ? info->Nd : 15;
 }
 
+static bool vfio_ap_mdev_has_crycb(struct ap_matrix_mdev *matrix_mdev)
+{
+   return (matrix_mdev->kvm && matrix_mdev->kvm->arch.crypto.crycbd);
+}
+
+static void vfio_ap_mdev_commit_crycb(struct ap_matrix_mdev *matrix_mdev)
+{
+   kvm_arch_crypto_set_masks(matrix_mdev->kvm,
+ matrix_mdev->shadow_apcb.apm,
+ matrix_mdev->shadow_apcb.aqm,
+ matrix_mdev->shadow_apcb.adm);
+}
+
 static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
 {
struct ap_matrix_mdev *matrix_mdev;
@@ -315,6 +336,7 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct 
mdev_device *mdev)
 
matrix_mdev->mdev = mdev;
vfio_ap_matrix_init(_dev->info, _mdev->matrix);
+   vfio_ap_matrix_init(_dev->info, _mdev->shadow_apcb);
mdev_set_drvdata(mdev, matrix_mdev);
matrix_mdev->pqap_hook.hook = handle_pqap;
matrix_mdev->pqap_hook.owner = THIS_MODULE;
@@ -1168,13 +1190,12 @@ static int vfio_ap_mdev_group_notifier(struct 
notifier_block *nb,
if (ret)
return NOTIFY_DONE;
 
-   /* If there is no CRYCB pointer, then we can't copy the masks */
-   if (!matrix_mdev->kvm->arch.crypto.crycbd)
+   if (!vfio_ap_mdev_has_crycb(matrix_mdev))
return NOTIFY_DONE;
 
-   kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
- matrix_mdev->matrix.aqm,
- matrix_mdev->matrix.adm);
+   memcpy(_mdev->shadow_apcb, _mdev->matrix,
+  sizeof(matrix_mdev->shadow_apcb));
+   vfio_ap_mdev_commit_crycb(matrix_mdev);
 
return NOTIFY_OK;
 }
@@ -1289,6 +1310,8 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
kvm_put_kvm(matrix_mdev->kvm);
matrix_mdev->kvm = NULL;
}
+
+   vfio_ap_matrix_clear_masks(_mdev->shadow_apcb);
mutex_unlock(_dev->lock);
 
vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
diff --git a/drivers/s390/crypto/vfio_ap_private.h 
b/drivers/s390/crypto/vfio_ap_private.h
index ad2d5b6a2851..8e24a073166b 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -75,6 +75,7 @@ struct ap_matrix {
  * @list:  allows the ap_matrix_mdev struct to be added to a list
  * @matrix:the adapters, usage domains and control domains assigned to the
  * mediated matrix device.
+ * @shadow_apcb:the shadow copy of the APCB field of the KVM guest's CRYCB
  * @group_notifier: notifier block used for specifying callback function for
  * handling the VFIO_GROUP_NOTIFY_SET_KVM event
  * @kvm:   the struct holding guest's state
@@ -82,6 +83,7 @@ struct ap_matrix {
 struct ap_matrix_mdev {
struct list_head node;
struct ap_matrix matrix;
+   struct ap_matrix shadow_apcb;
struct notifier_block group_notifier;
struct notifier_block iommu_notifier;
struct kvm *kvm;
-- 
2.21.1

Re: [PATCH v4 05/12] PCI: brcmstb: Add suspend and resume pm_ops

2020-06-05 Thread Florian Fainelli




On 6/5/2020 2:26 PM, Jim Quinlan wrote:
> From: Jim Quinlan 
> 
> Broadcom Set-top (BrcmSTB) boards typically support S2, S3, and S5 suspend
> and resume.  Now the PCIe driver may do so as well.
> 
> Signed-off-by: Jim Quinlan 

Acked-by: Florian Fainelli 
-- 
Florian

[PATCH v8 07/16] s390/vfio-ap: sysfs attribute to display the guest's matrix

2020-06-05 Thread Tony Krowiak

The matrix of adapters and domains configured in a guest's CRYCB may
differ from the matrix of adapters and domains assigned to the matrix mdev,
so this patch introduces a sysfs attribute to display the matrix of a guest
using the matrix mdev. For a matrix mdev denoted by $uuid, the crycb for a
guest using the matrix mdev can be displayed as follows:

   cat /sys/devices/vfio_ap/matrix/$uuid/guest_matrix

If a guest is not using the matrix mdev at the time the crycb is displayed,
an error (ENODEV) will be returned.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 58 +++
 1 file changed, 58 insertions(+)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index b5ed36e2c948..779659074776 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1086,6 +1086,63 @@ static ssize_t matrix_show(struct device *dev, struct 
device_attribute *attr,
 }
 static DEVICE_ATTR_RO(matrix);
 
+static ssize_t guest_matrix_show(struct device *dev,
+struct device_attribute *attr, char *buf)
+{
+   struct mdev_device *mdev = mdev_from_dev(dev);
+   struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+   char *bufpos = buf;
+   unsigned long apid;
+   unsigned long apqi;
+   unsigned long apid1;
+   unsigned long apqi1;
+   unsigned long napm_bits = matrix_mdev->shadow_apcb.apm_max + 1;
+   unsigned long naqm_bits = matrix_mdev->shadow_apcb.aqm_max + 1;
+   int nchars = 0;
+   int n;
+
+   if (!vfio_ap_mdev_has_crycb(matrix_mdev))
+   return -ENODEV;
+
+   apid1 = find_first_bit_inv(matrix_mdev->shadow_apcb.apm, napm_bits);
+   apqi1 = find_first_bit_inv(matrix_mdev->shadow_apcb.aqm, naqm_bits);
+
+   mutex_lock(_dev->lock);
+
+   if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
+   for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm,
+napm_bits) {
+   for_each_set_bit_inv(apqi,
+matrix_mdev->shadow_apcb.aqm,
+naqm_bits) {
+   n = sprintf(bufpos, "%02lx.%04lx\n", apid,
+   apqi);
+   bufpos += n;
+   nchars += n;
+   }
+   }
+   } else if (apid1 < napm_bits) {
+   for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm,
+napm_bits) {
+   n = sprintf(bufpos, "%02lx.\n", apid);
+   bufpos += n;
+   nchars += n;
+   }
+   } else if (apqi1 < naqm_bits) {
+   for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
+naqm_bits) {
+   n = sprintf(bufpos, ".%04lx\n", apqi);
+   bufpos += n;
+   nchars += n;
+   }
+   }
+
+   mutex_unlock(_dev->lock);
+
+   return nchars;
+}
+static DEVICE_ATTR_RO(guest_matrix);
+
 static struct attribute *vfio_ap_mdev_attrs[] = {
_attr_assign_adapter.attr,
_attr_unassign_adapter.attr,
@@ -1095,6 +1152,7 @@ static struct attribute *vfio_ap_mdev_attrs[] = {
_attr_unassign_control_domain.attr,
_attr_control_domains.attr,
_attr_matrix.attr,
+   _attr_guest_matrix.attr,
NULL,
 };
 
-- 
2.21.1

Re: [GIT PULL] first round of SCSI updates for the 5.6+ merge window

2020-06-05 Thread James Bottomley

On Fri, 2020-06-05 at 14:25 -0700, Linus Torvalds wrote:
> On Fri, Jun 5, 2020 at 2:18 PM James Bottomley
>  wrote:
> > 
> > Um, no, shuffles feet ... I actually tagged the wrong branch:
> 
> Ok, now I see the changes, but I see more than you reported.
> 
> These seem to be new compared to your pull request:
> 
> Al Viro (4):
>   scsi: hpsa: Lift {BIG_,}IOCTL_Command_struct copy{in,out} into
> hpsa_ioctl()
>   scsi: hpsa: Don't bother with vmalloc for
> BIG_IOCTL_Command_struct
>   scsi: hpsa: Get rid of compat_alloc_user_space()
>   scsi: hpsa: hpsa_ioctl(): Tidy up a bit
> 
> Can Guo (1):
>   scsi: ufs: Don't update urgent bkops level when toggling auto
> bkops
> 
> Stanley Chu (1):
>   scsi: ufs: Remove redundant urgent_bkop_lvl initialization
> 
> They don't look alarming, but I don't like how I don't see what you
> _claim_ I should see.
> 
> Hmm?

Ah right, my MO is to do the first push and then start gathering for
the second.  pushing the tag again picked up the new stuff I've been
gathering.  Let me rewind the tag back to where it was for the original
push and then try again.

Done.  You should now see no stray additional patches on the scsi-misc
tag.

Sorry again, I believe I've actually fully verified the diffstat
matches this time ... (famous last words ..)

James

[PATCH v8 11/16] s390/vfio-ap: allow configuration of matrix mdev in use by a KVM guest

2020-06-05 Thread Tony Krowiak

The current support for pass-through crypto adapters does not allow
configuration of a matrix mdev when it is in use by a KVM guest. Let's
allow AP resources - i.e., adapters, domains and control domains - to be
assigned to or unassigned from a matrix mdev while it is in use by a guest.
This is in preparation for the introduction of support for dynamic
configuration of the AP matrix for a running KVM guest.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 24 
 1 file changed, 24 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 68bdf80807c6..4f59f471b4d3 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -773,10 +773,6 @@ static ssize_t assign_adapter_store(struct device *dev,
struct mdev_device *mdev = mdev_from_dev(dev);
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
-   /* If the guest is running, disallow assignment of adapter */
-   if (matrix_mdev->kvm)
-   return -EBUSY;
-
ret = kstrtoul(buf, 0, );
if (ret)
return ret;
@@ -828,10 +824,6 @@ static ssize_t unassign_adapter_store(struct device *dev,
struct mdev_device *mdev = mdev_from_dev(dev);
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
-   /* If the guest is running, disallow un-assignment of adapter */
-   if (matrix_mdev->kvm)
-   return -EBUSY;
-
ret = kstrtoul(buf, 0, );
if (ret)
return ret;
@@ -891,10 +883,6 @@ static ssize_t assign_domain_store(struct device *dev,
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
 
-   /* If the guest is running, disallow assignment of domain */
-   if (matrix_mdev->kvm)
-   return -EBUSY;
-
ret = kstrtoul(buf, 0, );
if (ret)
return ret;
@@ -946,10 +934,6 @@ static ssize_t unassign_domain_store(struct device *dev,
struct mdev_device *mdev = mdev_from_dev(dev);
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
-   /* If the guest is running, disallow un-assignment of domain */
-   if (matrix_mdev->kvm)
-   return -EBUSY;
-
ret = kstrtoul(buf, 0, );
if (ret)
return ret;
@@ -991,10 +975,6 @@ static ssize_t assign_control_domain_store(struct device 
*dev,
struct mdev_device *mdev = mdev_from_dev(dev);
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
-   /* If the guest is running, disallow assignment of control domain */
-   if (matrix_mdev->kvm)
-   return -EBUSY;
-
ret = kstrtoul(buf, 0, );
if (ret)
return ret;
@@ -1036,10 +1016,6 @@ static ssize_t unassign_control_domain_store(struct 
device *dev,
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
unsigned long max_domid =  matrix_mdev->matrix.adm_max;
 
-   /* If the guest is running, disallow un-assignment of control domain */
-   if (matrix_mdev->kvm)
-   return -EBUSY;
-
ret = kstrtoul(buf, 0, );
if (ret)
return ret;
-- 
2.21.1

[PATCH v8 08/16] s390/vfio-ap: filter matrix for unavailable queue devices

2020-06-05 Thread Tony Krowiak

Even though APQNs for queues that are not in the host's AP configuration
may be assigned to a matrix mdev, we do not want to set bits in the guest's
APCB for APQNs that do not reference AP queue devices bound to the vfio_ap
device driver. Ideally, it would be great if such APQNs could be filtered
out before setting the bits in the guest's APCB; however, the architecture
precludes filtering individual APQNs. Consequently, either the APID or APQI
must be filtered.

This patch introduces code to filter the APIDs or APQIs assigned to the
matrix mdev's AP configuration before assigning them to the guest's AP
configuration (i.e., APCB). We'll start by filtering the APIDs:

   If an APQN assigned to the matrix mdev's AP configuration does not
   reference a queue device bound to the vfio_ap device driver, the APID
   will be filtered out (i.e., not assigned to the guest's APCB).

If every APID assigned to the matrix mdev is filtered out, then we'll try
filtering the APQI's:

   If an APQN assigned to the matrix mdev's AP configuration does not
   reference a queue device bound to the vfio_ap device driver, the APQI
   will be filtered out (i.e., not assigned to the guest's APCB).

In any case, if after filtering either the APIDs or APQIs there are any
APQNs that can be assigned to the guest's APCB, they will be assigned and
the CRYCB will be hot plugged into the guest.

Example
===

APQNs bound to vfio_ap device driver:
   04.0004
   04.0047
   04.0054

   05.0005
   05.0047
   05.0054

Assignments to matrix mdev:
   APIDs  APQIs  -> APQNs
   04 0004  04.0004
   05 0005  04.0005
  0047  04.0047
  0054  04.0054
05.0004
05.0005
05.0047
04.0054

Filter APIDs:
   APID 04 will be filtered because APQN 04.0005 is not bound.
   APID 05 will be filtered because APQN 05.0004 is not bound.
   APQNs remaining: None

Filter APQIs:
   APQI 04 will be filtered because APQN 05.0004 is not bound.
   APQI 05 will be filtered because APQN 04.0005 is not bound.
   APQNs remaining: 04.0047, 04.0054, 05.0047, 05.0054

APQNs 04.0047, 04.0054, 05.0047, 05.0054 will be assigned to the CRYCB and
hot plugged into the KVM guest.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 159 +-
 1 file changed, 155 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 779659074776..add442977b9a 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -313,7 +313,7 @@ static bool vfio_ap_mdev_has_crycb(struct ap_matrix_mdev 
*matrix_mdev)
return (matrix_mdev->kvm && matrix_mdev->kvm->arch.crypto.crycbd);
 }
 
-static void vfio_ap_mdev_commit_crycb(struct ap_matrix_mdev *matrix_mdev)
+static void vfio_ap_mdev_commit_shadow_apcb(struct ap_matrix_mdev *matrix_mdev)
 {
kvm_arch_crypto_set_masks(matrix_mdev->kvm,
  matrix_mdev->shadow_apcb.apm,
@@ -584,6 +584,157 @@ static int vfio_ap_mdev_verify_no_sharing(struct 
ap_matrix_mdev *matrix_mdev,
return 0;
 }
 
+/**
+ * vfio_ap_mdev_filter_matrix
+ *
+ * Filter APQNs assigned to the matrix mdev that do not reference an AP queue
+ * device bound to the vfio_ap device driver.
+ *
+ * @matrix_mdev:  the matrix mdev whose AP configuration is to be filtered
+ * @shadow_apcb:  the shadow of the KVM guest's APCB (contains AP configuration
+ *   for guest)
+ * @filter_apids: boolean value indicating whether the APQNs shall be filtered
+ *   by APID (true) or by APQI (false).
+ *
+ * Returns the number of APQNs remaining after filtering is complete.
+ */
+static int vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
+ struct ap_matrix *shadow_apcb,
+ bool filter_apids)
+{
+   unsigned long apid, apqi, apqn;
+
+   memcpy(shadow_apcb, _mdev->matrix, sizeof(*shadow_apcb));
+
+   for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
+   /*
+* If the APID is not assigned to the host AP configuration,
+* we can not assign it to the guest's AP configuration
+*/
+   if (!test_bit_inv(apid,
+ (unsigned long *)matrix_dev->info.apm)) {
+   clear_bit_inv(apid, shadow_apcb->apm);
+   continue;
+   }
+
+   for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
+AP_DOMAINS) {
+   /*
+* If the APQI is not assigned to the host AP
+* configuration, then it can not be assigned to the
+* guest's AP configuration
+*/
+   if (!test_bit_inv(apqi, (unsigned

[PATCH v8 15/16] s390/vfio-ap: handle AP bus scan completed notification

2020-06-05 Thread Tony Krowiak

Implements the driver callback invoked by the AP bus when the AP bus
scan has completed. Since this callback is invoked after binding the newly
added devices to their respective device drivers, the vfio_ap driver will
attempt to plug the adapters, domains and control domains into each guest
using a matrix mdev to which they are assigned. Keep in mind that an
adapter or domain can be plugged in only if each APQN with the APID of the
adapter or the APQI of the domain references a queue device bound to the
vfio_ap device driver. Consequently, not all newly added adapters and
domains will necessarily get hot plugged.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_drv.c |   1 +
 drivers/s390/crypto/vfio_ap_ops.c | 110 +-
 drivers/s390/crypto/vfio_ap_private.h |   2 +
 3 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_drv.c 
b/drivers/s390/crypto/vfio_ap_drv.c
index f0f83c1b8983..badc99ee863d 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -178,6 +178,7 @@ static int __init vfio_ap_init(void)
vfio_ap_drv.in_use = vfio_ap_mdev_resource_in_use;
vfio_ap_drv.ids = ap_queue_ids;
vfio_ap_drv.on_config_changed = vfio_ap_on_cfg_changed;
+   vfio_ap_drv.on_scan_complete = vfio_ap_on_scan_complete;
 
ret = ap_driver_register(_ap_drv, THIS_MODULE, VFIO_AP_DRV_NAME);
if (ret) {
diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index e3c4b2d73072..cfe93ff9cc8c 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -616,14 +616,13 @@ static bool vfio_ap_mdev_config_shadow_apcb(struct 
ap_matrix_mdev *matrix_mdev)
 * CRYCB after filtering, then try filtering the APQIs.
 */
if (napm == 0) {
-   naqm = vfio_ap_mdev_filter_matrix(matrix_mdev,
- _apcb, false);
-
/*
 * If there are no APQNs that can be assigned to the
 * matrix mdev after filtering the APQIs, then no APQNs
 * shall be assigned to the guest's CRYCB.
 */
+   naqm = vfio_ap_mdev_filter_matrix(matrix_mdev,
+ _apcb, false);
if (naqm == 0) {
bitmap_clear(shadow_apcb.apm, 0, AP_DEVICES);
bitmap_clear(shadow_apcb.aqm, 0, AP_DOMAINS);
@@ -1759,6 +1758,16 @@ bool vfio_ap_mdev_unassign_apids(struct ap_matrix_mdev 
*matrix_mdev,
for_each_set_bit_inv(apid, apm_unassign, AP_DEVICES) {
unassigned |= vfio_ap_mdev_unassign_guest_apid(matrix_mdev,
   apid);
+   /*
+* If the APID is not assigned to the matrix mdev's shadow
+* CRYCB, continue with the next APID.
+*/
+   if (!test_bit_inv(apid, matrix_mdev->shadow_apcb.apm))
+   continue;
+
+   /* Unassign the APID from the matrix mdev's shadow CRYCB */
+   clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+   unassigned = true;
}
 
return unassigned;
@@ -1792,6 +1801,17 @@ bool vfio_ap_mdev_unassign_apqis(struct ap_matrix_mdev 
*matrix_mdev,
for_each_set_bit_inv(apqi, aqm_unassign, AP_DOMAINS) {
unassigned |= vfio_ap_mdev_unassign_guest_apqi(matrix_mdev,
   apqi);
+
+   /*
+* If the APQI is not assigned to the matrix mdev's shadow
+* CRYCB, continue with the next APQI
+*/
+   if (!test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
+   continue;
+
+   /* Unassign the APQI from the matrix mdev's shadow CRYCB */
+   clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
+   unassigned = true;
}
 
return unassigned;
@@ -1853,3 +1873,87 @@ void vfio_ap_on_cfg_changed(struct ap_config_info 
*new_config_info,
}
mutex_unlock(_dev->lock);
 }
+
+bool vfio_ap_mdev_assign_apids(struct ap_matrix_mdev *matrix_mdev,
+  unsigned long *apm_assign)
+{
+   unsigned long apid;
+   bool assigned = false;
+
+   for_each_set_bit_inv(apid, apm_assign, AP_DEVICES)
+   if (test_bit_inv(apid, matrix_mdev->matrix.apm))
+   if (vfio_ap_mdev_assign_guest_apid(matrix_mdev, apid))
+   assigned = true;
+
+   return assigned;
+}
+
+bool vfio_ap_mdev_assign_apqis(struct ap_matrix_mdev *matrix_mdev,
+  unsigned long *aqm_assign)
+{
+

[PATCH v8 12/16] s390/vfio-ap: allow hot plug/unplug of AP resources using mdev device

2020-06-05 Thread Tony Krowiak

Let's hot plug/unplug adapters, domains and control domains assigned to or
unassigned from an AP matrix mdev device while it is in use by a guest per
the following:

* When the APID of an adapter is assigned to a matrix mdev in use by a KVM
  guest, the adapter will be hot plugged into the KVM guest as long as each
  APQN derived from the Cartesian product of the APID being assigned and
  the APQIs already assigned to the guest's CRYCB references a queue device
  bound to the vfio_ap device driver.

* When the APID of an adapter is unassigned from a matrix mdev in use by a
  KVM guest, the adapter will be hot unplugged from the KVM guest.

* When the APQI of a domain is assigned to a matrix mdev in use by a KVM
  guest, the domain will be hot plugged into the KVM guest as long as each
  APQN derived from the Cartesian product of the APQI being assigned and
  the APIDs already assigned to the guest's CRYCB references a queue device
  bound to the vfio_ap device driver.

* When the APQI of a domain is unassigned from a matrix mdev in use by a
  KVM guest, the domain will be hot unplugged from the KVM guest

* When the domain number of a control domain is assigned to a matrix mdev
  in use by a KVM guest, the control domain will be hot plugged into the
  KVM guest.

* When the domain number of a control domain is unassigned from a matrix
  mdev in use by a KVM guest, the control domain will be hot unplugged
  from the KVM guest.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 196 ++
 1 file changed, 196 insertions(+)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 4f59f471b4d3..3df050eae112 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -731,6 +731,56 @@ static void vfio_ap_mdev_link_queues(struct ap_matrix_mdev 
*matrix_mdev,
}
 }
 
+static bool vfio_ap_mdev_assign_apqis_4_apid(struct ap_matrix_mdev 
*matrix_mdev,
+unsigned long apid)
+{
+   DECLARE_BITMAP(aqm, AP_DOMAINS);
+   unsigned long apqi, apqn;
+
+   bitmap_copy(aqm, matrix_mdev->matrix.aqm, AP_DOMAINS);
+
+   for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
+   if (!test_bit_inv(apqi,
+ (unsigned long *) matrix_dev->info.aqm))
+   clear_bit_inv(apqi, aqm);
+
+   apqn = AP_MKQID(apid, apqi);
+   if (!vfio_ap_get_mdev_queue(matrix_mdev, apqn))
+   clear_bit_inv(apqi, aqm);
+   }
+
+   if (bitmap_empty(aqm, AP_DOMAINS))
+   return false;
+
+   set_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+   bitmap_copy(matrix_mdev->shadow_apcb.aqm, aqm, AP_DOMAINS);
+
+   return true;
+}
+
+static bool vfio_ap_mdev_assign_guest_apid(struct ap_matrix_mdev *matrix_mdev,
+  unsigned long apid)
+{
+   unsigned long apqi, apqn;
+
+   if (!vfio_ap_mdev_has_crycb(matrix_mdev) ||
+   !test_bit_inv(apid, (unsigned long *)matrix_dev->info.apm))
+   return false;
+
+   if (bitmap_empty(matrix_mdev->shadow_apcb.aqm, AP_DOMAINS))
+   return vfio_ap_mdev_assign_apqis_4_apid(matrix_mdev, apid);
+
+   for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS) {
+   apqn = AP_MKQID(apid, apqi);
+   if (!vfio_ap_get_mdev_queue(matrix_mdev, apqn))
+   return false;
+   }
+
+   set_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+
+   return true;
+}
+
 /**
  * assign_adapter_store
  *
@@ -792,12 +842,42 @@ static ssize_t assign_adapter_store(struct device *dev,
}
set_bit_inv(apid, matrix_mdev->matrix.apm);
vfio_ap_mdev_link_queues(matrix_mdev, LINK_APID, apid);
+   if (vfio_ap_mdev_assign_guest_apid(matrix_mdev, apid))
+   vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
mutex_unlock(_dev->lock);
 
return count;
 }
 static DEVICE_ATTR_WO(assign_adapter);
 
+static bool vfio_ap_mdev_unassign_guest_apid(struct ap_matrix_mdev 
*matrix_mdev,
+unsigned long apid)
+{
+   if (vfio_ap_mdev_has_crycb(matrix_mdev)) {
+   if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) {
+   clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+
+   /*
+* If there are no APIDs assigned to the guest, then
+* the guest will not have access to any queues, so
+* let's also go ahead and unassign the APQIs. Keeping
+* them around may yield unpredictable results during
+* a probe that is not related to a host AP
+* configuration change (i.e., an AP adapter is
+* configured online).
+

[PATCH v8 10/16] s390/vfio-ap: allow assignment of unavailable AP queues to mdev device

2020-06-05 Thread Tony Krowiak

The current implementation does not allow assignment of an AP adapter or
domain to an mdev device if the APQNs resulting from the assignment
do not reference AP queue devices that are bound to the vfio_ap device
driver. This patch allows assignment of AP resources to the matrix mdev as
long as the APQNs resulting from the assignment:
   1. Are not reserved by the AP BUS for use by the zcrypt device drivers.
   2. Are not assigned to another matrix mdev.

The rationale behind this is twofold:
   1. The AP architecture does not preclude assignment of APQNs to an AP
  configuration that are not available to the system.
   2. APQNs that do not reference a queue device bound to the vfio_ap
  device driver will not be assigned to the guest's CRYCB, so the
  guest will not get access to queues not bound to the vfio_ap driver.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 212 +-
 1 file changed, 35 insertions(+), 177 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 9a019b2b86f8..68bdf80807c6 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1,4 +1,3 @@
-// SPDX-License-Identifier: GPL-2.0+
 /*
  * Adjunct processor matrix VFIO device driver callbacks.
  *
@@ -421,122 +420,6 @@ static struct attribute_group *vfio_ap_mdev_type_groups[] 
= {
NULL,
 };
 
-struct vfio_ap_queue_reserved {
-   unsigned long *apid;
-   unsigned long *apqi;
-   bool reserved;
-};
-
-/**
- * vfio_ap_has_queue
- *
- * @dev: an AP queue device
- * @data: a struct vfio_ap_queue_reserved reference
- *
- * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
- * apid or apqi specified in @data:
- *
- * - If @data contains both an apid and apqi value, then @data will be flagged
- *   as reserved if the APID and APQI fields for the AP queue device matches
- *
- * - If @data contains only an apid value, @data will be flagged as
- *   reserved if the APID field in the AP queue device matches
- *
- * - If @data contains only an apqi value, @data will be flagged as
- *   reserved if the APQI field in the AP queue device matches
- *
- * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if
- * @data does not contain either an apid or apqi.
- */
-static int vfio_ap_has_queue(struct device *dev, void *data)
-{
-   struct vfio_ap_queue_reserved *qres = data;
-   struct ap_queue *ap_queue = to_ap_queue(dev);
-   ap_qid_t qid;
-   unsigned long id;
-
-   if (qres->apid && qres->apqi) {
-   qid = AP_MKQID(*qres->apid, *qres->apqi);
-   if (qid == ap_queue->qid)
-   qres->reserved = true;
-   } else if (qres->apid && !qres->apqi) {
-   id = AP_QID_CARD(ap_queue->qid);
-   if (id == *qres->apid)
-   qres->reserved = true;
-   } else if (!qres->apid && qres->apqi) {
-   id = AP_QID_QUEUE(ap_queue->qid);
-   if (id == *qres->apqi)
-   qres->reserved = true;
-   } else {
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
-/**
- * vfio_ap_verify_queue_reserved
- *
- * @matrix_dev: a mediated matrix device
- * @apid: an AP adapter ID
- * @apqi: an AP queue index
- *
- * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP 
device
- * driver according to the following rules:
- *
- * - If both @apid and @apqi are not NULL, then there must be an AP queue
- *   device bound to the vfio_ap driver with the APQN identified by @apid and
- *   @apqi
- *
- * - If only @apid is not NULL, then there must be an AP queue device bound
- *   to the vfio_ap driver with an APQN containing @apid
- *
- * - If only @apqi is not NULL, then there must be an AP queue device bound
- *   to the vfio_ap driver with an APQN containing @apqi
- *
- * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
- */
-static int vfio_ap_verify_queue_reserved(unsigned long *apid,
-unsigned long *apqi)
-{
-   int ret;
-   struct vfio_ap_queue_reserved qres;
-
-   qres.apid = apid;
-   qres.apqi = apqi;
-   qres.reserved = false;
-
-   ret = driver_for_each_device(_dev->vfio_ap_drv->driver, NULL,
-, vfio_ap_has_queue);
-   if (ret)
-   return ret;
-
-   if (qres.reserved)
-   return 0;
-
-   return -EADDRNOTAVAIL;
-}
-
-static int
-vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev 
*matrix_mdev,
-unsigned long apid)
-{
-   int ret;
-   unsigned long apqi;
-   unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
-
-   if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
-   return vfio_ap_verify_queue_reserved(, NULL);
-
-

[PATCH v8 14/16] s390/vfio-ap: handle host AP config change notification

2020-06-05 Thread Tony Krowiak

Implements the driver callback invoked by the AP bus when the host
AP configuration has changed. Since this callback is invoked prior to
unbinding a device from its device driver, the vfio_ap driver will
respond by unplugging the AP adapters, domains and control domains
removed from the host's AP configuration from the guests using them.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_drv.c |   5 +-
 drivers/s390/crypto/vfio_ap_ops.c | 148 +++---
 drivers/s390/crypto/vfio_ap_private.h |   7 +-
 3 files changed, 146 insertions(+), 14 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_drv.c 
b/drivers/s390/crypto/vfio_ap_drv.c
index 86fc83701e05..f0f83c1b8983 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -113,9 +113,11 @@ static int vfio_ap_matrix_dev_create(void)
 
/* Fill in config info via PQAP(QCI), if available */
if (test_facility(12)) {
-   ret = ap_qci(_dev->info);
+   ret = ap_qci(_dev->config_info);
if (ret)
goto matrix_alloc_err;
+   memcpy(_dev->config_info_prev, _dev->config_info,
+  sizeof(struct ap_config_info));
}
 
mutex_init(_dev->lock);
@@ -175,6 +177,7 @@ static int __init vfio_ap_init(void)
vfio_ap_drv.remove = vfio_ap_queue_dev_remove;
vfio_ap_drv.in_use = vfio_ap_mdev_resource_in_use;
vfio_ap_drv.ids = ap_queue_ids;
+   vfio_ap_drv.on_config_changed = vfio_ap_on_cfg_changed;
 
ret = ap_driver_register(_ap_drv, THIS_MODULE, VFIO_AP_DRV_NAME);
if (ret) {
diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 3df050eae112..e3c4b2d73072 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -347,8 +347,9 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct 
mdev_device *mdev)
}
 
matrix_mdev->mdev = mdev;
-   vfio_ap_matrix_init(_dev->info, _mdev->matrix);
-   vfio_ap_matrix_init(_dev->info, _mdev->shadow_apcb);
+   vfio_ap_matrix_init(_dev->config_info, _mdev->matrix);
+   vfio_ap_matrix_init(_dev->config_info,
+   _mdev->shadow_apcb);
hash_init(matrix_mdev->qtable);
mdev_set_drvdata(mdev, matrix_mdev);
matrix_mdev->pqap_hook.hook = handle_pqap;
@@ -527,8 +528,8 @@ static int vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev 
*matrix_mdev,
 * If the APID is not assigned to the host AP configuration,
 * we can not assign it to the guest's AP configuration
 */
-   if (!test_bit_inv(apid,
- (unsigned long *)matrix_dev->info.apm)) {
+   if (!test_bit_inv(apid, (unsigned long *)
+ matrix_dev->config_info.apm)) {
clear_bit_inv(apid, shadow_apcb->apm);
continue;
}
@@ -541,7 +542,7 @@ static int vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev 
*matrix_mdev,
 * guest's AP configuration
 */
if (!test_bit_inv(apqi, (unsigned long *)
- matrix_dev->info.aqm)) {
+ matrix_dev->config_info.aqm)) {
clear_bit_inv(apqi, shadow_apcb->aqm);
continue;
}
@@ -595,7 +596,7 @@ static bool vfio_ap_mdev_config_shadow_apcb(struct 
ap_matrix_mdev *matrix_mdev)
int napm, naqm;
struct ap_matrix shadow_apcb;
 
-   vfio_ap_matrix_init(_dev->info, _apcb);
+   vfio_ap_matrix_init(_dev->config_info, _apcb);
napm = bitmap_weight(matrix_mdev->matrix.apm, AP_DEVICES);
naqm = bitmap_weight(matrix_mdev->matrix.aqm, AP_DOMAINS);
/*
@@ -741,7 +742,7 @@ static bool vfio_ap_mdev_assign_apqis_4_apid(struct 
ap_matrix_mdev *matrix_mdev,
 
for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
if (!test_bit_inv(apqi,
- (unsigned long *) matrix_dev->info.aqm))
+ (unsigned long *)matrix_dev->config_info.aqm))
clear_bit_inv(apqi, aqm);
 
apqn = AP_MKQID(apid, apqi);
@@ -764,7 +765,7 @@ static bool vfio_ap_mdev_assign_guest_apid(struct 
ap_matrix_mdev *matrix_mdev,
unsigned long apqi, apqn;
 
if (!vfio_ap_mdev_has_crycb(matrix_mdev) ||
-   !test_bit_inv(apid, (unsigned long *)matrix_dev->info.apm))
+   !test_bit_inv(apid, (unsigned long *)matrix_dev->config_info.apm))
return false;
 
if (bitmap_empty(matrix_mdev->shadow_apcb.aqm, AP_DOMAINS))
@@ -931,8 +932,8 @@ static bool vfio_ap_mdev_assign_apids_4_apqi(struct 
ap_matrix_mdev *matrix_mdev,

[PATCH v8 09/16] s390/vfio_ap: add qlink from ap_matrix_mdev struct to vfio_ap_queue struct

2020-06-05 Thread Tony Krowiak

In order to make retrieval of a vfio_ap_queue struct more
efficient when we already have a pointer to the ap_matrix_mdev to which the
queue's APQN is assigned, let's go ahead and add a link from the
ap_matrix_mdev struct to the vfio_ap_queue struct.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_ops.c | 102 ++
 drivers/s390/crypto/vfio_ap_private.h |   2 +
 2 files changed, 72 insertions(+), 32 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index add442977b9a..9a019b2b86f8 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -50,6 +50,19 @@ static struct vfio_ap_queue *vfio_ap_get_queue(unsigned long 
apqn)
return q;
 }
 
+struct vfio_ap_queue *vfio_ap_get_mdev_queue(struct ap_matrix_mdev 
*matrix_mdev,
+unsigned long apqn)
+{
+   struct vfio_ap_queue *q;
+
+   hash_for_each_possible(matrix_mdev->qtable, q, mdev_qnode, apqn) {
+   if (q && (q->apqn == apqn))
+   return q;
+   }
+
+   return NULL;
+}
+
 /**
  * vfio_ap_wait_for_irqclear
  * @apqn: The AP Queue number
@@ -337,6 +350,7 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct 
mdev_device *mdev)
matrix_mdev->mdev = mdev;
vfio_ap_matrix_init(_dev->info, _mdev->matrix);
vfio_ap_matrix_init(_dev->info, _mdev->shadow_apcb);
+   hash_init(matrix_mdev->qtable);
mdev_set_drvdata(mdev, matrix_mdev);
matrix_mdev->pqap_hook.hook = handle_pqap;
matrix_mdev->pqap_hook.owner = THIS_MODULE;
@@ -639,7 +653,7 @@ static int vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev 
*matrix_mdev,
 * filter the APQI.
 */
apqn = AP_MKQID(apid, apqi);
-   if (!vfio_ap_get_queue(apqn)) {
+   if (!vfio_ap_get_mdev_queue(matrix_mdev, apqn)) {
if (filter_apids)
clear_bit_inv(apid, shadow_apcb->apm);
else
@@ -682,7 +696,6 @@ static bool vfio_ap_mdev_config_shadow_apcb(struct 
ap_matrix_mdev *matrix_mdev)
vfio_ap_matrix_init(_dev->info, _apcb);
napm = bitmap_weight(matrix_mdev->matrix.apm, AP_DEVICES);
naqm = bitmap_weight(matrix_mdev->matrix.aqm, AP_DOMAINS);
-
/*
 * If there are no APIDs or no APQIs assigned to the matrix mdev,
 * then no APQNs shall be assigned to the guest CRYCB.
@@ -694,6 +707,7 @@ static bool vfio_ap_mdev_config_shadow_apcb(struct 
ap_matrix_mdev *matrix_mdev)
 */
napm = vfio_ap_mdev_filter_matrix(matrix_mdev, _apcb,
  true);
+
/*
 * If there are no APQNs that can be assigned to the guest's
 * CRYCB after filtering, then try filtering the APQIs.
@@ -742,56 +756,75 @@ enum qlink_type {
UNLINK_APQI,
 };
 
+static void vfio_ap_mdev_link_queue(struct ap_matrix_mdev *matrix_mdev,
+   unsigned long apid, unsigned long apqi)
+{
+   struct vfio_ap_queue *q;
+
+   q = vfio_ap_get_queue(AP_MKQID(apid, apqi));
+   if (q) {
+   q->matrix_mdev = matrix_mdev;
+   hash_add(matrix_mdev->qtable,
+>mdev_qnode, q->apqn);
+   }
+}
+
+static void vfio_ap_mdev_unlink_queue(unsigned long apid, unsigned long apqi)
+{
+   struct vfio_ap_queue *q;
+
+   q = vfio_ap_get_queue(AP_MKQID(apid, apqi));
+   if (q) {
+   q->matrix_mdev = NULL;
+   hash_del(>mdev_qnode);
+   }
+}
+
 /**
  * vfio_ap_mdev_link_queues
  *
  * @matrix_mdev: The matrix mdev to link.
- * @type:   The type of link.
+ * @type:   The type of @qlink_id.
  * @qlink_id:   The APID or APQI of the queues to link.
  *
- * Sets the link from the queues with the specified @qlink_id (i.e., APID or
- * APQI) to @matrix_mdev:
- * qlink_id == LINK_APID: Link @matrix_mdev to the queues with the
- * specified APID>
- * qlink_id == UNLINK_APID: Unlink @matrix_mdev from the queues with the
- * specified APID>
- * qlink_id == LINK_APQI: Link @matrix_mdev to the queues with the
- * specified APQI>
- * qlink_id == UNLINK_APQI: Unlink @matrix_mdev from the queues with the
- * specified APQI>
+ * Sets or clears the links between the queues with the specified @qlink_id
+ * and the @matrix_mdev:
+ * @type == LINK_APID: Set the links between the @matrix_mdev and the
+ * queues with the specified @qlink_id (APID)
+ * @type == LINK_APQI: Set the links between the @matrix_mdev and the
+ * queues with the specified @qlink_id (APQI)
+ * @type == UNLINK_APID: Clear the links between the

Re: [PATCH 03/12] x86/xen: Introduce new function to map HYPERVISOR_shared_info on Resume

2020-06-05 Thread Boris Ostrovsky

On 6/4/20 7:03 PM, Anchal Agarwal wrote:
> On Sat, May 30, 2020 at 07:02:01PM -0400, Boris Ostrovsky wrote:
>> CAUTION: This email originated from outside of the organization. Do not 
>> click links or open attachments unless you can confirm the sender and know 
>> the content is safe.
>>
>>
>>
>> On 5/19/20 7:25 PM, Anchal Agarwal wrote:
>>> Introduce a small function which re-uses shared page's PA allocated
>>> during guest initialization time in reserve_shared_info() and not
>>> allocate new page during resume flow.
>>> It also  does the mapping of shared_info_page by calling
>>> xen_hvm_init_shared_info() to use the function.
>>>
>>> Signed-off-by: Anchal Agarwal 
>>> ---
>>>  arch/x86/xen/enlighten_hvm.c | 7 +++
>>>  arch/x86/xen/xen-ops.h   | 1 +
>>>  2 files changed, 8 insertions(+)
>>>
>>> diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
>>> index e138f7de52d2..75b1ec7a0fcd 100644
>>> --- a/arch/x86/xen/enlighten_hvm.c
>>> +++ b/arch/x86/xen/enlighten_hvm.c
>>> @@ -27,6 +27,13 @@
>>>
>>>  static unsigned long shared_info_pfn;
>>>
>>> +void xen_hvm_map_shared_info(void)
>>> +{
>>> + xen_hvm_init_shared_info();
>>> + if (shared_info_pfn)
>>> + HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
>>> +}
>>> +
>>
>> AFAICT it is only called once so I don't see a need for new routine.
>>
>>
> HYPERVISOR_shared_info can only be mapped in this scope without refactoring
> much of the code.


Refactoring what? All am suggesting is

--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -124,7 +124,9 @@ static void xen_syscore_resume(void)
    return;
 
    /* No need to setup vcpu_info as it's already moved off */
-   xen_hvm_map_shared_info();
+   xen_hvm_init_shared_info();
+   if (shared_info_pfn)
+   HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
 
    pvclock_resume();

>> And is it possible for shared_info_pfn to be NULL in resume path (which
>> is where this is called)?
>>
>>
> I don't think it should be, still a sanity check but I don't think its needed 
> there
> because hibernation will fail in any case if thats the case. 


If shared_info_pfn is NULL you'd have problems long before hibernation
started. We set it in xen_hvm_guest_init() and never touch again.


In fact, I'd argue that it should be __ro_after_init.


> However, HYPERVISOR_shared_info does needs to be re-mapped on resume as its 
> been
> marked to dummy address on suspend. Its also safe in case va changes.
> Does the answer your question?


I wasn't arguing whether HYPERVISOR_shared_info needs to be set, I was
only saying that shared_info_pfn doesn't need to be tested.


-boris

[PATCH v8 02/16] s390/vfio-ap: use new AP bus interface to search for queue devices

2020-06-05 Thread Tony Krowiak

This patch refactor's the vfio_ap device driver to use the AP bus's
ap_get_qdev() function to retrieve the vfio_ap_queue struct containing
information about a queue that is bound to the vfio_ap device driver.
The bus's ap_get_qdev() function retrieves the queue device from a
hashtable keyed by APQN. This is much more efficient than looping over
the list of devices attached to the AP bus by several orders of
magnitude.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_drv.c | 27 ++---
 drivers/s390/crypto/vfio_ap_ops.c | 82 +++
 drivers/s390/crypto/vfio_ap_private.h |  8 ++-
 3 files changed, 58 insertions(+), 59 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_drv.c 
b/drivers/s390/crypto/vfio_ap_drv.c
index be2520cc010b..59233cf7419d 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -51,15 +51,9 @@ MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
  */
 static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
 {
-   struct vfio_ap_queue *q;
-
-   q = kzalloc(sizeof(*q), GFP_KERNEL);
-   if (!q)
-   return -ENOMEM;
-   dev_set_drvdata(>device, q);
-   q->apqn = to_ap_queue(>device)->qid;
-   q->saved_isc = VFIO_AP_ISC_INVALID;
-   return 0;
+   struct ap_queue *queue = to_ap_queue(>device);
+
+   return vfio_ap_mdev_probe_queue(queue);
 }
 
 /**
@@ -70,18 +64,9 @@ static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
  */
 static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
 {
-   struct vfio_ap_queue *q;
-   int apid, apqi;
-
-   mutex_lock(_dev->lock);
-   q = dev_get_drvdata(>device);
-   dev_set_drvdata(>device, NULL);
-   apid = AP_QID_CARD(q->apqn);
-   apqi = AP_QID_QUEUE(q->apqn);
-   vfio_ap_mdev_reset_queue(apid, apqi, 1);
-   vfio_ap_irq_disable(q);
-   kfree(q);
-   mutex_unlock(_dev->lock);
+   struct ap_queue *queue = to_ap_queue(>device);
+
+   vfio_ap_mdev_remove_queue(queue);
 }
 
 static void vfio_ap_matrix_dev_release(struct device *dev)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index e0bde8518745..7c96b6fd9f70 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -26,43 +26,26 @@
 
 static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
 
-static int match_apqn(struct device *dev, const void *data)
-{
-   struct vfio_ap_queue *q = dev_get_drvdata(dev);
-
-   return (q->apqn == *(int *)(data)) ? 1 : 0;
-}
-
 /**
- * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
- * @matrix_mdev: the associated mediated matrix
+ * vfio_ap_get_queue: Retrieve a queue with a specific APQN.
  * @apqn: The queue APQN
  *
- * Retrieve a queue with a specific APQN from the list of the
- * devices of the vfio_ap_drv.
- * Verify that the APID and the APQI are set in the matrix.
+ * Retrieve a queue with a specific APQN from the AP queue devices attached to
+ * the AP bus.
  *
- * Returns the pointer to the associated vfio_ap_queue
+ * Returns the pointer to the vfio_ap_queue with the specified APQN, or NULL.
  */
-static struct vfio_ap_queue *vfio_ap_get_queue(
-   struct ap_matrix_mdev *matrix_mdev,
-   int apqn)
+static struct vfio_ap_queue *vfio_ap_get_queue(unsigned long apqn)
 {
+   struct ap_queue *queue;
struct vfio_ap_queue *q;
-   struct device *dev;
 
-   if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
-   return NULL;
-   if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
+   queue = ap_get_qdev(apqn);
+   if (!queue)
return NULL;
 
-   dev = driver_find_device(_dev->vfio_ap_drv->driver, NULL,
-, match_apqn);
-   if (!dev)
-   return NULL;
-   q = dev_get_drvdata(dev);
-   q->matrix_mdev = matrix_mdev;
-   put_device(dev);
+   q = dev_get_drvdata(>ap_dev.device);
+   put_device(>ap_dev.device);
 
return q;
 }
@@ -293,10 +276,11 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
   struct ap_matrix_mdev, pqap_hook);
 
-   q = vfio_ap_get_queue(matrix_mdev, apqn);
+   q = vfio_ap_get_queue(apqn);
if (!q)
goto out_unlock;
 
+   q->matrix_mdev = matrix_mdev;
status = vcpu->run->s.regs.gprs[1];
 
/* If IR bit(16) is set we enable the interrupt */
@@ -1116,16 +1100,11 @@ static int vfio_ap_mdev_group_notifier(struct 
notifier_block *nb,
 
 static void vfio_ap_irq_disable_apqn(int apqn)
 {
-   struct device *dev;
struct vfio_ap_queue *q;
 
-   dev = driver_find_device(_dev->vfio_ap_drv->driver, NULL,
-, match_apqn);
-   if (dev) {
-   q =

[PATCH v8 13/16] s390/zcrypt: Notify driver on config changed and scan complete callbacks

2020-06-05 Thread Tony Krowiak

From: Harald Freudenberger 

This patch intruduces an extension to the ap bus to notify drivers
on crypto config changed and bus scan complete events.
Two new callbacks are introduced for ap_drivers:

  void (*on_config_changed)(struct ap_config_info *new_config_info,
struct ap_config_info *old_config_info);
  void (*on_scan_complete)(struct ap_config_info *new_config_info,
struct ap_config_info *old_config_info);

Both callbacks are optional. Both callbacks are only triggered
when QCI information is available (facility bit 12):

* The on_config_changed callback is invoked at the start of the AP bus scan
  function when it determines that the host AP configuration information
  has changed since the previous scan. This is done by storing
  an old and current QCI info struct and comparing them. If there is any
  difference, the callback is invoked.

  Note that when the AP bus scan detects that AP adapters or domains have
  been removed from the host's AP configuration, it will remove the
  associated devices from the AP bus subsystem's device model. This
  callback gives the device driver a chance to respond to the removal
  of the AP devices in bulk rather than one at a time as its remove
  callback is invoked. It will also allow the device driver to do any
  any cleanup prior to giving control back to the bus piecemeal. This is
  particularly important for the vfio_ap driver because there may be
  guests using the queues at the time.

* The on_scan_complete callback is invoked after the ap bus scan is
  complete if the host AP configuration data has changed.

  Note that when the AP bus scan detects that adapters or domains have
  been added to the host's configuration, it will create new devices in
  the AP bus subsystem's device model. This callback also allows the driver
  to process all of the new devices in bulk.

Please note that changes to the apmask and aqmask do not trigger
these two callbacks since the bus scan function is not invoked by changes
to those masks.

Signed-off-by: Harald Freudenberger 
Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/ap_bus.c | 175 ++-
 drivers/s390/crypto/ap_bus.h |  12 +++
 2 files changed, 142 insertions(+), 45 deletions(-)

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 40cb5861dad3..0b4a67051138 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -73,8 +73,12 @@ struct ap_perms ap_perms;
 EXPORT_SYMBOL(ap_perms);
 DEFINE_MUTEX(ap_perms_mutex);
 EXPORT_SYMBOL(ap_perms_mutex);
+DEFINE_MUTEX(ap_config_lock);
+
+/* current and old qci info structs */
+static struct ap_config_info *ap_config_info;
+static struct ap_config_info *ap_old_config_info;
 
-static struct ap_config_info *ap_configuration;
 static bool initialised;
 
 /*
@@ -183,8 +187,8 @@ static int ap_apft_available(void)
  */
 static inline int ap_qact_available(void)
 {
-   if (ap_configuration)
-   return ap_configuration->qact;
+   if (ap_config_info)
+   return ap_config_info->qact;
return 0;
 }
 
@@ -213,13 +217,15 @@ static void ap_init_configuration(void)
if (!ap_configuration_available())
return;
 
-   ap_configuration = kzalloc(sizeof(*ap_configuration), GFP_KERNEL);
-   if (!ap_configuration)
-   return;
-   if (ap_query_configuration(ap_configuration) != 0) {
-   kfree(ap_configuration);
-   ap_configuration = NULL;
+   /* allocate current qci info struct */
+   ap_config_info = kzalloc(sizeof(*ap_config_info), GFP_KERNEL);
+   if (!ap_config_info)
return;
+
+   /* fetch qci info into the current qci info struct */
+   if (ap_query_configuration(ap_config_info)) {
+   kfree(ap_config_info);
+   ap_config_info = NULL;
}
 }
 
@@ -242,10 +248,10 @@ static inline int ap_test_config(unsigned int *field, 
unsigned int nr)
  */
 static inline int ap_test_config_card_id(unsigned int id)
 {
-   if (!ap_configuration)  /* QCI not supported */
-   /* only ids 0...3F may be probed */
+   if (!ap_config_info)
+   /* QCI not available, only ids 0...3F may be probed */
return id < 0x40 ? 1 : 0;
-   return ap_test_config(ap_configuration->apm, id);
+   return ap_test_config(ap_config_info->apm, id);
 }
 
 /*
@@ -259,9 +265,9 @@ static inline int ap_test_config_card_id(unsigned int id)
  */
 int ap_test_config_usage_domain(unsigned int domain)
 {
-   if (!ap_configuration)  /* QCI not supported */
+   if (!ap_config_info)  /* QCI not supported */
return domain < 16;
-   return ap_test_config(ap_configuration->aqm, domain);
+   return ap_test_config(ap_config_info->aqm, domain);
 }
 EXPORT_SYMBOL(ap_test_config_usage_domain);
 
@@ -275,9 +281,9 @@ EXPORT_SYMBOL(ap_test_config_usage_domain);
  */

[PATCH v8 05/16] s390/vfio-ap: implement in-use callback for vfio_ap driver

2020-06-05 Thread Tony Krowiak

Let's implement the callback to indicate when an APQN
is in use by the vfio_ap device driver. The callback is
invoked whenever a change to the apmask or aqmask would
result in one or more queue devices being removed from the driver. The
vfio_ap device driver will indicate a resource is in use
if the APQN of any of the queue devices to be removed are assigned to
any of the matrix mdevs under the driver's control.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/vfio_ap_drv.c |  1 +
 drivers/s390/crypto/vfio_ap_ops.c | 68 ---
 drivers/s390/crypto/vfio_ap_private.h |  2 +
 3 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_drv.c 
b/drivers/s390/crypto/vfio_ap_drv.c
index 59233cf7419d..86fc83701e05 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -173,6 +173,7 @@ static int __init vfio_ap_init(void)
memset(_ap_drv, 0, sizeof(vfio_ap_drv));
vfio_ap_drv.probe = vfio_ap_queue_dev_probe;
vfio_ap_drv.remove = vfio_ap_queue_dev_remove;
+   vfio_ap_drv.in_use = vfio_ap_mdev_resource_in_use;
vfio_ap_drv.ids = ap_queue_ids;
 
ret = ap_driver_register(_ap_drv, THIS_MODULE, VFIO_AP_DRV_NAME);
diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 21b98a392f36..2eebb2b6d2d4 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -501,18 +501,36 @@ vfio_ap_mdev_verify_queues_reserved_for_apid(struct 
ap_matrix_mdev *matrix_mdev,
return 0;
 }
 
+#define MDEV_SHARING_ERR "Userspace may not re-assign queue %02lx.%04lx " \
+"already assigned to %s"
+
+static void vfio_ap_mdev_log_sharing_err(const char *mdev_name,
+unsigned long *apm,
+unsigned long *aqm)
+{
+   unsigned long apid, apqi;
+
+   for_each_set_bit_inv(apid, apm, AP_DEVICES)
+   for_each_set_bit_inv(apqi, aqm, AP_DOMAINS)
+   pr_err(MDEV_SHARING_ERR, apid, apqi, mdev_name);
+}
+
 /**
  * vfio_ap_mdev_verify_no_sharing
  *
  * Verifies that the APQNs derived from the cross product of the AP adapter IDs
- * and AP queue indexes comprising the AP matrix are not configured for another
+ * and AP queue indexes comprising an AP matrix are not assigned to another
  * mediated device. AP queue sharing is not allowed.
  *
  * @matrix_mdev: the mediated matrix device
+ * @mdev_apm: mask indicating the APIDs of the APQNs to be verified
+ * @mdev_aqm: mask indicating the APQIs of the APQNs to be verified
  *
  * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE.
  */
-static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
+static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *mdev_apm,
+ unsigned long *mdev_aqm)
 {
struct ap_matrix_mdev *lstdev;
DECLARE_BITMAP(apm, AP_DEVICES);
@@ -529,14 +547,15 @@ static int vfio_ap_mdev_verify_no_sharing(struct 
ap_matrix_mdev *matrix_mdev)
 * We work on full longs, as we can only exclude the leftover
 * bits in non-inverse order. The leftover is all zeros.
 */
-   if (!bitmap_and(apm, matrix_mdev->matrix.apm,
-   lstdev->matrix.apm, AP_DEVICES))
+   if (!bitmap_and(apm, mdev_apm, lstdev->matrix.apm, AP_DEVICES))
continue;
 
-   if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
-   lstdev->matrix.aqm, AP_DOMAINS))
+   if (!bitmap_and(aqm, mdev_aqm, lstdev->matrix.aqm, AP_DOMAINS))
continue;
 
+   vfio_ap_mdev_log_sharing_err(dev_name(mdev_dev(lstdev->mdev)),
+apm, aqm);
+
return -EADDRINUSE;
}
 
@@ -642,6 +661,7 @@ static ssize_t assign_adapter_store(struct device *dev,
 {
int ret;
unsigned long apid;
+   DECLARE_BITMAP(apm, AP_DEVICES);
struct mdev_device *mdev = mdev_from_dev(dev);
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
@@ -667,18 +687,18 @@ static ssize_t assign_adapter_store(struct device *dev,
if (ret)
goto done;
 
-   set_bit_inv(apid, matrix_mdev->matrix.apm);
+   memset(apm, 0, sizeof(apm));
+   set_bit_inv(apid, apm);
 
-   ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
+   ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev, apm,
+matrix_mdev->matrix.aqm);
if (ret)
-   goto share_err;
+   goto done;
 
+   set_bit_inv(apid, matrix_mdev->matrix.apm);
vfio_ap_mdev_link_queues(matrix_mdev, LINK_APID, apid);
ret

[PATCH v8 04/16] s390/zcrypt: driver callback to indicate resource in use

2020-06-05 Thread Tony Krowiak

Introduces a new driver callback to prevent a root user from unbinding
an AP queue from its device driver if the queue is in use. The intent of
this callback is to provide a driver with the means to prevent a root user
from inadvertently taking a queue away from a matrix mdev and giving it to
the host while it is assigned to the matrix mdev. The callback will
be invoked whenever a change to the AP bus's sysfs apmask or aqmask
attributes would result in one or more AP queues being removed from its
driver. If the callback responds in the affirmative for any driver
queried, the change to the apmask or aqmask will be rejected with a device
in use error.

For this patch, only non-default drivers will be queried. Currently,
there is only one non-default driver, the vfio_ap device driver. The
vfio_ap device driver facilitates pass-through of an AP queue to a
guest. The idea here is that a guest may be administered by a different
sysadmin than the host and we don't want AP resources to unexpectedly
disappear from a guest's AP configuration (i.e., adapters, domains and
control domains assigned to the matrix mdev). This will enforce the proper
procedure for removing AP resources intended for guest usage which is to
first unassign them from the matrix mdev, then unbind them from the
vfio_ap device driver.

Signed-off-by: Tony Krowiak 
---
 drivers/s390/crypto/ap_bus.c | 148 ---
 drivers/s390/crypto/ap_bus.h |   4 +
 2 files changed, 142 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index e71ca4a719a5..40cb5861dad3 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ap_bus.h"
 #include "ap_debug.h"
@@ -876,6 +877,23 @@ static int modify_bitmap(const char *str, unsigned long 
*bitmap, int bits)
return 0;
 }
 
+static int ap_parse_bitmap_str(const char *str, unsigned long *bitmap, int 
bits,
+  unsigned long *newmap)
+{
+   unsigned long size;
+   int rc;
+
+   size = BITS_TO_LONGS(bits)*sizeof(unsigned long);
+   if (*str == '+' || *str == '-') {
+   memcpy(newmap, bitmap, size);
+   rc = modify_bitmap(str, newmap, bits);
+   } else {
+   memset(newmap, 0, size);
+   rc = hex2bitmap(str, newmap, bits);
+   }
+   return rc;
+}
+
 int ap_parse_mask_str(const char *str,
  unsigned long *bitmap, int bits,
  struct mutex *lock)
@@ -895,14 +913,7 @@ int ap_parse_mask_str(const char *str,
kfree(newmap);
return -ERESTARTSYS;
}
-
-   if (*str == '+' || *str == '-') {
-   memcpy(newmap, bitmap, size);
-   rc = modify_bitmap(str, newmap, bits);
-   } else {
-   memset(newmap, 0, size);
-   rc = hex2bitmap(str, newmap, bits);
-   }
+   rc = ap_parse_bitmap_str(str, bitmap, bits, newmap);
if (rc == 0)
memcpy(bitmap, newmap, size);
mutex_unlock(lock);
@@ -1092,12 +1103,70 @@ static ssize_t apmask_show(struct bus_type *bus, char 
*buf)
return rc;
 }
 
+int __verify_card_reservations(struct device_driver *drv, void *data)
+{
+   int rc = 0;
+   struct ap_driver *ap_drv = to_ap_drv(drv);
+   unsigned long *newapm = (unsigned long *)data;
+
+   /*
+* No need to verify whether the driver is using the queues if it is the
+* default driver.
+*/
+   if (ap_drv->flags & AP_DRIVER_FLAG_DEFAULT)
+   return 0;
+
+   /* The non-default driver's module must be loaded */
+   if (!try_module_get(drv->owner))
+   return 0;
+
+   if (ap_drv->in_use)
+   if (ap_drv->in_use(newapm, ap_perms.aqm))
+   rc = -EADDRINUSE;
+
+   module_put(drv->owner);
+
+   return rc;
+}
+
+static int apmask_commit(unsigned long *newapm)
+{
+   int rc;
+   unsigned long reserved[BITS_TO_LONGS(AP_DEVICES)];
+
+   /*
+* Check if any bits in the apmask have been set which will
+* result in queues being removed from non-default drivers
+*/
+   if (bitmap_andnot(reserved, newapm, ap_perms.apm, AP_DEVICES)) {
+   rc = bus_for_each_drv(_bus_type, NULL, reserved,
+ __verify_card_reservations);
+   if (rc)
+   return rc;
+   }
+
+   memcpy(ap_perms.apm, newapm, APMASKSIZE);
+
+   return 0;
+}
+
 static ssize_t apmask_store(struct bus_type *bus, const char *buf,
size_t count)
 {
int rc;
+   DECLARE_BITMAP(newapm, AP_DEVICES);
+
+   if (mutex_lock_interruptible(_perms_mutex))
+   return -ERESTARTSYS;
+
+   rc = ap_parse_bitmap_str(buf, ap_perms.apm, AP_DEVICES, newapm);
+   if (rc)
+

[PATCH 14/21] KVM: Move x86's version of struct kvm_mmu_memory_cache to common code

2020-06-05 Thread Sean Christopherson

Move x86's 'struct kvm_mmu_memory_cache' to common code in anticipation
of moving the entire x86 implementation code to common KVM and reusing
it for arm64 and MIPS.  Add a new architecture specific asm/kvm_types.h
to control the existence and parameters of the struct.  The new header
is needed to avoid a chicken-and-egg problem with asm/kvm_host.h as all
architectures define instances of the struct in their vCPU structs.

Suggested-by: Christoffer Dall 
Signed-off-by: Sean Christopherson 
---
 arch/arm64/include/asm/kvm_types.h   |  6 ++
 arch/mips/include/asm/kvm_types.h|  5 +
 arch/powerpc/include/asm/kvm_types.h |  5 +
 arch/s390/include/asm/kvm_types.h|  5 +
 arch/x86/include/asm/kvm_host.h  | 13 -
 arch/x86/include/asm/kvm_types.h |  7 +++
 include/linux/kvm_types.h| 19 +++
 7 files changed, 47 insertions(+), 13 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_types.h
 create mode 100644 arch/mips/include/asm/kvm_types.h
 create mode 100644 arch/powerpc/include/asm/kvm_types.h
 create mode 100644 arch/s390/include/asm/kvm_types.h
 create mode 100644 arch/x86/include/asm/kvm_types.h

diff --git a/arch/arm64/include/asm/kvm_types.h 
b/arch/arm64/include/asm/kvm_types.h
new file mode 100644
index ..d0987007d581
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_types.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_KVM_TYPES_H
+#define _ASM_ARM64_KVM_TYPES_H
+
+#endif /* _ASM_ARM64_KVM_TYPES_H */
+
diff --git a/arch/mips/include/asm/kvm_types.h 
b/arch/mips/include/asm/kvm_types.h
new file mode 100644
index ..5efeb32a5926
--- /dev/null
+++ b/arch/mips/include/asm/kvm_types.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_MIPS_KVM_TYPES_H
+#define _ASM_MIPS_KVM_TYPES_H
+
+#endif /* _ASM_MIPS_KVM_TYPES_H */
diff --git a/arch/powerpc/include/asm/kvm_types.h 
b/arch/powerpc/include/asm/kvm_types.h
new file mode 100644
index ..f627eceaa314
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_types.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KVM_TYPES_H
+#define _ASM_POWERPC_KVM_TYPES_H
+
+#endif /* _ASM_POWERPC_KVM_TYPES_H */
diff --git a/arch/s390/include/asm/kvm_types.h 
b/arch/s390/include/asm/kvm_types.h
new file mode 100644
index ..b66a81f8a354
--- /dev/null
+++ b/arch/s390/include/asm/kvm_types.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_KVM_TYPES_H
+#define _ASM_S390_KVM_TYPES_H
+
+#endif /* _ASM_S390_KVM_TYPES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fb99e6776e27..8e8fea13b6c7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -193,8 +193,6 @@ struct x86_exception;
 enum x86_intercept;
 enum x86_intercept_stage;
 
-#define KVM_NR_MEM_OBJS 40
-
 #define KVM_NR_DB_REGS 4
 
 #define DR6_BD (1 << 13)
@@ -245,17 +243,6 @@ enum x86_intercept_stage;
 
 struct kvm_kernel_irq_routing_entry;
 
-/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
-   int nobjs;
-   gfp_t gfp_zero;
-   struct kmem_cache *kmem_cache;
-   void *objects[KVM_NR_MEM_OBJS];
-};
-
 /*
  * the pages used as guest page table on soft mmu are tracked by
  * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h
new file mode 100644
index ..08f1b57d3b62
--- /dev/null
+++ b/arch/x86/include/asm/kvm_types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVM_TYPES_H
+#define _ASM_X86_KVM_TYPES_H
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_X86_KVM_TYPES_H */
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 68e84cf42a3f..a7580f69dda0 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -20,6 +20,8 @@ enum kvm_mr_change;
 
 #include 
 
+#include 
+
 /*
  * Address types:
  *
@@ -58,4 +60,21 @@ struct gfn_to_pfn_cache {
bool dirty;
 };
 
+#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
+/*
+ * Memory caches are used to preallocate memory ahead of various MMU flows,
+ * e.g. page fault handlers.  Gracefully handling allocation failures deep in
+ * MMU flows is problematic, as is triggering reclaim, I/O, etc... while
+ * holding MMU locks.  Note, these caches act more like prefetch buffers than
+ * classical caches, i.e. objects are not returned to the cache on being freed.
+ */
+struct kvm_mmu_memory_cache {
+   int nobjs;
+   gfp_t gfp_zero;
+   struct kmem_cache *kmem_cache;
+   void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE];
+};
+#endif
+
+
 #endif /* __KVM_TYPES_H__ */
-- 
2.26.0

[PATCH 02/21] KVM: x86/mmu: Consolidate "page" variant of memory cache helpers

2020-06-05 Thread Sean Christopherson

Drop the "page" variants of the topup/free memory cache helpers, using
the existence of an associated kmem_cache to select the correct alloc
or free routine.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 37 +++--
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0830c195c9ed..cbc101663a89 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1067,7 +1067,10 @@ static int mmu_topup_memory_cache(struct 
kvm_mmu_memory_cache *cache, int min)
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   obj = kmem_cache_zalloc(cache->kmem_cache, GFP_KERNEL_ACCOUNT);
+   if (cache->kmem_cache)
+   obj = kmem_cache_zalloc(cache->kmem_cache, 
GFP_KERNEL_ACCOUNT);
+   else
+   obj = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!obj)
return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = obj;
@@ -1082,30 +1085,12 @@ static int mmu_memory_cache_free_objects(struct 
kvm_mmu_memory_cache *cache)
 
 static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
 {
-   while (mc->nobjs)
-   kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]);
-}
-
-static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
-  int min)
-{
-   void *page;
-
-   if (cache->nobjs >= min)
-   return 0;
-   while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
-   if (!page)
-   return cache->nobjs >= min ? 0 : -ENOMEM;
-   cache->objects[cache->nobjs++] = page;
+   while (mc->nobjs) {
+   if (mc->kmem_cache)
+   kmem_cache_free(mc->kmem_cache, 
mc->objects[--mc->nobjs]);
+   else
+   free_page((unsigned long)mc->objects[--mc->nobjs]);
}
-   return 0;
-}
-
-static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
-{
-   while (mc->nobjs)
-   free_page((unsigned long)mc->objects[--mc->nobjs]);
 }
 
 static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
@@ -1116,7 +1101,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
   8 + PTE_PREFETCH_NUM);
if (r)
goto out;
-   r = mmu_topup_memory_cache_page(>arch.mmu_page_cache, 8);
+   r = mmu_topup_memory_cache(>arch.mmu_page_cache, 8);
if (r)
goto out;
r = mmu_topup_memory_cache(>arch.mmu_page_header_cache, 4);
@@ -1127,7 +1112,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
mmu_free_memory_cache(>arch.mmu_pte_list_desc_cache);
-   mmu_free_memory_cache_page(>arch.mmu_page_cache);
+   mmu_free_memory_cache(>arch.mmu_page_cache);
mmu_free_memory_cache(>arch.mmu_page_header_cache);
 }
 
-- 
2.26.0

[PATCH 04/21] KVM: x86/mmu: Remove superfluous gotos from mmu_topup_memory_caches()

2020-06-05 Thread Sean Christopherson

Return errors directly from mmu_topup_memory_caches() instead of
branching to a label that does the same.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 36c90f004ef4..ba70de24a5b0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1100,13 +1100,11 @@ static int mmu_topup_memory_caches(struct kvm_vcpu 
*vcpu)
r = mmu_topup_memory_cache(>arch.mmu_pte_list_desc_cache,
   8 + PTE_PREFETCH_NUM);
if (r)
-   goto out;
+   return r;
r = mmu_topup_memory_cache(>arch.mmu_page_cache, 8);
if (r)
-   goto out;
-   r = mmu_topup_memory_cache(>arch.mmu_page_header_cache, 4);
-out:
-   return r;
+   return r;
+   return mmu_topup_memory_cache(>arch.mmu_page_header_cache, 4);
 }
 
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
-- 
2.26.0

[PATCH 05/21] KVM: x86/mmu: Try to avoid crashing KVM if a MMU memory cache is empty

2020-06-05 Thread Sean Christopherson

Attempt to allocate a new object instead of crashing KVM (and likely the
kernel) if a memory cache is unexpectedly empty.  Use GFP_ATOMIC for the
allocation as the caches are used while holding mmu_lock.  The immediate
BUG_ON() makes the code unnecessarily explosive and led to confusing
minimums being used in the past, e.g. allocating 4 objects where 1 would
suffice.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ba70de24a5b0..5e773564ab20 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1060,6 +1060,15 @@ static void walk_shadow_page_lockless_end(struct 
kvm_vcpu *vcpu)
local_irq_enable();
 }
 
+static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
+  gfp_t gfp_flags)
+{
+   if (mc->kmem_cache)
+   return kmem_cache_zalloc(mc->kmem_cache, gfp_flags);
+   else
+   return (void *)__get_free_page(gfp_flags);
+}
+
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
 {
void *obj;
@@ -1067,10 +1076,7 @@ static int mmu_topup_memory_cache(struct 
kvm_mmu_memory_cache *mc, int min)
if (mc->nobjs >= min)
return 0;
while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
-   if (mc->kmem_cache)
-   obj = kmem_cache_zalloc(mc->kmem_cache, 
GFP_KERNEL_ACCOUNT);
-   else
-   obj = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+   obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
if (!obj)
return mc->nobjs >= min ? 0 : -ENOMEM;
mc->objects[mc->nobjs++] = obj;
@@ -1118,8 +1124,11 @@ static void *mmu_memory_cache_alloc(struct 
kvm_mmu_memory_cache *mc)
 {
void *p;
 
-   BUG_ON(!mc->nobjs);
-   p = mc->objects[--mc->nobjs];
+   if (WARN_ON(!mc->nobjs))
+   p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT);
+   else
+   p = mc->objects[--mc->nobjs];
+   BUG_ON(!p);
return p;
 }
 
-- 
2.26.0

[rcu:dev.2020.06.02a 85/90] kernel/smp.c:122: undefined reference to `__udivdi3'

2020-06-05 Thread kernel test robot

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git 
dev.2020.06.02a
head:   5216948905dd07a84cef8a7dc72c2ec076802efd
commit: 92ebbb71443dced2019cd24b737ce60b03a29e10 [85/90] EXP kernel/smp: 
Provide CSD lock timeout diagnostics
config: i386-randconfig-c001-20200605 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-13) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>, old ones prefixed by <<):

ld: kernel/smp.o: in function `csd_lock_wait':
>> kernel/smp.c:122: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
ld: kernel/smp.c:136: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:122: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
ld: kernel/smp.c:136: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:122: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
ld: kernel/smp.c:136: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:122: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:128: undefined reference to `__udivdi3'
>> ld: kernel/smp.c:136: undefined reference to `__umoddi3'
ld: kernel/smp.c:136: undefined reference to `__udivdi3'

vim +122 kernel/smp.c

   107  
   108  /*
   109   * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
   110   *
   111   * For non-synchronous ipi calls the csd can still be in use by the
   112   * previous function call. For multi-cpu calls its even more interesting
   113   * as we'll have to ensure no other cpu is observing our csd.
   114   */
   115  static __always_inline void csd_lock_wait(call_single_data_t *csd)
   116  {
   117  int bug_id = 0;
   118  int cpu;
   119  call_single_data_t *cpu_cur_csd;
   120  u64 ts0, ts1, ts2, ts_delta;
   121  
 > 122  ts1 = ts0 = sched_clock() / 1000 / 1000;
   123  for (;;) {
   124  unsigned long flags = READ_ONCE(csd->flags);
   125  
   126  if (!(flags & CSD_FLAG_LOCK))
   127  break;
 > 128  ts2 = sched_clock() / 1000 / 1000;
   129  ts_delta = ts2 - ts1;
   130  if (unlikely(ts_delta > CSD_LOCK_TIMEOUT)) {
   131  bug_id = atomic_inc_return(_bug_count);
   132  cpu = csd->cpu;
   133  smp_mb(); // No stale cur_csd values!
   134  cpu_cur_csd = per_cpu(cur_csd, cpu);
   135  smp_mb(); // No refetching cur_csd values!
 > 136  printk("csd: Detected non-responsive CSD lock 
 > (#%d) on CPU#%d, waiting %Ld.%03Ld secs for CPU#%02d %pf(%ps), currently 
 > %s.\n",
   137 bug_id, raw_smp_processor_id(),
   138 ts_delta/1000ULL, ts_delta % 1000ULL, 
cpu,
   139 csd->func, csd->info,
   140 !cpu_cur_csd ? "unresponsive"
   141  : csd == cpu_cur_csd
   142  ? "handling this 
request"
   143  : "handling prior 
request");
   144  if (!trigger_single_cpu_backtrace(cpu))
   145  dump_cpu_task(cpu);
   146  if (!cpu_cur_csd) {
   147  printk("csd: Re-sending CSD lock (#%d) 
IPI from CPU#%02d to CPU#%02d\n", bug_id, raw_smp_processor_id(), cpu);
   148  arch_send_call_function_single_ipi(cpu);
   149  }
   150  dump_stack();
   151  ts1 = ts2;
   152  }
   153  cpu_relax();
   154  }
   155  smp_acquire__after_ctrl_dep();
   156  if (unlikely(bug_id))
   157  printk("csd: CSD lock (#%d) got unstuck on CPU#%02d, 
CPU#%02d released the lock after all. Phew!\n", bug_id, raw_smp_processor_id(), 
cpu);
   158  }
   159  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip

[PATCH 06/21] KVM: x86/mmu: Move fast_page_fault() call above mmu_topup_memory_caches()

2020-06-05 Thread Sean Christopherson

Avoid refilling the memory caches and potentially slow reclaim/swap when
handling a fast page fault, which does not need to allocate any new
objects.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5e773564ab20..4b4c3234d623 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4095,6 +4095,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
 
+   if (fast_page_fault(vcpu, gpa, error_code))
+   return RET_PF_RETRY;
+
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
@@ -4102,9 +4105,6 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
if (lpage_disallowed)
max_level = PG_LEVEL_4K;
 
-   if (fast_page_fault(vcpu, gpa, error_code))
-   return RET_PF_RETRY;
-
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
 
-- 
2.26.0

[PATCH 20/21] KVM: MIPS: Account pages used for GPA page tables

2020-06-05 Thread Sean Christopherson

Use GFP_KERNEL_ACCOUNT instead of GFP_KERNEL when allocating pages for
the the GPA page tables.  The primary motivation for accounting the
allocations is to align with the common KVM memory cache helpers in
preparation for moving to the common implementation in a future patch.
The actual accounting is a bonus side effect.

Signed-off-by: Sean Christopherson 
---
 arch/mips/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 94562c54b930..41a4a063a730 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -32,7 +32,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache 
*cache, int min)
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   page = (void *)__get_free_page(GFP_KERNEL);
+   page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
-- 
2.26.0

[PATCH 12/21] KVM: x86/mmu: Skip filling the gfn cache for guaranteed direct MMU topups

2020-06-05 Thread Sean Christopherson

Don't bother filling the gfn array cache when the caller is a fully
direct MMU, i.e. won't need a gfn array for shadow pages.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 18 ++
 arch/x86/kvm/mmu/paging_tmpl.h |  4 ++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a8f8eebf67df..8d66cf558f1b 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1101,7 +1101,7 @@ static void mmu_free_memory_cache(struct 
kvm_mmu_memory_cache *mc)
}
 }
 
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
 {
int r;
 
@@ -1114,10 +1114,12 @@ static int mmu_topup_memory_caches(struct kvm_vcpu 
*vcpu)
   PT64_ROOT_MAX_LEVEL);
if (r)
return r;
-   r = mmu_topup_memory_cache(>arch.mmu_gfn_array_cache,
-  PT64_ROOT_MAX_LEVEL);
-   if (r)
-   return r;
+   if (maybe_indirect) {
+   r = mmu_topup_memory_cache(>arch.mmu_gfn_array_cache,
+  PT64_ROOT_MAX_LEVEL);
+   if (r)
+   return r;
+   }
return mmu_topup_memory_cache(>arch.mmu_page_header_cache,
  PT64_ROOT_MAX_LEVEL);
 }
@@ -4107,7 +4109,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
if (fast_page_fault(vcpu, gpa, error_code))
return RET_PF_RETRY;
 
-   r = mmu_topup_memory_caches(vcpu);
+   r = mmu_topup_memory_caches(vcpu, false);
if (r)
return r;
 
@@ -5147,7 +5149,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 {
int r;
 
-   r = mmu_topup_memory_caches(vcpu);
+   r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->direct_map);
if (r)
goto out;
r = mmu_alloc_roots(vcpu);
@@ -5341,7 +5343,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, 
gpa_t gpa,
 * or not since pte prefetch is skiped if it does not have
 * enough objects in the cache.
 */
-   mmu_topup_memory_caches(vcpu);
+   mmu_topup_memory_caches(vcpu, true);
 
spin_lock(>kvm->mmu_lock);
 
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 3de32122f601..ac39710d0594 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -818,7 +818,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t 
addr, u32 error_code,
return RET_PF_EMULATE;
}
 
-   r = mmu_topup_memory_caches(vcpu);
+   r = mmu_topup_memory_caches(vcpu, true);
if (r)
return r;
 
@@ -905,7 +905,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, 
hpa_t root_hpa)
 * No need to check return value here, rmap_can_add() can
 * help us to skip pte prefetch later.
 */
-   mmu_topup_memory_caches(vcpu);
+   mmu_topup_memory_caches(vcpu, true);
 
if (!VALID_PAGE(root_hpa)) {
WARN_ON(1);
-- 
2.26.0

[PATCH 21/21] KVM: MIPS: Use common KVM implementation of MMU memory caches

2020-06-05 Thread Sean Christopherson

Move to the common MMU memory cache implementation now that the common
code and MIPS's existing code are semantically compatible.

No functional change intended.

Suggested-by: Christoffer Dall 
Signed-off-by: Sean Christopherson 
---
 arch/mips/include/asm/kvm_host.h  | 11 -
 arch/mips/include/asm/kvm_types.h |  2 ++
 arch/mips/kvm/mmu.c   | 40 ---
 3 files changed, 7 insertions(+), 46 deletions(-)

diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 363e7a89d173..f49617175f60 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -335,17 +335,6 @@ struct kvm_mips_tlb {
long tlb_lo[2];
 };
 
-#define KVM_NR_MEM_OBJS 4
-
-/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
-   int nobjs;
-   void *objects[KVM_NR_MEM_OBJS];
-};
-
 #define KVM_MIPS_AUX_FPU   0x1
 #define KVM_MIPS_AUX_MSA   0x2
 
diff --git a/arch/mips/include/asm/kvm_types.h 
b/arch/mips/include/asm/kvm_types.h
index 5efeb32a5926..213754d9ef6b 100644
--- a/arch/mips/include/asm/kvm_types.h
+++ b/arch/mips/include/asm/kvm_types.h
@@ -2,4 +2,6 @@
 #ifndef _ASM_MIPS_KVM_TYPES_H
 #define _ASM_MIPS_KVM_TYPES_H
 
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 4
+
 #endif /* _ASM_MIPS_KVM_TYPES_H */
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 41a4a063a730..d6acd88c0c46 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -25,39 +25,9 @@
 #define KVM_MMU_CACHE_MIN_PAGES 2
 #endif
 
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min)
-{
-   void *page;
-
-   if (cache->nobjs >= min)
-   return 0;
-   while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-   page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
-   if (!page)
-   return -ENOMEM;
-   cache->objects[cache->nobjs++] = page;
-   }
-   return 0;
-}
-
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
-{
-   while (mc->nobjs)
-   free_page((unsigned long)mc->objects[--mc->nobjs]);
-}
-
-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
-{
-   void *p;
-
-   BUG_ON(!mc || !mc->nobjs);
-   p = mc->objects[--mc->nobjs];
-   return p;
-}
-
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
-   mmu_free_memory_cache(>arch.mmu_page_cache);
+   kvm_mmu_free_memory_cache(>arch.mmu_page_cache);
 }
 
 /**
@@ -151,7 +121,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct 
kvm_mmu_memory_cache *cache,
 
if (!cache)
return NULL;
-   new_pmd = mmu_memory_cache_alloc(cache);
+   new_pmd = kvm_mmu_memory_cache_alloc(cache);
pmd_init((unsigned long)new_pmd,
 (unsigned long)invalid_pte_table);
pud_populate(NULL, pud, new_pmd);
@@ -162,7 +132,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct 
kvm_mmu_memory_cache *cache,
 
if (!cache)
return NULL;
-   new_pte = mmu_memory_cache_alloc(cache);
+   new_pte = kvm_mmu_memory_cache_alloc(cache);
clear_page(new_pte);
pmd_populate_kernel(NULL, pmd, new_pte);
}
@@ -709,7 +679,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, 
unsigned long gpa,
goto out;
 
/* We need a minimum of cached pages ready for page table creation */
-   err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
+   err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (err)
goto out;
 
@@ -793,7 +763,7 @@ static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu 
*vcpu,
int ret;
 
/* We need a minimum of cached pages ready for page table creation */
-   ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
+   ret = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (ret)
return NULL;
 
-- 
2.26.0

[PATCH 19/21] KVM: MIPS: Drop @max param from mmu_topup_memory_cache()

2020-06-05 Thread Sean Christopherson

Replace the @max param in mmu_topup_memory_cache() and instead use
ARRAY_SIZE() to terminate the loop to fill the cache.  This removes a
BUG_ON() and sets the stage for moving MIPS to the common memory cache
implementation.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/mips/kvm/mmu.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 7dad7a293eae..94562c54b930 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -25,15 +25,13 @@
 #define KVM_MMU_CACHE_MIN_PAGES 2
 #endif
 
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
- int min, int max)
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min)
 {
void *page;
 
-   BUG_ON(max > KVM_NR_MEM_OBJS);
if (cache->nobjs >= min)
return 0;
-   while (cache->nobjs < max) {
+   while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = (void *)__get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
@@ -711,8 +709,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, 
unsigned long gpa,
goto out;
 
/* We need a minimum of cached pages ready for page table creation */
-   err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
-KVM_NR_MEM_OBJS);
+   err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (err)
goto out;
 
@@ -796,8 +793,7 @@ static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu 
*vcpu,
int ret;
 
/* We need a minimum of cached pages ready for page table creation */
-   ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
-KVM_NR_MEM_OBJS);
+   ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (ret)
return NULL;
 
-- 
2.26.0

[PATCH 09/21] KVM: x86/mmu: Separate the memory caches for shadow pages and gfn arrays

2020-06-05 Thread Sean Christopherson

Use separate caches for allocating shadow pages versus gfn arrays.  This
sets the stage for specifying __GFP_ZERO when allocating shadow pages
without incurring extra cost for gfn arrays.

No functional change intended.

Signed-off-by: Sean Christopherson 
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/mmu/mmu.c  | 15 ++-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 16347b050754..e7a427547557 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -636,7 +636,8 @@ struct kvm_vcpu_arch {
struct kvm_mmu *walk_mmu;
 
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
-   struct kvm_mmu_memory_cache mmu_page_cache;
+   struct kvm_mmu_memory_cache mmu_shadow_page_cache;
+   struct kvm_mmu_memory_cache mmu_gfn_array_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
 
/*
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 451e0365e5dd..d245acece3cd 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1108,8 +1108,12 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
   1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM);
if (r)
return r;
-   r = mmu_topup_memory_cache(>arch.mmu_page_cache,
-  2 * PT64_ROOT_MAX_LEVEL);
+   r = mmu_topup_memory_cache(>arch.mmu_shadow_page_cache,
+  PT64_ROOT_MAX_LEVEL);
+   if (r)
+   return r;
+   r = mmu_topup_memory_cache(>arch.mmu_gfn_array_cache,
+  PT64_ROOT_MAX_LEVEL);
if (r)
return r;
return mmu_topup_memory_cache(>arch.mmu_page_header_cache,
@@ -1119,7 +1123,8 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
mmu_free_memory_cache(>arch.mmu_pte_list_desc_cache);
-   mmu_free_memory_cache(>arch.mmu_page_cache);
+   mmu_free_memory_cache(>arch.mmu_shadow_page_cache);
+   mmu_free_memory_cache(>arch.mmu_gfn_array_cache);
mmu_free_memory_cache(>arch.mmu_page_header_cache);
 }
 
@@ -2096,9 +2101,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
kvm_vcpu *vcpu, int direct
struct kvm_mmu_page *sp;
 
sp = mmu_memory_cache_alloc(>arch.mmu_page_header_cache);
-   sp->spt = mmu_memory_cache_alloc(>arch.mmu_page_cache);
+   sp->spt = mmu_memory_cache_alloc(>arch.mmu_shadow_page_cache);
if (!direct)
-   sp->gfns = mmu_memory_cache_alloc(>arch.mmu_page_cache);
+   sp->gfns = 
mmu_memory_cache_alloc(>arch.mmu_gfn_array_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
/*
-- 
2.26.0

Re: [PATCH] IMA: Add log statements for failure conditions

2020-06-05 Thread Lakshmi Ramasubramanian


On 6/5/20 2:34 PM, Mimi Zohar wrote:



Maybe I can use the audit_msgno "AUDIT_INTEGRITY_PCR" with appropriate
strings for "op" and "cause".

Mimi - please let me know if you think this audit_msgno would be ok to
use. I see this code used, for instance, for boot aggregate measurement.

integrity_audit_msg(AUDIT_INTEGRITY_PCR, NULL, boot_aggregate_name, op,
audit_cause, result, 0);


Yes, AUDIT_INTEGRITY_PCR is also used for failures to add to the
measurement list.



thanks - i'll post an updated patch shortly.

 -lakshmi

Re: [PATCH] IMA: Add log statements for failure conditions

2020-06-05 Thread Mimi Zohar

On Fri, 2020-06-05 at 14:09 -0700, Lakshmi Ramasubramanian wrote:
> On 6/5/20 1:49 PM, Paul Moore wrote:
> 
> > 
> >> Since a pr_xyz() call was already present, I just wanted to change the
> >> log level to keep the code change to the minimum. But if audit log is
> >> the right approach for this case, I'll update.
> > 
> > Generally we reserve audit for things that are required for various
> > security certifications and/or "security relevant".  From what you
> > mentioned above, it seems like this would fall into the second
> > category if not the first.
> > 
> > Looking at your patch it doesn't look like you are trying to record
> > anything special so you may be able to use the existing
> > integrity_audit_msg(...) helper.  Of course then the question comes
> > down to the audit record type (the audit_msgno argument), the
> > operation (op), and the comm/cause (cause).
> > 
> > Do you feel that any of the existing audit record types are a good fit for 
> > this?
> > 
> 
> Maybe I can use the audit_msgno "AUDIT_INTEGRITY_PCR" with appropriate 
> strings for "op" and "cause".
> 
> Mimi - please let me know if you think this audit_msgno would be ok to 
> use. I see this code used, for instance, for boot aggregate measurement.
> 
> integrity_audit_msg(AUDIT_INTEGRITY_PCR, NULL, boot_aggregate_name, op,
>   audit_cause, result, 0);

Yes, AUDIT_INTEGRITY_PCR is also used for failures to add to the
measurement list.

thanks,

Mimi

[PATCH v6 5/5] cpufreq: qcom: Disable fast switch when scaling DDR/L3

2020-06-05 Thread Sibi Sankar

Disable fast switch when the opp-tables required for scaling DDR/L3
are populated.

Signed-off-by: Sibi Sankar 
---

v6:
 * No change

v5:
 * Drop dev_pm_opp_get_path_count [Saravana]

 drivers/cpufreq/qcom-cpufreq-hw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c 
b/drivers/cpufreq/qcom-cpufreq-hw.c
index 8fa6ab6e0e4b6..56f01049fd3a3 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -158,6 +158,8 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
} else if (ret != -ENODEV) {
dev_err(cpu_dev, "Invalid opp table in device tree\n");
return ret;
+   } else {
+   policy->fast_switch_possible = true;
}
 
for (i = 0; i < LUT_MAX_ENTRIES; i++) {
@@ -307,8 +309,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy 
*policy)
 
dev_pm_opp_of_register_em(policy->cpus);
 
-   policy->fast_switch_possible = true;
-
return 0;
 error:
devm_iounmap(dev, base);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH v6 1/5] cpufreq: blacklist SDM845 in cpufreq-dt-platdev

2020-06-05 Thread Sibi Sankar

Add SDM845 to cpufreq-dt-platdev blacklist since the actual scaling is
handled by the 'qcom-cpufreq-hw' driver.

Reviewed-by: Amit Kucheria 
Reviewed-by: Matthias Kaehlcke 
Signed-off-by: Sibi Sankar 
---

v6:
 * No change

v5:
 * Picked up R-b from Amit

v4:
 * Updated commit message [Matthias]
 * Picked up R-b from Matthias

 drivers/cpufreq/cpufreq-dt-platdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c 
b/drivers/cpufreq/cpufreq-dt-platdev.c
index e8e20fef400b0..be85eb494a6b3 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -132,6 +132,7 @@ static const struct of_device_id blacklist[] __initconst = {
{ .compatible = "qcom,apq8096", },
{ .compatible = "qcom,msm8996", },
{ .compatible = "qcom,qcs404", },
+   { .compatible = "qcom,sdm845", },
 
{ .compatible = "st,stih407", },
{ .compatible = "st,stih410", },
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH v6 4/5] cpufreq: qcom: Update the bandwidth levels on frequency change

2020-06-05 Thread Sibi Sankar

Add support to parse optional OPP table attached to the cpu node when
the OPP bandwidth values are populated. This allows for scaling of
DDR/L3 bandwidth levels with frequency change.

Signed-off-by: Sibi Sankar 
---

v6:
 * Add global flag to distinguish between voltage update and opp add.
   Use the same flag before trying to scale ddr/l3 bw [Viresh]
 * Use dev_pm_opp_find_freq_ceil to grab all opps [Viresh] 
 * Move dev_pm_opp_of_find_icc_paths into probe [Viresh]

v5:
 * Use dev_pm_opp_adjust_voltage instead [Viresh]
 * Misc cleanup

v4:
 * Split fast switch disable into another patch [Lukasz]

 drivers/cpufreq/qcom-cpufreq-hw.c | 82 ++-
 1 file changed, 80 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c 
b/drivers/cpufreq/qcom-cpufreq-hw.c
index fc92a8842e252..8fa6ab6e0e4b6 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,48 @@
 
 static unsigned long cpu_hw_rate, xo_rate;
 static struct platform_device *global_pdev;
+static bool icc_scaling_enabled;
+
+static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
+  unsigned long freq_khz)
+{
+   unsigned long freq_hz = freq_khz * 1000;
+   struct dev_pm_opp *opp;
+   struct device *dev;
+   int ret;
+
+   dev = get_cpu_device(policy->cpu);
+   if (!dev)
+   return -ENODEV;
+
+   opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
+   if (IS_ERR(opp))
+   return PTR_ERR(opp);
+
+   ret = dev_pm_opp_set_bw(dev, opp);
+   dev_pm_opp_put(opp);
+   return ret;
+}
+
+static int qcom_cpufreq_update_opp(struct device *cpu_dev,
+  unsigned long freq_khz,
+  unsigned long volt)
+{
+   unsigned long freq_hz = freq_khz * 1000;
+   int ret;
+
+   /* Skip voltage update if the opp table is not available */
+   if (!icc_scaling_enabled)
+   return dev_pm_opp_add(cpu_dev, freq_hz, volt);
+
+   ret = dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt);
+   if (ret) {
+   dev_err(cpu_dev, "Voltage update failed freq=%ld\n", freq_khz);
+   return ret;
+   }
+
+   return dev_pm_opp_enable(cpu_dev, freq_hz);
+}
 
 static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
unsigned int index)
@@ -39,6 +82,9 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy 
*policy,
 
writel_relaxed(index, perf_state_reg);
 
+   if (icc_scaling_enabled)
+   qcom_cpufreq_set_bw(policy, freq);
+
arch_set_freq_scale(policy->related_cpus, freq,
policy->cpuinfo.max_freq);
return 0;
@@ -89,11 +135,31 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
u32 data, src, lval, i, core_count, prev_freq = 0, freq;
u32 volt;
struct cpufreq_frequency_table  *table;
+   struct dev_pm_opp *opp;
+   unsigned long rate;
+   int ret;
 
table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;
 
+   ret = dev_pm_opp_of_add_table(cpu_dev);
+   if (!ret) {
+   /* Disable all opps and cross-validate against LUT */
+   icc_scaling_enabled = true;
+   for (rate = 0; ; rate++) {
+   opp = dev_pm_opp_find_freq_ceil(cpu_dev, );
+   if (IS_ERR(opp))
+   break;
+
+   dev_pm_opp_put(opp);
+   dev_pm_opp_disable(cpu_dev, rate);
+   }
+   } else if (ret != -ENODEV) {
+   dev_err(cpu_dev, "Invalid opp table in device tree\n");
+   return ret;
+   }
+
for (i = 0; i < LUT_MAX_ENTRIES; i++) {
data = readl_relaxed(base + REG_FREQ_LUT +
  i * LUT_ROW_SIZE);
@@ -112,7 +178,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
 
if (freq != prev_freq && core_count != LUT_TURBO_IND) {
table[i].frequency = freq;
-   dev_pm_opp_add(cpu_dev, freq * 1000, volt);
+   qcom_cpufreq_update_opp(cpu_dev, freq, volt);
dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
freq, core_count);
} else if (core_count == LUT_TURBO_IND) {
@@ -133,7 +199,8 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
prev->frequency = prev_freq;
prev->flags = CPUFREQ_BOOST_FREQ;
-

[PATCH v6 3/5] OPP: Add and export helper to set bandwidth

2020-06-05 Thread Sibi Sankar

Add and export 'dev_pm_opp_set_bw' to set the bandwidth
levels associated with an OPP.

Signed-off-by: Sibi Sankar 
---

v6:
 * Pass NULL to _set_opp_bw 

v5:
https://lkml.org/lkml/2020/5/27/7
 * Rework the patch based on ^^

v4:
https://patchwork.kernel.org/patch/11019737/
 * Pass device opp to set bw levels [Bjorn]

 drivers/opp/core.c | 31 +++
 include/linux/pm_opp.h |  6 ++
 2 files changed, 37 insertions(+)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index dfbd3d10410ca..6937bf45f497f 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -831,6 +831,37 @@ static int _set_required_opps(struct device *dev,
return ret;
 }
 
+/**
+ * dev_pm_opp_set_bw() - sets bandwidth levels corresponding to an opp
+ * @dev:   device for which we do this operation
+ * @opp:   opp based on which the bandwidth levels are to be configured
+ *
+ * This configures the bandwidth to the levels specified by the OPP. However
+ * if the OPP specified is NULL the bandwidth levels are cleared out.
+ *
+ * Return: 0 on success or a negative error value.
+ */
+int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp)
+{
+   struct opp_table *opp_table;
+   int ret;
+
+   opp_table = _find_opp_table(dev);
+   if (IS_ERR(opp_table)) {
+   dev_err(dev, "%s: device opp table doesn't exist\n", __func__);
+   return PTR_ERR(opp_table);
+   }
+
+   if (opp)
+   ret = _set_opp_bw(opp_table, opp, dev, false);
+   else
+   ret = _set_opp_bw(opp_table, NULL, dev, true);
+
+   dev_pm_opp_put_opp_table(opp_table);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_bw);
+
 /**
  * dev_pm_opp_set_rate() - Configure new OPP based on frequency
  * @dev:device for which we do this operation
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index d5c4a329321dd..ae68417c0ae00 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -151,6 +151,7 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device 
*dev, const char **names
 void dev_pm_opp_detach_genpd(struct opp_table *opp_table);
 int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct 
opp_table *dst_table, unsigned int pstate);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
+int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask 
*cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask 
*cpumask);
 void dev_pm_opp_remove_table(struct device *dev);
@@ -342,6 +343,11 @@ static inline int dev_pm_opp_set_rate(struct device *dev, 
unsigned long target_f
return -ENOTSUPP;
 }
 
+static inline int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp)
+{
+   return -EOPNOTSUPP;
+}
+
 static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const 
struct cpumask *cpumask)
 {
return -ENOTSUPP;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH v6 0/5] DDR/L3 Scaling support on SDM845 and SC7180 SoCs

2020-06-05 Thread Sibi Sankar

This patch series aims to extend cpu based scaling support to L3/DDR on
SDM845 and SC7180 SoCs.

Patches [1-2] - Blacklist SDM845 and SC7180 in cpufreq-dt-platdev
Patches [3-5] - Update bw levels based on cpu frequency change

Based on Viresh's opp-next:
https://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git/log/?h=opp/linux-next

V6:
 * Add global flag to distinguish between voltage update and opp add.
   Use the same flag before trying to scale ddr/l3 bw [Viresh]
 * Use dev_pm_opp_find_freq_ceil to grab all opps [Viresh] 
 * Move dev_pm_opp_of_find_icc_paths into probe [Viresh]

V5:
 * Pick up R-bs from Amit
 * Drop icc tag support/dt changes till the a consensus is achieved
 * Use dev_pm_opp_adjust_voltage instead [Viresh]
 * Drop dev_pm_opp_get_path_count [Saravana]
 * Rework dev_pm_opp_set_bw

V4:
 * Migrate to using Georgi's new bindings
 * Misc fixups based on Matthias comments
 * API fixups based on Bjorn's comments on v2
 * Picked up a few R-bs from Matthias

v3:
 * Migrated to using Saravana's opp-kBps bindings [1]
 * Fixed some misc comments from Rajendra
 * Added support for SC7180

v2:
 * Incorporated Viresh's comments from:
 https://lore.kernel.org/lkml/20190410102429.r6j6brm5kspmqxc3@vireshk-i7/
 https://lore.kernel.org/lkml/20190410112516.gnh77jcwawvld6et@vireshk-i7/
 * Dropped cpufreq-map passive governor

Sibi Sankar (5):
  cpufreq: blacklist SDM845 in cpufreq-dt-platdev
  cpufreq: blacklist SC7180 in cpufreq-dt-platdev
  OPP: Add and export helper to set bandwidth
  cpufreq: qcom: Update the bandwidth levels on frequency change
  cpufreq: qcom: Disable fast switch when scaling DDR/L3

 drivers/cpufreq/cpufreq-dt-platdev.c |  2 +
 drivers/cpufreq/qcom-cpufreq-hw.c| 86 ++--
 drivers/opp/core.c   | 31 ++
 include/linux/pm_opp.h   |  6 ++
 4 files changed, 121 insertions(+), 4 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH v6 2/5] cpufreq: blacklist SC7180 in cpufreq-dt-platdev

2020-06-05 Thread Sibi Sankar

Add SC7180 to cpufreq-dt-platdev blacklist since the actual scaling is
handled by the 'qcom-cpufreq-hw' driver.

Reviewed-by: Amit Kucheria 
Reviewed-by: Matthias Kaehlcke 
Signed-off-by: Sibi Sankar 
---

v6:
 * No change

v5:
 * Picked up R-b from Amit

v4:
 * Updated commit message [Matthias]
 * Picked up R-b from Matthias

 drivers/cpufreq/cpufreq-dt-platdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c 
b/drivers/cpufreq/cpufreq-dt-platdev.c
index be85eb494a6b3..7d01df7bfa6cd 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -132,6 +132,7 @@ static const struct of_device_id blacklist[] __initconst = {
{ .compatible = "qcom,apq8096", },
{ .compatible = "qcom,msm8996", },
{ .compatible = "qcom,qcs404", },
+   { .compatible = "qcom,sc7180", },
{ .compatible = "qcom,sdm845", },
 
{ .compatible = "st,stih407", },
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH v2] docs: deprecated.rst: Add zero-length and one-element arrays

2020-06-05 Thread Gustavo A. R. Silva




On 6/5/20 14:30, Kees Cook wrote:
> On Fri, Jun 05, 2020 at 11:21:42AM -0500, Gustavo A. R. Silva wrote:
>> Add zero-length and one-element arrays to the list.
>>
>> While I continue replacing zero-length and one-element arrays with
>> flexible-array members, I need a reference to point people to, so
>> they don't introduce more instances of such arrays. And while here,
>> add a note to the "open-coded arithmetic in allocator arguments"
>> section, on the use of struct_size() and the arrays-to-deprecate
>> mentioned here.
>>
>> Signed-off-by: Gustavo A. R. Silva 
>> ---
>> Changes in v2:
>>  - Adjust some markup links for readability.
>>
>>  Documentation/process/deprecated.rst | 83 
>>  1 file changed, 83 insertions(+)
>>
>> diff --git a/Documentation/process/deprecated.rst 
>> b/Documentation/process/deprecated.rst
>> index 652e2aa02a66c..042c21c968e19 100644
>> --- a/Documentation/process/deprecated.rst
>> +++ b/Documentation/process/deprecated.rst
>> @@ -85,6 +85,11 @@ Instead, use the helper::
>>  
>>  header = kzalloc(struct_size(header, item, count), GFP_KERNEL);
>>  
>> +NOTE: If you are using struct_size() on a structure containing a zero-length
> 
> Please use:
> 
> .. note::
> 

OK.

> for this kind of "NOTE:"
> 
>> +or a one-element array as a trailing array member, stop using such arrays
> 
> And I think it was likely my language suggestion to say "stop using", but
> probably this should be more friendly. How about "please refactor such
> arrays ..."
> 
>> +and switch to `flexible arrays <#zero-length-and-one-element-arrays>`_
> 
> ... to a `flexible array member <#...
> 
>> +instead.
>> +
> 
>>  See array_size(), array3_size(), and struct_size(),
>>  for more details as well as the related check_add_overflow() and
>>  check_mul_overflow() family of functions.
>> @@ -200,3 +205,81 @@ All switch/case blocks must end in one of:
>>  * continue;
>>  * goto ;
>>  * return [expression];
>> +
>> +Zero-length and one-element arrays
>> +--
>> +Old code in the kernel uses the zero-length and one-element array extensions
>> +to the C90 standard, but the `preferred mechanism to declare variable-length
> 
> I'd like to reword this to make an immediate statement about what _should_
> be done, and then move into the details on an as accurate as possible
> review of the history of these work-arounds. How about this, which I
> mixed some of your earlier paragraphs into:
> 
> 
> 
> There is a regular need in the kernel to provide a way to declare having
> a dynamically sized set of trailing elements in a structure. Kernel code
> should always use `"flexible array members" 
> `_
> for these cases. The older style of one-element or zero-length arrays should
> no longer be used.
> 
> In older C code, dynamically sized trailing elements were done by specifying
> a one-element array at the end of a structure::
> 
> struct something {
> int count;
> struct items[1];
> };
> 
> This led to fragile size calculations via sizeof() (which would need to
> remove the size of the single trailing element to get a correct size of
> the "header"). A `GNU C extension 
> `_
> was introduced to allow for zero-length arrays, to avoid these kinds of
> size problems::
> 
> struct something {
> int count;
> struct items[0];
> };
> 
> But this led to other problems, and didn't solve some problems shared by
> both styles, like not being to able to detect when such an array is
> accidentally being used _not_ at the end of a structure (which could happen
> directly, or when such a struct was in unions, structs of structs, etc).
> 
> C99 introduced "flexible array members", which lacks a numeric size for
> the array declaration entirely::
> 
> struct something {
> int count;
> struct items[];
> };
> 
> This is the way the kernel expects dynamically sized trailing elements
> to be declared. It allows the compiler to generate errors when the
> flexible array does not occur last in the structure, which helps to prevent
> some kind of `undefined behavior
> `_
> bugs from being inadvertently introduced to the codebase. It also allows
> the compiler to correctly analyze array sizes (via sizeof(),
> `CONFIG_FORTIFY_SOURCE`, and `CONFIG_UBSAN_BOUNDS`). For instance,
> there is no mechanism that warns us that the following application of the
> sizeof() operator to a zero-length array always results in zero::
> 
> struct something {
> int count;
> struct items[0];
> };
> 
> struct something *instance;
> 
> instance = kmalloc(struct_size(instance, items, size), GFP_KERNEL);
> instance->length =

[PATCH v4 01/12] PCI: brcmstb: PCIE_BRCMSTB depends on ARCH_BRCMSTB

2020-06-05 Thread Jim Quinlan

From: Jim Quinlan 

Have PCIE_BRCMSTB depend on ARCH_BRCMSTB.  Also set the default value to
ARCH_BRCMSTB.

Signed-off-by: Jim Quinlan 
Acked-by: Florian Fainelli 
Reviewed-by: Rob Herring 
---
 drivers/pci/controller/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 91bfdb784829..c0f3d4d10047 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -244,9 +244,10 @@ config VMD
 
 config PCIE_BRCMSTB
tristate "Broadcom Brcmstb PCIe host controller"
-   depends on ARCH_BCM2835 || COMPILE_TEST
+   depends on ARCH_BRCMSTB || ARCH_BCM2835 || COMPILE_TEST
depends on OF
depends on PCI_MSI_IRQ_DOMAIN
+   default ARCH_BRCMSTB
help
  Say Y here to enable PCIe host controller support for
  Broadcom STB based SoCs, like the Raspberry Pi 4.
-- 
2.17.1

< 1 2 3 4 5 6 7 8 9 10 >

101 - 200 of 1157 matches

Mail list logo