[PATCH RFC 1/1] powerpc/eeh: PE info tree via debugfs and syslog

2020-06-23 Thread Sam Bobroff
Provide an ASCII art tree display of the PEs affected by recovery,
with as much state as possible, at the start and end of recovery.

Some platform specific information is provided via a new eeh_ops
member.

The same information is made available from debugfs at:

/sys/kernel/debug/powerpc/PCI/eeh_pe_tree

Signed-off-by: Sam Bobroff 
---
Here's some debug code I've been using for a long time while working on EEH. I
haven't posted it before because it wasn't possible to make the code safe
enough (to avoid either NULL or LIST_POISON), but with the recent safety work
done it's become possible.

It should be applied on top of:

"powerpc/eeh: Synchronization for safety"
"powerpc/eeh: Provide a unique ID for each EEH recovery"
"powerpc/eeh: Asynchronous recovery"

 arch/powerpc/include/asm/eeh.h   |  3 +
 arch/powerpc/kernel/eeh.c| 90 
 arch/powerpc/kernel/eeh_driver.c | 28 ++
 arch/powerpc/platforms/powernv/eeh-powernv.c | 63 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c | 21 -
 5 files changed, 203 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index dd55d1bf1cfd..46dec5b2482e 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -230,6 +230,7 @@ struct eeh_ops {
int (*next_error)(struct eeh_pe **pe);
int (*restore_config)(struct pci_dn *pdn);
int (*notify_resume)(struct pci_dn *pdn);
+   void (*pe_plat_state_dump)(char *buf, size_t len, struct eeh_pe *pe);
 };
 
 extern int eeh_subsystem_flags;
@@ -324,6 +325,8 @@ int eeh_pe_configure(struct eeh_pe *pe);
 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
  unsigned long addr, unsigned long mask);
 int eeh_restore_vf_config(struct pci_dn *pdn);
+void eeh_tree_state_dump(void (*pfn)(void *, const char *, ...),
+void *s, struct eeh_pe *pe, int level, int xlevel);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 54f921ff7621..6f675f277d26 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -839,6 +839,96 @@ int eeh_restore_vf_config(struct pci_dn *pdn)
return 0;
 }
 
+static void eeh_tree_state_indent(void (*pfn)(void *, const char *, ...),
+ void *s, int level, int xlevel, bool node)
+{
+   int i;
+
+   for (i = 0; i < level; i++)
+   pfn(s, "%c   ", ((xlevel & (1 << i)) ? '|' : ' '));
+   if (node)
+   pfn(s, "%c---", ((xlevel & (1 << level)) ? '+' : '\\'));
+   else
+   pfn(s, "%c...", ((xlevel & (1 << level)) ? '|' : ' '));
+}
+
+void eeh_tree_state_dump(void (*pfn)(void *, const char *, ...),
+void *s, struct eeh_pe *pe, int level, int xlevel)
+{
+   struct eeh_dev *edev;
+   struct eeh_pe *child_pe;
+   int slevel, sxlevel;
+   char buf[1024];
+
+   eeh_recovery_must_be_locked();
+   eeh_tree_state_indent(pfn, s, level, xlevel, true);
+   pfn(s, "* [PE#0x%x] type=%s%s%s%s%s config_addr=0x%x pass_count=%d\n",
+   pe->addr,
+   ((pe->type & EEH_PE_INVALID) ? "INVALID " : ""),
+   ((pe->type & EEH_PE_PHB) ? "PHB " : ""),
+   ((pe->type & EEH_PE_DEVICE) ? "DEVICE " : ""),
+   ((pe->type & EEH_PE_BUS) ? "BUS " : ""),
+   ((pe->type & EEH_PE_VF) ? "VF " : ""),
+   pe->config_addr,
+   atomic_read(>pass_dev_cnt));
+
+   slevel = level + 1;
+   sxlevel = xlevel;
+   if (!list_empty(>edevs) || !list_empty(>child_list))
+   sxlevel |= (1 << slevel);
+   eeh_tree_state_indent(pfn, s, slevel, sxlevel, false);
+   pfn(s, "  check_count=%d freeze_count=%d false_positives=%d 
first_freeze=%llu\n",
+  pe->check_count, pe->freeze_count, pe->false_positives,
+  pe->tstamp);
+   eeh_tree_state_indent(pfn, s, slevel, sxlevel, false);
+   pfn(s, "  kernel state=0x%x %s%s%s%s%s%s%s%s\n", pe->state,
+   ((pe->state & EEH_PE_ISOLATED) ? "ISOLATED " : ""),
+   ((pe->state & EEH_PE_RECOVERING) ? "RECOVERING " : ""),
+   ((pe->state & EEH_PE_CFG_BLOCKED) ? "CFG_BLOCKED " : ""),
+   ((pe->state & EEH_PE_RESET) ? "RESET " : ""),
+   ((pe->state & EEH_PE_KEEP) ? "KEEP " : ""),
+   ((pe->state & EEH_PE_CFG_RESTRICTED) ? "CFG_RESTRICTED " : ""),
+   ((pe->state & EEH_PE_REMOVED) ? "REMOVED" : ""),
+   ((pe->state & EEH_PE_PRI_BUS) ? "PRI_BUS" : ""));
+   if (eeh_ops->pe_plat_state_dump) {
+   eeh_tree_state_indent(pfn, s, slevel, sxlevel, false);
+   eeh_ops->pe_plat_state_dump(buf, sizeof(buf), pe);
+   pfn(s, "  %.*s\n", sizeof(buf), buf);
+   }
+
+   slevel = level + 1;
+   list_for_each_entry(edev, >edevs, entry) {
+ 

[PATCH RFC 1/1] powerpc/eeh: Asynchronous recovery

2020-06-23 Thread Sam Bobroff
Currently, EEH recovery is entirely serialized and takes place within
a single kernel thread. This can cause recovery to take a long time
when there are many devices.

To shorten recovery time, this change allows recovery to proceed in
parallel in two ways:
- Each PHB is given it's own recovery event queue and can be recovered
independently from other PHBs.
- Driver handlers are called in parallel, but with the constraint that
handlers higher up (closer to the PHB) in the PE hierarchy must be
called before those lower down.

To maintain the constraint, above, the driver handlers are called by
traversing the tree of affected PEs from the top, stopping to call
handlers (in parallel) when a PE with devices is discovered. When the
calls for that PE are complete, traversal continues at each child PE.

Signed-off-by: Sam Bobroff 
---
This patch should be applied on top of both:
"powerpc/eeh: Synchronization for safety"
"powerpc/eeh: Provide a unique ID for each EEH recovery"

 arch/powerpc/include/asm/eeh.h|   1 +
 arch/powerpc/include/asm/eeh_event.h  |   7 +
 arch/powerpc/include/asm/pci-bridge.h |   2 +
 arch/powerpc/kernel/eeh_dev.c |   2 +
 arch/powerpc/kernel/eeh_driver.c  | 313 ++
 arch/powerpc/kernel/eeh_event.c   |  65 +++---
 6 files changed, 276 insertions(+), 114 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 1d4c0b19a63c..dd55d1bf1cfd 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -130,6 +130,7 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
 #define EEH_DEV_NO_HANDLER (1 << 8)/* No error handler */
 #define EEH_DEV_SYSFS  (1 << 9)/* Sysfs created*/
 #define EEH_DEV_REMOVED(1 << 10)   /* Removed permanently  
*/
+#define EEH_DEV_RECOVERING (1 << 11)   /* Recovering   */
 
 struct eeh_dev {
int mode;   /* EEH mode */
diff --git a/arch/powerpc/include/asm/eeh_event.h 
b/arch/powerpc/include/asm/eeh_event.h
index a1fe736bc4cf..b21f49e87b7b 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -8,6 +8,8 @@
 #define ASM_POWERPC_EEH_EVENT_H
 #ifdef __KERNEL__
 
+#include 
+
 /*
  * structure holding pci controller data that describes a
  * change in the isolation status of a PCI slot.  A pointer
@@ -15,16 +17,21 @@
  * callback.
  */
 struct eeh_event {
+   struct work_struct  work;
struct list_headlist;   /* to form event queue  */
struct eeh_pe   *pe;/* EEH PE   */
unsigned intid; /* Event ID */
 };
 
+extern spinlock_t eeh_eventlist_lock;
+
 int eeh_event_init(void);
+int eeh_phb_event(struct eeh_pe *pe);
 int eeh_send_failure_event(struct eeh_pe *pe);
 int __eeh_send_failure_event(struct eeh_pe *pe);
 void eeh_remove_event(struct eeh_pe *pe, bool force);
 void eeh_handle_normal_event(unsigned int event_id, struct eeh_pe *pe);
+void eeh_handle_normal_event_work(struct work_struct *work);
 void eeh_handle_special_event(void);
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 69f4cb3b7c56..2a9d639b18d1 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -127,6 +127,8 @@ struct pci_controller {
 
void *private_data;
struct npu *npu;
+   bool eeh_in_progress;
+   struct list_head eeh_eventlist;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index 7370185c7a05..2e48a1e142a9 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -62,6 +62,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
  */
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 {
+   phb->eeh_in_progress = false; /* TODO: Necessary? */
+   INIT_LIST_HEAD(>eeh_eventlist);
/* EEH PE for PHB */
eeh_phb_pe_create(phb);
 }
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 0dbc218597e3..9d03292f66a7 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -12,6 +12,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -19,6 +22,8 @@
 #include 
 #include 
 
+static atomic_t eeh_wu_id = ATOMIC_INIT(0);
+
 struct eeh_rmv_data {
struct list_head removed_vf_list;
int removed_dev_count;
@@ -249,73 +254,58 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool 
enable)
 }
 
 typedef enum pci_ers_result (*eeh_report_fn)(unsigned int event_id,
+unsigned int id,
 struct pci_dev *,
 struct 

Re: [PATCH v2 2/2] cpufreq: Specify default governor on command line

2020-06-23 Thread Viresh Kumar
On 23-06-20, 15:21, Quentin Perret wrote:
> Currently, the only way to specify the default CPUfreq governor is via
> Kconfig options, which suits users who can build the kernel themselves
> perfectly.
> 
> However, for those who use a distro-like kernel (such as Android, with
> the Generic Kernel Image project), the only way to use a different
> default is to boot to userspace, and to then switch using the sysfs
> interface. Being able to specify the default governor on the command
> line, like is the case for cpuidle, would enable those users to specify
> their governor of choice earlier on, and to simplify slighlty the
> userspace boot procedure.
> 
> To support this use-case, add a kernel command line parameter enabling
> to specify a default governor for CPUfreq, which takes precedence over
> the builtin default.
> 
> This implementation has one notable limitation: the default governor
> must be registered before the driver. This is solved for builtin
> governors and drivers using appropriate *_initcall() functions. And in
> the modular case, this must be reflected as a constraint on the module
> loading order.
> 
> Signed-off-by: Quentin Perret 
> ---
>  .../admin-guide/kernel-parameters.txt |  5 
>  Documentation/admin-guide/pm/cpufreq.rst  |  6 ++---
>  drivers/cpufreq/cpufreq.c | 23 +++
>  3 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index fb95fad81c79..5fd3c9f187eb 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -703,6 +703,11 @@
>   cpufreq.off=1   [CPU_FREQ]
>   disable the cpufreq sub-system
>  
> + cpufreq.default_governor=
> + [CPU_FREQ] Name of the default cpufreq governor to use.
> + This governor must be registered in the kernel before
> + the cpufreq driver probes.
> +
>   cpu_init_udelay=N
>   [X86] Delay for N microsec between assert and de-assert
>   of APIC INIT to start processors.  This delay occurs
> diff --git a/Documentation/admin-guide/pm/cpufreq.rst 
> b/Documentation/admin-guide/pm/cpufreq.rst
> index 0c74a7784964..368e612145d2 100644
> --- a/Documentation/admin-guide/pm/cpufreq.rst
> +++ b/Documentation/admin-guide/pm/cpufreq.rst
> @@ -147,9 +147,9 @@ CPUs in it.
>  
>  The next major initialization step for a new policy object is to attach a
>  scaling governor to it (to begin with, that is the default scaling governor
> -determined by the kernel configuration, but it may be changed later
> -via ``sysfs``).  First, a pointer to the new policy object is passed to the
> -governor's ``->init()`` callback which is expected to initialize all of the
> +determined by the kernel command line or configuration, but it may be changed
> +later via ``sysfs``).  First, a pointer to the new policy object is passed to
> +the governor's ``->init()`` callback which is expected to initialize all of 
> the
>  data structures necessary to handle the given policy and, possibly, to add
>  a governor ``sysfs`` interface to it.  Next, the governor is started by
>  invoking its ``->start()`` callback.
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 0128de3603df..4b1a5c0173cf 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -50,6 +50,9 @@ static LIST_HEAD(cpufreq_governor_list);
>  #define for_each_governor(__governor)\
>   list_for_each_entry(__governor, _governor_list, governor_list)
>  
> +static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
> +static struct cpufreq_governor *default_governor;
> +
>  /**
>   * The "cpufreq driver" - the arch- or hardware-dependent low
>   * level driver of CPUFreq support, and its spinlock. This lock
> @@ -1055,7 +1058,6 @@ __weak struct cpufreq_governor 
> *cpufreq_default_governor(void)
>  
>  static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  {
> - struct cpufreq_governor *def_gov = cpufreq_default_governor();
>   struct cpufreq_governor *gov = NULL;
>   unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
>  
> @@ -1065,8 +1067,8 @@ static int cpufreq_init_policy(struct cpufreq_policy 
> *policy)
>   if (gov) {
>   pr_debug("Restoring governor %s for cpu %d\n",
>policy->governor->name, policy->cpu);
> - } else if (def_gov) {
> - gov = def_gov;
> + } else if (default_governor) {
> + gov = default_governor;
>   } else {
>   return -ENODATA;
>   }
> @@ -1074,8 +1076,8 @@ static int cpufreq_init_policy(struct cpufreq_policy 
> *policy)
>   /* Use the default policy if there is no last_policy. 

[PATCH RFC 1/1] powerpc/eeh: Provide a unique ID for each EEH recovery

2020-06-23 Thread Sam Bobroff
Give a unique ID to each recovery event, to ease log parsing and
prepare for parallel recovery.

Also add some new messages with a very simple format that may be
useful to log-parsers.

Signed-off-by: Sam Bobroff 
---
This patch should be applied on top of my recent(ish) set:
"powerpc/eeh: Synchronization for safety".

 arch/powerpc/include/asm/eeh_event.h |   3 +-
 arch/powerpc/include/asm/ppc-pci.h   |   2 +-
 arch/powerpc/kernel/eeh.c|  45 ---
 arch/powerpc/kernel/eeh_driver.c | 189 +++
 arch/powerpc/kernel/eeh_event.c  |  12 +-
 5 files changed, 148 insertions(+), 103 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh_event.h 
b/arch/powerpc/include/asm/eeh_event.h
index dadde7d52f46..a1fe736bc4cf 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -17,13 +17,14 @@
 struct eeh_event {
struct list_headlist;   /* to form event queue  */
struct eeh_pe   *pe;/* EEH PE   */
+   unsigned intid; /* Event ID */
 };
 
 int eeh_event_init(void);
 int eeh_send_failure_event(struct eeh_pe *pe);
 int __eeh_send_failure_event(struct eeh_pe *pe);
 void eeh_remove_event(struct eeh_pe *pe, bool force);
-void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_normal_event(unsigned int event_id, struct eeh_pe *pe);
 void eeh_handle_special_event(void);
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/ppc-pci.h 
b/arch/powerpc/include/asm/ppc-pci.h
index 7f4be5a05eb3..ec62b491ff97 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -47,7 +47,7 @@ extern int rtas_setup_phb(struct pci_controller *phb);
 void eeh_addr_cache_insert_dev(struct pci_dev *dev);
 void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
 struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
-void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
+void eeh_slot_error_detail(unsigned int event_id, struct eeh_pe *pe, int 
severity);
 int eeh_pci_enable(struct eeh_pe *pe, int function);
 int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
 void eeh_save_bars(struct eeh_dev *edev);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 68e6dfa526a5..54f921ff7621 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -197,7 +197,8 @@ EXPORT_SYMBOL_GPL(eeh_recovery_must_be_locked);
  * for the indicated PCI device, and puts them into a buffer
  * for RTAS error logging.
  */
-static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
+static size_t eeh_dump_dev_log(unsigned int event_id, struct eeh_dev *edev,
+  char *buf, size_t len)
 {
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
u32 cfg;
@@ -206,34 +207,37 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char 
*buf, size_t len)
char buffer[128];
 
if (!pdn) {
-   pr_warn("EEH: Note: No error log for absent device.\n");
+   pr_warn("EEH(%u): Note: No error log for absent device.\n",
+   event_id);
return 0;
}
 
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
   pdn->phb->global_number, pdn->busno,
   PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
-   pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
+   pr_warn("EEH(%u): of node=%04x:%02x:%02x.%01x\n",
+   event_id,
pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 
eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, );
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
-   pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
+   pr_warn("EEH(%u): PCI device/vendor: %08x\n", event_id, cfg);
 
eeh_ops->read_config(pdn, PCI_COMMAND, 4, );
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
-   pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
+   pr_warn("EEH(%u): PCI cmd/status register: %08x\n", event_id, cfg);
 
/* Gather bridge-specific registers */
if (edev->mode & EEH_DEV_BRIDGE) {
eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, );
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
-   pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
+   pr_warn("EEH(%u): Bridge secondary status: %04x\n",
+   event_id, cfg);
 
eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, );
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
-   pr_warn("EEH: Bridge control: %04x\n", cfg);
+   pr_warn("EEH(%u): Bridge control: %04x\n", event_id, cfg);
}
 
/* Dump out the PCI-X command and status regs */
@@ -241,18 +245,19 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char 
*buf, size_t len)
 

[PATCH] powerpc/boot: Use address-of operator on section symbols

2020-06-23 Thread Nathan Chancellor
Clang warns:

arch/powerpc/boot/main.c:107:18: warning: array comparison always
evaluates to a constant [-Wtautological-compare]
if (_initrd_end > _initrd_start) {
^
arch/powerpc/boot/main.c:155:20: warning: array comparison always
evaluates to a constant [-Wtautological-compare]
if (_esm_blob_end <= _esm_blob_start)
  ^
2 warnings generated.

These are not true arrays, they are linker defined symbols, which are
just addresses.  Using the address of operator silences the warning
and does not change the resulting assembly with either clang/ld.lld
or gcc/ld (tested with diff + objdump -Dr).

Link: https://github.com/ClangBuiltLinux/linux/issues/212
Reported-by: Joel Stanley 
Signed-off-by: Nathan Chancellor 
---
 arch/powerpc/boot/main.c | 4 ++--
 arch/powerpc/boot/ps3.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index a9d209135975..cae31a6e8f02 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -104,7 +104,7 @@ static struct addr_range prep_initrd(struct addr_range 
vmlinux, void *chosen,
 {
/* If we have an image attached to us, it overrides anything
 * supplied by the loader. */
-   if (_initrd_end > _initrd_start) {
+   if (&_initrd_end > &_initrd_start) {
printf("Attached initrd image at 0x%p-0x%p\n\r",
   _initrd_start, _initrd_end);
initrd_addr = (unsigned long)_initrd_start;
@@ -152,7 +152,7 @@ static void prep_esm_blob(struct addr_range vmlinux, void 
*chosen)
unsigned long esm_blob_addr, esm_blob_size;
 
/* Do we have an ESM (Enter Secure Mode) blob? */
-   if (_esm_blob_end <= _esm_blob_start)
+   if (&_esm_blob_end <= &_esm_blob_start)
return;
 
printf("Attached ESM blob at 0x%p-0x%p\n\r",
diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c
index c52552a681c5..6e4efbdb6b7c 100644
--- a/arch/powerpc/boot/ps3.c
+++ b/arch/powerpc/boot/ps3.c
@@ -127,7 +127,7 @@ void platform_init(void)
ps3_repository_read_rm_size(_size);
dt_fixup_memory(0, rm_size);
 
-   if (_initrd_end > _initrd_start) {
+   if (&_initrd_end > &_initrd_start) {
setprop_val(chosen, "linux,initrd-start", (u32)(_initrd_start));
setprop_val(chosen, "linux,initrd-end", (u32)(_initrd_end));
}

base-commit: 3e08a95294a4fb3702bb3d35ed08028433c37fe6
-- 
2.27.0



Re: [PATCH V3 0/4] mm/debug_vm_pgtable: Add some more tests

2020-06-23 Thread Anshuman Khandual



On 06/15/2020 09:07 AM, Anshuman Khandual wrote:
> This series adds some more arch page table helper validation tests which
> are related to core and advanced memory functions. This also creates a
> documentation, enlisting expected semantics for all page table helpers as
> suggested by Mike Rapoport previously (https://lkml.org/lkml/2020/1/30/40).
> 
> There are many TRANSPARENT_HUGEPAGE and ARCH_HAS_TRANSPARENT_HUGEPAGE_PUD
> ifdefs scattered across the test. But consolidating all the fallback stubs
> is not very straight forward because ARCH_HAS_TRANSPARENT_HUGEPAGE_PUD is
> not explicitly dependent on ARCH_HAS_TRANSPARENT_HUGEPAGE.
> 
> Tested on arm64, x86 platforms but only build tested on all other enabled
> platforms through ARCH_HAS_DEBUG_VM_PGTABLE i.e powerpc, arc, s390. The
> following failure on arm64 still exists which was mentioned previously. It
> will be fixed with the upcoming THP migration on arm64 enablement series.
> 
> WARNING  mm/debug_vm_pgtable.c:860 debug_vm_pgtable+0x940/0xa54
> WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd
> 
> This series is based on v5.8-rc1.
> 
> Changes in V3:
> 
> - Replaced HAVE_ARCH_SOFT_DIRTY with MEM_SOFT_DIRTY
> - Added HAVE_ARCH_HUGE_VMAP checks in pxx_huge_tests() per Gerald
> - Updated documentation for pmd_thp_tests() per Zi Yan
> - Replaced READ_ONCE() with huge_ptep_get() per Gerald
> - Added pte_mkhuge() and masking with PMD_MASK per Gerald
> - Replaced pte_same() with holding pfn check in pxx_swap_tests()
> - Added documentation for all (#ifdef #else #endif) per Gerald
> - Updated pmd_protnone_tests() per Gerald
> - Updated HugeTLB PTE creation in hugetlb_advanced_tests() per Gerald
> - Replaced [pmd|pud]_mknotpresent() with [pmd|pud]_mkinvalid()
> - Added has_transparent_hugepage() check for PMD and PUD tests
> - Added a patch which debug prints all individual tests being executed
> - Updated documentation for renamed [pmd|pud]_mkinvalid() helpers

Hello Gerald/Christophe/Vineet,

It would be really great if you could give this series a quick test
on s390/ppc/arc platforms respectively. Thank you.

- Anshuman


Re: [PATCH v6 1/5] KVM: s390: clean up redundant 'kvm_run' parameters

2020-06-23 Thread Tianjia Zhang




On 2020/6/23 23:31, Christian Borntraeger wrote:



On 23.06.20 15:14, Tianjia Zhang wrote:

In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

Signed-off-by: Tianjia Zhang 
Reviewed-by: Vitaly Kuznetsov 
---
  arch/s390/kvm/kvm-s390.c | 23 +++
  1 file changed, 15 insertions(+), 8 deletions(-)


Tinajia,

I have trouble seeing value in this particular patch. We add LOCs
without providing any noticable benefit. All other patches in this series at
least reduce the amount of code. So I would defer this to Paolo if he prefers
to have this way across all architectures.


Yes, this is a full architecture optimization. Some of the architecture 
optimization has been merged into the mainline. I think it is necessary 
to unify this optimization. This is also the meaning of Paolo.

You can refer to the email of the previous version:
https://lkml.org/lkml/2020/4/27/16

Thanks,
Tianjia


RE: [PATCH v2 0/2] cpufreq: Specify the default governor on command line

2020-06-23 Thread Doug Smythies
Hi Quentin,
Thanks for your quick reply.

On 2020.06.23 11:05 Quentin Perret wrote: 

> Hi Doug,
> 
> On Tuesday 23 Jun 2020 at 10:54:33 (-0700), Doug Smythies wrote:
> > Hi Quentin,
> >
> > Because I am lazy and sometimes do not want to recompile
> > the distro source, I have a need/desire for this.
> 
> Good to know I'm not the only one ;-)
> 
> > Tested these two grub command lines:
> >
> > GRUB_CMDLINE_LINUX_DEFAULT="ipv6.disable=1 consoleblank=300 
> > intel_pstate=disable
> cpufreq.default_governor=schedutil cpuidle_sysfs_switch cpuidle.governor=teo"
> >
> > And
> >
> > #GRUB_CMDLINE_LINUX_DEFAULT="ipv6.disable=1 consoleblank=450 
> > intel_pstate=passive
> cpufreq.default_governor=schedutil cpuidle_sysfs_switch cpuidle.governor=teo"
> >
> > And all worked as expected. I use Ubuntu as my distro, and also had to 
> > disable a startup script that
> switches to "ondemand", or similar, after 1 minute.
> 
> Good, thanks for giving it a try.
> 
> > As a side note (separate subject, but is one reason I tried it):
> > My i5-9600K based computer seems to hit a power limit during boot 
> > approximately 3 seconds after
> kernel selection on grub.
> > This had no effect on that issue (even when selecting powersave governor).
> 
> Interesting ... Could you confirm that compiling with powersave as
> default doesn't fix the issue either?

No, it doesn't (good idea for a test though).
However, the big mains spike is also gone. So, I no longer know why those power
limit log bits are always set after boot.

> 
> Other question, when does the intel_pstate driver start on your device?
> Before or after that 3 seconds boot time?

Before, if I understand correctly (from dmesg):

[0.468969] intel_pstate: Intel P-state driver initializing

I'll attach a couple of annotated mains power graphs.
(which will likely get stripped from the on-list version of this e-mail).

Currently, I am drowning in stuff that doesn't work, and will put
this aside for now. I'll revive this as a new thread or a bugzilla
eventually.

I also tried booting with turbo disabled, no difference.

Thanks for this patch set.

... Doug



[PATCH 3/3] powerpc: re-initialise lazy FPU/VEC counters on every fault

2020-06-23 Thread Nicholas Piggin
When a FP/VEC/VSX unavailable fault loads registers and enables the
facility in the MSR, re-set the lazy restore counters to 1 rather
than incrementing them so every fault gets the same number of
restores before the next fault.

This probably shouldn't be a practical change because if a lazy counter
was non-zero then it should have been restored and would not cause a
fault when userspace tries to access it. However the code and comment
implies otherwise so that's misleading and unnecessary.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/fpu.S| 4 +---
 arch/powerpc/kernel/vector.S | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index cac22cb97a8c..4ae39db70044 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -107,9 +107,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
or  r12,r12,r4
std r12,_MSR(r1)
 #endif
-   /* Don't care if r4 overflows, this is desired behaviour */
-   lbz r4,THREAD_LOAD_FP(r5)
-   addir4,r4,1
+   li  r4,1
stb r4,THREAD_LOAD_FP(r5)
addir10,r5,THREAD_FPSTATE
lfd fr0,FPSTATE_FPSCR(r10)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index efc5b52f95d2..801dc28fdcca 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -76,9 +76,7 @@ _GLOBAL(load_up_altivec)
orisr12,r12,MSR_VEC@h
std r12,_MSR(r1)
 #endif
-   /* Don't care if r4 overflows, this is desired behaviour */
-   lbz r4,THREAD_LOAD_VEC(r5)
-   addir4,r4,1
+   li  r4,1
stb r4,THREAD_LOAD_VEC(r5)
addir6,r5,THREAD_VRSTATE
li  r4,1
-- 
2.23.0



[PATCH 1/3] powerpc/64s: restore_math remove TM test

2020-06-23 Thread Nicholas Piggin
The TM test in restore_math added by commit dc16b553c949e ("powerpc:
Always restore FPU/VEC/VSX if hardware transactional memory in use") is
no longer necessary after commit a8318c13e79ba ("powerpc/tm: Fix
restoring FP/VMX facility incorrectly on interrupts"), which removed
the cases where restore_math has to restore if TM is active.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/process.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7bb7faf84490..c6c1add91bf3 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -530,8 +530,7 @@ void notrace restore_math(struct pt_regs *regs)
 {
unsigned long msr;
 
-   if (!MSR_TM_ACTIVE(regs->msr) &&
-   !current->thread.load_fp && !loadvec(current->thread))
+   if (!current->thread.load_fp && !loadvec(current->thread))
return;
 
msr = regs->msr;
-- 
2.23.0



Re: [PATCH 2/2] ASoC: bindings: fsl-asoc-card: Add compatible string for wm8524

2020-06-23 Thread Nicolin Chen
On Tue, Jun 23, 2020 at 02:52:47PM +0800, Shengjiu Wang wrote:
> In order to support wm8524 codec with fsl-asoc-card machine
> driver, add compatible string "fsl,imx-audio-wm8524".
> 
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 


Re: [PATCH 1/2] ASoC: fsl-asoc-card: Add WM8524 support

2020-06-23 Thread Nicolin Chen
On Tue, Jun 23, 2020 at 02:52:46PM +0800, Shengjiu Wang wrote:
> WM8524 only supports playback mode, and only works at
> slave mode.
> 
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 


Re: [PATCH] powerpc/boot/dts: Fix dtc "pciex" warnings

2020-06-23 Thread Stephen Rothwell
Hi Michael,

On Tue, 23 Jun 2020 23:03:20 +1000 Michael Ellerman  wrote:
>
> With CONFIG_OF_ALL_DTBS=y, as set by eg. allmodconfig, we see lots of
> warnings about our dts files, such as:
> 
>   arch/powerpc/boot/dts/glacier.dts:492.26-532.5:
>   Warning (pci_bridge): /plb/pciex@d: node name is not "pci"
>   or "pcie"
> 
> The node name should not particularly matter, it's just a name, and
> AFAICS there's no kernel code that cares whether nodes are *named*
> "pciex" or "pcie". So shutup these warnings by converting to the name
> dtc wants.
> 
> As always there's some risk this could break something obscure that
> does rely on the name, in which case we can revert.
> 
> Signed-off-by: Michael Ellerman 

Thanks for that.  I have applied it to my "fixes" tree until it turns
up elsewhere.

-- 
Cheers,
Stephen Rothwell


pgpC3XufEBllF.pgp
Description: OpenPGP digital signature


Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Ahmed S. Darwish
On Tue, Jun 23, 2020 at 05:24:50PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 05:00:31PM +0200, Ahmed S. Darwish wrote:
> > On Tue, Jun 23, 2020 at 10:36:52AM +0200, Peter Zijlstra wrote:
> > ...
> > > -#define lockdep_assert_irqs_disabled()   do {
> > > \
> > > - WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> > > -   current->hardirqs_enabled,\
> > > -   "IRQs not disabled as expected\n");   \
> > > - } while (0)
> > > +#define lockdep_assert_irqs_enabled()
> > > \
> > > +do { 
> > > \
> > > + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));  \
> > > +} while (0)
> > >
> >
> > Can we add a small comment on top of lockdep_off(), stating that lockdep
> > IRQ tracking will still be kept after a lockdep_off call?
>
> That would only legitimize lockdep_off(). The only comment I want to put
> on that is: "if you use this, you're doing it wrong'.
>

Well, freshly merged code is using it. For example, KCSAN:

=> f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
=> kernel/kcsan/report.c:

void kcsan_report(...)
{
...
/*
 * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
 * we do not turn off lockdep here; this could happen due to recursion
 * into lockdep via KCSAN if we detect a race in utilities used by
 * lockdep.
 */
lockdep_off();
...
}

thanks,

--
Ahmed S. Darwish
Linutronix GmbH


Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Ahmed S. Darwish
On Tue, Jun 23, 2020 at 10:36:52AM +0200, Peter Zijlstra wrote:
...
> -#define lockdep_assert_irqs_disabled()   do {
> \
> - WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> -   current->hardirqs_enabled,\
> -   "IRQs not disabled as expected\n");   \
> - } while (0)
> +#define lockdep_assert_irqs_enabled()
> \
> +do { \
> + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));  \
> +} while (0)
>

Can we add a small comment on top of lockdep_off(), stating that lockdep
IRQ tracking will still be kept after a lockdep_off call?

thanks,

--
Ahmed S. Darwish
Linutronix GmbH


Re: [PATCH v4 3/8] sparc64: Fix asm/percpu.h build error

2020-06-23 Thread David Miller
From: Peter Zijlstra 
Date: Tue, 23 Jun 2020 10:36:48 +0200

> In order to break a header dependency between lockdep and task_struct,
> I need per-cpu stuff from lockdep.
> 
> Signed-off-by: Peter Zijlstra (Intel) 

Acked-by: David S. Miller 


Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Peter Zijlstra
On Tue, Jun 23, 2020 at 10:24:04PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 08:12:32PM +0200, Peter Zijlstra wrote:
> > Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> > anything happens.
> 
> OK, so the below patch doesn't seem to have any nasty recursion issues
> here. The only 'problem' is that lockdep now sees report_lock can cause
> deadlocks.
> 
> It is completely right about it too, but I don't suspect there's much we
> can do about it, it's pretty much the standard printk() with scheduler
> locks held report.

Just for giggles I added the below and that works fine too. Right until
the report_lock deadlock splat of course, thereafter lockdep is
disabled.

diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..a011cf0a1611 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -459,6 +459,8 @@ static void set_other_info_task_blocking(unsigned long 
*flags,
 */
int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);

+   lockdep_assert_held(_lock);
+
other_info->task = current;
do {
if (is_running) {
@@ -495,6 +497,8 @@ static void set_other_info_task_blocking(unsigned long 
*flags,
 other_info->task == current);
if (is_running)
set_current_state(TASK_RUNNING);
+
+   lockdep_assert_held(_lock);
 }

 /* Populate @other_info; requires that the provided @other_info not in use. */


Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Peter Zijlstra
On Tue, Jun 23, 2020 at 08:12:32PM +0200, Peter Zijlstra wrote:
> Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> anything happens.

OK, so the below patch doesn't seem to have any nasty recursion issues
here. The only 'problem' is that lockdep now sees report_lock can cause
deadlocks.

It is completely right about it too, but I don't suspect there's much we
can do about it, it's pretty much the standard printk() with scheduler
locks held report.

---
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 15f67949d11e..732623c30359 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t 
size, int type)
}
 
if (!kcsan_interrupt_watcher)
-   /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
-   raw_local_irq_save(irq_flags);
+   local_irq_save(irq_flags);
 
watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t 
size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
 out_unlock:
if (!kcsan_interrupt_watcher)
-   raw_local_irq_restore(irq_flags);
+   local_irq_restore(irq_flags);
 out:
user_access_restore(ua_flags);
 }
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..ef31c1d2dac3 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -605,14 +605,6 @@ void kcsan_report(const volatile void *ptr, size_t size, 
int access_type,
if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= 
ARRAY_SIZE(other_infos)))
goto out;
 
-   /*
-* With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
-* we do not turn off lockdep here; this could happen due to recursion
-* into lockdep via KCSAN if we detect a race in utilities used by
-* lockdep.
-*/
-   lockdep_off();
-
if (prepare_report(, type, , other_info)) {
/*
 * Never report if value_change is FALSE, only if we it is
@@ -628,7 +620,6 @@ void kcsan_report(const volatile void *ptr, size_t size, 
int access_type,
release_report(, other_info);
}
 
-   lockdep_on();
 out:
kcsan_enable_current();
 }



Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Peter Zijlstra
On Tue, Jun 23, 2020 at 09:13:35PM +0200, Marco Elver wrote:
> I see the below report when I boot with your branch + KCSAN and
> PROVE_LOCKING. config attached. Trying to make sense of what's
> happening.

Ah, I was still playing with tip/master + PROVE_LOCKING + KCSAN and
slowly removing parts of that annotation patch to see what would come
unstuck.

I think I just hit a genuine but unavoidable lockdep report on
report_lock.

> -- >8 --
> 
> [   10.182354] [ cut here ]
> [   10.183058] WARNING: CPU: 7 PID: 136 at kernel/locking/lockdep.c:398 
> lockdep_hardirqs_on_prepare+0x1c6/0x270
> [   10.184347] Modules linked in:
> [   10.184771] CPU: 7 PID: 136 Comm: systemd-journal Not tainted 5.8.0-rc1+ #3
> [   10.185706] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> 1.13.0-1 04/01/2014
> [   10.186821] RIP: 0010:lockdep_hardirqs_on_prepare+0x1c6/0x270
> [   10.187594] Code: 75 28 65 48 8b 04 25 28 00 00 00 48 3b 44 24 08 0f 85 b9 
> 00 00 00 48 83 c4 10 5b 41 5e 41 5f c3 65 48 ff 05 d4 24 4e 75 eb d8 <0f> 0b 
> 90 41 c7 86 c4 08 00 00 00 00 00 00 eb c8 e8 65 09 71 01 85
> [   10.190203] RSP: 0018:a7ee802b7848 EFLAGS: 00010017
> [   10.190989] RAX: 0001 RBX: 955e92a34ab0 RCX: 
> 0001
> [   10.192053] RDX: 0006 RSI: 955e92a34a88 RDI: 
> 955e92a341c0
> [   10.193117] RBP: a7ee802b7be8 R08:  R09: 
> 
> [   10.194186] R10:  R11: 8d07e268 R12: 
> 0001
> [   10.195249] R13: 8e41bb10 R14: 955e92a341c0 R15: 
> 0001
> [   10.196312] FS:  7fd6862aa8c0() GS:955e9fd8() 
> knlGS:
> [   10.197513] CS:  0010 DS:  ES:  CR0: 80050033
> [   10.198373] CR2: 7fd6837dd000 CR3: 000812acc001 CR4: 
> 00760ee0
> [   10.199436] DR0:  DR1:  DR2: 
> 
> [   10.200494] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [   10.201554] PKRU: 5554
> [   10.201967] Call Trace:
> [   10.202348]  ? _raw_spin_unlock_irqrestore+0x40/0x70
> [   10.203093]  trace_hardirqs_on+0x56/0x60   <- enter 
> IRQ flags tracing code?
> [   10.203686]  _raw_spin_unlock_irqrestore+0x40/0x70 <- 
> take report_lock
> [   10.204406]  prepare_report+0x11f/0x150
> [   10.204986]  kcsan_report+0xca/0x6c0   <- 
> generating a KCSAN report
> [   10.212669]  kcsan_found_watchpoint+0xe5/0x110

That appears to be warning about a lockdep_recursion underflow, weird.
I'll go stare at it.




Re: [PATCH 2/2] powerpc/papr_scm: Add support for fetching nvdimm 'fuel-gauge' metric

2020-06-23 Thread Ira Weiny
On Mon, Jun 22, 2020 at 09:54:51AM +0530, Vaibhav Jain wrote:
> We add support for reporting 'fuel-gauge' NVDIMM metric via
> PAPR_PDSM_HEALTH pdsm payload. 'fuel-gauge' metric indicates the usage
> life remaining of a papr-scm compatible NVDIMM. PHYP exposes this
> metric via the H_SCM_PERFORMANCE_STATS.
> 
> The metric value is returned from the pdsm by extending the return
> payload 'struct nd_papr_pdsm_health' without breaking the ABI. A new
> field 'dimm_fuel_gauge' to hold the metric value is introduced at the
> end of the payload struct and its presence is indicated by by
> extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID.
> 
> The patch introduces a new function papr_pdsm_fuel_gauge() that is
> called from papr_pdsm_health(). If fetching NVDIMM performance stats
> is supported then 'papr_pdsm_fuel_gauge()' allocated an output buffer
> large enough to hold the performance stat and passes it to
> drc_pmem_query_stats() that issues the HCALL to PHYP. The return value
> of the stat is then populated in the 'struct
> nd_papr_pdsm_health.dimm_fuel_gauge' field with extension flag
> 'PDSM_DIMM_HEALTH_RUN_GAUGE_VALID' set in 'struct
> nd_papr_pdsm_health.extension_flags'
> 
> Signed-off-by: Vaibhav Jain 
> ---
>  arch/powerpc/include/uapi/asm/papr_pdsm.h |  9 +
>  arch/powerpc/platforms/pseries/papr_scm.c | 47 +++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h 
> b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> index 9ccecc1d6840..50ef95e2f5b1 100644
> --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
> +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> @@ -72,6 +72,11 @@
>  #define PAPR_PDSM_DIMM_CRITICAL  2
>  #define PAPR_PDSM_DIMM_FATAL 3
>  
> +/* struct nd_papr_pdsm_health.extension_flags field flags */
> +
> +/* Indicate that the 'dimm_fuel_gauge' field is valid */
> +#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
> +
>  /*
>   * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
>   * Various flags indicate the health status of the dimm.
> @@ -84,6 +89,7 @@
>   * dimm_locked   : Contents of the dimm cant be modified until 
> CEC reboot
>   * dimm_encrypted: Contents of dimm are encrypted.
>   * dimm_health   : Dimm health indicator. One of 
> PAPR_PDSM_DIMM_
> + * dimm_fuel_gauge   : Life remaining of DIMM as a percentage from 0-100
>   */
>  struct nd_papr_pdsm_health {
>   union {
> @@ -96,6 +102,9 @@ struct nd_papr_pdsm_health {
>   __u8 dimm_locked;
>   __u8 dimm_encrypted;
>   __u16 dimm_health;
> +
> + /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
> + __u16 dimm_fuel_gauge;
>   };
>   __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
>   };
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index cb3f9acc325b..39527cd38d9c 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -506,6 +506,45 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned 
> int cmd, void *buf,
>   return 0;
>  }
>  
> +static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p,
> + union nd_pdsm_payload *payload)
> +{
> + int rc, size;
> + struct papr_scm_perf_stat *stat;
> + struct papr_scm_perf_stats *stats;
> +
> + /* Silently fail if fetching performance metrics isn't  supported */
> + if (!p->len_stat_buffer)
> + return 0;
> +
> + /* Allocate request buffer enough to hold single performance stat */
> + size = sizeof(struct papr_scm_perf_stats) +
> + sizeof(struct papr_scm_perf_stat);
> +
> + stats = kzalloc(size, GFP_KERNEL);
> + if (!stats)
> + return -ENOMEM;
> +
> + stat = >scm_statistic[0];
> + memcpy(>statistic_id, "MemLife ", sizeof(stat->statistic_id));
> + stat->statistic_value = 0;
> +
> + /* Fetch the fuel gauge and populate it in payload */
> + rc = drc_pmem_query_stats(p, stats, size, 1, NULL);
> + if (!rc) {

Always best to except the error case...

if (rc) {
... print debuging from below...
goto free_stats;
}

> + dev_dbg(>pdev->dev,
> + "Fetched fuel-gauge %llu", stat->statistic_value);
> + payload->health.extension_flags |=
> + PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
> + payload->health.dimm_fuel_gauge = stat->statistic_value;
> +
> + rc = sizeof(struct nd_papr_pdsm_health);
> + }
> +

free_stats:

> + kfree(stats);
> + return rc;
> +}
> +
>  /* Fetch the DIMM health info and populate it in provided package. */
>  static int papr_pdsm_health(struct papr_scm_priv *p,
>   union nd_pdsm_payload *payload)
> @@ -546,6 +585,14 @@ static int 

Re: [PATCH 1/2] powerpc/papr_scm: Fetch nvdimm performance stats from PHYP

2020-06-23 Thread Ira Weiny
On Mon, Jun 22, 2020 at 09:54:50AM +0530, Vaibhav Jain wrote:
> Update papr_scm.c to query dimm performance statistics from PHYP via
> H_SCM_PERFORMANCE_STATS hcall and export them to user-space as PAPR
> specific NVDIMM attribute 'perf_stats' in sysfs. The patch also
> provide a sysfs ABI documentation for the stats being reported and
> their meanings.
> 
> During NVDIMM probe time in papr_scm_nvdimm_init() a special variant
> of H_SCM_PERFORMANCE_STATS hcall is issued to check if collection of
> performance statistics is supported or not. If successful then a PHYP
> returns a maximum possible buffer length needed to read all
> performance stats. This returned value is stored in a per-nvdimm
> attribute 'len_stat_buffer'.
> 
> The layout of request buffer for reading NVDIMM performance stats from
> PHYP is defined in 'struct papr_scm_perf_stats' and 'struct
> papr_scm_perf_stat'. These structs are used in newly introduced
> drc_pmem_query_stats() that issues the H_SCM_PERFORMANCE_STATS hcall.
> 
> The sysfs access function perf_stats_show() uses value
> 'len_stat_buffer' to allocate a buffer large enough to hold all
> possible NVDIMM performance stats and passes it to
> drc_pmem_query_stats() to populate. Finally statistics reported in the
> buffer are formatted into the sysfs access function output buffer.
> 
> Signed-off-by: Vaibhav Jain 
> ---
>  Documentation/ABI/testing/sysfs-bus-papr-pmem |  27 
>  arch/powerpc/platforms/pseries/papr_scm.c | 139 ++
>  2 files changed, 166 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem 
> b/Documentation/ABI/testing/sysfs-bus-papr-pmem
> index 5b10d036a8d4..c1a67275c43f 100644
> --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem
> +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
> @@ -25,3 +25,30 @@ Description:
> NVDIMM have been scrubbed.
>   * "locked"  : Indicating that NVDIMM contents cant
> be modified until next power cycle.
> +
> +What:/sys/bus/nd/devices/nmemX/papr/perf_stats
> +Date:May, 2020
> +KernelVersion:   v5.9
> +Contact: linuxppc-dev , 
> linux-nvd...@lists.01.org,
> +Description:
> + (RO) Report various performance stats related to papr-scm NVDIMM
> + device.  Each stat is reported on a new line with each line
> + composed of a stat-identifier followed by it value. Below are
> + currently known dimm performance stats which are reported:
> +
> + * "CtlResCt" : Controller Reset Count
> + * "CtlResTm" : Controller Reset Elapsed Time
> + * "PonSecs " : Power-on Seconds
> + * "MemLife " : Life Remaining
> + * "CritRscU" : Critical Resource Utilization
> + * "HostLCnt" : Host Load Count
> + * "HostSCnt" : Host Store Count
> + * "HostSDur" : Host Store Duration
> + * "HostLDur" : Host Load Duration
> + * "MedRCnt " : Media Read Count
> + * "MedWCnt " : Media Write Count
> + * "MedRDur " : Media Read Duration
> + * "MedWDur " : Media Write Duration
> + * "CchRHCnt" : Cache Read Hit Count
> + * "CchWHCnt" : Cache Write Hit Count
> + * "FastWCnt" : Fast Write Count
> \ No newline at end of file
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index 9c569078a09f..cb3f9acc325b 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -62,6 +62,24 @@
>   PAPR_PMEM_HEALTH_FATAL |\
>   PAPR_PMEM_HEALTH_UNHEALTHY)
>  
> +#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
> +#define PAPR_SCM_PERF_STATS_VERSION 0x1
> +
> +/* Struct holding a single performance metric */
> +struct papr_scm_perf_stat {
> + u8 statistic_id[8];
> + u64 statistic_value;
> +};
> +
> +/* Struct exchanged between kernel and PHYP for fetching drc perf stats */
> +struct papr_scm_perf_stats {
> + u8 eye_catcher[8];
> + u32 stats_version;  /* Should be 0x01 */
 
 PAPR_SCM_PERF_STATS_VERSION?

> + u32 num_statistics; /* Number of stats following */
> + /* zero or more performance matrics */
> + struct papr_scm_perf_stat scm_statistic[];
> +} __packed;
> +
>  /* private struct associated with each region */
>  struct papr_scm_priv {
>   struct platform_device *pdev;
> @@ -89,6 +107,9 @@ struct papr_scm_priv {
>  
>   /* Health information for the dimm */
>   u64 health_bitmap;
> +
> + /* length of the stat buffer as expected by phyp */
> + size_t len_stat_buffer;
>  };
>  
>  static int drc_pmem_bind(struct papr_scm_priv *p)
> @@ -194,6 

Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Peter Zijlstra
On Tue, Jun 23, 2020 at 07:59:57PM +0200, Marco Elver wrote:
> On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> > On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > > Well, freshly merged code is using it. For example, KCSAN:
> > > 
> > > => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> > > => kernel/kcsan/report.c:
> > > 
> > > void kcsan_report(...)
> > > {
> > >   ...
> > > /*
> > >  * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes 
> > > corrupted if
> > >  * we do not turn off lockdep here; this could happen due to 
> > > recursion
> > >  * into lockdep via KCSAN if we detect a race in utilities used by
> > >  * lockdep.
> > >  */
> > > lockdep_off();
> > >   ...
> > > }
> > 
> > Marco, do you remember what exactly happened there? Because I'm about to
> > wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> > lockdep_off().
> 
> Yeah, I was trying to squash any kind of recursion:
> 
>   lockdep -> other libs ->
>   -> KCSAN
>   -> print report
>   -> dump stack, printk and friends
>   -> lockdep -> other libs
>   -> KCSAN ...
> 
> Some history:
> 
> * Initial patch to fix:
>   https://lore.kernel.org/lkml/20200115162512.70807-1-el...@google.com/

That patch is weird; just :=n on lockdep.c should've cured that, the
rest is massive overkill.

> * KCSAN+lockdep+ftrace:
>   https://lore.kernel.org/lkml/20200214211035.209972-1-el...@google.com/

That doesn't really have anything useful..

> lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
> there are no paths out of lockdep, or the IRQ flags tracing code, that
> might lead through other libs, through KCSAN, libs used to generate a
> report, and back to lockdep.
> 
> I never quite figured out the exact trace that led to corruption, but
> avoiding any kind of potential for recursion was the only thing that
> would avoid the check_flags() warnings.

Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
anything happens.


[PATCH AUTOSEL 4.19 07/15] ibmvnic: Harden device login requests

2020-06-23 Thread Sasha Levin
From: Thomas Falcon 

[ Upstream commit dff515a3e71dc8ab3b9dcc2e23a9b5fca88b3c18 ]

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 645298628b6f7..5e9e45befc875 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -792,12 +792,13 @@ static int ibmvnic_login(struct net_device *netdev)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
unsigned long timeout = msecs_to_jiffies(3);
int retry_count = 0;
+   int retries = 10;
bool retry;
int rc;
 
do {
retry = false;
-   if (retry_count > IBMVNIC_MAX_QUEUES) {
+   if (retry_count > retries) {
netdev_warn(netdev, "Login attempts exceeded\n");
return -1;
}
@@ -812,11 +813,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
if (!wait_for_completion_timeout(>init_done,
 timeout)) {
-   netdev_warn(netdev, "Login timed out\n");
-   return -1;
+   netdev_warn(netdev, "Login timed out, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   continue;
}
 
-   if (adapter->init_done_rc == PARTIALSUCCESS) {
+   if (adapter->init_done_rc == ABORTED) {
+   netdev_warn(netdev, "Login aborted, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   /* FW or device may be busy, so
+* wait a bit before retrying login
+*/
+   msleep(500);
+   } else if (adapter->init_done_rc == PARTIALSUCCESS) {
retry_count++;
release_sub_crqs(adapter, 1);
 
-- 
2.25.1



[PATCH AUTOSEL 5.4 07/24] ibmvnic: Harden device login requests

2020-06-23 Thread Sasha Levin
From: Thomas Falcon 

[ Upstream commit dff515a3e71dc8ab3b9dcc2e23a9b5fca88b3c18 ]

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 5a42ddeecfe50..4f503b9a674c4 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -779,12 +779,13 @@ static int ibmvnic_login(struct net_device *netdev)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
unsigned long timeout = msecs_to_jiffies(3);
int retry_count = 0;
+   int retries = 10;
bool retry;
int rc;
 
do {
retry = false;
-   if (retry_count > IBMVNIC_MAX_QUEUES) {
+   if (retry_count > retries) {
netdev_warn(netdev, "Login attempts exceeded\n");
return -1;
}
@@ -799,11 +800,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
if (!wait_for_completion_timeout(>init_done,
 timeout)) {
-   netdev_warn(netdev, "Login timed out\n");
-   return -1;
+   netdev_warn(netdev, "Login timed out, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   continue;
}
 
-   if (adapter->init_done_rc == PARTIALSUCCESS) {
+   if (adapter->init_done_rc == ABORTED) {
+   netdev_warn(netdev, "Login aborted, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   /* FW or device may be busy, so
+* wait a bit before retrying login
+*/
+   msleep(500);
+   } else if (adapter->init_done_rc == PARTIALSUCCESS) {
retry_count++;
release_sub_crqs(adapter, 1);
 
-- 
2.25.1



[PATCH AUTOSEL 5.7 08/28] ibmvnic: Harden device login requests

2020-06-23 Thread Sasha Levin
From: Thomas Falcon 

[ Upstream commit dff515a3e71dc8ab3b9dcc2e23a9b5fca88b3c18 ]

The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 197dc5b2c0905..c265917487e84 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -842,12 +842,13 @@ static int ibmvnic_login(struct net_device *netdev)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
unsigned long timeout = msecs_to_jiffies(3);
int retry_count = 0;
+   int retries = 10;
bool retry;
int rc;
 
do {
retry = false;
-   if (retry_count > IBMVNIC_MAX_QUEUES) {
+   if (retry_count > retries) {
netdev_warn(netdev, "Login attempts exceeded\n");
return -1;
}
@@ -862,11 +863,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
if (!wait_for_completion_timeout(>init_done,
 timeout)) {
-   netdev_warn(netdev, "Login timed out\n");
-   return -1;
+   netdev_warn(netdev, "Login timed out, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   continue;
}
 
-   if (adapter->init_done_rc == PARTIALSUCCESS) {
+   if (adapter->init_done_rc == ABORTED) {
+   netdev_warn(netdev, "Login aborted, retrying...\n");
+   retry = true;
+   adapter->init_done_rc = 0;
+   retry_count++;
+   /* FW or device may be busy, so
+* wait a bit before retrying login
+*/
+   msleep(500);
+   } else if (adapter->init_done_rc == PARTIALSUCCESS) {
retry_count++;
release_sub_crqs(adapter, 1);
 
-- 
2.25.1



[PATCH] KVM: PPC: Book3S HV: Use feature flag CPU_FTR_P9_TIDR when accessing TIDR

2020-06-23 Thread Cédric Le Goater
The TIDR register is only available on POWER9 systems and code
accessing this register is not always protected by the CPU_FTR_P9_TIDR
flag. Fix that to make sure POWER10 systems won't use it as TIDR has
been removed.

Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/kvm/book3s_hv.c| 23 +--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 16 
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d64a2dc1ccca..3e5410f27a2a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1755,7 +1755,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
*val = get_reg_val(id, vcpu->arch.wort);
break;
case KVM_REG_PPC_TIDR:
-   *val = get_reg_val(id, vcpu->arch.tid);
+   if (cpu_has_feature(CPU_FTR_P9_TIDR))
+   *val = get_reg_val(id, vcpu->arch.tid);
+   else
+   r = -ENXIO;
break;
case KVM_REG_PPC_PSSCR:
*val = get_reg_val(id, vcpu->arch.psscr);
@@ -1972,7 +1975,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
vcpu->arch.wort = set_reg_val(id, *val);
break;
case KVM_REG_PPC_TIDR:
-   vcpu->arch.tid = set_reg_val(id, *val);
+   if (cpu_has_feature(CPU_FTR_P9_TIDR))
+   vcpu->arch.tid = set_reg_val(id, *val);
+   else
+   r = -ENXIO;
break;
case KVM_REG_PPC_PSSCR:
vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
@@ -3526,13 +3532,15 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 
time_limit,
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long host_dscr = mfspr(SPRN_DSCR);
-   unsigned long host_tidr = mfspr(SPRN_TIDR);
+   unsigned long host_tidr;
unsigned long host_iamr = mfspr(SPRN_IAMR);
unsigned long host_amr = mfspr(SPRN_AMR);
s64 dec;
u64 tb;
int trap, save_pmu;
 
+   if (cpu_has_feature(CPU_FTR_P9_TIDR))
+   host_tidr = mfspr(SPRN_TIDR);
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 512)
@@ -3579,7 +3587,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 
time_limit,
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
mtspr(SPRN_WORT, vcpu->arch.wort);
-   mtspr(SPRN_TIDR, vcpu->arch.tid);
+   if (cpu_has_feature(CPU_FTR_P9_TIDR))
+   mtspr(SPRN_TIDR, vcpu->arch.tid);
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_AMR, vcpu->arch.amr);
@@ -3653,7 +3662,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 
time_limit,
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
vcpu->arch.wort = mfspr(SPRN_WORT);
-   vcpu->arch.tid = mfspr(SPRN_TIDR);
+   if (cpu_has_feature(CPU_FTR_P9_TIDR))
+   vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
vcpu->arch.dscr = mfspr(SPRN_DSCR);
@@ -3662,7 +3672,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 
time_limit,
mtspr(SPRN_WORT, 0);
mtspr(SPRN_UAMOR, 0);
mtspr(SPRN_DSCR, host_dscr);
-   mtspr(SPRN_TIDR, host_tidr);
+   if (cpu_has_feature(CPU_FTR_P9_TIDR))
+   mtspr(SPRN_TIDR, host_tidr);
mtspr(SPRN_IAMR, host_iamr);
mtspr(SPRN_PSPB, 0);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 71943892c81c..64e454656749 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -697,9 +697,11 @@ kvmppc_got_guest:
/* Save host values of some registers */
 BEGIN_FTR_SECTION
mfspr   r5, SPRN_TIDR
+   std r5, STACK_SLOT_TID(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TIDR)
+BEGIN_FTR_SECTION
mfspr   r6, SPRN_PSSCR
mfspr   r7, SPRN_PID
-   std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
mfspr   r5, SPRN_HFSCR
@@ -835,13 +837,15 @@ BEGIN_FTR_SECTION
nop
 FTR_SECTION_ELSE
/* POWER9-only registers */
+BEGIN_FTR_SECTION_NESTED(96);
ld  r5, VCPU_TID(r4)
+   mtspr   SPRN_TIDR, r5
+END_FTR_SECTION_NESTED_IFSET(CPU_FTR_P9_TIDR, 96)
ld  r6, VCPU_PSSCR(r4)
lbz r8, HSTATE_FAKE_SUSPEND(r13)
orisr6, r6, PSSCR_EC@h  /* This makes stop trap to HV */
rldimi  r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
ld  r7, VCPU_HFSCR(r4)
-   mtspr   SPRN_TIDR, r5
mtspr   SPRN_PSSCR, r6
mtspr   SPRN_HFSCR, r7
 

Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Peter Zijlstra
On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> Well, freshly merged code is using it. For example, KCSAN:
> 
> => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> => kernel/kcsan/report.c:
> 
> void kcsan_report(...)
> {
>   ...
> /*
>  * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
>  * we do not turn off lockdep here; this could happen due to recursion
>  * into lockdep via KCSAN if we detect a race in utilities used by
>  * lockdep.
>  */
> lockdep_off();
>   ...
> }

Marco, do you remember what exactly happened there? Because I'm about to
wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
lockdep_off().


Re: [PATCH 14/18] powerpc/numa: remove arch_update_cpu_topology

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:34]:

> Since arch_update_cpu_topology() doesn't do anything on powerpc now,
> remove it and associated dead code.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/include/asm/topology.h |  6 --
>  arch/powerpc/mm/numa.c  | 10 --
>  2 files changed, 16 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 13/18] powerpc/numa: remove prrn_is_enabled()

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:33]:

> All users of this prrn_is_enabled() are gone; remove it.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/include/asm/topology.h | 5 -
>  arch/powerpc/mm/numa.c  | 5 -
>  2 files changed, 10 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 12/18] powerpc/rtasd: simplify handle_rtas_event(), emit message on events

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:32]:

> prrn_is_enabled() always returns false/0, so handle_rtas_event() can
> be simplified and some dead code can be removed. Use machine_is()
> instead of #ifdef to run this code only on pseries, and add an
> informational ratelimited message that we are ignoring the
> events. PRRN events are relatively rare in normal operation and
> usually arise from operator-initiated actions such as a DPO (Dynamic
> Platform Optimizer) run.
> 
> Eventually we do want to consume these events and update the device
> tree, but that needs more care to be safe vs LPM and DLPAR.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/kernel/rtasd.c | 28 +++-
>  1 file changed, 3 insertions(+), 25 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 11/18] powerpc/numa: remove start/stop_topology_update()

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:31]:

> These APIs have become no-ops, so remove them and all call sites.
> 
> Signed-off-by: Nathan Lynch 
> ---

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 10/18] powerpc/numa: remove timed_topology_update()

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:30]:

> timed_topology_update is a no-op now, so remove it and all call sites.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/include/asm/topology.h  | 5 -
>  arch/powerpc/mm/numa.c   | 9 -
>  arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 --
>  3 files changed, 16 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 09/18] powerpc/numa: stub out numa_update_cpu_topology()

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:29]:

> Previous changes have removed the code which sets bits in
> cpu_associativity_changes_mask and thus it is never modifed at
> runtime. From this we can reason that numa_update_cpu_topology()
> always returns 0 without doing anything. Remove the body of
> numa_update_cpu_topology() and remove all code which becomes
> unreachable as a result.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/mm/numa.c | 193 +
>  1 file changed, 1 insertion(+), 192 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 08/18] powerpc/numa: remove vphn_enabled and prrn_enabled internal flags

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:28]:

> These flags are always zero now; remove them and suitably adjust the
> remaining references to them.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/mm/numa.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 07/18] powerpc/numa: remove unreachable topology workqueue code

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:27]:

> Since vphn_enabled is always 0, we can remove the call to
> topology_schedule_update() and remove the code which becomes
> unreachable as a result.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/mm/numa.c | 14 --
>  1 file changed, 14 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 06/18] powerpc/numa: remove unreachable topology timer code

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:26]:

> Since vphn_enabled is always 0, we can stub out
> timed_topology_update() and remove the code which becomes unreachable.
> 
> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/mm/numa.c | 21 -
>  1 file changed, 21 deletions(-)
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 
-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 05/18] powerpc/numa: make vphn_enabled, prrn_enabled flags const

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:25]:

> Previous changes have made it so these flags are never changed;
> enforce this by making them const.
> 
> Signed-off-by: Nathan Lynch 
> ---

Looks good to me.

Reviewed-by: Srikar Dronamraju 

>  arch/powerpc/mm/numa.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 9e20f12e6caf..1b89bacb8975 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -1132,8 +1132,8 @@ struct topology_update_data {
>  #define TOPOLOGY_DEF_TIMER_SECS  60
> 
>  static cpumask_t cpu_associativity_changes_mask;
> -static int vphn_enabled;
> -static int prrn_enabled;
> +static const int vphn_enabled;
> +static const int prrn_enabled;
>  static void reset_topology_timer(void);
>  static int topology_timer_secs = 1;
>  static int topology_inited;
> -- 
> 2.25.4
> 

-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 04/18] powerpc/numa: remove unreachable topology update code

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:24]:

> Since the topology_updates_enabled flag is now always false, remove it
> and the code which has become unreachable. This is the minimum change
> that prevents 'defined but unused' warnings emitted by the compiler
> after stubbing out the start/stop_topology_updates() functions.
> 
> Signed-off-by: Nathan Lynch 

Looks good to me.

Reviewed-by: Srikar Dronamraju 

-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 03/18] powerpc/numa: remove ability to enable topology updates

2020-06-23 Thread Srikar Dronamraju
* Nathan Lynch  [2020-06-12 00:12:23]:

> Remove the /proc/powerpc/topology_updates interface and the
> topology_updates=on/off command line argument. The internal
> topology_updates_enabled flag remains for now, but always false.
> 

Looks good to me.

Reviewed-by: Srikar Dronamraju 

> Signed-off-by: Nathan Lynch 
> ---
>  arch/powerpc/mm/numa.c | 71 +-
>  1 file changed, 1 insertion(+), 70 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 9fcf2d195830..34d95de77bdd 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -984,27 +984,7 @@ static int __init early_numa(char *p)
>  }
>  early_param("numa", early_numa);
> 
> -/*
> - * The platform can inform us through one of several mechanisms
> - * (post-migration device tree updates, PRRN or VPHN) that the NUMA
> - * assignment of a resource has changed. This controls whether we act
> - * on that. Disabled by default.
> - */
> -static bool topology_updates_enabled;
> -
> -static int __init early_topology_updates(char *p)
> -{
> - if (!p)
> - return 0;
> -
> - if (!strcmp(p, "on")) {
> - pr_warn("Caution: enabling topology updates\n");
> - topology_updates_enabled = true;
> - }
> -
> - return 0;
> -}
> -early_param("topology_updates", early_topology_updates);
> +static const bool topology_updates_enabled;
> 
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  /*
> @@ -1632,52 +1612,6 @@ int prrn_is_enabled(void)
>   return prrn_enabled;
>  }
> 
> -static int topology_read(struct seq_file *file, void *v)
> -{
> - if (vphn_enabled || prrn_enabled)
> - seq_puts(file, "on\n");
> - else
> - seq_puts(file, "off\n");
> -
> - return 0;
> -}
> -
> -static int topology_open(struct inode *inode, struct file *file)
> -{
> - return single_open(file, topology_read, NULL);
> -}
> -
> -static ssize_t topology_write(struct file *file, const char __user *buf,
> -   size_t count, loff_t *off)
> -{
> - char kbuf[4]; /* "on" or "off" plus null. */
> - int read_len;
> -
> - read_len = count < 3 ? count : 3;
> - if (copy_from_user(kbuf, buf, read_len))
> - return -EINVAL;
> -
> - kbuf[read_len] = '\0';
> -
> - if (!strncmp(kbuf, "on", 2)) {
> - topology_updates_enabled = true;
> - start_topology_update();
> - } else if (!strncmp(kbuf, "off", 3)) {
> - stop_topology_update();
> - topology_updates_enabled = false;
> - } else
> - return -EINVAL;
> -
> - return count;
> -}
> -
> -static const struct proc_ops topology_proc_ops = {
> - .proc_read  = seq_read,
> - .proc_write = topology_write,
> - .proc_open  = topology_open,
> - .proc_release   = single_release,
> -};
> -
>  static int topology_update_init(void)
>  {
>   start_topology_update();
> @@ -1685,9 +1619,6 @@ static int topology_update_init(void)
>   if (vphn_enabled)
>   topology_schedule_update();
> 
> - if (!proc_create("powerpc/topology_updates", 0644, NULL, 
> _proc_ops))
> - return -ENOMEM;
> -
>   topology_inited = 1;
>   return 0;
>  }
> -- 
> 2.25.4
> 

-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH v6 1/5] KVM: s390: clean up redundant 'kvm_run' parameters

2020-06-23 Thread Christian Borntraeger



On 23.06.20 15:14, Tianjia Zhang wrote:
> In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
> structure. For historical reasons, many kvm-related function parameters
> retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
> patch does a unified cleanup of these remaining redundant parameters.
> 
> Signed-off-by: Tianjia Zhang 
> Reviewed-by: Vitaly Kuznetsov 
> ---
>  arch/s390/kvm/kvm-s390.c | 23 +++
>  1 file changed, 15 insertions(+), 8 deletions(-)

Tinajia,

I have trouble seeing value in this particular patch. We add LOCs
without providing any noticable benefit. All other patches in this series at
least reduce the amount of code. So I would defer this to Paolo if he prefers
to have this way across all architectures. 
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index d47c19718615..f5f96dc33712 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -4175,8 +4175,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
>   return rc;
>  }
>  
> -static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
>  {
> + struct kvm_run *kvm_run = vcpu->run;
>   struct runtime_instr_cb *riccb;
>   struct gs_cb *gscb;
>  
> @@ -4242,8 +4243,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, 
> struct kvm_run *kvm_run)
>   /* SIE will load etoken directly from SDNX and therefore kvm_run */
>  }
>  
> -static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void sync_regs(struct kvm_vcpu *vcpu)
>  {
> + struct kvm_run *kvm_run = vcpu->run;
> +
>   if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
>   kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
>   if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
> @@ -4272,7 +4275,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct 
> kvm_run *kvm_run)
>  
>   /* Sync fmt2 only data */
>   if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
> - sync_regs_fmt2(vcpu, kvm_run);
> + sync_regs_fmt2(vcpu);
>   } else {
>   /*
>* In several places we have to modify our internal view to
> @@ -4291,8 +4294,10 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct 
> kvm_run *kvm_run)
>   kvm_run->kvm_dirty_regs = 0;
>  }
>  
> -static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void store_regs_fmt2(struct kvm_vcpu *vcpu)
>  {
> + struct kvm_run *kvm_run = vcpu->run;
> +
>   kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
>   kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
>   kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
> @@ -4312,8 +4317,10 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu, 
> struct kvm_run *kvm_run)
>   /* SIE will save etoken directly into SDNX and therefore kvm_run */
>  }
>  
> -static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void store_regs(struct kvm_vcpu *vcpu)
>  {
> + struct kvm_run *kvm_run = vcpu->run;
> +
>   kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
>   kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
>   kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
> @@ -4332,7 +4339,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct 
> kvm_run *kvm_run)
>   current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
>   current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
>   if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
> - store_regs_fmt2(vcpu, kvm_run);
> + store_regs_fmt2(vcpu);
>  }
>  
>  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
> @@ -4370,7 +4377,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>   goto out;
>   }
>  
> - sync_regs(vcpu, kvm_run);
> + sync_regs(vcpu);
>   enable_cpu_timer_accounting(vcpu);
>  
>   might_fault();
> @@ -4392,7 +4399,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>   }
>  
>   disable_cpu_timer_accounting(vcpu);
> - store_regs(vcpu, kvm_run);
> + store_regs(vcpu);
>  
>   kvm_sigset_deactivate(vcpu);
>  
> 


Re: [PATCH] KVM: PPC: Book3S HV: increase KVMPPC_NR_LPIDS on POWER8 and POWER9

2020-06-23 Thread Cédric Le Goater
On 6/8/20 1:57 PM, Cédric Le Goater wrote:
> POWER8 and POWER9 have 12-bit LPIDs. Change LPID_RSVD to support up to
> (4096 - 2) guests on these processors. POWER7 is kept the same with a
> limitation of (1024 - 2), but it might be time to drop KVM support for
> POWER7.
> 
> Tested with 2048 guests * 4 vCPUs on a witherspoon system with 512G
> RAM and a bit of swap.

For the record, it is possible to run 4094 guests * 4 vCPUs on a POWER9 
system with 1TB. It takes ~5m to boot them all.

CONFIG_NR_IRQS needs to be increased to support 4094 * 4 escalation 
interrupts.

Cheers,

C.


> 
> Signed-off-by: Cédric Le Goater 
> ---
>  arch/powerpc/include/asm/reg.h  | 3 ++-
>  arch/powerpc/kvm/book3s_64_mmu_hv.c | 8 ++--
>  2 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 88e6c78100d9..b70bbfb0ea3c 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -473,7 +473,8 @@
>  #ifndef SPRN_LPID
>  #define SPRN_LPID0x13F   /* Logical Partition Identifier */
>  #endif
> -#define   LPID_RSVD  0x3ff   /* Reserved LPID for partn switching */
> +#define   LPID_RSVD_POWER7   0x3ff   /* Reserved LPID for partn switching */
> +#define   LPID_RSVD  0xfff   /* Reserved LPID for partn switching */
>  #define  SPRN_HMER   0x150   /* Hypervisor maintenance exception reg 
> */
>  #define   HMER_DEBUG_TRIG(1ul << (63 - 17)) /* Debug trigger */
>  #define  SPRN_HMEER  0x151   /* Hyp maintenance exception enable reg 
> */
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 18aed9775a3c..23035ab2ec50 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void)
>   if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
>   return -EINVAL;
>  
> - /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
>   host_lpid = 0;
>   if (cpu_has_feature(CPU_FTR_HVMODE))
>   host_lpid = mfspr(SPRN_LPID);
> - rsvd_lpid = LPID_RSVD;
> +
> + /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
> + if (cpu_has_feature(CPU_FTR_ARCH_207S))
> + rsvd_lpid = LPID_RSVD;
> + else
> + rsvd_lpid = LPID_RSVD_POWER7;
>  
>   kvmppc_init_lpid(rsvd_lpid + 1);
>  
> 



Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-06-23 Thread Peter Zijlstra
On Tue, Jun 23, 2020 at 05:00:31PM +0200, Ahmed S. Darwish wrote:
> On Tue, Jun 23, 2020 at 10:36:52AM +0200, Peter Zijlstra wrote:
> ...
> > -#define lockdep_assert_irqs_disabled() do {
> > \
> > -   WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> > - current->hardirqs_enabled,\
> > - "IRQs not disabled as expected\n");   \
> > -   } while (0)
> > +#define lockdep_assert_irqs_enabled()  
> > \
> > +do {   
> > \
> > +   WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));  \
> > +} while (0)
> >
> 
> Can we add a small comment on top of lockdep_off(), stating that lockdep
> IRQ tracking will still be kept after a lockdep_off call?

That would only legitimize lockdep_off(). The only comment I want to put
on that is: "if you use this, you're doing it wrong'.


Re: [PATCH v1 2/3] powerpc/mm/radix: Fix PTE/PMD fragment count for early page table mappings

2020-06-23 Thread Bharata B Rao
On Tue, Jun 23, 2020 at 04:07:34PM +0530, Aneesh Kumar K.V wrote:
> Bharata B Rao  writes:
> 
> > We can hit the following BUG_ON during memory unplug:
> >
> > kernel BUG at arch/powerpc/mm/book3s64/pgtable.c:342!
> > Oops: Exception in kernel mode, sig: 5 [#1]
> > LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
> > NIP [c0093308] pmd_fragment_free+0x48/0xc0
> > LR [c147bfec] remove_pagetable+0x578/0x60c
> > Call Trace:
> > 0xc0805000 (unreliable)
> > remove_pagetable+0x384/0x60c
> > radix__remove_section_mapping+0x18/0x2c
> > remove_section_mapping+0x1c/0x3c
> > arch_remove_memory+0x11c/0x180
> > try_remove_memory+0x120/0x1b0
> > __remove_memory+0x20/0x40
> > dlpar_remove_lmb+0xc0/0x114
> > dlpar_memory+0x8b0/0xb20
> > handle_dlpar_errorlog+0xc0/0x190
> > pseries_hp_work_fn+0x2c/0x60
> > process_one_work+0x30c/0x810
> > worker_thread+0x98/0x540
> > kthread+0x1c4/0x1d0
> > ret_from_kernel_thread+0x5c/0x74
> >
> > This occurs when unplug is attempted for such memory which has
> > been mapped using memblock pages as part of early kernel page
> > table setup. We wouldn't have initialized the PMD or PTE fragment
> > count for those PMD or PTE pages.
> >
> > Fixing this includes 3 parts:
> >
> > - Re-walk the init_mm page tables from mem_init() and initialize
> >   the PMD and PTE fragment count to 1.
> > - When freeing PUD, PMD and PTE page table pages, check explicitly
> >   if they come from memblock and if so free then appropriately.
> > - When we do early memblock based allocation of PMD and PUD pages,
> >   allocate in PAGE_SIZE granularity so that we are sure the
> >   complete page is used as pagetable page.
> >
> > Since we now do PAGE_SIZE allocations for both PUD table and
> > PMD table (Note that PTE table allocation is already of PAGE_SIZE),
> > we end up allocating more memory for the same amount of system RAM.
> > Here is a comparision of how much more we need for a 64T and 2G
> > system after this patch:
> >
> > 1. 64T system
> > -
> > 64T RAM would need 64G for vmemmap with struct page size being 64B.
> >
> > 128 PUD tables for 64T memory (1G mappings)
> > 1 PUD table and 64 PMD tables for 64G vmemmap (2M mappings)
> >
> > With default PUD[PMD]_TABLE_SIZE(4K), (128+1+64)*4K=772K
> > With PAGE_SIZE(64K) table allocations, (128+1+64)*64K=12352K
> >
> > 2. 2G system
> > 
> > 2G RAM would need 2M for vmemmap with struct page size being 64B.
> >
> > 1 PUD table for 2G memory (1G mapping)
> > 1 PUD table and 1 PMD table for 2M vmemmap (2M mappings)
> >
> > With default PUD[PMD]_TABLE_SIZE(4K), (1+1+1)*4K=12K
> > With new PAGE_SIZE(64K) table allocations, (1+1+1)*64K=192K
> 
> How about we just do
> 
> void pmd_fragment_free(unsigned long *pmd)
> {
>   struct page *page = virt_to_page(pmd);
> 
>   /*
>* Early pmd pages allocated via memblock
>* allocator need to be freed differently
>*/
>   if (PageReserved(page))
>   return free_reserved_page(page);
> 
>   BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
>   if (atomic_dec_and_test(>pt_frag_refcount)) {
>   pgtable_pmd_page_dtor(page);
>   __free_page(page);
>   }
> }
> 
> That way we could avoid the fixup_pgtable_fragments completely?

Yes we could, by doing the same for pte_fragment_free() too.

However right from the early versions, we were going in the direction of
making the handling and behaviour of both early page tables and later
page tables as similar to each other as possible. Hence we started with
"fixing up" the early page tables.

If that's not a significant consideration, we can do away with fixup
and retain the other parts (PAGE_SIZE allocations and conditional
freeing) and still fix the bug.

Regards,
Bharata.


[PATCH v6 3/5] KVM: PPC: clean up redundant kvm_run parameters in assembly

2020-06-23 Thread Tianjia Zhang
In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

Signed-off-by: Tianjia Zhang 
---
 arch/powerpc/include/asm/kvm_ppc.h|  2 +-
 arch/powerpc/kvm/book3s_interrupts.S  | 22 ++
 arch/powerpc/kvm/book3s_pr.c  |  9 -
 arch/powerpc/kvm/booke.c  |  9 -
 arch/powerpc/kvm/booke_interrupts.S   |  9 -
 arch/powerpc/kvm/bookehv_interrupts.S | 10 +-
 6 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index ccf66b3a4c1d..0a056c64c317 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -59,7 +59,7 @@ enum xlate_readwrite {
 };
 
 extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
-extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
 extern void kvmppc_handler_highmem(void);
 
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_interrupts.S 
b/arch/powerpc/kvm/book3s_interrupts.S
index f7ad99d972ce..a3674f6b8d3d 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -55,8 +55,7 @@
  /
 
 /* Registers:
- *  r3: kvm_run pointer
- *  r4: vcpu pointer
+ *  r3: vcpu pointer
  */
 _GLOBAL(__kvmppc_vcpu_run)
 
@@ -68,8 +67,8 @@ kvm_start_entry:
/* Save host state to the stack */
PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
 
-   /* Save r3 (kvm_run) and r4 (vcpu) */
-   SAVE_2GPRS(3, r1)
+   /* Save r3 (vcpu) */
+   SAVE_GPR(3, r1)
 
/* Save non-volatile registers (r14 - r31) */
SAVE_NVGPRS(r1)
@@ -82,14 +81,13 @@ kvm_start_entry:
PPC_STL r0, _LINK(r1)
 
/* Load non-volatile guest state from the vcpu */
-   VCPU_LOAD_NVGPRS(r4)
+   VCPU_LOAD_NVGPRS(r3)
 
 kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
-   mr  r3, r4
bl  FUNC(kvmppc_copy_to_svcpu)
nop
-   REST_GPR(4, r1)
+   REST_GPR(3, r1)
 
 #ifdef CONFIG_PPC_BOOK3S_64
/* Get the dcbz32 flag */
@@ -146,7 +144,7 @@ after_sprg3_load:
 *
 */
 
-   PPC_LL  r3, GPR4(r1)/* vcpu pointer */
+   PPC_LL  r3, GPR3(r1)/* vcpu pointer */
 
/*
 * kvmppc_copy_from_svcpu can clobber volatile registers, save
@@ -190,11 +188,11 @@ after_sprg3_load:
PPC_STL r30, VCPU_GPR(R30)(r7)
PPC_STL r31, VCPU_GPR(R31)(r7)
 
-   /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
-   lwz r5, VCPU_TRAP(r7)
+   /* Pass the exit number as 2nd argument to kvmppc_handle_exit */
+   lwz r4, VCPU_TRAP(r7)
 
-   /* Restore r3 (kvm_run) and r4 (vcpu) */
-   REST_2GPRS(3, r1)
+   /* Restore r3 (vcpu) */
+   REST_GPR(3, r1)
bl  FUNC(kvmppc_handle_exit_pr)
 
/* If RESUME_GUEST, get back in the loop */
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ef54f917bdaf..01c8fe5abe0d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1151,9 +1151,9 @@ static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, 
unsigned int exit_nr)
return r;
 }
 
-int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
 {
+   struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
 
@@ -1826,7 +1826,6 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu 
*vcpu)
 
 static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
 {
-   struct kvm_run *run = vcpu->run;
int ret;
 #ifdef CONFIG_ALTIVEC
unsigned long uninitialized_var(vrsave);
@@ -1834,7 +1833,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
 
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
-   run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = -EINVAL;
goto out;
}
@@ -1861,7 +1860,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
 
kvmppc_fix_ee_before_entry();
 
-   ret = __kvmppc_vcpu_run(run, vcpu);
+   ret = __kvmppc_vcpu_run(vcpu);
 
kvmppc_clear_debug(vcpu);
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index c0d62a917e20..3e1c9f08e302 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -731,12 +731,11 @@ int kvmppc_core_check_requests(struct 

[PATCH v6 0/5] clean up redundant 'kvm_run' parameters

2020-06-23 Thread Tianjia Zhang
In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

This series of patches has completely cleaned the architecture of
arm64, mips, ppc, and s390 (no such redundant code on x86). Due to
the large number of modified codes, a separate patch is made for each
platform. On the ppc platform, there is also a redundant structure
pointer of 'kvm_run' in 'vcpu_arch', which has also been cleaned
separately.

---
v6 changes:
  Rearrange patch sets, only keep the unmerged patch.
  rebase on mainline.

v5 change:
  ppc: fix for review.

v4 change:
  mips: fixes two errors in entry.c.

v3 change:
  Keep the existing `vcpu->run` in the function body unchanged.

v2 change:
  s390 retains the original variable name and minimizes modification.

Tianjia Zhang (5):
  KVM: s390: clean up redundant 'kvm_run' parameters
  KVM: arm64: clean up redundant 'kvm_run' parameters
  KVM: PPC: clean up redundant kvm_run parameters in assembly
  KVM: MIPS: clean up redundant 'kvm_run' parameters
  KVM: MIPS: clean up redundant kvm_run parameters in assembly

 arch/arm64/include/asm/kvm_coproc.h   |  12 +--
 arch/arm64/include/asm/kvm_host.h |  11 +--
 arch/arm64/include/asm/kvm_mmu.h  |   2 +-
 arch/arm64/kvm/arm.c  |   6 +-
 arch/arm64/kvm/handle_exit.c  |  36 
 arch/arm64/kvm/mmio.c |  11 +--
 arch/arm64/kvm/mmu.c  |   5 +-
 arch/arm64/kvm/sys_regs.c |  13 ++-
 arch/mips/include/asm/kvm_host.h  |  32 ++--
 arch/mips/kvm/emulate.c   |  59 +
 arch/mips/kvm/entry.c |  21 ++---
 arch/mips/kvm/mips.c  |  14 ++--
 arch/mips/kvm/trap_emul.c | 114 +++---
 arch/mips/kvm/vz.c|  26 +++---
 arch/powerpc/include/asm/kvm_ppc.h|   2 +-
 arch/powerpc/kvm/book3s_interrupts.S  |  22 +++--
 arch/powerpc/kvm/book3s_pr.c  |   9 +-
 arch/powerpc/kvm/booke.c  |   9 +-
 arch/powerpc/kvm/booke_interrupts.S   |   9 +-
 arch/powerpc/kvm/bookehv_interrupts.S |  10 +--
 arch/s390/kvm/kvm-s390.c  |  23 --
 21 files changed, 188 insertions(+), 258 deletions(-)

-- 
2.17.1



[PATCH v6 1/5] KVM: s390: clean up redundant 'kvm_run' parameters

2020-06-23 Thread Tianjia Zhang
In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

Signed-off-by: Tianjia Zhang 
Reviewed-by: Vitaly Kuznetsov 
---
 arch/s390/kvm/kvm-s390.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d47c19718615..f5f96dc33712 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4175,8 +4175,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
 }
 
-static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
 {
+   struct kvm_run *kvm_run = vcpu->run;
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
 
@@ -4242,8 +4243,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
/* SIE will load etoken directly from SDNX and therefore kvm_run */
 }
 
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs(struct kvm_vcpu *vcpu)
 {
+   struct kvm_run *kvm_run = vcpu->run;
+
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
@@ -4272,7 +4275,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 
/* Sync fmt2 only data */
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
-   sync_regs_fmt2(vcpu, kvm_run);
+   sync_regs_fmt2(vcpu);
} else {
/*
 * In several places we have to modify our internal view to
@@ -4291,8 +4294,10 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
kvm_run->kvm_dirty_regs = 0;
 }
 
-static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs_fmt2(struct kvm_vcpu *vcpu)
 {
+   struct kvm_run *kvm_run = vcpu->run;
+
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
@@ -4312,8 +4317,10 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
/* SIE will save etoken directly into SDNX and therefore kvm_run */
 }
 
-static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs(struct kvm_vcpu *vcpu)
 {
+   struct kvm_run *kvm_run = vcpu->run;
+
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
@@ -4332,7 +4339,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
-   store_regs_fmt2(vcpu, kvm_run);
+   store_regs_fmt2(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
@@ -4370,7 +4377,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
goto out;
}
 
-   sync_regs(vcpu, kvm_run);
+   sync_regs(vcpu);
enable_cpu_timer_accounting(vcpu);
 
might_fault();
@@ -4392,7 +4399,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
 
disable_cpu_timer_accounting(vcpu);
-   store_regs(vcpu, kvm_run);
+   store_regs(vcpu);
 
kvm_sigset_deactivate(vcpu);
 
-- 
2.17.1



[PATCH v6 5/5] KVM: MIPS: clean up redundant kvm_run parameters in assembly

2020-06-23 Thread Tianjia Zhang
In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

Signed-off-by: Tianjia Zhang 
Reviewed-by: Huacai Chen 
---
 arch/mips/include/asm/kvm_host.h |  4 ++--
 arch/mips/kvm/entry.c| 21 -
 arch/mips/kvm/mips.c |  3 ++-
 arch/mips/kvm/trap_emul.c|  2 +-
 arch/mips/kvm/vz.c   |  2 +-
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 157fc876feca..01efa635fa73 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -352,7 +352,7 @@ struct kvm_mmu_memory_cache {
 #define KVM_MIPS_GUEST_TLB_SIZE64
 struct kvm_vcpu_arch {
void *guest_ebase;
-   int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+   int (*vcpu_run)(struct kvm_vcpu *vcpu);
 
/* Host registers preserved across guest mode execution */
unsigned long host_stack;
@@ -863,7 +863,7 @@ int kvm_mips_emulation_init(struct kvm_mips_callbacks 
**install_callbacks);
 /* Debug: dump vcpu state */
 int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
 
-extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int kvm_mips_handle_exit(struct kvm_vcpu *vcpu);
 
 /* Building of entry/exception code */
 int kvm_mips_entry_setup(void);
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c
index fd716942e302..832475bf2055 100644
--- a/arch/mips/kvm/entry.c
+++ b/arch/mips/kvm/entry.c
@@ -205,7 +205,7 @@ static inline void build_set_exc_base(u32 **p, unsigned int 
reg)
  * Assemble the start of the vcpu_run function to run a guest VCPU. The 
function
  * conforms to the following prototype:
  *
- * int vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ * int vcpu_run(struct kvm_vcpu *vcpu);
  *
  * The exit from the guest and return to the caller is handled by the code
  * generated by kvm_mips_build_ret_to_host().
@@ -218,8 +218,7 @@ void *kvm_mips_build_vcpu_run(void *addr)
unsigned int i;
 
/*
-* A0: run
-* A1: vcpu
+* A0: vcpu
 */
 
/* k0/k1 not being used in host kernel context */
@@ -238,10 +237,10 @@ void *kvm_mips_build_vcpu_run(void *addr)
kvm_mips_build_save_scratch(, V1, K1);
 
/* VCPU scratch register has pointer to vcpu */
-   UASM_i_MTC0(, A1, scratch_vcpu[0], scratch_vcpu[1]);
+   UASM_i_MTC0(, A0, scratch_vcpu[0], scratch_vcpu[1]);
 
/* Offset into vcpu->arch */
-   UASM_i_ADDIU(, K1, A1, offsetof(struct kvm_vcpu, arch));
+   UASM_i_ADDIU(, K1, A0, offsetof(struct kvm_vcpu, arch));
 
/*
 * Save the host stack to VCPU, used for exception processing
@@ -645,10 +644,7 @@ void *kvm_mips_build_exit(void *addr)
/* Now that context has been saved, we can use other registers */
 
/* Restore vcpu */
-   UASM_i_MFC0(, S1, scratch_vcpu[0], scratch_vcpu[1]);
-
-   /* Restore run (vcpu->run) */
-   UASM_i_LW(, S0, offsetof(struct kvm_vcpu, run), S1);
+   UASM_i_MFC0(, S0, scratch_vcpu[0], scratch_vcpu[1]);
 
/*
 * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
@@ -810,7 +806,6 @@ void *kvm_mips_build_exit(void *addr)
 * with this in the kernel
 */
uasm_i_move(, A0, S0);
-   uasm_i_move(, A1, S1);
UASM_i_LA(, T9, (unsigned long)kvm_mips_handle_exit);
uasm_i_jalr(, RA, T9);
 UASM_i_ADDIU(, SP, SP, -CALLFRAME_SIZ);
@@ -852,7 +847,7 @@ static void *kvm_mips_build_ret_from_exit(void *addr)
 * guest, reload k1
 */
 
-   uasm_i_move(, K1, S1);
+   uasm_i_move(, K1, S0);
UASM_i_ADDIU(, K1, K1, offsetof(struct kvm_vcpu, arch));
 
/*
@@ -886,8 +881,8 @@ static void *kvm_mips_build_ret_to_guest(void *addr)
 {
u32 *p = addr;
 
-   /* Put the saved pointer to vcpu (s1) back into the scratch register */
-   UASM_i_MTC0(, S1, scratch_vcpu[0], scratch_vcpu[1]);
+   /* Put the saved pointer to vcpu (s0) back into the scratch register */
+   UASM_i_MTC0(, S0, scratch_vcpu[0], scratch_vcpu[1]);
 
/* Load up the Guest EBASE to minimize the window where BEV is set */
UASM_i_LW(, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index f5ba393472e3..21bfbf414d2c 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1195,8 +1195,9 @@ static void kvm_mips_set_c0_status(void)
 /*
  * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
  */
-int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
 {
+   struct kvm_run *run = 

[PATCH v6 2/5] KVM: arm64: clean up redundant 'kvm_run' parameters

2020-06-23 Thread Tianjia Zhang
In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

Signed-off-by: Tianjia Zhang 
Reviewed-by: Vitaly Kuznetsov 
---
 arch/arm64/include/asm/kvm_coproc.h | 12 +-
 arch/arm64/include/asm/kvm_host.h   | 11 -
 arch/arm64/include/asm/kvm_mmu.h|  2 +-
 arch/arm64/kvm/arm.c|  6 ++---
 arch/arm64/kvm/handle_exit.c| 36 ++---
 arch/arm64/kvm/mmio.c   | 11 +
 arch/arm64/kvm/mmu.c|  5 ++--
 arch/arm64/kvm/sys_regs.c   | 13 +--
 8 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_coproc.h 
b/arch/arm64/include/asm/kvm_coproc.h
index 0185ee8b8b5e..454373704b8a 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -27,12 +27,12 @@ struct kvm_sys_reg_target_table {
 void kvm_register_target_sys_reg_table(unsigned int target,
   struct kvm_sys_reg_target_table *table);
 
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
 
 #define kvm_coproc_table_init kvm_sys_reg_table_init
 void kvm_sys_reg_table_init(void);
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index c3e6fcc664b1..5c9db5767ba4 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -481,18 +481,15 @@ u64 __kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
-int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
-   int exception_index);
-void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
-  int exception_index);
+int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
+void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
 
 /* MMIO helpers */
 void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
 unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
 
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
-phys_addr_t fault_ipa);
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
+int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
 
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b12bfc1f051a..40be8f6c7351 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -139,7 +139,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
  phys_addr_t pa, unsigned long size, bool writable);
 
-int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
 
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 90cb90561446..985ede7bcca0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -658,7 +658,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret;
 
if (run->exit_reason == KVM_EXIT_MMIO) {
-   ret = kvm_handle_mmio_return(vcpu, run);
+   ret = kvm_handle_mmio_return(vcpu);
if (ret)
return ret;
}
@@ -810,11 +810,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), 
*vcpu_pc(vcpu));
 
/* Exit types that need handling before we can be preempted */
-   handle_exit_early(vcpu, run, ret);
+   handle_exit_early(vcpu, ret);
 
preempt_enable();
 
-   ret = handle_exit(vcpu, run, ret);
+   ret = handle_exit(vcpu, ret);
}
 
/* Tell userspace about in-kernel device output levels */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 

[PATCH v6 4/5] KVM: MIPS: clean up redundant 'kvm_run' parameters

2020-06-23 Thread Tianjia Zhang
In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

Signed-off-by: Tianjia Zhang 
Reviewed-by: Huacai Chen 
---
 arch/mips/include/asm/kvm_host.h |  28 +---
 arch/mips/kvm/emulate.c  |  59 ++--
 arch/mips/kvm/mips.c |  11 ++-
 arch/mips/kvm/trap_emul.c| 114 ++-
 arch/mips/kvm/vz.c   |  26 +++
 5 files changed, 87 insertions(+), 151 deletions(-)

diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 363e7a89d173..157fc876feca 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -854,8 +854,8 @@ struct kvm_mips_callbacks {
   const struct kvm_one_reg *reg, s64 v);
int (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
int (*vcpu_put)(struct kvm_vcpu *vcpu, int cpu);
-   int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
-   void (*vcpu_reenter)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+   int (*vcpu_run)(struct kvm_vcpu *vcpu);
+   void (*vcpu_reenter)(struct kvm_vcpu *vcpu);
 };
 extern struct kvm_mips_callbacks *kvm_mips_callbacks;
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -910,7 +910,6 @@ extern int kvm_mips_handle_mapped_seg_tlb_fault(struct 
kvm_vcpu *vcpu,
 
 extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
 u32 *opc,
-struct kvm_run *run,
 struct kvm_vcpu *vcpu,
 bool write_fault);
 
@@ -1021,83 +1020,67 @@ static inline bool kvm_is_ifetch_fault(struct 
kvm_vcpu_arch *vcpu)
 
 extern enum emulation_result kvm_mips_emulate_inst(u32 cause,
   u32 *opc,
-  struct kvm_run *run,
   struct kvm_vcpu *vcpu);
 
 long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_syscall(u32 cause,
  u32 *opc,
- struct kvm_run *run,
  struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
 u32 *opc,
-struct kvm_run *run,
 struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
u32 *opc,
-   struct kvm_run *run,
struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
 u32 *opc,
-struct kvm_run *run,
 struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
u32 *opc,
-   struct kvm_run *run,
struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
 u32 *opc,
-struct kvm_run *run,
 struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
  u32 *opc,
- struct kvm_run *run,
  struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_handle_ri(u32 cause,
u32 *opc,
-   struct kvm_run *run,
struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
 u32 *opc,
-struct kvm_run *run,
 struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
   

[PATCH] powerpc/boot/dts: Fix dtc "pciex" warnings

2020-06-23 Thread Michael Ellerman
With CONFIG_OF_ALL_DTBS=y, as set by eg. allmodconfig, we see lots of
warnings about our dts files, such as:

  arch/powerpc/boot/dts/glacier.dts:492.26-532.5:
  Warning (pci_bridge): /plb/pciex@d: node name is not "pci"
  or "pcie"

The node name should not particularly matter, it's just a name, and
AFAICS there's no kernel code that cares whether nodes are *named*
"pciex" or "pcie". So shutup these warnings by converting to the name
dtc wants.

As always there's some risk this could break something obscure that
does rely on the name, in which case we can revert.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/boot/dts/akebono.dts | 8 
 arch/powerpc/boot/dts/bluestone.dts   | 2 +-
 arch/powerpc/boot/dts/canyonlands.dts | 4 ++--
 arch/powerpc/boot/dts/currituck.dts   | 6 +++---
 arch/powerpc/boot/dts/glacier.dts | 4 ++--
 arch/powerpc/boot/dts/haleakala.dts   | 2 +-
 arch/powerpc/boot/dts/icon.dts| 4 ++--
 arch/powerpc/boot/dts/katmai.dts  | 6 +++---
 arch/powerpc/boot/dts/kilauea.dts | 4 ++--
 arch/powerpc/boot/dts/makalu.dts  | 4 ++--
 arch/powerpc/boot/dts/redwood.dts | 6 +++---
 11 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/boot/dts/akebono.dts 
b/arch/powerpc/boot/dts/akebono.dts
index cd9d66041a3f..df18f8dc4642 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -248,7 +248,7 @@ FPGA0: fpga@ebc0 {
};
};
 
-   PCIE0: pciex@101 {
+   PCIE0: pcie@101 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -288,7 +288,7 @@ PCIE0: pciex@101 {
0x0 0x0 0x0 0x4  48 0x2 /* int D */>;
};
 
-   PCIE1: pciex@201 {
+   PCIE1: pcie@201 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -328,7 +328,7 @@ PCIE1: pciex@201 {
0x0 0x0 0x0 0x4  56 0x2 /* int D */>;
};
 
-   PCIE2: pciex@181 {
+   PCIE2: pcie@181 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -368,7 +368,7 @@ PCIE2: pciex@181 {
0x0 0x0 0x0 0x4  64 0x2 /* int D */>;
};
 
-   PCIE3: pciex@281 {
+   PCIE3: pcie@281 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/bluestone.dts 
b/arch/powerpc/boot/dts/bluestone.dts
index cc965a1816b6..aa1ae94cd776 100644
--- a/arch/powerpc/boot/dts/bluestone.dts
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -325,7 +325,7 @@ EMAC0: ethernet@ef600c00 {
};
};
 
-   PCIE0: pciex@d {
+   PCIE0: pcie@d {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/canyonlands.dts 
b/arch/powerpc/boot/dts/canyonlands.dts
index 0d6ac92d0f5e..c5fbb08e0a6e 100644
--- a/arch/powerpc/boot/dts/canyonlands.dts
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -461,7 +461,7 @@ PCIX0: pci@c0ec0 {
interrupt-map = < 0x0 0x0 0x0 0x0  0x0 0x8 >;
};
 
-   PCIE0: pciex@d {
+   PCIE0: pcie@d {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -503,7 +503,7 @@ PCIE0: pciex@d {
0x0 0x0 0x0 0x4  0xf 0x4 /* swizzled int D 
*/>;
};
 
-   PCIE1: pciex@d2000 {
+   PCIE1: pcie@d2000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/currituck.dts 
b/arch/powerpc/boot/dts/currituck.dts
index b6d87b9c2cef..aea8af810106 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -122,7 +122,7 @@ rtc@68 {
};
};
 
-   PCIE0: pciex@101 {  // 4xGBIF1
+   PCIE0: pcie@101 {   // 4xGBIF1
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -160,7 +160,7 @@ PCIE0: pciex@101 {  // 4xGBIF1
0x0 0x0 0x0 0x4  49 0x2 /* int D */>;
};
 
-   PCIE1: 

Re: [v2 1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API

2020-06-23 Thread Markus Elfring
>>>   if the parameter
>>> is NULL, clk_prepare_enable and clk_disable_unprepare will
>>> return immediately.
>>
>> The interpretation of these function implementations seems to be reasonable.
>> Would you like to achieve any improvements for the corresponding software 
>> documentation?
>
> Which document do you mean?

Example:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/clk.h?id=dd0d718152e4c65b173070d48ea9dfc06894c3e5#n905
https://elixir.bootlin.com/linux/v5.7.2/source/include/linux/clk.h#L905

Regards,
Markus


Re: [PATCH] ASoC: fsl_easrc: Fix uninitialized scalar variable in fsl_easrc_set_ctx_format

2020-06-23 Thread Mark Brown
On Mon, 22 Jun 2020 17:03:31 +0800, Shengjiu Wang wrote:
> The "ret" in fsl_easrc_set_ctx_format is not initialized, then
> the unknown value maybe returned by this function.

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/1] ASoC: fsl_easrc: Fix uninitialized scalar variable in 
fsl_easrc_set_ctx_format
  commit: 5748f4eb01a4df7a42024fe8bc7855f05febb7c5

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Re: [PATCH v2 0/2] Fix unchecked return value for clk_prepare_enable

2020-06-23 Thread Mark Brown
On Tue, 23 Jun 2020 14:01:10 +0800, Shengjiu Wang wrote:
> First patch is to remove the check of clock pointer before calling
> clk API.
> 
> Second patch is to fix the issue that the return value of
> clk_prepare_enable is not checked.
> 
> changes in v2:
> - split the patch to separate patches
> 
> [...]

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API
  commit: adf46113a608d9515801997fc96cbfe8ffa89ed3
[2/2] ASoC: fsl_mqs: Fix unchecked return value for clk_prepare_enable
  commit: 15217d170a4461c1d4c1ea7c497e1fc1122e42a9

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Re: linux-next: manual merge of the pidfd tree with the powerpc-fixes tree

2020-06-23 Thread Michael Ellerman
Christian Brauner  writes:
> On Fri, Jun 19, 2020 at 09:17:30PM +1000, Michael Ellerman wrote:
>> Stephen Rothwell  writes:
>> > Hi all,
>> >
>> > Today's linux-next merge of the pidfd tree got a conflict in:
>> >
>> >   arch/powerpc/kernel/syscalls/syscall.tbl
>> >
>> > between commit:
>> >
>> >   35e32a6cb5f6 ("powerpc/syscalls: Split SPU-ness out of ABI")
>> >
>> > from the powerpc-fixes tree and commit:
>> >
>> >   9b4feb630e8e ("arch: wire-up close_range()")
>> >
>> > from the pidfd tree.
>> >
>> > I fixed it up (see below) and can carry the fix as necessary. This
>> > is now fixed as far as linux-next is concerned, but any non trivial
>> > conflicts should be mentioned to your upstream maintainer when your tree
>> > is submitted for merging.  You may also want to consider cooperating
>> > with the maintainer of the conflicting tree to minimise any particularly
>> > complex conflicts.
...
>> 
>> I'm planning to send those changes to Linus for rc2, so the conflict
>> will then be vs mainline. But I guess it's pretty trivial so it doesn't
>> really matter.
>
> close_range() is targeted for the v5.9 merge window. I always do
> test-merges with mainline at the time I'm creating a pr and I'll just
> mention to Linus that there's conflict with ppc. :)

I ended up dropping the patch, so there shouldn't be a conflict anymore.

cheers


Re: [PATCH v2 1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API

2020-06-23 Thread Shengjiu Wang
On Tue, Jun 23, 2020 at 4:55 PM Markus Elfring  wrote:
>
> > clk_prepare_enable and clk_disable_unprepare check the input
> > clock parameter in the beginning of the function,
>
> These functions call further functions which perform null pointer checks.
>
>
> >   if the parameter
> > is NULL, clk_prepare_enable and clk_disable_unprepare will
> > return immediately.
>
> The interpretation of these function implementations seems to be reasonable.
> Would you like to achieve any improvements for the corresponding software 
> documentation?

Which document do you mean?

>
>
> > So Don't need to check input clock parameters before calling clk API.
>
> What do you find imperative in this wording?
>
> Another wording alternative:
>Thus omit extra null pointer checks before four function calls.
>
> Regards,
> Markus


Re: [PATCH v1 3/3] powerpc/mm/radix: Free PUD table when freeing pagetable

2020-06-23 Thread Aneesh Kumar K.V
Bharata B Rao  writes:

> remove_pagetable() isn't freeing PUD table. This causes memory
> leak during memory unplug. Fix this.
>

Reviewed-by: Aneesh Kumar K.V 

> Signed-off-by: Bharata B Rao 
> ---
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 16 
>  1 file changed, 16 insertions(+)
>
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
> b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 58e42393d5e8..8ec2110eaa1a 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -782,6 +782,21 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
>   pud_clear(pud);
>  }
>  
> +static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
> +{
> + pud_t *pud;
> + int i;
> +
> + for (i = 0; i < PTRS_PER_PUD; i++) {
> + pud = pud_start + i;
> + if (!pud_none(*pud))
> + return;

Should we do a VM_WARN() here?

> + }
> +
> + pud_free(_mm, pud_start);
> + p4d_clear(p4d);
> +}
> +
>  struct change_mapping_params {
>   pte_t *pte;
>   unsigned long start;
> @@ -956,6 +971,7 @@ static void __meminit remove_pagetable(unsigned long 
> start, unsigned long end)
>  
>   pud_base = (pud_t *)p4d_page_vaddr(*p4d);
>   remove_pud_table(pud_base, addr, next);
> + free_pud_table(pud_base, p4d);
>   }
>  
>   spin_unlock(_mm.page_table_lock);
> -- 
> 2.21.3


Re: [PATCH v1 2/3] powerpc/mm/radix: Fix PTE/PMD fragment count for early page table mappings

2020-06-23 Thread Aneesh Kumar K.V
Bharata B Rao  writes:

> We can hit the following BUG_ON during memory unplug:
>
> kernel BUG at arch/powerpc/mm/book3s64/pgtable.c:342!
> Oops: Exception in kernel mode, sig: 5 [#1]
> LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
> NIP [c0093308] pmd_fragment_free+0x48/0xc0
> LR [c147bfec] remove_pagetable+0x578/0x60c
> Call Trace:
> 0xc0805000 (unreliable)
> remove_pagetable+0x384/0x60c
> radix__remove_section_mapping+0x18/0x2c
> remove_section_mapping+0x1c/0x3c
> arch_remove_memory+0x11c/0x180
> try_remove_memory+0x120/0x1b0
> __remove_memory+0x20/0x40
> dlpar_remove_lmb+0xc0/0x114
> dlpar_memory+0x8b0/0xb20
> handle_dlpar_errorlog+0xc0/0x190
> pseries_hp_work_fn+0x2c/0x60
> process_one_work+0x30c/0x810
> worker_thread+0x98/0x540
> kthread+0x1c4/0x1d0
> ret_from_kernel_thread+0x5c/0x74
>
> This occurs when unplug is attempted for such memory which has
> been mapped using memblock pages as part of early kernel page
> table setup. We wouldn't have initialized the PMD or PTE fragment
> count for those PMD or PTE pages.
>
> Fixing this includes 3 parts:
>
> - Re-walk the init_mm page tables from mem_init() and initialize
>   the PMD and PTE fragment count to 1.
> - When freeing PUD, PMD and PTE page table pages, check explicitly
>   if they come from memblock and if so free then appropriately.
> - When we do early memblock based allocation of PMD and PUD pages,
>   allocate in PAGE_SIZE granularity so that we are sure the
>   complete page is used as pagetable page.
>
> Since we now do PAGE_SIZE allocations for both PUD table and
> PMD table (Note that PTE table allocation is already of PAGE_SIZE),
> we end up allocating more memory for the same amount of system RAM.
> Here is a comparision of how much more we need for a 64T and 2G
> system after this patch:
>
> 1. 64T system
> -
> 64T RAM would need 64G for vmemmap with struct page size being 64B.
>
> 128 PUD tables for 64T memory (1G mappings)
> 1 PUD table and 64 PMD tables for 64G vmemmap (2M mappings)
>
> With default PUD[PMD]_TABLE_SIZE(4K), (128+1+64)*4K=772K
> With PAGE_SIZE(64K) table allocations, (128+1+64)*64K=12352K
>
> 2. 2G system
> 
> 2G RAM would need 2M for vmemmap with struct page size being 64B.
>
> 1 PUD table for 2G memory (1G mapping)
> 1 PUD table and 1 PMD table for 2M vmemmap (2M mappings)
>
> With default PUD[PMD]_TABLE_SIZE(4K), (1+1+1)*4K=12K
> With new PAGE_SIZE(64K) table allocations, (1+1+1)*64K=192K

How about we just do

void pmd_fragment_free(unsigned long *pmd)
{
struct page *page = virt_to_page(pmd);

/*
 * Early pmd pages allocated via memblock
 * allocator need to be freed differently
 */
if (PageReserved(page))
return free_reserved_page(page);

BUG_ON(atomic_read(>pt_frag_refcount) <= 0);
if (atomic_dec_and_test(>pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
__free_page(page);
}
}

That way we could avoid the fixup_pgtable_fragments completely?

>
> Signed-off-by: Bharata B Rao 
> ---
>  arch/powerpc/include/asm/book3s/64/pgalloc.h | 11 ++-
>  arch/powerpc/include/asm/book3s/64/radix.h   |  1 +
>  arch/powerpc/include/asm/sparsemem.h |  1 +
>  arch/powerpc/mm/book3s64/pgtable.c   | 31 +++-
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 80 +++-
>  arch/powerpc/mm/mem.c|  5 ++
>  arch/powerpc/mm/pgtable-frag.c   |  9 ++-
>  7 files changed, 129 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h 
> b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> index 69c5b051734f..56d695f0095c 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
> @@ -109,7 +109,16 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, 
> unsigned long addr)
>  
>  static inline void pud_free(struct mm_struct *mm, pud_t *pud)
>  {
> - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
> + struct page *page = virt_to_page(pud);
> +
> + /*
> +  * Early pud pages allocated via memblock allocator
> +  * can't be directly freed to slab
> +  */
> + if (PageReserved(page))
> + free_reserved_page(page);
> + else
> + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
>  }
>  
>  static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> b/arch/powerpc/include/asm/book3s/64/radix.h
> index 0cba794c4fb8..90f05d52f46d 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -297,6 +297,7 @@ static inline unsigned long radix__get_tree_size(void)
>  int radix__create_section_mapping(unsigned long start, unsigned long end,
> int nid, pgprot_t prot);
>  int 

Re: [PATCH v1 1/3] powerpc/mm/radix: Create separate mappings for hot-plugged memory

2020-06-23 Thread Aneesh Kumar K.V
Bharata B Rao  writes:

> Memory that gets hot-plugged _during_ boot (and not the memory
> that gets plugged in after boot), is mapped with 1G mappings
> and will undergo splitting when it is unplugged. The splitting
> code has a few issues:
>
> 1. Recursive locking
> 
> Memory unplug path takes cpu_hotplug_lock and calls stop_machine()
> for splitting the mappings. However stop_machine() takes
> cpu_hotplug_lock again causing deadlock.
>
> 2. BUG: sleeping function called from in_atomic() context
> -
> Memory unplug path (remove_pagetable) takes init_mm.page_table_lock
> spinlock and later calls stop_machine() which does wait_for_completion()
>
> 3. Bad unlock unbalance
> ---
> Memory unplug path takes init_mm.page_table_lock spinlock and calls
> stop_machine(). The stop_machine thread function runs in a different
> thread context (migration thread) which tries to release and reaquire
> ptl. Releasing ptl from a different thread than which acquired it
> causes bad unlock unbalance.
>
> These problems can be avoided if we avoid mapping hot-plugged memory
> with 1G mapping, thereby removing the need for splitting them during
> unplug. Hence, during radix init, identify the hot-plugged memory region
> and create separate mappings for each LMB so that they don't get mapped
> with 1G mappings. The identification of hot-plugged memory has become
> possible after the commit b6eca183e23e ("powerpc/kernel: Enables memory
> hot-remove after reboot on pseries guests").
>
> To create separate mappings for every LMB in the hot-plugged
> region, we need lmb-size for which we use memory_block_size_bytes().
> Since this is early init time code, the machine type isn't probed yet
> and hence memory_block_size_bytes() would return the default LMB size
> as 16MB. Hence we end up issuing more number of mapping requests
> than earlier.

Considering we can split 1G pages correctly, we can avoid doing this?



>
> Signed-off-by: Bharata B Rao 
> ---
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 15 ---
>  1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
> b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 8acb96de0e48..ffccfe00ca2a 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -16,6 +16,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -320,6 +321,8 @@ static void __init radix_init_pgtable(void)
>  {
>   unsigned long rts_field;
>   struct memblock_region *reg;
> + phys_addr_t addr;
> + u64 lmb_size = memory_block_size_bytes();
>  
>   /* We don't support slb for radix */
>   mmu_slb_size = 0;
> @@ -338,9 +341,15 @@ static void __init radix_init_pgtable(void)
>   continue;
>   }
>  
> - WARN_ON(create_physical_mapping(reg->base,
> - reg->base + reg->size,
> - -1, PAGE_KERNEL));
> + if (memblock_is_hotpluggable(reg)) {
> + for (addr = reg->base; addr < (reg->base + reg->size);
> +  addr += lmb_size)
> + WARN_ON(create_physical_mapping(addr,
> + addr + lmb_size, -1, PAGE_KERNEL));
> + } else
> + WARN_ON(create_physical_mapping(reg->base,
> + reg->base + reg->size,
> + -1, PAGE_KERNEL));
>   }
>  
>   /* Find out how many PID bits are supported */
> -- 
> 2.21.3


[PATCH v2] hmi: Move hmi irq stat from percpu variable to paca.

2020-06-23 Thread Mahesh Salgaonkar
With the proposed change in percpu bootmem allocator to use page mapping
[1], the percpu first chunk memory area can come from vmalloc ranges. This
makes hmi handler to crash the kernel whenever percpu variable is accessed
in real mode.  This patch fixes this issue by moving the hmi irq stat
inside paca for safe access in realmode.

[1] 
https://lore.kernel.org/linuxppc-dev/20200608070904.387440-1-aneesh.ku...@linux.ibm.com/

Suggested-by: Aneesh Kumar K.V 
Signed-off-by: Mahesh Salgaonkar 
---
Machine check handling as well touches percpu variables in realmode. Will
address that in separate patchset.

Change in v2:
- Fix the build failures for pmac32 and ppc64e configs.
---
 arch/powerpc/include/asm/hardirq.h |1 -
 arch/powerpc/include/asm/paca.h|1 +
 arch/powerpc/kernel/irq.c  |8 ++--
 arch/powerpc/kernel/mce.c  |2 +-
 arch/powerpc/kvm/book3s_hv_ras.c   |2 +-
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/hardirq.h 
b/arch/powerpc/include/asm/hardirq.h
index f1e9067bd5ac..f133b5930ae1 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -13,7 +13,6 @@ typedef struct {
unsigned int pmu_irqs;
unsigned int mce_exceptions;
unsigned int spurious_irqs;
-   unsigned int hmi_exceptions;
unsigned int sreset_irqs;
 #ifdef CONFIG_PPC_WATCHDOG
unsigned int soft_nmi_irqs;
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 45a839a7c6cf..cc07c399306e 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -225,6 +225,7 @@ struct paca_struct {
u16 in_mce;
u8 hmi_event_available; /* HMI event is available */
u8 hmi_p9_special_emu;  /* HMI P9 special emulation */
+   u32 hmi_irqs;   /* HMI irq stat */
 #endif
u8 ftrace_enabled;  /* Hard disable ftrace */
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 112d150354b2..a05f9ce05459 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -621,13 +621,15 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
seq_printf(p, "  Machine check exceptions\n");
 
+#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_HVMODE)) {
seq_printf(p, "%*s: ", prec, "HMI");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
-   per_cpu(irq_stat, j).hmi_exceptions);
+   paca_ptrs[j]->hmi_irqs);
seq_printf(p, "  Hypervisor Maintenance Interrupts\n");
}
+#endif
 
seq_printf(p, "%*s: ", prec, "NMI");
for_each_online_cpu(j)
@@ -665,7 +667,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += per_cpu(irq_stat, cpu).mce_exceptions;
sum += per_cpu(irq_stat, cpu).spurious_irqs;
sum += per_cpu(irq_stat, cpu).timer_irqs_others;
-   sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+#ifdef CONFIG_PPC_BOOK3S_64
+   sum += paca_ptrs[cpu]->hmi_irqs;
+#endif
sum += per_cpu(irq_stat, cpu).sreset_irqs;
 #ifdef CONFIG_PPC_WATCHDOG
sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd90c0eda229..dc11fc16750f 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -711,7 +711,7 @@ long hmi_exception_realmode(struct pt_regs *regs)
 {  
int ret;
 
-   __this_cpu_inc(irq_stat.hmi_exceptions);
+   local_paca->hmi_irqs++;
 
ret = hmi_handle_debugtrig(regs);
if (ret >= 0)
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 79f7d07ef674..6028628ea3ac 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -244,7 +244,7 @@ long kvmppc_realmode_hmi_handler(void)
 {
bool resync_req;
 
-   __this_cpu_inc(irq_stat.hmi_exceptions);
+   local_paca->hmi_irqs++;
 
if (hmi_handle_debugtrig(NULL) >= 0)
return 1;




Re: [PATCH v4 0/7] clean up redundant 'kvm_run' parameters

2020-06-23 Thread Paolo Bonzini
On 23/06/20 12:00, Tianjia Zhang wrote:
> 
> 
> On 2020/6/23 17:42, Paolo Bonzini wrote:
>> On 27/04/20 06:35, Tianjia Zhang wrote:
>>> In the current kvm version, 'kvm_run' has been included in the
>>> 'kvm_vcpu'
>>> structure. For historical reasons, many kvm-related function parameters
>>> retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
>>> patch does a unified cleanup of these remaining redundant parameters.
>>>
>>> This series of patches has completely cleaned the architecture of
>>> arm64, mips, ppc, and s390 (no such redundant code on x86). Due to
>>> the large number of modified codes, a separate patch is made for each
>>> platform. On the ppc platform, there is also a redundant structure
>>> pointer of 'kvm_run' in 'vcpu_arch', which has also been cleaned
>>> separately.
>>
>> Tianjia, can you please refresh the patches so that each architecture
>> maintainer can pick them up?  Thanks very much for this work!
>>
>> Paolo
>>
> 
> No problem, this is what I should do.
> After I update, do I submit separately for each architecture or submit
> them together in a patchset?

You can send them together.

Paolo



Re: [PATCH v4 0/7] clean up redundant 'kvm_run' parameters

2020-06-23 Thread Tianjia Zhang




On 2020/6/23 17:42, Paolo Bonzini wrote:

On 27/04/20 06:35, Tianjia Zhang wrote:

In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
structure. For historical reasons, many kvm-related function parameters
retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
patch does a unified cleanup of these remaining redundant parameters.

This series of patches has completely cleaned the architecture of
arm64, mips, ppc, and s390 (no such redundant code on x86). Due to
the large number of modified codes, a separate patch is made for each
platform. On the ppc platform, there is also a redundant structure
pointer of 'kvm_run' in 'vcpu_arch', which has also been cleaned
separately.


Tianjia, can you please refresh the patches so that each architecture
maintainer can pick them up?  Thanks very much for this work!

Paolo



No problem, this is what I should do.
After I update, do I submit separately for each architecture or submit 
them together in a patchset?


Thanks,
Tianjia


Re: [PATCH V3 (RESEND) 0/3] arm64: Enable vmemmap mapping from device memory

2020-06-23 Thread Jia He

Hi

I also tested the addional cases on arm64

1, 4k page size + devdax + --map=mem

2, 64k page size + devdax + --map=mem

3, 4k page size + devdax + --map=dev

4, 64k page size + devdax + --map=dev

case 4 is important to verify Anshuman's this series.

Host kernel: 5.7-rc3

guest kernel: 5.7-rc5 with this series

ndctl: https://github.com/pmem/ndctl/tree/c7767834871 



On the guest:

1. ./ndctl/.libs/ndctl create-namespace -e namespace0.0 --mode=devdax --map=dev 
-s 1g -f -v -a 64K


 echo dax0.0 > /sys/bus/dax/drivers/device_dax/unbind
 echo dax0.0 > /sys/bus/dax/drivers/kmem/new_id

The 1g block was added

2. echo 0 > /sys/devices/system/memory/memory10/online

modprobe -r dax_pmem

The 1g block was removed


Some minor fix should be applied which is not relevant to this series itself. 
e.g numa id


---
Cheers,
Justin (Jia He)

On 2020/6/18 9:15, Anshuman Khandual wrote:

This series enables vmemmap backing memory allocation from device memory
ranges on arm64. But before that, it enables vmemmap_populate_basepages()
and vmemmap_alloc_block_buf() to accommodate struct vmem_altmap based
alocation requests.

This series applies on 5.8-rc1.

Pending Question:

altmap_alloc_block_buf() does not have any other remaining users in the
tree after this change. Should it be converted into a static function and
it's declaration be dropped from the header (include/linux/mm.h). Avoided
doing so because I was not sure if there are any off-tree users or not.

Changes in V3:

- Dropped comment from free_hotplug_page_range() per Robin
- Modified comment in unmap_hotplug_range() per Robin
- Enabled altmap support in vmemmap_alloc_block_buf() per Robin

Changes in V2: (https://lkml.org/lkml/2020/3/4/475)

- Rebased on latest hot-remove series (v14) adding P4D page table support

Changes in V1: (https://lkml.org/lkml/2020/1/23/12)

- Added an WARN_ON() in unmap_hotplug_range() when altmap is
   provided without the page table backing memory being freed

Changes in RFC V2: (https://lkml.org/lkml/2019/10/21/11)

- Changed the commit message on 1/2 patch per Will
- Changed the commit message on 2/2 patch as well
- Rebased on arm64 memory hot remove series (v10)

RFC V1: (https://lkml.org/lkml/2019/6/28/32)

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Paul Walmsley 
Cc: Palmer Dabbelt 
Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: Dave Hansen 
Cc: Andy Lutomirski 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: David Hildenbrand 
Cc: Mike Rapoport 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Andrew Morton 
Cc: Dan Williams 
Cc: Pavel Tatashin 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-i...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: x...@kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Anshuman Khandual (3):
   mm/sparsemem: Enable vmem_altmap support in vmemmap_populate_basepages()
   mm/sparsemem: Enable vmem_altmap support in vmemmap_alloc_block_buf()
   arm64/mm: Enable vmem_altmap support for vmemmap mappings

  arch/arm64/mm/mmu.c   | 59 ++-
  arch/ia64/mm/discontig.c  |  2 +-
  arch/powerpc/mm/init_64.c | 10 +++
  arch/riscv/mm/init.c  |  2 +-
  arch/x86/mm/init_64.c | 12 
  include/linux/mm.h|  8 --
  mm/sparse-vmemmap.c   | 38 -
  7 files changed, 87 insertions(+), 44 deletions(-)


--



Re: [PATCH 17/17] arch: rename copy_thread_tls() back to copy_thread()

2020-06-23 Thread Geert Uytterhoeven
On Tue, Jun 23, 2020 at 1:47 AM Christian Brauner
 wrote:
> Now that HAVE_COPY_THREAD_TLS has been removed, rename copy_thread_tls()
> back simply copy_thread(). It's a simpler name, and doesn't imply that only
> tls is copied here. This finishes an outstanding chunk of internal process
> creation work since we've added clone3().

> Signed-off-by: Christian Brauner 

>  arch/m68k/kernel/process.c   | 2 +-

Acked-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH 16/17] arch: remove HAVE_COPY_THREAD_TLS

2020-06-23 Thread Geert Uytterhoeven
On Tue, Jun 23, 2020 at 1:47 AM Christian Brauner
 wrote:
> All architectures support copy_thread_tls() now, so remove the legacy
> copy_thread() function and the HAVE_COPY_THREAD_TLS config option. Everyone
> uses the same process creation calling convention based on
> copy_thread_tls() and struct kernel_clone_args. This will make it easier to
> maintain the core process creation code under kernel/, simplifies the
> callpaths and makes the identical for all architectures.

> Signed-off-by: Christian Brauner 

>  arch/m68k/Kconfig  |  1 -

Acked-by: Geert Uytterhoeven 

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v4 0/7] clean up redundant 'kvm_run' parameters

2020-06-23 Thread Paolo Bonzini
On 27/04/20 06:35, Tianjia Zhang wrote:
> In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
> structure. For historical reasons, many kvm-related function parameters
> retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
> patch does a unified cleanup of these remaining redundant parameters.
> 
> This series of patches has completely cleaned the architecture of
> arm64, mips, ppc, and s390 (no such redundant code on x86). Due to
> the large number of modified codes, a separate patch is made for each
> platform. On the ppc platform, there is also a redundant structure
> pointer of 'kvm_run' in 'vcpu_arch', which has also been cleaned
> separately.

Tianjia, can you please refresh the patches so that each architecture
maintainer can pick them up?  Thanks very much for this work!

Paolo

> 
> ---
> v4 change:
>   mips: fixes two errors in entry.c.
> 
> v3 change:
>   Keep the existing `vcpu->run` in the function body unchanged.
> 
> v2 change:
>   s390 retains the original variable name and minimizes modification.
> 
> Tianjia Zhang (7):
>   KVM: s390: clean up redundant 'kvm_run' parameters
>   KVM: arm64: clean up redundant 'kvm_run' parameters
>   KVM: PPC: Remove redundant kvm_run from vcpu_arch
>   KVM: PPC: clean up redundant 'kvm_run' parameters
>   KVM: PPC: clean up redundant kvm_run parameters in assembly
>   KVM: MIPS: clean up redundant 'kvm_run' parameters
>   KVM: MIPS: clean up redundant kvm_run parameters in assembly
> 
>  arch/arm64/include/asm/kvm_coproc.h  |  12 +--
>  arch/arm64/include/asm/kvm_host.h|  11 +--
>  arch/arm64/include/asm/kvm_mmu.h |   2 +-
>  arch/arm64/kvm/handle_exit.c |  36 +++
>  arch/arm64/kvm/sys_regs.c|  13 ++-
>  arch/mips/include/asm/kvm_host.h |  32 +--
>  arch/mips/kvm/emulate.c  |  59 
>  arch/mips/kvm/entry.c|  21 ++---
>  arch/mips/kvm/mips.c |  14 +--
>  arch/mips/kvm/trap_emul.c| 114 ++-
>  arch/mips/kvm/vz.c   |  26 ++
>  arch/powerpc/include/asm/kvm_book3s.h|  16 ++--
>  arch/powerpc/include/asm/kvm_host.h  |   1 -
>  arch/powerpc/include/asm/kvm_ppc.h   |  27 +++---
>  arch/powerpc/kvm/book3s.c|   4 +-
>  arch/powerpc/kvm/book3s.h|   2 +-
>  arch/powerpc/kvm/book3s_64_mmu_hv.c  |  12 +--
>  arch/powerpc/kvm/book3s_64_mmu_radix.c   |   4 +-
>  arch/powerpc/kvm/book3s_emulate.c|  10 +-
>  arch/powerpc/kvm/book3s_hv.c |  64 ++---
>  arch/powerpc/kvm/book3s_hv_nested.c  |  12 +--
>  arch/powerpc/kvm/book3s_interrupts.S |  17 ++--
>  arch/powerpc/kvm/book3s_paired_singles.c |  72 +++---
>  arch/powerpc/kvm/book3s_pr.c |  33 ---
>  arch/powerpc/kvm/booke.c |  39 
>  arch/powerpc/kvm/booke.h |   8 +-
>  arch/powerpc/kvm/booke_emulate.c |   2 +-
>  arch/powerpc/kvm/booke_interrupts.S  |   9 +-
>  arch/powerpc/kvm/bookehv_interrupts.S|  10 +-
>  arch/powerpc/kvm/e500_emulate.c  |  15 ++-
>  arch/powerpc/kvm/emulate.c   |  10 +-
>  arch/powerpc/kvm/emulate_loadstore.c |  32 +++
>  arch/powerpc/kvm/powerpc.c   |  72 +++---
>  arch/powerpc/kvm/trace_hv.h  |   6 +-
>  arch/s390/kvm/kvm-s390.c |  23 +++--
>  virt/kvm/arm/arm.c   |   6 +-
>  virt/kvm/arm/mmio.c  |  11 ++-
>  virt/kvm/arm/mmu.c   |   5 +-
>  38 files changed, 392 insertions(+), 470 deletions(-)
> 



Re: [PATCH v4 6/8] arm: Break cyclic percpu include

2020-06-23 Thread Will Deacon
On Tue, Jun 23, 2020 at 10:36:51AM +0200, Peter Zijlstra wrote:
> In order to use  in irqflags.h, we need to make sure
> asm/percpu.h does not itself depend on irqflags.h.
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
>  arch/arm/include/asm/percpu.h |2 ++
>  1 file changed, 2 insertions(+)
> 
> --- a/arch/arm/include/asm/percpu.h
> +++ b/arch/arm/include/asm/percpu.h
> @@ -10,6 +10,8 @@
>   * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
>   */
>  #if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
> +register unsigned long current_stack_pointer asm ("sp");

If you define this unconditionally, then we can probably get rid of the
copy in asm/thread_info.h, rather than duplicate the same #define.

Will


Re: [PATCH v2 1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API

2020-06-23 Thread Markus Elfring
> clk_prepare_enable and clk_disable_unprepare check the input
> clock parameter in the beginning of the function,

These functions call further functions which perform null pointer checks.


>   if the parameter
> is NULL, clk_prepare_enable and clk_disable_unprepare will
> return immediately.

The interpretation of these function implementations seems to be reasonable.
Would you like to achieve any improvements for the corresponding software 
documentation?


> So Don't need to check input clock parameters before calling clk API.

What do you find imperative in this wording?

Another wording alternative:
   Thus omit extra null pointer checks before four function calls.

Regards,
Markus


[PATCH v4 6/8] arm: Break cyclic percpu include

2020-06-23 Thread Peter Zijlstra
In order to use  in irqflags.h, we need to make sure
asm/percpu.h does not itself depend on irqflags.h.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/arm/include/asm/percpu.h |2 ++
 1 file changed, 2 insertions(+)

--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -10,6 +10,8 @@
  * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
  */
 #if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
+register unsigned long current_stack_pointer asm ("sp");
+
 static inline void set_my_cpu_offset(unsigned long off)
 {
/* Set TPIDRPRW */




[PATCH v4 5/8] s390: Break cyclic percpu include

2020-06-23 Thread Peter Zijlstra
In order to use  in irqflags.h, we need to make sure
asm/percpu.h does not itself depend on irqflags.h

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/s390/include/asm/smp.h |1 +
 arch/s390/include/asm/thread_info.h |1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -10,6 +10,7 @@
 
 #include 
 #include 
+#include 
 
 #define raw_smp_processor_id() (S390_lowcore.cpu_nr)
 
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -24,7 +24,6 @@
 #ifndef __ASSEMBLY__
 #include 
 #include 
-#include 
 
 #define STACK_INIT_OFFSET \
(THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))




[PATCH v4 4/8] powerpc64: Break asm/percpu.h vs spinlock_types.h dependency

2020-06-23 Thread Peter Zijlstra
In order to use  in lockdep.h, we need to make sure
asm/percpu.h does not itself depend on lockdep.

The below seems to make that so and builds powerpc64-defconfig +
PROVE_LOCKING.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/powerpc/include/asm/dtl.h |   52 +
 arch/powerpc/include/asm/lppaca.h  |   44 ---
 arch/powerpc/include/asm/paca.h|2 -
 arch/powerpc/kernel/time.c |2 +
 arch/powerpc/kvm/book3s_hv.c   |1 
 arch/powerpc/platforms/pseries/dtl.c   |1 
 arch/powerpc/platforms/pseries/lpar.c  |1 
 arch/powerpc/platforms/pseries/setup.c |1 
 arch/powerpc/platforms/pseries/svm.c   |1 
 9 files changed, 60 insertions(+), 45 deletions(-)

--- /dev/null
+++ b/arch/powerpc/include/asm/dtl.h
@@ -0,0 +1,52 @@
+#ifndef _ASM_POWERPC_DTL_H
+#define _ASM_POWERPC_DTL_H
+
+#include 
+#include 
+
+/*
+ * Layout of entries in the hypervisor's dispatch trace log buffer.
+ */
+struct dtl_entry {
+   u8  dispatch_reason;
+   u8  preempt_reason;
+   __be16  processor_id;
+   __be32  enqueue_to_dispatch_time;
+   __be32  ready_to_enqueue_time;
+   __be32  waiting_to_ready_time;
+   __be64  timebase;
+   __be64  fault_addr;
+   __be64  srr0;
+   __be64  srr1;
+};
+
+#define DISPATCH_LOG_BYTES 4096/* bytes per cpu */
+#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+
+/*
+ * Dispatch trace log event enable mask:
+ *   0x1: voluntary virtual processor waits
+ *   0x2: time-slice preempts
+ *   0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE   0x1
+#define DTL_LOG_PREEMPT0x2
+#define DTL_LOG_FAULT  0x4
+#define DTL_LOG_ALL(DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
+extern struct kmem_cache *dtl_cache;
+extern rwlock_t dtl_access_lock;
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log.  If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
+extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif /* _ASM_POWERPC_DTL_H */
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -42,7 +42,6 @@
  */
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -146,49 +145,6 @@ struct slb_shadow {
} save_area[SLB_NUM_BOLTED];
 } cacheline_aligned;
 
-/*
- * Layout of entries in the hypervisor's dispatch trace log buffer.
- */
-struct dtl_entry {
-   u8  dispatch_reason;
-   u8  preempt_reason;
-   __be16  processor_id;
-   __be32  enqueue_to_dispatch_time;
-   __be32  ready_to_enqueue_time;
-   __be32  waiting_to_ready_time;
-   __be64  timebase;
-   __be64  fault_addr;
-   __be64  srr0;
-   __be64  srr1;
-};
-
-#define DISPATCH_LOG_BYTES 4096/* bytes per cpu */
-#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
-
-/*
- * Dispatch trace log event enable mask:
- *   0x1: voluntary virtual processor waits
- *   0x2: time-slice preempts
- *   0x4: virtual partition memory page faults
- */
-#define DTL_LOG_CEDE   0x1
-#define DTL_LOG_PREEMPT0x2
-#define DTL_LOG_FAULT  0x4
-#define DTL_LOG_ALL(DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
-
-extern struct kmem_cache *dtl_cache;
-extern rwlock_t dtl_access_lock;
-
-/*
- * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
- * reading from the dispatch trace log.  If other code wants to consume
- * DTL entries, it can set this pointer to a function that will get
- * called once for each DTL entry that gets processed.
- */
-extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
-
-extern void register_dtl_buffer(int cpu);
-extern void alloc_dtl_buffers(unsigned long *time_limit);
 extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
 
 #endif /* CONFIG_PPC_BOOK3S */
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 
@@ -53,6 +52,7 @@ extern unsigned int debug_smp_processor_
 #define get_slb_shadow()   (get_paca()->slb_shadow_ptr)
 
 struct task_struct;
+struct rtas_args;
 
 /*
  * Defines the layout of the paca.
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -183,6 +183,8 @@ static inline unsigned long read_spurr(u
 
 #ifdef CONFIG_PPC_SPLPAR
 
+#include 
+
 /*
  * Scan the dispatch trace log and count up the stolen time.
  * Should be called with interrupts disabled.
--- 

[PATCH v4 8/8] lockdep: Remove lockdep_hardirq{s_enabled, _context}() argument

2020-06-23 Thread Peter Zijlstra
Now that the macros use per-cpu data, we no longer need the argument.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/x86/entry/common.c|2 +-
 include/linux/irqflags.h   |8 
 include/linux/lockdep.h|2 +-
 kernel/locking/lockdep.c   |   30 +++---
 kernel/softirq.c   |2 +-
 tools/include/linux/irqflags.h |4 ++--
 6 files changed, 24 insertions(+), 24 deletions(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -689,7 +689,7 @@ noinstr void idtentry_exit_user(struct p
 
 noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
 {
-   bool irq_state = lockdep_hardirqs_enabled(current);
+   bool irq_state = lockdep_hardirqs_enabled();
 
__nmi_enter();
lockdep_hardirqs_off(CALLER_ADDR0);
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -40,9 +40,9 @@ DECLARE_PER_CPU(int, hardirq_context);
   extern void trace_hardirqs_off_finish(void);
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p)(this_cpu_read(hardirq_context))
+# define lockdep_hardirq_context() (this_cpu_read(hardirq_context))
 # define lockdep_softirq_context(p)((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p)   (this_cpu_read(hardirqs_enabled))
+# define lockdep_hardirqs_enabled()(this_cpu_read(hardirqs_enabled))
 # define lockdep_softirqs_enabled(p)   ((p)->softirqs_enabled)
 # define lockdep_hardirq_enter()   \
 do {   \
@@ -109,9 +109,9 @@ do {\
 # define trace_hardirqs_off_finish()   do { } while (0)
 # define trace_hardirqs_on()   do { } while (0)
 # define trace_hardirqs_off()  do { } while (0)
-# define lockdep_hardirq_context(p)0
+# define lockdep_hardirq_context() 0
 # define lockdep_softirq_context(p)0
-# define lockdep_hardirqs_enabled(p)   0
+# define lockdep_hardirqs_enabled()0
 # define lockdep_softirqs_enabled(p)   0
 # define lockdep_hardirq_enter()   do { } while (0)
 # define lockdep_hardirq_threaded()do { } while (0)
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -736,7 +736,7 @@ do {
\
 
 # define lockdep_assert_RT_in_threaded_ctx() do {  \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- lockdep_hardirq_context(current) &&   \
+ lockdep_hardirq_context() &&  \
  !(current->hardirq_threaded || current->irq_config),  
\
  "Not in threaded context on PREEMPT_RT as 
expected\n");   \
 } while (0)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_str
pr_warn("-\n");
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
-   lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+   lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
-   lockdep_hardirqs_enabled(curr),
+   lockdep_hardirqs_enabled(),
curr->softirqs_enabled);
print_lock(next);
 
@@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr
 
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
curr->comm, task_pid_nr(curr),
-   lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+   lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
-   lockdep_hardirqs_enabled(curr),
+   lockdep_hardirqs_enabled(),
lockdep_softirqs_enabled(curr));
print_lock(this);
 
@@ -3658,7 +3658,7 @@ void lockdep_hardirqs_on_prepare(unsigne
if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
 
-   if (unlikely(lockdep_hardirqs_enabled(current))) {
+   if (unlikely(lockdep_hardirqs_enabled())) {
/*
 * Neither irq nor preemption are disabled here
 * so this is racy by nature but losing one hit
@@ -3686,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigne
 * Can't allow enabling interrupts while in an interrupt handler,
 * that's general bad form and such. Recursion, limited stack etc..
 */
-   if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current)))
+   if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context()))
return;
 

[PATCH v4 2/8] x86/entry: Fix NMI vs IRQ state tracking

2020-06-23 Thread Peter Zijlstra
While the nmi_enter() users did
trace_hardirqs_{off_prepare,on_finish}() there was no matching
lockdep_hardirqs_*() calls to complete the picture.

Introduce idtentry_{enter,exit}_nmi() to enable proper IRQ state
tracking across the NMIs.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/x86/entry/common.c |   42 
 arch/x86/include/asm/idtentry.h |3 ++
 arch/x86/kernel/nmi.c   |9 +++-
 arch/x86/kernel/traps.c |   17 +---
 include/linux/hardirq.h |   28 ++
 5 files changed, 70 insertions(+), 29 deletions(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -550,7 +550,7 @@ SYSCALL_DEFINE0(ni_syscall)
  * The return value must be fed into the rcu_exit argument of
  * idtentry_exit_cond_rcu().
  */
-bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
+noinstr bool idtentry_enter_cond_rcu(struct pt_regs *regs)
 {
if (user_mode(regs)) {
enter_from_user_mode();
@@ -640,7 +640,7 @@ static void idtentry_exit_cond_resched(s
  * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
  * function must be fed into the @rcu_exit argument.
  */
-void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
+noinstr void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
 {
lockdep_assert_irqs_disabled();
 
@@ -684,7 +684,7 @@ void noinstr idtentry_exit_cond_rcu(stru
  * Invokes enter_from_user_mode() to establish the proper context for
  * NOHZ_FULL. Otherwise scheduling on exit would not be possible.
  */
-void noinstr idtentry_enter_user(struct pt_regs *regs)
+noinstr void idtentry_enter_user(struct pt_regs *regs)
 {
enter_from_user_mode();
 }
@@ -701,13 +701,47 @@ void noinstr idtentry_enter_user(struct
  *
  * Counterpart to idtentry_enter_user().
  */
-void noinstr idtentry_exit_user(struct pt_regs *regs)
+noinstr void idtentry_exit_user(struct pt_regs *regs)
 {
lockdep_assert_irqs_disabled();
 
prepare_exit_to_usermode(regs);
 }
 
+noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+{
+   bool irq_state = lockdep_hardirqs_enabled(current);
+
+   __nmi_enter();
+   lockdep_hardirqs_off(CALLER_ADDR0);
+   lockdep_hardirq_enter();
+   rcu_nmi_enter();
+
+   instrumentation_begin();
+   trace_hardirqs_off_finish();
+   ftrace_nmi_enter();
+   instrumentation_end();
+
+   return irq_state;
+}
+
+noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+{
+   instrumentation_begin();
+   ftrace_nmi_exit();
+   if (restore) {
+   trace_hardirqs_on_prepare();
+   lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+   }
+   instrumentation_end();
+
+   rcu_nmi_exit();
+   lockdep_hardirq_exit();
+   if (restore)
+   lockdep_hardirqs_on(CALLER_ADDR0);
+   __nmi_exit();
+}
+
 #ifdef CONFIG_XEN_PV
 #ifndef CONFIG_PREEMPTION
 /*
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -16,6 +16,9 @@ void idtentry_exit_user(struct pt_regs *
 bool idtentry_enter_cond_rcu(struct pt_regs *regs);
 void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
 
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
+
 /**
  * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
  *   No error code pushed by hardware
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struc
__this_cpu_write(last_nmi_rip, regs->ip);
 
instrumentation_begin();
-   trace_hardirqs_off_finish();
 
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struc
unknown_nmi_error(reason, regs);
 
 out:
-   if (regs->flags & X86_EFLAGS_IF)
-   trace_hardirqs_on_prepare();
instrumentation_end();
 }
 
@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi
 
 DEFINE_IDTENTRY_RAW(exc_nmi)
 {
+   bool irq_state;
+
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
 
@@ -491,14 +490,14 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 
this_cpu_write(nmi_dr7, local_db_save());
 
-   nmi_enter();
+   irq_state = idtentry_enter_nmi(regs);
 
inc_irq_stat(__nmi_count);
 
if (!ignore_nmis)
default_do_nmi(regs);
 
-   nmi_exit();
+   idtentry_exit_nmi(regs, irq_state);
 
local_db_restore(this_cpu_read(nmi_dr7));
 
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -403,7 +403,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
}
 #endif
 
-   nmi_enter();
+   idtentry_enter_nmi(regs);
instrumentation_begin();
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, 

[PATCH v4 1/8] lockdep: Prepare for NMI IRQ state tracking

2020-06-23 Thread Peter Zijlstra
There is no reason not to always, accurately, track IRQ state.

This change also makes IRQ state tracking ignore lockdep_off().

Signed-off-by: Peter Zijlstra (Intel) 
---
 kernel/locking/lockdep.c |   44 +---
 1 file changed, 41 insertions(+), 3 deletions(-)

--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3646,7 +3646,16 @@ static void __trace_hardirqs_on_caller(v
  */
 void lockdep_hardirqs_on_prepare(unsigned long ip)
 {
-   if (unlikely(!debug_locks || current->lockdep_recursion))
+   if (unlikely(!debug_locks))
+   return;
+
+   /*
+* NMIs do not (and cannot) track lock dependencies, nothing to do.
+*/
+   if (unlikely(in_nmi()))
+   return;
+
+   if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
 
if (unlikely(current->hardirqs_enabled)) {
@@ -3692,7 +3701,27 @@ void noinstr lockdep_hardirqs_on(unsigne
 {
struct task_struct *curr = current;
 
-   if (unlikely(!debug_locks || curr->lockdep_recursion))
+   if (unlikely(!debug_locks))
+   return;
+
+   /*
+* NMIs can happen in the middle of local_irq_{en,dis}able() where the
+* tracking state and hardware state are out of sync.
+*
+* NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from,
+* and not rely on hardware state like normal interrupts.
+*/
+   if (unlikely(in_nmi())) {
+   /*
+* Skip:
+*  - recursion check, because NMI can hit lockdep;
+*  - hardware state check, because above;
+*  - chain_key check, see lockdep_hardirqs_on_prepare().
+*/
+   goto skip_checks;
+   }
+
+   if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
 
if (curr->hardirqs_enabled) {
@@ -3720,6 +3749,7 @@ void noinstr lockdep_hardirqs_on(unsigne
DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
current->curr_chain_key);
 
+skip_checks:
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
curr->hardirq_enable_ip = ip;
@@ -3735,7 +3765,15 @@ void noinstr lockdep_hardirqs_off(unsign
 {
struct task_struct *curr = current;
 
-   if (unlikely(!debug_locks || curr->lockdep_recursion))
+   if (unlikely(!debug_locks))
+   return;
+
+   /*
+* Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep;
+* they will restore the software state. This ensures the software
+* state is consistent inside NMIs as well.
+*/
+   if (unlikely(!in_nmi() && (current->lockdep_recursion & 
LOCKDEP_RECURSION_MASK)))
return;
 
/*




[PATCH v4 3/8] sparc64: Fix asm/percpu.h build error

2020-06-23 Thread Peter Zijlstra
In order to break a header dependency between lockdep and task_struct,
I need per-cpu stuff from lockdep.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/sparc/include/asm/percpu_64.h  |2 ++
 arch/sparc/include/asm/trap_block.h |2 ++
 2 files changed, 4 insertions(+)

--- a/arch/sparc/include/asm/percpu_64.h
+++ b/arch/sparc/include/asm/percpu_64.h
@@ -4,7 +4,9 @@
 
 #include 
 
+#ifndef BUILD_VDSO
 register unsigned long __local_per_cpu_offset asm("g5");
+#endif
 
 #ifdef CONFIG_SMP
 
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_TRAP_BLOCK_H
 #define _SPARC_TRAP_BLOCK_H
 
+#include 
+
 #include 
 #include 
 




[PATCH v4 0/8] lockdep: Change IRQ state tracking to use per-cpu variables

2020-06-23 Thread Peter Zijlstra
Ahmed and Sebastian wanted additional lockdep_assert*() macros and ran into
header hell. I figured using per-cpu variables would cure that, and also
ran into header hell, still tracktable though.

By moving the IRQ state into per-cpu variables we remove the dependency on
task_struct.

Patches go on top of anything recent I think, an actual git tree with them
in is (for now) here:

  git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git 
locking/irqstate

Which 0day blessed with 0 build fails.




[PATCH v4 7/8] lockdep: Change hardirq{s_enabled, _context} to per-cpu variables

2020-06-23 Thread Peter Zijlstra
Currently all IRQ-tracking state is in task_struct, this means that
task_struct needs to be defined before we use it.

Especially for lockdep_assert_irq*() this can lead to header-hell.

Move the hardirq state into per-cpu variables to avoid the task_struct
dependency.

Signed-off-by: Peter Zijlstra (Intel) 
---
 include/linux/irqflags.h |   19 ---
 include/linux/lockdep.h  |   34 ++
 include/linux/sched.h|2 --
 kernel/fork.c|4 +---
 kernel/locking/lockdep.c |   30 +++---
 kernel/softirq.c |6 ++
 6 files changed, 52 insertions(+), 43 deletions(-)

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -14,6 +14,7 @@
 
 #include 
 #include 
+#include 
 
 /* Currently lockdep_softirqs_on/off is used only by lockdep */
 #ifdef CONFIG_PROVE_LOCKING
@@ -31,18 +32,22 @@
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
   extern void trace_hardirqs_on_prepare(void);
   extern void trace_hardirqs_off_finish(void);
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p)((p)->hardirq_context)
+# define lockdep_hardirq_context(p)(this_cpu_read(hardirq_context))
 # define lockdep_softirq_context(p)((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p)   ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled(p)   (this_cpu_read(hardirqs_enabled))
 # define lockdep_softirqs_enabled(p)   ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter()   \
-do {   \
-   if (!current->hardirq_context++)\
-   current->hardirq_threaded = 0;  \
+# define lockdep_hardirq_enter()   \
+do {   \
+   if (this_cpu_inc_return(hardirq_context) == 1)  \
+   current->hardirq_threaded = 0;  \
 } while (0)
 # define lockdep_hardirq_threaded()\
 do {   \
@@ -50,7 +55,7 @@ do {  \
 } while (0)
 # define lockdep_hardirq_exit()\
 do {   \
-   current->hardirq_context--; \
+   this_cpu_dec(hardirq_context);  \
 } while (0)
 # define lockdep_softirq_enter()   \
 do {   \
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -20,6 +20,7 @@ extern int lock_stat;
 #define MAX_LOCKDEP_SUBCLASSES 8UL
 
 #include 
+#include 
 
 enum lockdep_wait_type {
LD_WAIT_INV = 0,/* not checked, catch all */
@@ -703,28 +704,29 @@ do {  
\
lock_release(&(lock)->dep_map, _THIS_IP_);  \
 } while (0)
 
-#define lockdep_assert_irqs_enabled()  do {\
-   WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirqs_enabled,   \
- "IRQs not enabled as expected\n");\
-   } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
 
-#define lockdep_assert_irqs_disabled() do {\
-   WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirqs_enabled,\
- "IRQs not disabled as expected\n");   \
-   } while (0)
+#define lockdep_assert_irqs_enabled()  \
+do {   \
+   WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));  \
+} while (0)
 
-#define lockdep_assert_in_irq() do {   \
-   WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirq_context,\
- "Not in hardirq as expected\n");  \
-   } while (0)
+#define lockdep_assert_irqs_disabled() \
+do {   \
+   WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled));   \
+} while (0)
+
+#define lockdep_assert_in_irq()
\
+do {   \
+   WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context));   \
+} while (0)
 
 #else
 # define might_lock(lock) do { } while (0)
 # define might_lock_read(lock) do { } while (0)
 # define might_lock_nested(lock, subclass) do { } while (0)
+
 # define lockdep_assert_irqs_enabled() do { } while (0)
 # define 

Re: [PATCH v2 1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API

2020-06-23 Thread Shengjiu Wang
On Tue, Jun 23, 2020 at 3:38 PM Markus Elfring  wrote:
>
> > In-Reply-To: 
>
> I guess that it should be sufficient to specify such a field once
> for the header information.

seems it's caused by my "git format-patch" command, I will update
it, hope it is better next time.

>
>
> > Because clk_prepare_enable and clk_disable_unprepare should
> > check input clock parameter is NULL or not internally,
>
> I find this change description unclear.

clk_prepare_enable and clk_disable_unprepare check the input
clock parameter in the beginning of the function, if the parameter
is NULL, clk_prepare_enable and clk_disable_unprepare will
return immediately.

So Don't need to check input clock parameters before calling clk
API.

Do you think this commit message is better?

best regards
wang shengjiu


Re: [PATCH v2 1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API

2020-06-23 Thread Markus Elfring
> In-Reply-To: 

I guess that it should be sufficient to specify such a field once
for the header information.


> Because clk_prepare_enable and clk_disable_unprepare should
> check input clock parameter is NULL or not internally,

I find this change description unclear.


> then we don't need to check them before calling the function.

Please use an imperative wording for the commit message.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?id=dd0d718152e4c65b173070d48ea9dfc06894c3e5#n151

Regards,
Markus


[PATCH v1 2/3] powerpc/mm/radix: Fix PTE/PMD fragment count for early page table mappings

2020-06-23 Thread Bharata B Rao
We can hit the following BUG_ON during memory unplug:

kernel BUG at arch/powerpc/mm/book3s64/pgtable.c:342!
Oops: Exception in kernel mode, sig: 5 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
NIP [c0093308] pmd_fragment_free+0x48/0xc0
LR [c147bfec] remove_pagetable+0x578/0x60c
Call Trace:
0xc0805000 (unreliable)
remove_pagetable+0x384/0x60c
radix__remove_section_mapping+0x18/0x2c
remove_section_mapping+0x1c/0x3c
arch_remove_memory+0x11c/0x180
try_remove_memory+0x120/0x1b0
__remove_memory+0x20/0x40
dlpar_remove_lmb+0xc0/0x114
dlpar_memory+0x8b0/0xb20
handle_dlpar_errorlog+0xc0/0x190
pseries_hp_work_fn+0x2c/0x60
process_one_work+0x30c/0x810
worker_thread+0x98/0x540
kthread+0x1c4/0x1d0
ret_from_kernel_thread+0x5c/0x74

This occurs when unplug is attempted for such memory which has
been mapped using memblock pages as part of early kernel page
table setup. We wouldn't have initialized the PMD or PTE fragment
count for those PMD or PTE pages.

Fixing this includes 3 parts:

- Re-walk the init_mm page tables from mem_init() and initialize
  the PMD and PTE fragment count to 1.
- When freeing PUD, PMD and PTE page table pages, check explicitly
  if they come from memblock and if so free then appropriately.
- When we do early memblock based allocation of PMD and PUD pages,
  allocate in PAGE_SIZE granularity so that we are sure the
  complete page is used as pagetable page.

Since we now do PAGE_SIZE allocations for both PUD table and
PMD table (Note that PTE table allocation is already of PAGE_SIZE),
we end up allocating more memory for the same amount of system RAM.
Here is a comparision of how much more we need for a 64T and 2G
system after this patch:

1. 64T system
-
64T RAM would need 64G for vmemmap with struct page size being 64B.

128 PUD tables for 64T memory (1G mappings)
1 PUD table and 64 PMD tables for 64G vmemmap (2M mappings)

With default PUD[PMD]_TABLE_SIZE(4K), (128+1+64)*4K=772K
With PAGE_SIZE(64K) table allocations, (128+1+64)*64K=12352K

2. 2G system

2G RAM would need 2M for vmemmap with struct page size being 64B.

1 PUD table for 2G memory (1G mapping)
1 PUD table and 1 PMD table for 2M vmemmap (2M mappings)

With default PUD[PMD]_TABLE_SIZE(4K), (1+1+1)*4K=12K
With new PAGE_SIZE(64K) table allocations, (1+1+1)*64K=192K

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 11 ++-
 arch/powerpc/include/asm/book3s/64/radix.h   |  1 +
 arch/powerpc/include/asm/sparsemem.h |  1 +
 arch/powerpc/mm/book3s64/pgtable.c   | 31 +++-
 arch/powerpc/mm/book3s64/radix_pgtable.c | 80 +++-
 arch/powerpc/mm/mem.c|  5 ++
 arch/powerpc/mm/pgtable-frag.c   |  9 ++-
 7 files changed, 129 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h 
b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 69c5b051734f..56d695f0095c 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -109,7 +109,16 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, 
unsigned long addr)
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-   kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
+   struct page *page = virt_to_page(pud);
+
+   /*
+* Early pud pages allocated via memblock allocator
+* can't be directly freed to slab
+*/
+   if (PageReserved(page))
+   free_reserved_page(page);
+   else
+   kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
b/arch/powerpc/include/asm/book3s/64/radix.h
index 0cba794c4fb8..90f05d52f46d 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -297,6 +297,7 @@ static inline unsigned long radix__get_tree_size(void)
 int radix__create_section_mapping(unsigned long start, unsigned long end,
  int nid, pgprot_t prot);
 int radix__remove_section_mapping(unsigned long start, unsigned long end);
+void radix__fixup_pgtable_fragments(void);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/sparsemem.h 
b/arch/powerpc/include/asm/sparsemem.h
index c89b32443cff..d0b22a937a7a 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -16,6 +16,7 @@
 extern int create_section_mapping(unsigned long start, unsigned long end,
  int nid, pgprot_t prot);
 extern int remove_section_mapping(unsigned long start, unsigned long end);
+void fixup_pgtable_fragments(void);
 
 #ifdef CONFIG_PPC_BOOK3S_64
 extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
diff --git a/arch/powerpc/mm/book3s64/pgtable.c 

[PATCH v1 3/3] powerpc/mm/radix: Free PUD table when freeing pagetable

2020-06-23 Thread Bharata B Rao
remove_pagetable() isn't freeing PUD table. This causes memory
leak during memory unplug. Fix this.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 58e42393d5e8..8ec2110eaa1a 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -782,6 +782,21 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
pud_clear(pud);
 }
 
+static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+   pud_t *pud;
+   int i;
+
+   for (i = 0; i < PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (!pud_none(*pud))
+   return;
+   }
+
+   pud_free(_mm, pud_start);
+   p4d_clear(p4d);
+}
+
 struct change_mapping_params {
pte_t *pte;
unsigned long start;
@@ -956,6 +971,7 @@ static void __meminit remove_pagetable(unsigned long start, 
unsigned long end)
 
pud_base = (pud_t *)p4d_page_vaddr(*p4d);
remove_pud_table(pud_base, addr, next);
+   free_pud_table(pud_base, p4d);
}
 
spin_unlock(_mm.page_table_lock);
-- 
2.21.3



[PATCH v1 1/3] powerpc/mm/radix: Create separate mappings for hot-plugged memory

2020-06-23 Thread Bharata B Rao
Memory that gets hot-plugged _during_ boot (and not the memory
that gets plugged in after boot), is mapped with 1G mappings
and will undergo splitting when it is unplugged. The splitting
code has a few issues:

1. Recursive locking

Memory unplug path takes cpu_hotplug_lock and calls stop_machine()
for splitting the mappings. However stop_machine() takes
cpu_hotplug_lock again causing deadlock.

2. BUG: sleeping function called from in_atomic() context
-
Memory unplug path (remove_pagetable) takes init_mm.page_table_lock
spinlock and later calls stop_machine() which does wait_for_completion()

3. Bad unlock unbalance
---
Memory unplug path takes init_mm.page_table_lock spinlock and calls
stop_machine(). The stop_machine thread function runs in a different
thread context (migration thread) which tries to release and reaquire
ptl. Releasing ptl from a different thread than which acquired it
causes bad unlock unbalance.

These problems can be avoided if we avoid mapping hot-plugged memory
with 1G mapping, thereby removing the need for splitting them during
unplug. Hence, during radix init, identify the hot-plugged memory region
and create separate mappings for each LMB so that they don't get mapped
with 1G mappings. The identification of hot-plugged memory has become
possible after the commit b6eca183e23e ("powerpc/kernel: Enables memory
hot-remove after reboot on pseries guests").

To create separate mappings for every LMB in the hot-plugged
region, we need lmb-size for which we use memory_block_size_bytes().
Since this is early init time code, the machine type isn't probed yet
and hence memory_block_size_bytes() would return the default LMB size
as 16MB. Hence we end up issuing more number of mapping requests
than earlier.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 8acb96de0e48..ffccfe00ca2a 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -320,6 +321,8 @@ static void __init radix_init_pgtable(void)
 {
unsigned long rts_field;
struct memblock_region *reg;
+   phys_addr_t addr;
+   u64 lmb_size = memory_block_size_bytes();
 
/* We don't support slb for radix */
mmu_slb_size = 0;
@@ -338,9 +341,15 @@ static void __init radix_init_pgtable(void)
continue;
}
 
-   WARN_ON(create_physical_mapping(reg->base,
-   reg->base + reg->size,
-   -1, PAGE_KERNEL));
+   if (memblock_is_hotpluggable(reg)) {
+   for (addr = reg->base; addr < (reg->base + reg->size);
+addr += lmb_size)
+   WARN_ON(create_physical_mapping(addr,
+   addr + lmb_size, -1, PAGE_KERNEL));
+   } else
+   WARN_ON(create_physical_mapping(reg->base,
+   reg->base + reg->size,
+   -1, PAGE_KERNEL));
}
 
/* Find out how many PID bits are supported */
-- 
2.21.3



[PATCH v1 0/3] powerpc/mm/radix: Memory unplug fixes

2020-06-23 Thread Bharata B Rao
This is the next version of the fixes for memory unplug on radix.
The issues and the fix are described in the actual patches.

Changes in v1:
==
- Rebased to latest kernel.
- Took care of p4d changes.
- Addressed Aneesh's review feedback:
 - Added comments.
 - Indentation fixed.
- Dropped the 1st patch (setting DRCONF_MEM_HOTREMOVABLE lmb flags) as
  it is debatable if this flag should be set in the device tree by OS
  and not by platform in case of hotplug. This can be looked at separately.
  (The fixes in this patchset remain valid without the dropped patch)
- Dropped the last patch that removed split_kernel_mapping() to ensure
  that spilitting code is available for any radix guest running on
  platforms that don't set DRCONF_MEM_HOTREMOVABLE.

v0: 
https://lore.kernel.org/linuxppc-dev/20200406034925.22586-1-bhar...@linux.ibm.com/

Bharata B Rao (3):
  powerpc/mm/radix: Create separate mappings for hot-plugged memory
  powerpc/mm/radix: Fix PTE/PMD fragment count for early page table
mappings
  powerpc/mm/radix: Free PUD table when freeing pagetable

 arch/powerpc/include/asm/book3s/64/pgalloc.h |  11 +-
 arch/powerpc/include/asm/book3s/64/radix.h   |   1 +
 arch/powerpc/include/asm/sparsemem.h |   1 +
 arch/powerpc/mm/book3s64/pgtable.c   |  31 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c | 111 +--
 arch/powerpc/mm/mem.c|   5 +
 arch/powerpc/mm/pgtable-frag.c   |   9 +-
 7 files changed, 157 insertions(+), 12 deletions(-)

-- 
2.21.3



[PATCH v2 00/15] Documentation fixes

2020-06-23 Thread Mauro Carvalho Chehab
Hi Jon,

As requested, this is a rebase of a previous series posted on Jan, 15.

Since then, several patches got merged via other trees or became
obsolete. There were also 2 patches before that fits better at the
ReST conversion patchset. So, I'll be sending it on another patch
series together with the remaining ReST conversions.

I also added reviews/acks received.

So, the series reduced from 29 to 15 patches.

Let's hope b4 would be able to properly handle this one.

Regards,
Mauro

Mauro Carvalho Chehab (15):
  mm: vmalloc.c: remove a kernel-doc annotation from a removed parameter
  net: dev: add a missing kernel-doc annotation
  net: netdevice.h: add a description for napi_defer_hard_irqs
  scripts/kernel-doc: parse __ETHTOOL_DECLARE_LINK_MODE_MASK
  net: pylink.h: add kernel-doc descriptions for new fields at
phylink_config
  scripts/kernel-doc: handle function pointer prototypes
  fs: fs.h: fix a kernel-doc parameter description
  kcsan: fix a kernel-doc warning
  selftests/vm/keys: fix a broken reference at protection_keys.c
  docs: hugetlbpage.rst: fix some warnings
  docs: powerpc: fix some issues at vas-api.rst
  docs: driver-model: remove a duplicated markup at driver.rst
  docs: ABI: fix a typo when pointing to w1-generic.rst
  docs: fix references for DMA*.txt files
  docs: fs: proc.rst: convert a new chapter to ReST

 .../ABI/testing/sysfs-driver-w1_therm |  2 +-
 Documentation/PCI/pci.rst |  6 +--
 Documentation/admin-guide/mm/hugetlbpage.rst  | 23 +++---
 Documentation/block/biodoc.rst|  2 +-
 Documentation/bus-virt-phys-mapping.txt   |  2 +-
 Documentation/core-api/dma-api.rst|  6 +--
 Documentation/core-api/dma-isa-lpc.rst|  2 +-
 .../driver-api/driver-model/driver.rst|  2 -
 Documentation/driver-api/usb/dma.rst  |  6 +--
 Documentation/filesystems/proc.rst| 44 +--
 Documentation/powerpc/vas-api.rst | 23 +++---
 .../translations/ko_KR/memory-barriers.txt|  6 +--
 arch/ia64/hp/common/sba_iommu.c   | 12 ++---
 arch/parisc/kernel/pci-dma.c  |  2 +-
 arch/x86/include/asm/dma-mapping.h|  4 +-
 arch/x86/kernel/amd_gart_64.c |  2 +-
 drivers/parisc/sba_iommu.c| 14 +++---
 include/linux/dma-mapping.h   |  2 +-
 include/linux/fs.h|  2 +-
 include/linux/kcsan-checks.h  | 10 +++--
 include/linux/netdevice.h |  2 +
 include/linux/phylink.h   |  4 ++
 include/media/videobuf-dma-sg.h   |  2 +-
 kernel/dma/debug.c|  2 +-
 mm/vmalloc.c  |  1 -
 net/core/dev.c|  1 +
 scripts/kernel-doc|  7 +++
 tools/testing/selftests/vm/protection_keys.c  |  2 +-
 28 files changed, 114 insertions(+), 79 deletions(-)

-- 
2.26.2




[PATCH v2 11/15] docs: powerpc: fix some issues at vas-api.rst

2020-06-23 Thread Mauro Carvalho Chehab
There are a few issues on this document, when built via the
building with ``make htmldocs``:

Documentation/powerpc/vas-api.rst:116: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:116: WARNING: Inline emphasis 
start-string without end-string.
Documentation/powerpc/vas-api.rst:117: WARNING: Block quote ends without a 
blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:117: WARNING: Inline emphasis 
start-string without end-string.
Documentation/powerpc/vas-api.rst:120: WARNING: Definition list ends 
without a blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:124: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:133: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:135: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:150: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:151: WARNING: Block quote ends without a 
blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:161: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:176: WARNING: Definition list ends 
without a blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:253: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:253: WARNING: Inline emphasis 
start-string without end-string.
Documentation/powerpc/vas-api.rst:259: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:261: WARNING: Block quote ends without a 
blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:266: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:267: WARNING: Block quote ends without a 
blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:270: WARNING: Definition list ends 
without a blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:271: WARNING: Definition list ends 
without a blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:273: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:274: WARNING: Block quote ends without a 
blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:277: WARNING: Definition list ends 
without a blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:278: WARNING: Definition list ends 
without a blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:280: WARNING: Unexpected indentation.
Documentation/powerpc/vas-api.rst:287: WARNING: Block quote ends without a 
blank line; unexpected unindent.
Documentation/powerpc/vas-api.rst:289: WARNING: Block quote ends without a 
blank line; unexpected unindent.

Fixes: c12e38b1d52e ("Documentation/powerpc: VAS API")
Signed-off-by: Mauro Carvalho Chehab 
---
 Documentation/powerpc/vas-api.rst | 23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/Documentation/powerpc/vas-api.rst 
b/Documentation/powerpc/vas-api.rst
index 1217c2f1595e..b7fdbe560010 100644
--- a/Documentation/powerpc/vas-api.rst
+++ b/Documentation/powerpc/vas-api.rst
@@ -87,6 +87,7 @@ Applications may chose a specific instance of the NX 
co-processor using
 the vas_id field in the VAS_TX_WIN_OPEN ioctl as detailed below.
 
 A userspace library libnxz is available here but still in development:
+
 https://github.com/abalib/power-gzip
 
 Applications that use inflate / deflate calls can link with libnxz
@@ -110,6 +111,7 @@ Applications should use the VAS_TX_WIN_OPEN ioctl as 
follows to establish
 a connection with NX co-processor engine:
 
::
+
struct vas_tx_win_open_attr {
__u32   version;
__s16   vas_id; /* specific instance of vas or -1
@@ -119,8 +121,10 @@ a connection with NX co-processor engine:
__u64   reserved2[6];
};
 
-   version: The version field must be currently set to 1.
-   vas_id: If '-1' is passed, kernel will make a best-effort attempt
+   version:
+   The version field must be currently set to 1.
+   vas_id:
+   If '-1' is passed, kernel will make a best-effort attempt
to assign an optimal instance of NX for the process. To
select the specific VAS instance, refer
"Discovery of available VAS engines" section below.
@@ -129,7 +133,8 @@ a connection with NX co-processor engine:
and must be set to 0.
 
The attributes attr for the VAS_TX_WIN_OPEN ioctl are defined as
-   follows:
+   follows::
+
#define VAS_MAGIC 'v'
#define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 1,
struct vas_tx_win_open_attr)
@@ -141,6 +146,8 @@ a connection with NX co-processor engine:
returns -1 and sets the errno variable to indicate the error.
 
Error conditions:
+
+   

[PATCH 1/2] ASoC: fsl-asoc-card: Add WM8524 support

2020-06-23 Thread Shengjiu Wang
WM8524 only supports playback mode, and only works at
slave mode.

Signed-off-by: Shengjiu Wang 
---
 sound/soc/fsl/fsl-asoc-card.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index d0543a53764e..57ea1b072326 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -611,6 +611,15 @@ static int fsl_asoc_card_probe(struct platform_device 
*pdev)
priv->dai_link[2].dpcm_capture = 0;
priv->card.dapm_routes = audio_map_tx;
priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_tx);
+   } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8524")) {
+   codec_dai_name = "wm8524-hifi";
+   priv->card.set_bias_level = NULL;
+   priv->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS;
+   priv->dai_link[1].dpcm_capture = 0;
+   priv->dai_link[2].dpcm_capture = 0;
+   priv->cpu_priv.slot_width = 32;
+   priv->card.dapm_routes = audio_map_tx;
+   priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_tx);
} else {
dev_err(>dev, "unknown Device Tree compatible\n");
ret = -EINVAL;
@@ -760,6 +769,7 @@ static const struct of_device_id fsl_asoc_card_dt_ids[] = {
{ .compatible = "fsl,imx-audio-wm8962", },
{ .compatible = "fsl,imx-audio-wm8960", },
{ .compatible = "fsl,imx-audio-mqs", },
+   { .compatible = "fsl,imx-audio-wm8524", },
{}
 };
 MODULE_DEVICE_TABLE(of, fsl_asoc_card_dt_ids);
-- 
2.21.0



[PATCH 2/2] ASoC: bindings: fsl-asoc-card: Add compatible string for wm8524

2020-06-23 Thread Shengjiu Wang
In order to support wm8524 codec with fsl-asoc-card machine
driver, add compatible string "fsl,imx-audio-wm8524".

Signed-off-by: Shengjiu Wang 
---
 Documentation/devicetree/bindings/sound/fsl-asoc-card.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt 
b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
index ca9a3a43adfd..133d7e14a4d0 100644
--- a/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
+++ b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
@@ -36,6 +36,8 @@ The compatible list for this generic sound card currently:
 
  "fsl,imx-audio-mqs"
 
+ "fsl,imx-audio-wm8524"
+
 Required properties:
 
   - compatible : Contains one of entries in the compatible list.
-- 
2.21.0



Re: [PATCH v2 2/2] ASoC: fsl_mqs: Fix unchecked return value for clk_prepare_enable

2020-06-23 Thread Nicolin Chen
On Tue, Jun 23, 2020 at 02:01:12PM +0800, Shengjiu Wang wrote:
> Fix unchecked return value for clk_prepare_enable, add error
> handler in fsl_mqs_runtime_resume.
> 
> Fixes: 9e28f6532c61 ("ASoC: fsl_mqs: Add MQS component driver")
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 


Re: [PATCH v2 1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API

2020-06-23 Thread Nicolin Chen
On Tue, Jun 23, 2020 at 02:01:11PM +0800, Shengjiu Wang wrote:
> Because clk_prepare_enable and clk_disable_unprepare should
> check input clock parameter is NULL or not internally, then
> we don't need to check them before calling the function.
> 
> Fixes: 9e28f6532c61 ("ASoC: fsl_mqs: Add MQS component driver")
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 


[PATCH v2 2/2] ASoC: fsl_mqs: Fix unchecked return value for clk_prepare_enable

2020-06-23 Thread Shengjiu Wang
Fix unchecked return value for clk_prepare_enable, add error
handler in fsl_mqs_runtime_resume.

Fixes: 9e28f6532c61 ("ASoC: fsl_mqs: Add MQS component driver")
Signed-off-by: Shengjiu Wang 
---
 sound/soc/fsl/fsl_mqs.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c
index b44b134390a3..69aeb0e71844 100644
--- a/sound/soc/fsl/fsl_mqs.c
+++ b/sound/soc/fsl/fsl_mqs.c
@@ -265,10 +265,20 @@ static int fsl_mqs_remove(struct platform_device *pdev)
 static int fsl_mqs_runtime_resume(struct device *dev)
 {
struct fsl_mqs *mqs_priv = dev_get_drvdata(dev);
+   int ret;
 
-   clk_prepare_enable(mqs_priv->ipg);
+   ret = clk_prepare_enable(mqs_priv->ipg);
+   if (ret) {
+   dev_err(dev, "failed to enable ipg clock\n");
+   return ret;
+   }
 
-   clk_prepare_enable(mqs_priv->mclk);
+   ret = clk_prepare_enable(mqs_priv->mclk);
+   if (ret) {
+   dev_err(dev, "failed to enable mclk clock\n");
+   clk_disable_unprepare(mqs_priv->ipg);
+   return ret;
+   }
 
if (mqs_priv->use_gpr)
regmap_write(mqs_priv->regmap, IOMUXC_GPR2,
-- 
2.21.0



[PATCH v2 1/2] ASoC: fsl_mqs: Don't check clock is NULL before calling clk API

2020-06-23 Thread Shengjiu Wang
Because clk_prepare_enable and clk_disable_unprepare should
check input clock parameter is NULL or not internally, then
we don't need to check them before calling the function.

Fixes: 9e28f6532c61 ("ASoC: fsl_mqs: Add MQS component driver")
Signed-off-by: Shengjiu Wang 
---
 sound/soc/fsl/fsl_mqs.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c
index 0c813a45bba7..b44b134390a3 100644
--- a/sound/soc/fsl/fsl_mqs.c
+++ b/sound/soc/fsl/fsl_mqs.c
@@ -266,11 +266,9 @@ static int fsl_mqs_runtime_resume(struct device *dev)
 {
struct fsl_mqs *mqs_priv = dev_get_drvdata(dev);
 
-   if (mqs_priv->ipg)
-   clk_prepare_enable(mqs_priv->ipg);
+   clk_prepare_enable(mqs_priv->ipg);
 
-   if (mqs_priv->mclk)
-   clk_prepare_enable(mqs_priv->mclk);
+   clk_prepare_enable(mqs_priv->mclk);
 
if (mqs_priv->use_gpr)
regmap_write(mqs_priv->regmap, IOMUXC_GPR2,
@@ -292,11 +290,8 @@ static int fsl_mqs_runtime_suspend(struct device *dev)
regmap_read(mqs_priv->regmap, REG_MQS_CTRL,
_priv->reg_mqs_ctrl);
 
-   if (mqs_priv->mclk)
-   clk_disable_unprepare(mqs_priv->mclk);
-
-   if (mqs_priv->ipg)
-   clk_disable_unprepare(mqs_priv->ipg);
+   clk_disable_unprepare(mqs_priv->mclk);
+   clk_disable_unprepare(mqs_priv->ipg);
 
return 0;
 }
-- 
2.21.0



[PATCH v2 0/2] Fix unchecked return value for clk_prepare_enable

2020-06-23 Thread Shengjiu Wang
First patch is to remove the check of clock pointer before calling
clk API.

Second patch is to fix the issue that the return value of
clk_prepare_enable is not checked.

changes in v2:
- split the patch to separate patches

Shengjiu Wang (2):
  ASoC: fsl_mqs: Don't check clock is NULL before calling clk API
  ASoC: fsl_mqs: Fix unchecked return value for clk_prepare_enable

 sound/soc/fsl/fsl_mqs.c | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

-- 
2.21.0