Re: [PATCH v2 9/9] powerpc/eeh: Add eeh_state_active() helper

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:49 pm, Sam Bobroff wrote:
> Checking for a "fully active" device state requires testing two flag
> bits, which is open coded in several places, so add a function to do
> it.
> 
> Signed-off-by: Sam Bobroff 


Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/include/asm/eeh.h   |  6 ++
>  arch/powerpc/kernel/eeh.c| 19 ++-
>  arch/powerpc/platforms/powernv/eeh-powernv.c |  9 ++---
>  3 files changed, 14 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index fd37cc101f4f..c2266ca61853 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -256,6 +256,12 @@ static inline void eeh_serialize_unlock(unsigned long 
> flags)
>   raw_spin_unlock_irqrestore(_error_lock, flags);
>  }
>  
> +static inline bool eeh_state_active(int state)
> +{
> + return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
> + == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
> +}
> +
>  typedef void *(*eeh_traverse_func)(void *data, void *flag);
>  void eeh_set_pe_aux_size(int size);
>  int eeh_phb_pe_create(struct pci_controller *phb);
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 2b9df0040d6b..bc640e4c5ca5 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
>   /* Check PHB state */
>   ret = eeh_ops->get_state(phb_pe, NULL);
>   if ((ret < 0) ||
> - (ret == EEH_STATE_NOT_SUPPORT) ||
> - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
> - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
> + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
>   ret = 0;
>   goto out;
>   }
> @@ -433,7 +431,6 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
>  int eeh_dev_check_failure(struct eeh_dev *edev)
>  {
>   int ret;
> - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
>   unsigned long flags;
>   struct device_node *dn;
>   struct pci_dev *dev;
> @@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
>* state, PE is in good state.
>*/
>   if ((ret < 0) ||
> - (ret == EEH_STATE_NOT_SUPPORT) ||
> - ((ret & active_flags) == active_flags)) {
> + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
>   eeh_stats.false_positives++;
>   pe->false_positives++;
>   rc = 0;
> @@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
>  
>   /* Frozen parent PE ? */
>   ret = eeh_ops->get_state(parent_pe, NULL);
> - if (ret > 0 &&
> - (ret & active_flags) != active_flags)
> + if (ret > 0 && !eeh_state_active(ret))
>   pe = parent_pe;
>  
>   /* Next parent level */
> @@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag)
>   */
>  int eeh_pe_reset_full(struct eeh_pe *pe)
>  {
> - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
>   int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
>   int type = EEH_RESET_HOT;
>   unsigned int freset = 0;
> @@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
>  
>   /* Wait until the PE is in a functioning state */
>   state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
> - if ((state & active_flags) == active_flags)
> + if (eeh_state_active(state))
>   break;
>  
>   if (state < 0) {
> @@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
>   struct eeh_dev *edev, *tmp;
>   struct pci_dev *pdev;
>   struct pci_device_id *id;
> - int flags, ret;
> + int ret;
>  
>   /* Check PE state */
> - flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
>   ret = eeh_ops->get_state(pe, NULL);
>   if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
>   return 0;
>  
>   /* Unfrozen PE, nothing to do */
> - if ((ret & flags) == flags)
> + if (eeh_state_active(ret))
>   return 0;
>  
>   /* Frozen PE, check if it needs PE level reset */
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
> b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index 33c86c1a1720..ddfc3544d285 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -1425,11 +1425,8 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
>   dev_pe = dev_pe->parent;
>   while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
>   int ret;
> - int active_flags = (EEH_STATE_MMIO_ACTIVE |
> - EEH_STATE_DMA_ACTIVE);

Re: [PATCH v3 8/9] powerpc/eeh: Factor out common code eeh_reset_device()

2018-03-20 Thread Alexey Kardashevskiy
On 21/3/18 1:06 pm, Sam Bobroff wrote:
> The caller will always pass NULL for 'rmv_data' when
> 'eeh_aware_driver' is true, so the first two calls to
> eeh_pe_dev_traverse() can be combined without changing behaviour as
> can the two arms of the final 'if' block.
> 
> This should not change behaviour.
> 
> Signed-off-by: Sam Bobroff 


Reviewed-by: Alexey Kardashevskiy 


> ---
> == v2 -> v3: ==
> Corrected two missed cases of eeh_aware_driver -> driver_eeh_aware.
> 
>  arch/powerpc/kernel/eeh_driver.c | 32 ++--
>  1 file changed, 10 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index 93fc22e791fa..43ceb6263cd8 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -647,16 +647,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>* into pci_hp_add_devices().
>*/
>   eeh_pe_state_mark(pe, EEH_PE_KEEP);
> - if (!driver_eeh_aware) {
> - if (pe->type & EEH_PE_VF) {
> - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
> - } else {
> - pci_lock_rescan_remove();
> - pci_hp_remove_devices(bus);
> - pci_unlock_rescan_remove();
> - }
> - } else {
> + if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
>   eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
> + } else {
> + pci_lock_rescan_remove();
> + pci_hp_remove_devices(bus);
> + pci_unlock_rescan_remove();
>   }
>  
>   /*
> @@ -691,8 +687,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>* the device up before the scripts have taken it down,
>* potentially weird things happen.
>*/
> - if (!driver_eeh_aware) {
> - pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
> + if (!driver_eeh_aware || rmv_data->removed) {
> + pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
> + (driver_eeh_aware ? "partial" : "complete"));
>   ssleep(5);
>  
>   /*
> @@ -705,19 +702,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   if (pe->type & EEH_PE_VF) {
>   eeh_add_virt_device(edev, NULL);
>   } else {
> - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
> + if (!driver_eeh_aware)
> + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
>   pci_hp_add_devices(bus);
>   }
> - } else if (rmv_data->removed) {
> - pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
> - ssleep(5);
> -
> - edev = list_first_entry(>edevs, struct eeh_dev, list);
> - eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
> - if (pe->type & EEH_PE_VF)
> - eeh_add_virt_device(edev, NULL);
> - else
> - pci_hp_add_devices(bus);
>   }
>   eeh_pe_state_clear(pe, EEH_PE_KEEP);
>  
> 


-- 
Alexey


Re: [PATCH v2 7/9] powerpc/eeh: Remove always-true tests in eeh_reset_device()

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:49 pm, Sam Bobroff wrote:
> eeh_reset_device() tests the value of 'bus' more than once but the
> only caller, eeh_handle_normal_device() does this test itself and will
> never pass NULL.
> 
> So, remove the dead tests.
> 
> This should not change behaviour.
> 
> Signed-off-by: Sam Bobroff 


Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/kernel/eeh_driver.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index 07437d765434..93fc22e791fa 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -655,7 +655,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   pci_hp_remove_devices(bus);
>   pci_unlock_rescan_remove();
>   }
> - } else if (bus) {
> + } else {
>   eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
>   }
>  
> @@ -708,7 +708,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
>   pci_hp_add_devices(bus);
>   }
> - } else if (bus && rmv_data->removed) {
> + } else if (rmv_data->removed) {
>   pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>   ssleep(5);
>  
> 


-- 
Alexey


Re: [PATCH v2 6/9] powerpc/eeh: Clarify arguments to eeh_reset_device()

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:48 pm, Sam Bobroff wrote:
> It is currently difficult to understand the behaviour of
> eeh_reset_device() due to the way it's parameters are used. In
> particular, when 'bus' is NULL, it's value is still necessary so the
> same value is looked up again locally under a different name
> ('frozen_bus') but behaviour is changed.
> 
> To clarify this, add a new parameter 'driver_eeh_aware', and have the
> caller set it when it would have passed NULL for 'bus' and always pass
> a value for 'bus'. Then change any test that was on 'bus' to one on
> '!driver_eeh_aware' and replace uses of 'frozen_bus' with 'bus'.
> 
> Also update the function's comment.
> 
> This should not change behaviour.
> 
> Signed-off-by: Sam Bobroff 



Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/kernel/eeh_driver.c | 20 +++-
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index cb584d72b0a5..07437d765434 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -619,17 +619,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
>  
>  /**
>   * eeh_reset_device - Perform actual reset of a pci slot
> + * @driver_eeh_aware: Does the device's driver provide EEH support?
>   * @pe: EEH PE
>   * @bus: PCI bus corresponding to the isolcated slot
> + * @rmv_data: Optional, list to record removed devices
>   *
>   * This routine must be called to do reset on the indicated PE.
>   * During the reset, udev might be invoked because those affected
>   * PCI devices will be removed and then added.
>   */
>  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
> - struct eeh_rmv_data *rmv_data)
> + struct eeh_rmv_data *rmv_data,
> + bool driver_eeh_aware)
>  {
> - struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>   time64_t tstamp;
>   int cnt, rc;
>   struct eeh_dev *edev;
> @@ -645,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>* into pci_hp_add_devices().
>*/
>   eeh_pe_state_mark(pe, EEH_PE_KEEP);
> - if (bus) {
> + if (!driver_eeh_aware) {
>   if (pe->type & EEH_PE_VF) {
>   eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>   } else {
> @@ -653,7 +655,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   pci_hp_remove_devices(bus);
>   pci_unlock_rescan_remove();
>   }
> - } else if (frozen_bus) {
> + } else if (bus) {
>   eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
>   }
>  
> @@ -689,7 +691,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>* the device up before the scripts have taken it down,
>* potentially weird things happen.
>*/
> - if (bus) {
> + if (!driver_eeh_aware) {
>   pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
>   ssleep(5);
>  
> @@ -706,7 +708,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
>   pci_hp_add_devices(bus);
>   }
> - } else if (frozen_bus && rmv_data->removed) {
> + } else if (bus && rmv_data->removed) {
>   pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>   ssleep(5);
>  
> @@ -715,7 +717,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   if (pe->type & EEH_PE_VF)
>   eeh_add_virt_device(edev, NULL);
>   else
> - pci_hp_add_devices(frozen_bus);
> + pci_hp_add_devices(bus);
>   }
>   eeh_pe_state_clear(pe, EEH_PE_KEEP);
>  
> @@ -820,7 +822,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
>*/
>   if (result == PCI_ERS_RESULT_NONE) {
>   pr_info("EEH: Reset with hotplug activity\n");
> - rc = eeh_reset_device(pe, bus, NULL);
> + rc = eeh_reset_device(pe, bus, NULL, false);
>   if (rc) {
>   pr_warn("%s: Unable to reset, err=%d\n",
>   __func__, rc);
> @@ -872,7 +874,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
>   /* If any device called out for a reset, then reset the slot */
>   if (result == PCI_ERS_RESULT_NEED_RESET) {
>   pr_info("EEH: Reset without hotplug activity\n");
> - rc = eeh_reset_device(pe, NULL, _data);
> + rc = eeh_reset_device(pe, bus, _data, true);
>   if (rc) {
>   pr_warn("%s: Cannot reset, err=%d\n",
>   __func__, rc);
> 


-- 
Alexey


Re: [PATCH v2 5/9] powerpc/eeh: Rename frozen_bus to bus in eeh_handle_normal_event()

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:47 pm, Sam Bobroff wrote:
> The name "frozen_bus" is misleading: it's not necessarily frozen, it's
> just the PE's PCI bus.
> 
> Signed-off-by: Sam Bobroff 

Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/kernel/eeh_driver.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index 04a5d9db5499..cb584d72b0a5 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -754,14 +754,14 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   */
>  void eeh_handle_normal_event(struct eeh_pe *pe)
>  {
> - struct pci_bus *frozen_bus;
> + struct pci_bus *bus;
>   struct eeh_dev *edev, *tmp;
>   int rc = 0;
>   enum pci_ers_result result = PCI_ERS_RESULT_NONE;
>   struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
>  
> - frozen_bus = eeh_pe_bus_get(pe);
> - if (!frozen_bus) {
> + bus = eeh_pe_bus_get(pe);
> + if (!bus) {
>   pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
>   __func__, pe->phb->global_number, pe->addr);
>   return;
> @@ -820,7 +820,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
>*/
>   if (result == PCI_ERS_RESULT_NONE) {
>   pr_info("EEH: Reset with hotplug activity\n");
> - rc = eeh_reset_device(pe, frozen_bus, NULL);
> + rc = eeh_reset_device(pe, bus, NULL);
>   if (rc) {
>   pr_warn("%s: Unable to reset, err=%d\n",
>   __func__, rc);
> @@ -938,7 +938,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
>   eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>  
>   pci_lock_rescan_remove();
> - pci_hp_remove_devices(frozen_bus);
> + pci_hp_remove_devices(bus);
>   pci_unlock_rescan_remove();
>   /* The passed PE should no longer be used */
>   return;
> 


-- 
Alexey


Re: [PATCH v2 4/9] powerpc/eeh: Remove misleading test in eeh_handle_normal_event()

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:46 pm, Sam Bobroff wrote:
> Remove a test that checks if "frozen_bus" is NULL, because it cannot
> have changed since it was tested at the start of the function and so
> must be true here.
> 
> Signed-off-by: Sam Bobroff 


Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/kernel/eeh_driver.c | 24 +++-
>  1 file changed, 11 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index 5b7a5ed4db4d..04a5d9db5499 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -930,20 +930,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
>* all removed devices correctly to avoid access
>* the their PCI config any more.
>*/
> - if (frozen_bus) {
> - if (pe->type & EEH_PE_VF) {
> - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
> - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> - } else {
> - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
> - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> + if (pe->type & EEH_PE_VF) {
> + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
> + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> + } else {
> + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
> + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>  
> - pci_lock_rescan_remove();
> - pci_hp_remove_devices(frozen_bus);
> - pci_unlock_rescan_remove();
> - /* The passed PE should no longer be used */
> - return;
> - }
> + pci_lock_rescan_remove();
> + pci_hp_remove_devices(frozen_bus);
> + pci_unlock_rescan_remove();
> + /* The passed PE should no longer be used */
> + return;
>   }
>  final:
>   eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
> 


-- 
Alexey


Re: [PATCH v2 3/9] powerpc/eeh: Fix misleading comment in __eeh_addr_cache_get_device()

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:46 pm, Sam Bobroff wrote:
> Commit "0ba17b05 powerpc/eeh: Remove reference to PCI device"
> removed a call to pci_dev_get() from __eeh_addr_cache_get_device() but
> did not update the comment to match.
> 
> Signed-off-by: Sam Bobroff 

Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/kernel/eeh_cache.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
> index d4cc26618809..201943d54a6e 100644
> --- a/arch/powerpc/kernel/eeh_cache.c
> +++ b/arch/powerpc/kernel/eeh_cache.c
> @@ -84,8 +84,7 @@ static inline struct eeh_dev 
> *__eeh_addr_cache_get_device(unsigned long addr)
>   * @addr: mmio (PIO) phys address or i/o port number
>   *
>   * Given an mmio phys address, or a port number, find a pci device
> - * that implements this address.  Be sure to pci_dev_put the device
> - * when finished.  I/O port numbers are assumed to be offset
> + * that implements this address.  I/O port numbers are assumed to be offset
>   * from zero (that is, they do *not* have pci_io_addr added in).
>   * It is safe to call this function within an interrupt.
>   */
> 


-- 
Alexey


Re: [PATCH v2 2/9] powerpc/eeh: Manage EEH_PE_RECOVERING inside eeh_handle_normal_event()

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:46 pm, Sam Bobroff wrote:
> Currently the EEH_PE_RECOVERING flag for a PE is managed by both the
> caller and callee of eeh_handle_normal_event() (among other places not
> considered here). This is complicated by the fact that the PE may
> or may not have been invalidated by the call.
> 
> So move the callee's handling into eeh_handle_normal_event(), which
> clarifies it and allows the return type to be changed to void (because
> it no longer needs to indicate at the PE has been invalidated).
> 
> This should not change behaviour except in eeh_event_handler() where
> it was previously possible to cause eeh_pe_state_clear() to be called
> on an invalid PE, which is now avoided.
> 
> Signed-off-by: Sam Bobroff 

Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/include/asm/eeh_event.h |  2 +-
>  arch/powerpc/kernel/eeh_driver.c | 29 +++--
>  arch/powerpc/kernel/eeh_event.c  |  2 --
>  3 files changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/eeh_event.h 
> b/arch/powerpc/include/asm/eeh_event.h
> index 0a168038882d..9884e872686f 100644
> --- a/arch/powerpc/include/asm/eeh_event.h
> +++ b/arch/powerpc/include/asm/eeh_event.h
> @@ -34,7 +34,7 @@ struct eeh_event {
>  int eeh_event_init(void);
>  int eeh_send_failure_event(struct eeh_pe *pe);
>  void eeh_remove_event(struct eeh_pe *pe, bool force);
> -bool eeh_handle_normal_event(struct eeh_pe *pe);
> +void eeh_handle_normal_event(struct eeh_pe *pe);
>  void eeh_handle_special_event(void);
>  
>  #endif /* __KERNEL__ */
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index 51b21c97910f..5b7a5ed4db4d 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -733,7 +733,8 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>  
>  /**
>   * eeh_handle_normal_event - Handle EEH events on a specific PE
> - * @pe: EEH PE
> + * @pe: EEH PE - which should not be used after we return, as it may
> + * have been invalidated.
>   *
>   * Attempts to recover the given PE.  If recovery fails or the PE has failed
>   * too many times, remove the PE.
> @@ -750,10 +751,8 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   * & devices under this slot, and then finally restarting the device
>   * drivers (which cause a second set of hotplug events to go out to
>   * userspace).
> - *
> - * Returns true if @pe should no longer be used, else false.
>   */
> -bool eeh_handle_normal_event(struct eeh_pe *pe)
> +void eeh_handle_normal_event(struct eeh_pe *pe)
>  {
>   struct pci_bus *frozen_bus;
>   struct eeh_dev *edev, *tmp;
> @@ -765,9 +764,11 @@ bool eeh_handle_normal_event(struct eeh_pe *pe)
>   if (!frozen_bus) {
>   pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
>   __func__, pe->phb->global_number, pe->addr);
> - return false;
> + return;
>   }
>  
> + eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
> +
>   eeh_pe_update_time_stamp(pe);
>   pe->freeze_count++;
>   if (pe->freeze_count > eeh_max_freezes) {
> @@ -904,7 +905,7 @@ bool eeh_handle_normal_event(struct eeh_pe *pe)
>   pr_info("EEH: Notify device driver to resume\n");
>   eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
>  
> - return false;
> + goto final;
>  
>  hard_fail:
>   /*
> @@ -940,12 +941,12 @@ bool eeh_handle_normal_event(struct eeh_pe *pe)
>   pci_lock_rescan_remove();
>   pci_hp_remove_devices(frozen_bus);
>   pci_unlock_rescan_remove();
> -
>   /* The passed PE should no longer be used */
> - return true;
> + return;
>   }
>   }
> - return false;
> +final:
> + eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
>  }
>  
>  /**
> @@ -1018,15 +1019,7 @@ void eeh_handle_special_event(void)
>*/
>   if (rc == EEH_NEXT_ERR_FROZEN_PE ||
>   rc == EEH_NEXT_ERR_FENCED_PHB) {
> - /*
> -  * eeh_handle_normal_event() can make the PE stale if it
> -  * determines that the PE cannot possibly be recovered.
> -  * Don't modify the PE state if that's the case.
> -  */
> - if (eeh_handle_normal_event(pe))
> - continue;
> -
> - eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
> + eeh_handle_normal_event(pe);
>   } else {
>   pci_lock_rescan_remove();
>   list_for_each_entry(hose, _list, list_node) {
> diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
> index 872bcfe8f90e..61c9356bf9c9 100644
> --- a/arch/powerpc/kernel/eeh_event.c

Re: [PATCH v2 1/9] powerpc/eeh: Remove eeh_handle_event()

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:46 pm, Sam Bobroff wrote:
> The function eeh_handle_event(pe) does nothing other than switching
> between calling eeh_handle_normal_event(pe) and
> eeh_handle_special_event(). However it is only called in two places,
> one where pe can't be NULL and the other where it must be NULL (see
> eeh_event_handler()) so it does nothing but obscure the flow of
> control.
> 
> So, remove it.
> 
> Signed-off-by: Sam Bobroff 


Reviewed-by: Alexey Kardashevskiy 


> ---
>  arch/powerpc/include/asm/eeh_event.h |  3 ++-
>  arch/powerpc/kernel/eeh_driver.c | 42 
> +---
>  arch/powerpc/kernel/eeh_event.c  |  4 ++--
>  3 files changed, 19 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/eeh_event.h 
> b/arch/powerpc/include/asm/eeh_event.h
> index 1e551a2d6f82..0a168038882d 100644
> --- a/arch/powerpc/include/asm/eeh_event.h
> +++ b/arch/powerpc/include/asm/eeh_event.h
> @@ -34,7 +34,8 @@ struct eeh_event {
>  int eeh_event_init(void);
>  int eeh_send_failure_event(struct eeh_pe *pe);
>  void eeh_remove_event(struct eeh_pe *pe, bool force);
> -void eeh_handle_event(struct eeh_pe *pe);
> +bool eeh_handle_normal_event(struct eeh_pe *pe);
> +void eeh_handle_special_event(void);
>  
>  #endif /* __KERNEL__ */
>  #endif /* ASM_POWERPC_EEH_EVENT_H */
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index 0c0b66fc5bfb..51b21c97910f 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -738,9 +738,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
> pci_bus *bus,
>   * Attempts to recover the given PE.  If recovery fails or the PE has failed
>   * too many times, remove the PE.
>   *
> + * While PHB detects address or data parity errors on particular PCI
> + * slot, the associated PE will be frozen. Besides, DMA's occurring
> + * to wild addresses (which usually happen due to bugs in device
> + * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
> + * #PERR or other misc PCI-related errors also can trigger EEH errors.
> + *
> + * Recovery process consists of unplugging the device driver (which
> + * generated hotplug events to userspace), then issuing a PCI #RST to
> + * the device, then reconfiguring the PCI config space for all bridges
> + * & devices under this slot, and then finally restarting the device
> + * drivers (which cause a second set of hotplug events to go out to
> + * userspace).
> + *
>   * Returns true if @pe should no longer be used, else false.
>   */
> -static bool eeh_handle_normal_event(struct eeh_pe *pe)
> +bool eeh_handle_normal_event(struct eeh_pe *pe)
>  {
>   struct pci_bus *frozen_bus;
>   struct eeh_dev *edev, *tmp;
> @@ -942,7 +955,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
>   * specific PE.  Iterates through possible failures and handles them as
>   * necessary.
>   */
> -static void eeh_handle_special_event(void)
> +void eeh_handle_special_event(void)
>  {
>   struct eeh_pe *pe, *phb_pe;
>   struct pci_bus *bus;
> @@ -1049,28 +1062,3 @@ static void eeh_handle_special_event(void)
>   break;
>   } while (rc != EEH_NEXT_ERR_NONE);
>  }
> -
> -/**
> - * eeh_handle_event - Reset a PCI device after hard lockup.
> - * @pe: EEH PE
> - *
> - * While PHB detects address or data parity errors on particular PCI
> - * slot, the associated PE will be frozen. Besides, DMA's occurring
> - * to wild addresses (which usually happen due to bugs in device
> - * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
> - * #PERR or other misc PCI-related errors also can trigger EEH errors.
> - *
> - * Recovery process consists of unplugging the device driver (which
> - * generated hotplug events to userspace), then issuing a PCI #RST to
> - * the device, then reconfiguring the PCI config space for all bridges
> - * & devices under this slot, and then finally restarting the device
> - * drivers (which cause a second set of hotplug events to go out to
> - * userspace).
> - */
> -void eeh_handle_event(struct eeh_pe *pe)
> -{
> - if (pe)
> - eeh_handle_normal_event(pe);
> - else
> - eeh_handle_special_event();
> -}
> diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
> index accbf8b5fd46..872bcfe8f90e 100644
> --- a/arch/powerpc/kernel/eeh_event.c
> +++ b/arch/powerpc/kernel/eeh_event.c
> @@ -81,10 +81,10 @@ static int eeh_event_handler(void * dummy)
>   pr_info("EEH: Detected PCI bus error on "
>   "PHB#%x-PE#%x\n",
>   pe->phb->global_number, pe->addr);
> - eeh_handle_event(pe);
> + eeh_handle_normal_event(pe);
>   eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
>   } else {
> - 

Re: [PATCH v2 0/9] EEH refactoring 1

2018-03-20 Thread Alexey Kardashevskiy
On 19/3/18 1:45 pm, Sam Bobroff wrote:
> Hello everyone,
> 
> Here is a set of some small, mostly idempotent, changes to improve
> maintainability in some of the EEH code, primarily in eeh_driver.c.
> 
> I've kept them all small to aid review but perhaps they should be squashed 
> down
> before being applied.
> 
> Cheers,
> Sam.
> 
> Patch set changelog follows:
> 
> == v1 -> v2: ==
> 
> Patch 1/9: powerpc/eeh: Remove eeh_handle_event()
> 
> Patch 2/9: powerpc/eeh: Manage EEH_PE_RECOVERING inside 
> eeh_handle_normal_event()
> 
> Patch 3/9: powerpc/eeh: Fix misleading comment in 
> __eeh_addr_cache_get_device()
> 
> Patch 4/9: powerpc/eeh: Remove misleading test in eeh_handle_normal_event()
> 
> Patch 5/9: powerpc/eeh: Rename frozen_bus to bus in eeh_handle_normal_event()
> 
> Patch 6/9: powerpc/eeh: Clarify arguments to eeh_reset_device()
> * Re-ordered parameters to eeh_reset_device() to keep pe first.
> * Changed eeh_aware_driver to driver_eeh_aware.
> 
> Patch 7/9: powerpc/eeh: Remove always-true tests in eeh_reset_device()
> 
> Patch 8/9: powerpc/eeh: Factor out common code eeh_reset_device()
> * In one case, added braces to "if" to match "else".
> 
> Patch 9/9: powerpc/eeh: Add eeh_state_active() helper
> 
> == v1: ==
> 
> Patch 1/9: powerpc/eeh: Remove eeh_handle_event()
> Patch 2/9: powerpc/eeh: Manage EEH_PE_RECOVERING inside 
> eeh_handle_normal_event()
> Patch 3/9: powerpc/eeh: Fix misleading comment in 
> __eeh_addr_cache_get_device()
> Patch 4/9: powerpc/eeh: Remove misleading test in eeh_handle_normal_event()
> Patch 5/9: powerpc/eeh: Rename frozen_bus to bus in eeh_handle_normal_event()
> Patch 6/9: powerpc/eeh: Clarify arguments to eeh_reset_device()
> Patch 7/9: powerpc/eeh: Remove always-true tests in eeh_reset_device()
> Patch 8/9: powerpc/eeh: Factor out common code eeh_reset_device()
> Patch 9/9: powerpc/eeh: Add eeh_state_active() helper
> 
> Sam Bobroff (9):
>   powerpc/eeh: Remove eeh_handle_event()
>   powerpc/eeh: Manage EEH_PE_RECOVERING inside eeh_handle_normal_event()
>   powerpc/eeh: Fix misleading comment in __eeh_addr_cache_get_device()
>   powerpc/eeh: Remove misleading test in eeh_handle_normal_event()
>   powerpc/eeh: Rename frozen_bus to bus in eeh_handle_normal_event()
>   powerpc/eeh: Clarify arguments to eeh_reset_device()
>   powerpc/eeh: Remove always-true tests in eeh_reset_device()
>   powerpc/eeh: Factor out common code eeh_reset_device()
>   powerpc/eeh: Add eeh_state_active() helper
> 
>  arch/powerpc/include/asm/eeh.h   |   6 ++
>  arch/powerpc/include/asm/eeh_event.h |   3 +-
>  arch/powerpc/kernel/eeh.c|  19 ++--
>  arch/powerpc/kernel/eeh_cache.c  |   3 +-
>  arch/powerpc/kernel/eeh_driver.c | 137 
> +++
>  arch/powerpc/kernel/eeh_event.c  |   6 +-
>  arch/powerpc/platforms/powernv/eeh-powernv.c |   9 +-
>  7 files changed, 72 insertions(+), 111 deletions(-)
> 

Nice cleanup,

Reviewed-by: Alexey Kardashevskiy 


-- 
Alexey


Re: [PATCH] powerpc/64s: Fix lost pending interrupt due to race causing lost update to irq_happened

2018-03-20 Thread Benjamin Herrenschmidt
On Wed, 2018-03-21 at 13:16 +1000, Nicholas Piggin wrote:
> Yes Carol is working on this with distros. Backporting should be
> simple, so hoping to get this upstream soon to help the process.
> We can take that as a Reviewed-by:? :)

Ah yup.

Cheers,
Ben.



Re: RFC on writel and writel_relaxed

2018-03-20 Thread Oliver
On Wed, Mar 21, 2018 at 2:07 PM, Sinan Kaya  wrote:
> Hi PPC Maintainers,
>
> We are seeking feedback on the status of relaxed write API implementation.
> What is the motivation for not implementing the relaxed API?

Hmm, good question.

Looks like we've implemented the relaxed_* variants by aliasing them
to the normal version since the dawn of time. There's a comment in
io.h saying something about us not having the expected semantics for
the relaxed variants, but I don't see what the issue is... Ben?

> I see that network drivers are working around the issue by calling
> __raw_write() API directly but this also breaks other architectures
> like SPARC since the semantics of __raw_writel() seems to be system dependent.

Yeah that's pretty gross. Which drivers are doing this?

> This is putting drivers into a tight position and they cannot achieve true
> multi-arch enablement and are forced into calling __raw APIs flavors
> directly with #ifdef BIG_ENDIAN ugliness.
>
> Sinan
>
> --
> Sinan Kaya
> Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
> Technologies, Inc.
> Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux 
> Foundation Collaborative Project.


Re: [PATCH] powerpc/64s: Fix lost pending interrupt due to race causing lost update to irq_happened

2018-03-20 Thread Nicholas Piggin
Adding Carol...

On Wed, 21 Mar 2018 13:32:28 +1100
Benjamin Herrenschmidt  wrote:

> On Wed, 2018-03-21 at 12:22 +1000, Nicholas Piggin wrote:
> > force_external_irq_replay() can be called in the do_IRQ path with
> > interrupts hard enabled and soft disabled if may_hard_irq_enable() set
> > MSR[EE]=1. It updates local_paca->irq_happened with a load, modify,
> > store sequence. If a maskable interrupt hits during this sequence, it
> > will go to the masked handler to be marked pending in irq_happened.
> > This update will be lost when the interrupt returns and the store
> > instruction executes.  This can result in unpredictable latencies,
> > timeouts, lockups, etc.
> > 
> > Fix this by ensuring hard interrupts are disabled before modifying
> > irq_happened.
> > 
> > This could cause any maskable asynchronous interrupt to get lost, but
> > it was noticed on P9 SMP system doing RDMA NVMe target over 100GbE,
> > so very high external interrupt rate and high IPI rate. The hang was
> > bisected down to enabling doorbell interrupts for IPIs. These provided
> > an interrupt type that could run at high rates in the do_IRQ path,
> > stressing the race.
> > 
> > Fixes: 1d607bb3bd ("powerpc/irq: Add mechanism to force a replay of 
> > interrupts")
> > Reported-by: Carol L. Soto 
> > Cc: Benjamin Herrenschmidt 
> > Signed-off-by: Nicholas Piggin 
> > ---  
> 
> Nice one. We need that back into the distros asap.


Yes Carol is working on this with distros. Backporting should be
simple, so hoping to get this upstream soon to help the process.
We can take that as a Reviewed-by:? :)

Thanks,
Nick


RFC on writel and writel_relaxed

2018-03-20 Thread Sinan Kaya
Hi PPC Maintainers,

We are seeking feedback on the status of relaxed write API implementation.
What is the motivation for not implementing the relaxed API? 

I see that network drivers are working around the issue by calling
__raw_write() API directly but this also breaks other architectures
like SPARC since the semantics of __raw_writel() seems to be system dependent.

This is putting drivers into a tight position and they cannot achieve true
multi-arch enablement and are forced into calling __raw APIs flavors
directly with #ifdef BIG_ENDIAN ugliness.

Sinan

-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux 
Foundation Collaborative Project.


Re: [PATCH] powerpc/64s: Fix lost pending interrupt due to race causing lost update to irq_happened

2018-03-20 Thread Benjamin Herrenschmidt
On Wed, 2018-03-21 at 12:22 +1000, Nicholas Piggin wrote:
> force_external_irq_replay() can be called in the do_IRQ path with
> interrupts hard enabled and soft disabled if may_hard_irq_enable() set
> MSR[EE]=1. It updates local_paca->irq_happened with a load, modify,
> store sequence. If a maskable interrupt hits during this sequence, it
> will go to the masked handler to be marked pending in irq_happened.
> This update will be lost when the interrupt returns and the store
> instruction executes.  This can result in unpredictable latencies,
> timeouts, lockups, etc.
> 
> Fix this by ensuring hard interrupts are disabled before modifying
> irq_happened.
> 
> This could cause any maskable asynchronous interrupt to get lost, but
> it was noticed on P9 SMP system doing RDMA NVMe target over 100GbE,
> so very high external interrupt rate and high IPI rate. The hang was
> bisected down to enabling doorbell interrupts for IPIs. These provided
> an interrupt type that could run at high rates in the do_IRQ path,
> stressing the race.
> 
> Fixes: 1d607bb3bd ("powerpc/irq: Add mechanism to force a replay of 
> interrupts")
> Reported-by: Carol L. Soto 
> Cc: Benjamin Herrenschmidt 
> Signed-off-by: Nicholas Piggin 
> ---

Nice one. We need that back into the distros asap.

> This has survived stress testing quite well so far, may need a little
> more testing but I'd like to post it now to get some more comments.
> 
> We can optimise the mtmsr a bit more (e.g., skip it if interrupts are
> already disabled or EE alrady set), but I've got some other patches
> pending which change things there slightly, so I prefer to have this
> minimal fix now, then make such changes upstream later.
> 
> ---
>  arch/powerpc/kernel/irq.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index f88038847790..061aa0f47bb1 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -476,6 +476,14 @@ void force_external_irq_replay(void)
>*/
>   WARN_ON(!arch_irqs_disabled());
>  
> + /*
> +  * Interrupts must always be hard disabled before irq_happened is
> +  * modified (to prevent lost update in case of interrupt between
> +  * load and store).
> +  */
> + __hard_irq_disable();
> + local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
> +
>   /* Indicate in the PACA that we have an interrupt to replay */
>   local_paca->irq_happened |= PACA_IRQ_EE;
>  }


[PATCH] powerpc/64s: Fix lost pending interrupt due to race causing lost update to irq_happened

2018-03-20 Thread Nicholas Piggin
force_external_irq_replay() can be called in the do_IRQ path with
interrupts hard enabled and soft disabled if may_hard_irq_enable() set
MSR[EE]=1. It updates local_paca->irq_happened with a load, modify,
store sequence. If a maskable interrupt hits during this sequence, it
will go to the masked handler to be marked pending in irq_happened.
This update will be lost when the interrupt returns and the store
instruction executes.  This can result in unpredictable latencies,
timeouts, lockups, etc.

Fix this by ensuring hard interrupts are disabled before modifying
irq_happened.

This could cause any maskable asynchronous interrupt to get lost, but
it was noticed on P9 SMP system doing RDMA NVMe target over 100GbE,
so very high external interrupt rate and high IPI rate. The hang was
bisected down to enabling doorbell interrupts for IPIs. These provided
an interrupt type that could run at high rates in the do_IRQ path,
stressing the race.

Fixes: 1d607bb3bd ("powerpc/irq: Add mechanism to force a replay of interrupts")
Reported-by: Carol L. Soto 
Cc: Benjamin Herrenschmidt 
Signed-off-by: Nicholas Piggin 
---

This has survived stress testing quite well so far, may need a little
more testing but I'd like to post it now to get some more comments.

We can optimise the mtmsr a bit more (e.g., skip it if interrupts are
already disabled or EE alrady set), but I've got some other patches
pending which change things there slightly, so I prefer to have this
minimal fix now, then make such changes upstream later.

---
 arch/powerpc/kernel/irq.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f88038847790..061aa0f47bb1 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -476,6 +476,14 @@ void force_external_irq_replay(void)
 */
WARN_ON(!arch_irqs_disabled());
 
+   /*
+* Interrupts must always be hard disabled before irq_happened is
+* modified (to prevent lost update in case of interrupt between
+* load and store).
+*/
+   __hard_irq_disable();
+   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
/* Indicate in the PACA that we have an interrupt to replay */
local_paca->irq_happened |= PACA_IRQ_EE;
 }
-- 
2.16.1



[PATCH v3 8/9] powerpc/eeh: Factor out common code eeh_reset_device()

2018-03-20 Thread Sam Bobroff
The caller will always pass NULL for 'rmv_data' when
'eeh_aware_driver' is true, so the first two calls to
eeh_pe_dev_traverse() can be combined without changing behaviour as
can the two arms of the final 'if' block.

This should not change behaviour.

Signed-off-by: Sam Bobroff 
---
== v2 -> v3: ==
Corrected two missed cases of eeh_aware_driver -> driver_eeh_aware.

 arch/powerpc/kernel/eeh_driver.c | 32 ++--
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 93fc22e791fa..43ceb6263cd8 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -647,16 +647,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
 * into pci_hp_add_devices().
 */
eeh_pe_state_mark(pe, EEH_PE_KEEP);
-   if (!driver_eeh_aware) {
-   if (pe->type & EEH_PE_VF) {
-   eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-   } else {
-   pci_lock_rescan_remove();
-   pci_hp_remove_devices(bus);
-   pci_unlock_rescan_remove();
-   }
-   } else {
+   if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+   } else {
+   pci_lock_rescan_remove();
+   pci_hp_remove_devices(bus);
+   pci_unlock_rescan_remove();
}
 
/*
@@ -691,8 +687,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
 * the device up before the scripts have taken it down,
 * potentially weird things happen.
 */
-   if (!driver_eeh_aware) {
-   pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+   if (!driver_eeh_aware || rmv_data->removed) {
+   pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+   (driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
 
/*
@@ -705,19 +702,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
if (pe->type & EEH_PE_VF) {
eeh_add_virt_device(edev, NULL);
} else {
-   eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+   if (!driver_eeh_aware)
+   eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_hp_add_devices(bus);
}
-   } else if (rmv_data->removed) {
-   pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
-   ssleep(5);
-
-   edev = list_first_entry(>edevs, struct eeh_dev, list);
-   eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-   if (pe->type & EEH_PE_VF)
-   eeh_add_virt_device(edev, NULL);
-   else
-   pci_hp_add_devices(bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
-- 
2.16.1.74.g9b0b1f47b



linux-next: manual merge of the powerpc tree with the asm-generic tree

2018-03-20 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the powerpc tree got a conflict in:

  lib/raid6/test/Makefile

between commit:

  fa523d54a7eb ("raid: remove tile specific raid6 implementation")

from the asm-generic tree and commit:

  751ba79cc552 ("lib/raid6/altivec: Add vpermxor implementation for raid6 Q 
syndrome")

from the powerpc tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc lib/raid6/test/Makefile
index fabc477b1417,5050e270c06b..
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@@ -45,12 -45,17 +45,14 @@@ else ifeq ($(HAS_NEON),yes
  CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
  else
  HAS_ALTIVEC := $(shell printf '\#include \nvector int 
a;\n' |\
-  gcc -c -x c - >&/dev/null && \
-  rm ./-.o && echo yes)
+  gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
  ifeq ($(HAS_ALTIVEC),yes)
- OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ CFLAGS += -I../../../arch/powerpc/include
+ CFLAGS += -DCONFIG_ALTIVEC
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
  endif
  endif
 -ifeq ($(ARCH),tilegx)
 -OBJS += tilegx8.o
 -endif
  
  .c.o:
$(CC) $(CFLAGS) -c -o $@ $<
@@@ -117,7 -137,8 +131,7 @@@ tables.c: mktable
./mktables > tables.c
  
  clean:
-   rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c 
tables.c raid6test
+   rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c 
neon*.c tables.c raid6test
 -  rm -f tilegx*.c
  
  spotless: clean
rm -f *~


pgpdxxWKwgIrN.pgp
Description: OpenPGP digital signature


Re: [bug?] Access was denied by memory protection keys in execute-only address

2018-03-20 Thread Ram Pai
On Fri, Mar 09, 2018 at 11:43:00AM +0800, Li Wang wrote:
>On Fri, Mar 9, 2018 at 12:45 AM, Ram Pai <[1]linux...@us.ibm.com> wrote:
> 
>  On Thu, Mar 08, 2018 at 11:19:12PM +1100, Michael Ellerman wrote:
>  > Li Wang <[2]liw...@redhat.com> writes:
>  > > Hi,
>  > >
>  > > ltp/mprotect04[1] crashed by SEGV_PKUERR on ppc64(LPAR on P730,
>  Power 8
>  > > 8247-22L) with kernel-v4.16.0-rc4.
>  > >
>  > > 1000-1002 r-xp  fd:00 167223           mprotect04
>  > > 1002-1003 r--p 0001 fd:00 167223           mprotect04
>  > > 1003-1004 rw-p 0002 fd:00 167223           mprotect04
>  > > 1001a38-1001a3b rw-p  00:00 0          [heap]
>  > > 7fffa6c6-7fffa6c8 --xp  00:00 0 ​
>  > >
>  > > ​_func = 0x10030170​
>  > >
>  > > ​ = 0x7fffa6c60170​
>  > >
>  > > ​While perform ​
>  > > "(*func)();" we get the
>  > > ​segmentation fault.
>  > > ​
>  > >
>  > > ​strace log:​
>  > >
>  > > ---
>  > > ​mprotect(0x7fffaed0, 131072, PROT_EXEC) = 0
>  > > rt_sigprocmask(SIG_BLOCK, NULL, [], 8)  = 0
>  > > --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_PKUERR,
>  si_addr=0x7fffaed00170}
>  > > ---​
>  >
>  > Looks like a bug to me.
>  >
>  > Please Cc linuxppc-dev on powerpc bugs.
>  >
>  > I also can't reproduce this failure on my machine.
>  > Not sure what's going on?
> 
>  I could reproduce it on a power7 lpar.  But not on a power8 lpar.
> 
>  The problem seems to be that the cpu generates a key exception if
>  the page with Read/Write-disable-but-execute-enable key is executed
>  on power7. If I enable read on that key, the exception disappears.
> 
>After adding read permission on that key, reproducer get PASS on my power8
>machine too.​
>​(​mprotect(..,PROT_READ | PROT_EXEC))​
> 
> 
>  BTW: the testcase executes
>  ​​mprotect(..,PROT_EXEC).
>  The mprotect(, PROT_EXEC) system call internally generates a
>  execute-only key and associates it with the pages in the address-range. 
> 
>  Now since Li Wang claims that he can reproduce it on power8 as well, i
>  am wondering if the slightly different cpu behavior is dependent on the
>  version of the firmware/microcode?
> 
>​I also run this reproducer on series ppc kvm machines, but none of them
>get the FAIL.
>If you need some more HW info, pls let me know.​

Hi Li,

   Can you try the following patch and see if it solves your problem.


diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index c269817..184a10a 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -421,7 +421,7 @@ int __arch_override_mprotect_pkey(struct vm_area_struct 
*vma, int prot,
 * The requested protection is execute-only. Hence let's use an
 * execute-only pkey.
 */
-   if (prot == PROT_EXEC) {
+   if (prot == PROT_EXEC && pkey_execute_disable_supported) {
pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0)
return pkey;


Thanks
RP



Re: [PATCH 4/5] powerpc/zImage: Rework serial driver probing

2018-03-20 Thread Oliver
On Tue, Mar 20, 2018 at 8:20 PM, Michael Ellerman  wrote:
> Oliver O'Halloran  writes:
>
>> Rather than checking the compatible string in serial_driver_init()
>> we call into the driver's init function and wait for a driver to
>> inidicate it bound to the device by returning zero.
>>
>> The pointers to each driver probe functions are stored in the
>> ".serialdrv" section of the zImage, similar to how initcalls and
>> whatnot are handled inside Linux. This structure allows us to
>> conditionally compile specific driver files based on the KConfig
>> settings. This is needed because we don't have access to the
>> KConfig #defines in the zImage source files (it's a giant #include
>> headache) so conditional compilation is the only way to eliminate
>> unnecessary code.
>
> Did you actually get the config.h include working? Or was it such a big
> headache that it doesn't even work?

I had a half-hearted go at it and ran into some problems with include
paths. The usual workaround for that is copying the files into the
build directory and using sed scripts to replace the <> include with a
"" include, so I said screw it and went with this instead. I'm not too
suprised about there being a few linker errors. I tested a few
different defconfigs before posting, but I figured either your CI or
the 0day bot would catch the rest ;)

> I'm pretty happy with this patch regardless, but it would be simpler and
> less bug prone if the CONFIG_ symbols were usable in the boot code.
>
> cheers


Re: [PATCH 3/5] powerpc/zImage: Check compatible at driver init

2018-03-20 Thread Oliver
On Tue, Mar 20, 2018 at 8:19 PM, Michael Ellerman  wrote:
> Oliver O'Halloran  writes:
>> diff --git a/arch/powerpc/boot/mpc52xx-psc.c 
>> b/arch/powerpc/boot/mpc52xx-psc.c
>> index c2c08633ee35..75470936e661 100644
>> --- a/arch/powerpc/boot/mpc52xx-psc.c
>> +++ b/arch/powerpc/boot/mpc52xx-psc.c
>> @@ -52,6 +52,9 @@ static unsigned char psc_getc(void)
>>
>>  int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp)
>>  {
>> + if (!dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
>> + return 1;
>
> Returning 1 raises the question of whether this is a bool-style function
> that returns 1 on success and 0 on failure - or a UNIX style function
> that returns 0 for sucesss and 1 on failure.
>
> Can we return ENODEV, or just -1 instead to make it clearer?

I figured we would want to differentiate between a mis-matched driver
and any other error type. There's no existing definition of ENODEV and
friends in the wrapper so I just used positive 1 as a half-assed
workaround. I don't have particularly strong feelings about it so
adding an explicit ENODEV would make things a bit cleaner. I'll fix it
on the respin.

>
> cheers


Re: [PATCH 2/2] powerpc/mm: Trace tlbia instruction

2018-03-20 Thread kbuild test robot
Hi Christophe,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180320]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-mm-Add-missing-tracepoint-for-tlbie/20180320-130831
config: powerpc-g5_defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x0): multiple definition of 
>> `__tracepoint_tlbia'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x50): first defined here
   arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x28): multiple definition 
of `__tracepoint_tlbie'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x78): first defined here
   arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x50): multiple definition 
of `__tracepoint_hash_fault'
   arch/powerpc/kernel/irq.o:(__tracepoints+0xa0): first defined here
   arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x78): multiple definition 
of `__tracepoint_timer_interrupt_exit'
   arch/powerpc/kernel/irq.o:(__tracepoints+0xc8): first defined here
   arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0xa0): multiple definition 
of `__tracepoint_timer_interrupt_entry'
   arch/powerpc/kernel/irq.o:(__tracepoints+0xf0): first defined here
   arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0xc8): multiple definition 
of `__tracepoint_irq_exit'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x28): first defined here
   arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0xf0): multiple definition 
of `__tracepoint_irq_entry'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x0): first defined here

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 1/2] powerpc64/ftrace: Add a field in paca to disable ftrace in unsafe code paths

2018-03-20 Thread Naveen N. Rao

Michael Ellerman wrote:

Nicholas Piggin  writes:


On Mon, 19 Mar 2018 14:43:00 +0530
"Naveen N. Rao"  wrote:


We have some C code that we call into from real mode where we cannot
take any exceptions. Though the C functions themselves are mostly safe,
if these functions are traced, there is a possibility that we may take
an exception. For instance, in certain conditions, the ftrace code uses
WARN(), which uses a 'trap' to do its job.

For such scenarios, introduce a new field in paca 'ftrace_disabled',
which is checked on ftrace entry before continuing. This field can then
be set to a non-zero value to disable/pause ftrace, and reset to zero to
resume ftrace.

Since KVM is the only user for this currently, we guard the
ftrace/mcount checks within CONFIG_KVM. This can later be removed
if/when there are other users.


Why not test HSTATE_IN_GUEST then? Add ftrace_disabled if non-KVM users
come along.


We want to use it for the kexec down path, we've already had bugs there.


I had looked at kexec and we seem to be disabling ftrace in 
machine_kexec(). Has that been problematic?


I didn't actually realize that you wanted to use this in kexec path as 
well.




So please keep the separate flag and pull it out of #ifdef KVM.


Sure.

- Naveen




Re: [01/10] selftests/powerpc: add process creation benchmark

2018-03-20 Thread Michael Ellerman
On Tue, 2018-03-06 at 13:24:58 UTC, Nicholas Piggin wrote:
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/dd40c5b4c90d84d30cdb452c2d193d

cheers


Re: powerpc: dts: replace 'linux,stdout-path' with 'stdout-path'

2018-03-20 Thread Michael Ellerman
On Wed, 2018-02-28 at 22:44:06 UTC, Rob Herring wrote:
> 'linux,stdout-path' has been deprecated for some time in favor of
> 'stdout-path'. Now dtc will warn on occurrences of 'linux,stdout-path'.
> Search and replace all the of occurrences with 'stdout-path'.
> 
> Signed-off-by: Rob Herring 
> Cc: Mark Rutland 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/78e5dfea84dc15d69940831b3981b3

cheers


Re: [3/3] powernv/pci: Improve a size determination in pnv_pci_init_ioda_phb()

2018-03-20 Thread Michael Ellerman
On Tue, 2017-10-17 at 15:40:17 UTC, SF Markus Elfring wrote:
> From: Markus Elfring 
> Date: Tue, 17 Oct 2017 17:18:10 +0200
> 
> Replace the specification of a data structure by a pointer dereference
> as the parameter for the operator "sizeof" to make the corresponding size
> determination a bit safer according to the Linux coding style convention.
> 
> This issue was detected by using the Coccinelle software.
> 
> Signed-off-by: Markus Elfring 
> Reviewed-by: Alexey Kardashevskiy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a0828cf57acce9bf941539e1f633e9

cheers


Re: [v6, 1/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2018-03-20 Thread Michael Ellerman
On Fri, 2017-08-04 at 03:42:32 UTC, Matt Brown wrote:
> This patch uses the vpermxor instruction to optimise the raid6 Q syndrome.
> This instruction was made available with POWER8, ISA version 2.07.
> It allows for both vperm and vxor instructions to be done in a single
> instruction. This has been tested for correctness on a ppc64le vm with a
> basic RAID6 setup containing 5 drives.
> 
> The performance benchmarks are from the raid6test in the /lib/raid6/test
> directory. These results are from an IBM Firestone machine with ppc64le
> architecture. The benchmark results show a 35% speed increase over the best
> existing algorithm for powerpc (altivec). The raid6test has also been run
> on a big-endian ppc64 vm to ensure it also works for big-endian
> architectures.
> 
> Performance benchmarks:
>   raid6: altivecx4 gen() 18773 MB/s
>   raid6: altivecx8 gen() 19438 MB/s
> 
>   raid6: vpermxor4 gen() 25112 MB/s
>   raid6: vpermxor8 gen() 26279 MB/s
> 
> Signed-off-by: Matt Brown 
> Reviewed-by: Daniel Axtens 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/751ba79cc552c146595cd439b21c4f

cheers


Re: powerpc/include/asm: Remove unused 64bit cacheflush function

2018-03-20 Thread Michael Ellerman
On Thu, 2017-07-20 at 06:25:14 UTC, Matt Brown wrote:
> The flush_dcache_phys_range function is no longer used in the kernel.
> This patch removes and cleans up the function.
> 
> Signed-off-by: Matt Brown 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/31513207ce72fef5978e8b284e53f2

cheers


Re: [PATCH 4/5] powerpc/zImage: Rework serial driver probing

2018-03-20 Thread Michael Ellerman
Oliver O'Halloran  writes:

> Rather than checking the compatible string in serial_driver_init()
> we call into the driver's init function and wait for a driver to
> inidicate it bound to the device by returning zero.
>
> The pointers to each driver probe functions are stored in the
> ".serialdrv" section of the zImage, similar to how initcalls and
> whatnot are handled inside Linux. This structure allows us to
> conditionally compile specific driver files based on the KConfig
> settings. This is needed because we don't have access to the
> KConfig #defines in the zImage source files (it's a giant #include
> headache) so conditional compilation is the only way to eliminate
> unnecessary code.

Did you actually get the config.h include working? Or was it such a big
headache that it doesn't even work?

I'm pretty happy with this patch regardless, but it would be simpler and
less bug prone if the CONFIG_ symbols were usable in the boot code.

cheers


Re: [PATCH 3/5] powerpc/zImage: Check compatible at driver init

2018-03-20 Thread Michael Ellerman
Oliver O'Halloran  writes:
> diff --git a/arch/powerpc/boot/mpc52xx-psc.c b/arch/powerpc/boot/mpc52xx-psc.c
> index c2c08633ee35..75470936e661 100644
> --- a/arch/powerpc/boot/mpc52xx-psc.c
> +++ b/arch/powerpc/boot/mpc52xx-psc.c
> @@ -52,6 +52,9 @@ static unsigned char psc_getc(void)
>  
>  int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp)
>  {
> + if (!dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
> + return 1;

Returning 1 raises the question of whether this is a bool-style function
that returns 1 on success and 0 on failure - or a UNIX style function
that returns 0 for sucesss and 1 on failure.

Can we return ENODEV, or just -1 instead to make it clearer?

cheers


Re: [RFC PATCH 4/6] mm: provide generic compat_sys_readahead() implementation

2018-03-20 Thread Ingo Molnar

* Al Viro  wrote:

> > For example this attempt at creating a new system call:
> > 
> >   SYSCALL_DEFINE3(moron, int, fd, loff_t, offset, size_t, count)
> > 
> > ... would translate into something like:
> > 
> > .name = "moron", .pattern = "WWW", .type = "int",.size = 4,
> > .name = NULL,  .type = "loff_t", .size = 8,
> > .name = NULL,  .type = "size_t", .size = 4,
> > .name = NULL,  .type = NULL, .size = 0, /* 
> > end of parameter list */
> > 
> > i.e. "WDW". The build-time constraint checker could then warn about:
> > 
> >   # error: System call "moron" uses invalid 'WWW' argument mapping for a 
> > 'WDW' sequence
> >   #please avoid long-long arguments or use 'SYSCALL_DEFINE3_WDW()' 
> > instead
> 
> ... if you do 32bit build.

Yeah - but the checking tool could do a 32-bit sizing of the types and thus the 
checks would work on all arches and on all bitness settings.

I don't think doing part of this in CPP is a good idea:

 - It won't be able to do the full range of checks

 - Wrappers should IMHO be trivial and open coded as much as possible - not 
hidden
   inside several layers of macros.

 - There should be a penalty for newly introduced, badly designed system call
   ABIs, while most CPP variants I can think of will just make bad but solvable 
   decisions palatable, AFAICS.

I.e. I think the way out of this would be two steps:

 1) for new system calls: hard-enforce the highest quality at the development
stage and hard-reject crap. No new 6-parameter system calls or badly ordered
arguments. The tool would also check new extensions to existing system 
calls, 
i.e. no more "add a crappy 4th argument to an existing system call that 
works 
on x86 but hurts MIPS".

 2) for old legacies: cleanly open code all our existing legacies and weird
wrappers. No new muck will be added to it so the line count does not matter.

... is there anything I'm missing?

Thanks,

Ingo


Re: [RFC PATCH 4/6] mm: provide generic compat_sys_readahead() implementation

2018-03-20 Thread Dominik Brodowski
On Mon, Mar 19, 2018 at 11:23:42PM +, Al Viro wrote:
> static inline long C_S_moron(int, loff_t, size_t);
> long compat_SyS_moron(long a0, long a1, long a2, long a3, long a4, long a5, 
> long a6)
> {
>   return C_S_moron((__force int)a0,
> (__force loff_t)(((u64)a2 << 32)|a1),
> (__force size_t)a3);
> }
> static inline long C_S_moron(int fd, loff_t offset, size_t count)
> {
>   whatever body you had for it
> }
> 
> That - from
> COMPAT_SYSCALL_DEFINE3(moron, int, fd, loff_t, offset, size_t, count)
> {
>   whatever body you had for it
> }
> 
> We can use similar machinery for SYSCALL_DEFINE itself, so that
> SyS_moron() would be defined with (long, long, long, long, long, long)
> as arguments and not (long, long long, long) as we have now.

That would be great, as it would allow to use a struct pt_regs * based
syscall calling convention on i386 as well, and not only on x86-64, right?

> It's not impossible to do.  It won't be pretty, but that use of local
> enums allows to avoid unbearably long expansions.
> 
> Benefits:
>   * all SyS... wrappers (i.e. the thing that really ought to
> go into syscall tables) have the same type.
>   * we could have SYSCALL_DEFINE produce a trivial compat
> wrapper, have explicit COMPAT_SYSCALL_DEFINE discard that thing
> and populate the compat syscall table *entirely* with compat_SyS_...,
> letting the linker sort it out.  That way we don't need to keep
> track of what can use native and what needs compat in each compat
> table on biarch.
>   * s390 compat wrappers would disappear with that approach.
>   * we could even stop generating sys_... aliases - if
> syscall table is generated by slapping SyS_... or compat_SyS_...
> on the name given there, we don't need to _have_ those sys_...
> things at all.  All SyS_... would have the same type, so the pile
> in syscalls.h would not be needed - we could generate the externs
> at the same time we generate the syscall table.
> 
> And yes, it's a high-squick approach.  I know and I'm not saying
> it's a good idea.  OTOH, to quote the motto of philosophers and
> shell game operators, "there's something in it"...

... and getting rid of all in-kernel calls to sys_*() is needed as
groundwork for that. So I'll continue to do that "mindless" conversion
first. On top of that, three things (which are mostly orthogonal to each
other) can be done:

1) ptregs system call conversion for x86-64

   Original implementation by Linus exists; needs a bit of tweaking
   but should be doable soon. Need to double-check it does the right
   thing for IA32_EMULATION, though.

2) re-work initramfs etc. code to not use in-kernel equivalents of
   syscalls, but operate on the VFS level instead.

3) re-work SYSCALL_DEFINEx() / COMPAT_SYSCALL_DEFINEx() based on
   your suggestions.

Does that sound sensible?

Thanks,
Dominik


Re: [PATCH v2 8/9] powerpc/eeh: Factor out common code eeh_reset_device()

2018-03-20 Thread kbuild test robot
Hi Sam,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180319]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Sam-Bobroff/EEH-refactoring-1/20180320-024729
config: powerpc-ppc64_defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

Note: the linux-review/Sam-Bobroff/EEH-refactoring-1/20180320-024729 HEAD 
28e7cc1fa029b84221b827a6ea2908dc33b43d10 builds fine.
  It only hurts bisectibility.

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:14:0,
from include/linux/delay.h:22,
from arch/powerpc/kernel/eeh_driver.c:25:
   arch/powerpc/kernel/eeh_driver.c: In function 'eeh_reset_device':
>> arch/powerpc/kernel/eeh_driver.c:692:5: error: 'eeh_aware_driver' undeclared 
>> (first use in this function); did you mean 'device_driver'?
   (eeh_aware_driver ? "partial" : "complete"));
^
   include/linux/printk.h:308:34: note: in definition of macro 'pr_info'
 printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 ^~~
   arch/powerpc/kernel/eeh_driver.c:692:5: note: each undeclared identifier is 
reported only once for each function it appears in
   (eeh_aware_driver ? "partial" : "complete"));
^
   include/linux/printk.h:308:34: note: in definition of macro 'pr_info'
 printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 ^~~

vim +692 arch/powerpc/kernel/eeh_driver.c

   619  
   620  /**
   621   * eeh_reset_device - Perform actual reset of a pci slot
   622   * @driver_eeh_aware: Does the device's driver provide EEH support?
   623   * @pe: EEH PE
   624   * @bus: PCI bus corresponding to the isolcated slot
   625   * @rmv_data: Optional, list to record removed devices
   626   *
   627   * This routine must be called to do reset on the indicated PE.
   628   * During the reset, udev might be invoked because those affected
   629   * PCI devices will be removed and then added.
   630   */
   631  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
   632  struct eeh_rmv_data *rmv_data,
   633  bool driver_eeh_aware)
   634  {
   635  time64_t tstamp;
   636  int cnt, rc;
   637  struct eeh_dev *edev;
   638  
   639  /* pcibios will clear the counter; save the value */
   640  cnt = pe->freeze_count;
   641  tstamp = pe->tstamp;
   642  
   643  /*
   644   * We don't remove the corresponding PE instances because
   645   * we need the information afterwords. The attached EEH
   646   * devices are expected to be attached soon when calling
   647   * into pci_hp_add_devices().
   648   */
   649  eeh_pe_state_mark(pe, EEH_PE_KEEP);
   650  if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
   651  eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
   652  } else {
   653  pci_lock_rescan_remove();
   654  pci_hp_remove_devices(bus);
   655  pci_unlock_rescan_remove();
   656  }
   657  
   658  /*
   659   * Reset the pci controller. (Asserts RST#; resets config 
space).
   660   * Reconfigure bridges and devices. Don't try to bring the 
system
   661   * up if the reset failed for some reason.
   662   *
   663   * During the reset, it's very dangerous to have uncontrolled 
PCI
   664   * config accesses. So we prefer to block them. However, 
controlled
   665   * PCI config accesses initiated from EEH itself are allowed.
   666   */
   667  rc = eeh_pe_reset_full(pe);
   668  if (rc)
   669  return rc;
   670  
   671  pci_lock_rescan_remove();
   672  
   673  /* Restore PE */
   674  eeh_ops->configure_bridge(pe);
   675  eeh_pe_restore_bars(pe);
   676  
   677  /* Clear frozen state */
   678  rc = eeh_clear_pe_frozen_state(pe, false);
   679  if (rc) {
   680  pci_unlock_rescan_remove();
   681  return rc;
   682  }
   683  
   684  /* Give the system 5 seconds to finish running the user-space
   685   * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
   686   * this is a hack, but if we don't do this, and try to br

Re: [PATCH 4/5] powerpc/zImage: Rework serial driver probing

2018-03-20 Thread kbuild test robot
Hi Oliver,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180319]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Oliver-O-Halloran/powerpc-zImage-Remove-ifdef-in-opal-c/20180320-023705
config: powerpc-ppc64_defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

>> arch/powerpc/boot/wrapper.a(serial.o):(.got2+0x1c): undefined reference to 
>> `_serial_drv_start'
>> arch/powerpc/boot/wrapper.a(serial.o):(.got2+0x24): undefined reference to 
>> `_serial_drv_end'

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 1/2] powerpc/mm: Add missing tracepoint for tlbie

2018-03-20 Thread kbuild test robot
Hi Christophe,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180319]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-mm-Add-missing-tracepoint-for-tlbie/20180320-130831
config: powerpc-maple_defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x0): multiple definition of 
>> `__tracepoint_tlbie'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x50): first defined here
>> arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x28): multiple definition 
>> of `__tracepoint_hash_fault'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x78): first defined here
>> arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x50): multiple definition 
>> of `__tracepoint_timer_interrupt_exit'
   arch/powerpc/kernel/irq.o:(__tracepoints+0xa0): first defined here
>> arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0x78): multiple definition 
>> of `__tracepoint_timer_interrupt_entry'
   arch/powerpc/kernel/irq.o:(__tracepoints+0xc8): first defined here
>> arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0xa0): multiple definition 
>> of `__tracepoint_irq_exit'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x28): first defined here
>> arch/powerpc/mm/pgtable-hash64.o:(__tracepoints+0xc8): multiple definition 
>> of `__tracepoint_irq_entry'
   arch/powerpc/kernel/irq.o:(__tracepoints+0x0): first defined here

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 1/2] powerpc/mm: Add missing tracepoint for tlbie

2018-03-20 Thread Christophe LEROY



Le 19/03/2018 à 23:39, Balbir Singh a écrit :

On Mon, 19 Mar 2018 11:32:38 +0100 (CET)
Christophe Leroy  wrote:


commit 0428491cba927 ("powerpc/mm: Trace tlbie(l) instructions")
added tracepoints for tlbie calls, but _tlbil_va() was forgotten

Fixes: 0428491cba927 ("powerpc/mm: Trace tlbie(l) instructions")
Signed-off-by: Christophe Leroy 
---
  arch/powerpc/mm/mmu_decl.h | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 57fbc554c785..b9991e0c61a2 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -21,6 +21,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #ifdef CONFIG_PPC_MMU_NOHASH
  
@@ -56,6 +57,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,

 unsigned int tsize, unsigned int ind)
  {
asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
+   trace_tlbie(pid, 0, address, 0, 0, 0, 0);



I did not really cover a whole lot of NOHASH, any idea why its
called tlbil_va and not _tlbie_va?


Seems it comes from 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=0ba3418b8b1c85ee1771c63f1dd12041614e56ff


Kumar ?



The first field is really the lpid and should be 0 for non-virtualized
systems. Can I recommend that we rs field for pid and the rb field for
address? so effectively something like


Yes having the address in rb is expected since it is the name of the 
only field tlbie has in the 32bits instruction set.


Yes I can use rb for the pid.

Christophe


Re: [PATCH 4/5] powerpc/zImage: Rework serial driver probing

2018-03-20 Thread kbuild test robot
Hi Oliver,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180319]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Oliver-O-Halloran/powerpc-zImage-Remove-ifdef-in-opal-c/20180320-023705
config: powerpc64-alldefconfig (attached as .config)
compiler: powerpc-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc64 

All errors (new ones prefixed by >>):

   arch/powerpc/boot/wrapper.a(crt0.o): In function `_zimage_start_opd':
>> (.text+0x0): multiple definition of `_zimage_start_opd'
   arch/powerpc/boot/crt0.o:(.text+0x0): first defined here
   arch/powerpc/boot/wrapper.a(crt0.o): In function `_zimage_start':
>> (.text+0x24): multiple definition of `_zimage_start_lib'
   arch/powerpc/boot/crt0.o:(.text+0x24): first defined here

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 2/2] powerpc/mm: Trace tlbia instruction

2018-03-20 Thread Christophe LEROY



Le 19/03/2018 à 23:43, Balbir Singh a écrit :

On Mon, 19 Mar 2018 11:32:40 +0100 (CET)
Christophe Leroy  wrote:


Add a trace point for tlbia (Translation Lookaside Buffer Invalidate
All) instruction.

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/include/asm/trace.h | 15 +++
  arch/powerpc/mm/mmu_decl.h   |  2 ++
  2 files changed, 17 insertions(+)

diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 33f3b479138b..d1d63b173dd7 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -202,6 +202,21 @@ TRACE_EVENT(tlbie,
__entry->r)
  );
  
+TRACE_EVENT(tlbia,

+
+   TP_PROTO(unsigned long lpid),
+   TP_ARGS(lpid),
+   TP_STRUCT__entry(
+   __field(unsigned long, lpid)
+   ),
+
+   TP_fast_assign(
+   __entry->lpid = lpid;
+   ),
+
+   TP_printk("lpid=%ld", __entry->lpid)
+);



Do we want to call this lpid?



Should we can it rs in consistence with tlbie trace ? Or just pid ?
Allthough it is not an argument used by tlbia, I think it is good to 
keep a trace of it to keep track of the reason why tlbia is called.


Christophe