Re: [PATCH v8 29/45] powerpc/pci: Export pci_traverse_device_nodes()

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This renames traverse_pci_devices() to pci_traverse_device_nodes().
The function traverses all subordinate device nodes of the specified
one. Also, below cleanup applied to the function. No logical changes
introduced.

* Rename "pre" to "fn".
* Avoid assignment in if condition reported from checkpatch.pl.

Signed-off-by: Gavin Shan 
---
  arch/powerpc/include/asm/ppc-pci.h   |  6 +++---
  arch/powerpc/kernel/pci_dn.c | 15 ++-
  arch/powerpc/platforms/pseries/msi.c |  4 ++--
  3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h 
b/arch/powerpc/include/asm/ppc-pci.h
index ca0c5bf..8753e4e 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -33,9 +33,9 @@ extern struct pci_dev *isa_bridge_pcidev; /* may be NULL 
if no ISA bus */
  struct device_node;
  struct pci_dn;

-typedef void *(*traverse_func)(struct device_node *me, void *data);




Why removing this typedef? Typedef's are good.

Anyway,


Reviewed-by: Alexey Kardashevskiy 





-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
-   void *data);
+void *pci_traverse_device_nodes(struct device_node *start,
+   void *(*fn)(struct device_node *, void *),
+   void *data);
  void *traverse_pci_dn(struct pci_dn *root,
  void *(*fn)(struct pci_dn *, void *),
  void *data);
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index ce10281..ecdccce 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -372,8 +372,9 @@ EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
   * one of these nodes we also assume its siblings are non-pci for
   * performance.
   */
-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
-   void *data)
+void *pci_traverse_device_nodes(struct device_node *start,
+   void *(*fn)(struct device_node *, void *),
+   void *data)
  {
struct device_node *dn, *nextdn;
void *ret;
@@ -388,8 +389,11 @@ void *traverse_pci_devices(struct device_node *start, 
traverse_func pre,
if (classp)
class = of_read_number(classp, 1);

-   if (pre && ((ret = pre(dn, data)) != NULL))
-   return ret;
+   if (fn) {
+   ret = fn(dn, data);
+   if (ret)
+   return ret;
+   }

/* If we are a PCI bridge, go down */
if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
@@ -411,6 +415,7 @@ void *traverse_pci_devices(struct device_node *start, 
traverse_func pre,
}
return NULL;
  }
+EXPORT_SYMBOL_GPL(pci_traverse_device_nodes);

  static struct pci_dn *pci_dn_next_one(struct pci_dn *root,
  struct pci_dn *pdn)
@@ -487,7 +492,7 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
}

/* Update dn->phb ptrs for new phb and children devices */
-   traverse_pci_devices(dn, add_pdn, phb);
+   pci_traverse_device_nodes(dn, add_pdn, phb);
  }

  /**
diff --git a/arch/powerpc/platforms/pseries/msi.c 
b/arch/powerpc/platforms/pseries/msi.c
index 272e9ec..543a638 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -305,7 +305,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int 
request)
memset(, 0, sizeof(struct msi_counts));

/* Work out how many devices we have below this PE */
-   traverse_pci_devices(pe_dn, count_non_bridge_devices, );
+   pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, );

if (counts.num_devices == 0) {
pr_err("rtas_msi: found 0 devices under PE for %s\n",
@@ -320,7 +320,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int 
request)
/* else, we have some more calculating to do */
counts.requestor = pci_device_to_OF_node(dev);
counts.request = request;
-   traverse_pci_devices(pe_dn, count_spare_msis, );
+   pci_traverse_device_nodes(pe_dn, count_spare_msis, );

/* If the quota isn't an integer multiple of the total, we can
 * use the remainder as spare MSIs for anyone that wants them. */




--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 28/45] powerpc/pci: Introduce pci_remove_device_node_info()

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This implements and exports pci_remove_device_node_info(). It's
used to remove the pdn (struct pci_dn) for the indicated device
node. The function is going to be used by PowerNV PCI hotplug
driver.

Signed-off-by: Gavin Shan 


Kind of strange that there is no such helper for pseries, is there?


Reviewed-by: Alexey Kardashevskiy 



---
  arch/powerpc/include/asm/pci-bridge.h |  1 +
  arch/powerpc/kernel/pci_dn.c  | 23 +++
  2 files changed, 24 insertions(+)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 72a9d4e..c6310e2 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -240,6 +240,7 @@ extern struct pci_dn *add_dev_pci_data(struct pci_dev 
*pdev);
  extern void remove_dev_pci_data(struct pci_dev *pdev);
  extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
   struct device_node *dn);
+extern void pci_remove_device_node_info(struct device_node *dn);

  static inline int pci_device_from_OF_node(struct device_node *np,
  u8 *bus, u8 *devfn)
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 0a249ff..ce10281 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -331,6 +331,29 @@ struct pci_dn *pci_add_device_node_info(struct 
pci_controller *hose,
  }
  EXPORT_SYMBOL_GPL(pci_add_device_node_info);

+void pci_remove_device_node_info(struct device_node *dn)
+{
+   struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+#ifdef CONFIG_EEH
+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+   if (edev)
+   edev->pdn = NULL;
+#endif
+
+   if (!pdn)
+   return;
+
+   WARN_ON(!list_empty(>child_list));
+   list_del(>list);
+   if (pdn->parent)
+   of_node_put(pdn->parent->node);
+
+   dn->data = NULL;
+   kfree(pdn);
+}
+EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
+
  /*
   * Traverse a device tree stopping each PCI device in the tree.
   * This is done depth first.  As each node is processed, a "pre"




--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 27/45] powerpc/pci: Export pci_add_device_node_info()

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This renames update_dn_pci_info() to pci_add_device_node_info()
with corresponding adjustment on the parameter type and exports it.
The function is used to create pdn (struct pci_dn) for the indicated
device node. Another function add_pdn(), almost wrapper of
pci_add_device_node_info(), to be used in traverse_pci_devices(). No
logical changes introduced.

Signed-off-by: Gavin Shan 




Reviewed-by: Alexey Kardashevskiy 



---
  arch/powerpc/include/asm/pci-bridge.h  |  3 ++-
  arch/powerpc/kernel/pci_dn.c   | 30 +++---
  arch/powerpc/platforms/pseries/setup.c |  2 +-
  3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 03f4ee7..72a9d4e 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -238,7 +238,8 @@ extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus 
*bus,
  extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
  extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev);
  extern void remove_dev_pci_data(struct pci_dev *pdev);
-extern void *update_dn_pci_info(struct device_node *dn, void *data);
+extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+  struct device_node *dn);

  static inline int pci_device_from_OF_node(struct device_node *np,
  u8 *bus, u8 *devfn)
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 38102cb..0a249ff 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -282,13 +282,9 @@ void remove_dev_pci_data(struct pci_dev *pdev)
  #endif /* CONFIG_PCI_IOV */
  }

-/*
- * Traverse_func that inits the PCI fields of the device node.
- * NOTE: this *must* be done before read/write config to the device.
- */
-void *update_dn_pci_info(struct device_node *dn, void *data)
+struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+   struct device_node *dn)
  {
-   struct pci_controller *phb = data;
const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", 
NULL);
const __be32 *regs;
struct device_node *parent;
@@ -299,7 +295,7 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
return NULL;
dn->data = pdn;
pdn->node = dn;
-   pdn->phb = phb;
+   pdn->phb = hose;
  #ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
  #endif
@@ -331,8 +327,9 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
if (pdn->parent)
list_add_tail(>list, >parent->child_list);

-   return NULL;
+   return pdn;
  }
+EXPORT_SYMBOL_GPL(pci_add_device_node_info);

  /*
   * Traverse a device tree stopping each PCI device in the tree.
@@ -432,6 +429,18 @@ void *traverse_pci_dn(struct pci_dn *root,
return NULL;
  }

+static void *add_pdn(struct device_node *dn, void *data)
+{
+   struct pci_controller *hose = data;
+   struct pci_dn *pdn;
+
+   pdn = pci_add_device_node_info(hose, dn);
+   if (!pdn)
+   return ERR_PTR(-ENOMEM);
+
+   return NULL;
+}
+
  /**
   * pci_devs_phb_init_dynamic - setup pci devices under this PHB
   * phb: pci-to-host bridge (top-level bridge connecting to cpu)
@@ -446,8 +455,7 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
struct pci_dn *pdn;

/* PHB nodes themselves must not match */
-   update_dn_pci_info(dn, phb);
-   pdn = dn->data;
+   pdn = pci_add_device_node_info(phb, dn);
if (pdn) {
pdn->devfn = pdn->busno = -1;
pdn->vendor_id = pdn->device_id = pdn->class_code = 0;
@@ -456,7 +464,7 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
}

/* Update dn->phb ptrs for new phb and children devices */
-   traverse_pci_devices(dn, update_dn_pci_info, phb);
+   traverse_pci_devices(dn, add_pdn, phb);
  }

  /**
diff --git a/arch/powerpc/platforms/pseries/setup.c 
b/arch/powerpc/platforms/pseries/setup.c
index 36df46e..6f8d020 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -265,7 +265,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block 
*nb, unsigned long act
pdn = parent ? PCI_DN(parent) : NULL;
if (pdn) {
/* Create pdn and EEH device */
-   update_dn_pci_info(np, pdn->phb);
+   pci_add_device_node_info(pdn->phb, np);
eeh_dev_init(PCI_DN(np), pdn->phb);
}





--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 25/45] powerpc/pci: Rename pcibios_find_pci_bus()

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This renames pcibios_find_pci_bus() to pci_find_bus_by_node() to
avoid conflicts with those PCI subsystem weak function names, which
have prefix "pcibios". No logical changes introduced.

Signed-off-by: Gavin Shan 



Reviewed-by: Alexey Kardashevskiy 



---
  arch/powerpc/include/asm/pci-bridge.h  | 2 +-
  arch/powerpc/platforms/pseries/pci_dlpar.c | 5 ++---
  drivers/pci/hotplug/rpadlpar_core.c| 6 +++---
  drivers/pci/hotplug/rpaphp_pci.c   | 2 +-
  4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index c817f38..03f4ee7 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -260,7 +260,7 @@ static inline struct eeh_dev *pdn_to_eeh_dev(struct pci_dn 
*pdn)
  #endif

  /** Find the bus corresponding to the indicated device node */
-extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
+extern struct pci_bus *pci_find_bus_by_node(struct device_node *dn);

  /** Remove all of the PCI devices under this bus */
  extern void pci_remove_pci_devices(struct pci_bus *bus);
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c 
b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 5d4a3df..aee22b4 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -54,8 +54,7 @@ find_bus_among_children(struct pci_bus *bus,
return child;
  }

-struct pci_bus *
-pcibios_find_pci_bus(struct device_node *dn)
+struct pci_bus *pci_find_bus_by_node(struct device_node *dn)
  {
struct pci_dn *pdn = dn->data;

@@ -64,7 +63,7 @@ pcibios_find_pci_bus(struct device_node *dn)

return find_bus_among_children(pdn->phb->bus, dn);
  }
-EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
+EXPORT_SYMBOL_GPL(pci_find_bus_by_node);

  struct pci_controller *init_phb_dynamic(struct device_node *dn)
  {
diff --git a/drivers/pci/hotplug/rpadlpar_core.c 
b/drivers/pci/hotplug/rpadlpar_core.c
index 730982b..acbf041 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -175,7 +175,7 @@ static int dlpar_add_pci_slot(char *drc_name, struct 
device_node *dn)
struct pci_dev *dev;
struct pci_controller *phb;

-   if (pcibios_find_pci_bus(dn))
+   if (pci_find_bus_by_node(dn))
return -EINVAL;

/* Add pci bus */
@@ -212,7 +212,7 @@ static int dlpar_remove_phb(char *drc_name, struct 
device_node *dn)
struct pci_dn *pdn;
int rc = 0;

-   if (!pcibios_find_pci_bus(dn))
+   if (!pci_find_bus_by_node(dn))
return -EINVAL;

/* If pci slot is hotpluggable, use hotplug to remove it */
@@ -356,7 +356,7 @@ int dlpar_remove_pci_slot(char *drc_name, struct 
device_node *dn)

pci_lock_rescan_remove();

-   bus = pcibios_find_pci_bus(dn);
+   bus = pci_find_bus_by_node(dn);
if (!bus) {
ret = -EINVAL;
goto out;
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 1099b38..a9180bb 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -93,7 +93,7 @@ int rpaphp_enable_slot(struct slot *slot)
if (rc)
return rc;

-   bus = pcibios_find_pci_bus(slot->dn);
+   bus = pci_find_bus_by_node(slot->dn);
if (!bus) {
err("%s: no pci_bus for dn %s\n", __func__, 
slot->dn->full_name);
return -EINVAL;




--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 24/45] powerpc/pci: Rename pcibios_{add,remove}_pci_devices()

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This renames pcibios_{add,remove}_pci_devices() to avoid conflicts
with names of the weak functions in PCI subsystem, which have the
prefix "pcibios". No logical changes introduced.

Signed-off-by: Gavin Shan 
---
  arch/powerpc/include/asm/pci-bridge.h |  4 ++--
  arch/powerpc/kernel/eeh_driver.c  | 12 ++--
  arch/powerpc/kernel/pci-hotplug.c | 15 +++
  drivers/pci/hotplug/rpadlpar_core.c   |  2 +-
  drivers/pci/hotplug/rpaphp_core.c |  4 ++--
  drivers/pci/hotplug/rpaphp_pci.c  |  2 +-
  6 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 4dd6ef4..c817f38 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -263,10 +263,10 @@ static inline struct eeh_dev *pdn_to_eeh_dev(struct 
pci_dn *pdn)
  extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);

  /** Remove all of the PCI devices under this bus */
-extern void pcibios_remove_pci_devices(struct pci_bus *bus);
+extern void pci_remove_pci_devices(struct pci_bus *bus);



pci_lala_pci_lala() ("pci" is used twice) looks weird, if the prefix is 
"pci", what other device types can they handle?...


May be pcihp_add_devices(), pcihp_remove_devices() as these as defined in 
pci-hotplug.c?





  /** Discover new pci devices under this bus, and add them */
-extern void pcibios_add_pci_devices(struct pci_bus *bus);
+extern void pci_add_pci_devices(struct pci_bus *bus);


  extern void isa_bridge_find_early(struct pci_controller *hose);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index fb6207d..59e53fe 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -621,7 +621,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
 * We don't remove the corresponding PE instances because
 * we need the information afterwords. The attached EEH
 * devices are expected to be attached soon when calling
-* into pcibios_add_pci_devices().
+* into pci_add_pci_devices().
 */
eeh_pe_state_mark(pe, EEH_PE_KEEP);
if (bus) {
@@ -630,7 +630,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
} else {
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_lock_rescan_remove();
-   pcibios_remove_pci_devices(bus);
+   pci_remove_pci_devices(bus);
pci_unlock_rescan_remove();
}
} else if (frozen_bus) {
@@ -681,7 +681,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
if (pe->type & EEH_PE_VF)
eeh_add_virt_device(edev, NULL);
else
-   pcibios_add_pci_devices(bus);
+   pci_add_pci_devices(bus);
} else if (frozen_bus && rmv_data->removed) {
pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
ssleep(5);
@@ -691,7 +691,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
if (pe->type & EEH_PE_VF)
eeh_add_virt_device(edev, NULL);
else
-   pcibios_add_pci_devices(frozen_bus);
+   pci_add_pci_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);

@@ -896,7 +896,7 @@ perm_error:
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);

pci_lock_rescan_remove();
-   pcibios_remove_pci_devices(frozen_bus);
+   pci_remove_pci_devices(frozen_bus);
pci_unlock_rescan_remove();
}
}
@@ -981,7 +981,7 @@ static void eeh_handle_special_event(void)
bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
-   pcibios_remove_pci_devices(bus);
+   pci_remove_pci_devices(bus);
}
pci_unlock_rescan_remove();
}
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 59c4361..78bf2a1 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -38,20 +38,20 @@ void pcibios_release_device(struct pci_dev *dev)
  }

  /**
- * pcibios_remove_pci_devices - remove all devices under this bus
+ * pci_remove_pci_devices - remove all devices under this bus
   * @bus: the indicated PCI bus
   *
   * Remove all of the PCI devices under this bus both from the
   * linux pci device tree, and from the powerpc EEH address cache.
   */
-void 

Re: [PATCH v8 23/45] powerpc/powernv: Dynamically release PEs

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This support releasing PEs dynamically. Firstly, this moves
pnv_pci_ioda2_release_dma_pe() around, which is called to
release DMA resource on releasing IODA2 PE.



imho move would only make sense if we could get rid of the forward 
declarations but this is not the case.




Secondly, several
functions are implemented to release the consumed resources
on releasing the PE:

* pnv_pci_ioda1_unset_window() to unset TVEs for the PE.
* pnv_pci_ioda1_release_dma_pe() to unset TVEs for the PE and
  destroy the IOMMU table.
* pnv_ioda_release_pe_seg() releases the consumed IO/M32/M64
  segments by the PE.

Lastly, this adds a reference count of PE, representing the number
of PCI devices associated with the PE. The reference count is
increased when PCI device joins the PE. It's decreased when PCI
device leaves the PE in pnv_pci_release_device(). When the count
becomes zero, its consumed resources are released by functions
as mentioned above. Note that the count is accessed concurrently.
So a "counter" with "int" type is enough here.

Signed-off-by: Gavin Shan 
---
  arch/powerpc/platforms/powernv/pci-ioda.c | 236 ++
  arch/powerpc/platforms/powernv/pci.h  |   1 +
  2 files changed, 209 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 077f9db..fa428a8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -119,6 +119,158 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long 
flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
  }

+static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe);
+static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
+  int num);
+static void pnv_pci_ioda1_release_dma_pe(struct pnv_ioda_pe *pe)
+{
+   struct iommu_table *tbl;
+   unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
+   int64_t rc;
+
+   if (!weight)
+   return;
+
+   tbl = pe->table_group.tables[0];
+   rc = pnv_pci_ioda1_unset_window(>table_group, 0);
+   if (rc)
+   pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+
+   if (pe->table_group.group) {
+   iommu_group_put(pe->table_group.group);
+   WARN_ON(pe->table_group.group);
+   }
+
+   pnv_pci_ioda_table_free_pages(tbl);
+   iommu_free_table(tbl, "pnv");
+}
+
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+  int num);
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+static void pnv_pci_ioda2_release_dma_pe(struct pnv_ioda_pe *pe)



If you left this code in its old location, it would be lot more obvious 
what you silently changed in this function (checking for weight). Please 
either do not move the code (this is preferred as I am hacking same chunk 
in  "[PATCH kernel 0/2] powerpc/powernv: Fix crash on PF unbind when VF is 
passed" and I'd like to reduce conflicts) or split it to a separate patch.




+{
+   struct iommu_table *tbl;
+   unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
+   int64_t rc;
+
+   if (!weight)
+   return;
+
+   tbl = pe->table_group.tables[0];
+   rc = pnv_pci_ioda2_unset_window(>table_group, 0);
+   if (rc)
+   pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+
+   pnv_pci_ioda2_set_bypass(pe, false);
+   if (pe->table_group.group) {
+   iommu_group_put(pe->table_group.group);
+   WARN_ON(pe->table_group.group);
+   }
+
+   pnv_pci_ioda_table_free_pages(tbl);
+   iommu_free_table(tbl, "pnv");
+}
+
+static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
+{
+   struct pnv_phb *phb = pe->phb;
+   int win, index, *segmap = NULL;
+   int64_t rc;
+
+   for (win = OPAL_M32_WINDOW_TYPE; win <= OPAL_IO_WINDOW_TYPE; win++) {



In "Re: [PATCH v7 27/50] powerpc/powernv: Dynamically release PEs" I 
suggested shorter & cleaner pnv_ioda_release_window(), what was wrong with it?





+   if (phb->type == PNV_PHB_IODA2 &&
+   (win == OPAL_IO_WINDOW_TYPE || win == OPAL_M64_WINDOW_TYPE))
+   continue;
+
+   switch (win) {
+   case OPAL_IO_WINDOW_TYPE:
+   segmap = phb->ioda.io_segmap;
+   break;
+   case OPAL_M32_WINDOW_TYPE:
+   segmap = phb->ioda.m32_segmap;
+   break;
+   case OPAL_M64_WINDOW_TYPE:
+   segmap = phb->ioda.m64_segmap;
+   break;
+   }
+
+   for (index = 0; index < phb->ioda.total_pe_num; index++) {
+   if 

Re: [PATCH v8 22/45] powerpc/powernv/ioda1: Support releasing IODA1 TCE table

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

pnv_pci_ioda_table_free_pages() can be reused to release the IODA1
TCE table when releasing IODA1 PE in subsequent patches.

This renames the following functions to support releasing IODA1 TCE
table: pnv_pci_ioda2_table_free_pages() to pnv_pci_ioda_table_free_pages(),
pnv_pci_ioda2_table_do_free_pages() to pnv_pci_ioda_table_do_free_pages().
No logical changes introduced.


I can only see renaming here but it seems (from 
IODA_architecture_04-14-2008.pdf) that IODA1 does not support multi-level 
TCE tables in the way IODA2 does.





Signed-off-by: Gavin Shan 
---
  arch/powerpc/platforms/powernv/pci-ioda.c | 18 +-
  1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index d360607..077f9db 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -51,7 +51,7 @@
  #define POWERNV_IOMMU_DEFAULT_LEVELS  1
  #define POWERNV_IOMMU_MAX_LEVELS  5

-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+static void pnv_pci_ioda_table_free_pages(struct iommu_table *tbl);

  static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
@@ -1352,7 +1352,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev 
*dev, struct pnv_ioda_pe
iommu_group_put(pe->table_group.group);
BUG_ON(pe->table_group.group);
}
-   pnv_pci_ioda2_table_free_pages(tbl);
+   pnv_pci_ioda_table_free_pages(tbl);
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
  }

@@ -1946,7 +1946,7 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, 
long index,

  static void pnv_ioda2_table_free(struct iommu_table *tbl)
  {
-   pnv_pci_ioda2_table_free_pages(tbl);
+   pnv_pci_ioda_table_free_pages(tbl);
iommu_free_table(tbl, "pnv");
  }

@@ -2448,7 +2448,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int 
nid, unsigned shift,
return addr;
  }

-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+static void pnv_pci_ioda_table_do_free_pages(__be64 *addr,
unsigned long size, unsigned level);

  static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
@@ -2487,7 +2487,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, 
__u64 bus_offset,
 * release partially allocated table.
 */
if (offset < tce_table_size) {
-   pnv_pci_ioda2_table_do_free_pages(addr,
+   pnv_pci_ioda_table_do_free_pages(addr,
1ULL << (level_shift - 3), levels - 1);
return -ENOMEM;
}
@@ -2505,7 +2505,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, 
__u64 bus_offset,
return 0;
  }

-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+static void pnv_pci_ioda_table_do_free_pages(__be64 *addr,
unsigned long size, unsigned level)
  {
const unsigned long addr_ul = (unsigned long) addr &
@@ -2521,7 +2521,7 @@ static void pnv_pci_ioda2_table_do_free_pages(__be64 
*addr,
if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
continue;

-   pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+   pnv_pci_ioda_table_do_free_pages(__va(hpa), size,
level - 1);
}
}
@@ -2529,7 +2529,7 @@ static void pnv_pci_ioda2_table_do_free_pages(__be64 
*addr,
free_pages(addr_ul, get_order(size << 3));
  }

-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+static void pnv_pci_ioda_table_free_pages(struct iommu_table *tbl)
  {
const unsigned long size = tbl->it_indirect_levels ?
tbl->it_level_size : tbl->it_size;
@@ -2537,7 +2537,7 @@ static void pnv_pci_ioda2_table_free_pages(struct 
iommu_table *tbl)
if (!tbl->it_size)
return;

-   pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+   pnv_pci_ioda_table_do_free_pages((__be64 *)tbl->it_base, size,
tbl->it_indirect_levels);
  }





--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 1/1] powerpc/86xx: Add support for Emerson/Artesyn MVME7100

2016-04-18 Thread Scott Wood
On Mon, 2016-04-18 at 09:57 +0200, Alessio Igor Bogani wrote:
> + pci0: pcie@f1008000 {
> + reg = <0xf1008000 0x1000>;
> + ranges = <0x0200 0x0 0x8000 0x8000 0x0
> 0x5000
> +   0x0100 0x0 0x 0xf000 0x0
> 0x0080>;
> + pcie@0 {
> + ranges = <0x0200 0x0 0x8000
> +   0x0200 0x0 0x8000
> +   0x0 0x5000
> +
> +   0x0100 0x0 0x
> +   0x0100 0x0 0x
> +   0x0 0x0080>;
> + };
> + };
> +
> + pci1: pcie@f1009000 {
> + compatible = "fsl,mpc8641-pcie";
> + device_type = "pci";
> + #size-cells = <2>;
> + #address-cells = <3>;
> + reg = <0xf1009000 0x1000>;
> + bus-range = <0 0xff>;

Why are pci0 and pci1 so different?  Why does mpc8641si-post.dtsi not have
pci1?

> +asm(".globl _zimage_start\n\
> + _zimage_start:\n\
> + mfmsr   10\n\
> + rlwinm  10,10,0,~(1<<15)/* Clear MSR_EE */\n\
> + sync\n\
> + mtmsr   10\n\
> + isync\n\
> + b _zimage_start_lib\n\
> +");

Please put this in an asm file.

Is U-Boot really not clearing MSR[EE]?  How old is this U-Boot?

> diff --git a/arch/powerpc/boot/ppcboot.h b/arch/powerpc/boot/ppcboot.h
> index 6ae6f90..7b758be 100644
> --- a/arch/powerpc/boot/ppcboot.h
> +++ b/arch/powerpc/boot/ppcboot.h
> @@ -43,7 +43,7 @@ typedef struct bd_info {
>   unsigned long   bi_sramstart;   /* start of SRAM memory
> */
>   unsigned long   bi_sramsize;/* size  of SRAM
> memory */
>  #if defined(TARGET_8xx) || defined(TARGET_CPM2) || defined(TARGET_85xx) ||\
> - defined(TARGET_83xx)
> + defined(TARGET_83xx) || defined(TARGET_MVME7100)
>   unsigned long   bi_immr_base;   /* base of IMMR register
> */
>  #endif

TARGET_86xx would match the U-Boot definition better.

> +/*
> + * Called very early, device-tree isn't unflattened
> + */
> +static int __init mvme7100_probe(void)
> +{
> + unsigned long root = of_get_flat_dt_root();
> +
> + if (!of_flat_dt_is_compatible(root, "artesyn,MVME7100"))
> + return 0;
> +
> + _set_L2CR(_get_L2CR() | L2CR_L2E);
> + return 1;
> +}

U-Boot doesn't enable L2 cache?

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 21/45] powerpc/powernv: Create PEs at PCI hot plugging time

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

Currently, the PEs and their associated resources are assigned
in ppc_md.pcibios_fixup() except those used by SRIOV VFs.


But this new code does not affect IOV and VF's PEs will still be created 
somewhere else rather than pnv_pci_setup_bridge()?




The
function is called for once after PCI probing and resources
assignment is completed. So it isn't hotplug friendly.

This creates PEs dynamically by ppc_md.pcibios_setup_bridge(), which
is called on the event during system bootup and PCI hotplug: updating
PCI bridge's windows after resource assignment/reassignment are done.
For partial hotplug case, where not all PCI devices belonging to the
PE are unplugged and plugged again, we just need unbinding/binding
the affected PCI devices with the corresponding PE without creating
new one.

As there is no upstream bridge for root bus that needs to be covered
by PE, we have to create PE for root bus in ppc_md.pcibios_setup_bridge()
before any other PEs can be created, as PE for root bus is the ancestor
to anyone else.


We did not need a root bus PE before? What is the other PE reserved for? 
Comments only say "reserved"...




Also, the windows of root port or the upstream port of PCIe switch behind
root port are extended to be PHB's apertures to accommodate the additional
resources needed by newly plugged devices based on the fact: hotpluggable
slot is behind root port or downstream port of the PCIe switch behind
root port. The extension for those PCI brdiges' windows is done in
ppc_md.pcibios_setup_bridge() as well.



This patch seems to be doing way too many things, hard to follow.

Could you please split the patch into smaller chunks? For example (you can 
do it totally different):

- move pnv_pci_ioda_setup_opal_tce_kill()
- move PE creation from pnv_pci_ioda_fixup() to pnv_pci_setup_bridge();
- add pnv_pci_fixup_bridge_resources()
- add an extra reserved PE for the root bus (and all this magic with 
root_pe_idx/root_pe_populated)

- ...




--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC FIX PATCH v0] powerpc,numa: Fix memory_hotplug_max()

2016-04-18 Thread Bharata B Rao
On Sat, Apr 09, 2016 at 03:44:31PM +0530, Bharata B Rao wrote:
> On Fri, Apr 08, 2016 at 12:27:44AM -0500, Nathan Fontenot wrote:
> > On 04/06/2016 04:44 AM, Bharata B Rao wrote:
> > > memory_hotplug_max() uses hot_add_drconf_memory_max() to get maxmimum
> > > addressable memory by referring to ibm,dyanamic-memory property. There
> > > are three problems with the current approach:
> > > 
> > > 1 hot_add_drconf_memory_max() assumes that ibm,dynamic-memory includes
> > >   all the LMBs of the guest, but that is not true for PowerKVM which
> > >   populates only DR LMBs (LMBs that can be hotplugged/removed) in that
> > >   property.
> > > 2 hot_add_drconf_memory_max() multiplies lmb-size with lmb-count to arrive
> > >   at the max possible address. Since ibm,dynamic-memory doesn't include
> > >   RMA LMBs, the address thus obtained will be less than the actual max
> > >   address. For example, if max possible memory size is 32G, with lmb-size
> > >   of 256MB there can be 127 LMBs in ibm,dynamic-memory (1 LMB for RMA
> > >   which won't be present here).  hot_add_drconf_memory_max() would then
> > >   return the max addressable memory as 127 * 256MB = 31.75GB, the max
> > >   address should have been 32G which is what ibm,lrdr-capacity shows.
> > > 3 In PowerKVM, there can be a gap between the end of boot time RAM and
> > >   beginning of hotplug RAM area. So just multiplying lmb-count with
> > >   lmb-size will not provide the correct max possible address for PowerKVM.
> > > 
> > > This patch fixes 1 by using ibm,lrdr-capacity property to return the max
> > > addressable memory whenever the property is present. Then it fixes 2 & 3
> > > by fetching the address of the last LMB in ibm,dynamic-memory property.
> > > 
> > > NOTE: There are some unnecessary changes in the patch because of 
> > > converting
> > > spaces to tabs w/o which checkpatch.pl complains.
> > > 
> > > Signed-off-by: Bharata B Rao 
> > > ---
> > >  arch/powerpc/mm/numa.c | 29 ++---
> > >  1 file changed, 22 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> > > index 669a15e..57d5877 100644
> > > --- a/arch/powerpc/mm/numa.c
> > > +++ b/arch/powerpc/mm/numa.c
> > > @@ -1164,17 +1164,32 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
> > >  static u64 hot_add_drconf_memory_max(void)
> > >  {
> > >  struct device_node *memory = NULL;
> > > -unsigned int drconf_cell_cnt = 0;
> > > -u64 lmb_size = 0;
> > > + struct device_node *dn = NULL;
> > > + unsigned int drconf_cell_cnt = 0;
> > > + u64 lmb_size = 0;
> > >   const __be32 *dm = NULL;
> > > + const __be64 *lrdr = NULL;
> > > + struct of_drconf_cell drmem;
> > > +
> > > + dn = of_find_node_by_path("/rtas");
> > > + if (dn) {
> > > + lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
> > > + of_node_put(dn);
> > > + if (lrdr)
> > > + return be64_to_cpup(lrdr);
> > > + }
> > >  
> > >  memory = 
> > > of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
> > >  if (memory) {
> > > -drconf_cell_cnt = of_get_drconf_memory(memory, );
> > > -lmb_size = of_get_lmb_size(memory);
> > > -of_node_put(memory);
> > > -}
> > > -return lmb_size * drconf_cell_cnt;
> > > + drconf_cell_cnt = of_get_drconf_memory(memory, );
> > > + lmb_size = of_get_lmb_size(memory);
> > > +
> > > + /* Advance to the last cell, each cell has 6 32 bit integers */
> > > + dm += (drconf_cell_cnt - 1) * 6;
> > 
> > You could do this as follows to avoid hard-coding 6
> 
> Can't do that since dm is of type __be32 pointer.
> 
> > dm += (drconf_cell_cnt - 1) * sizeof(struct of_drconf_cell)
> > 
> > > + read_drconf_cell(, );
> > > + of_node_put(memory);
> > > + }
> > > + return drmem.base_addr + lmb_size;
> > 
> > I assume it is a safe assumption that there will only be 1 RMA LMB?
> 
> No, I am not assuming RMA to have 1 LMB here. I fetch the last LMB and
> get the max possible address from it by adding the base address of the
> last LMB with the lmb_size.
> 
> > 
> > I do see that the PAPR defines a bit in the flags field for each LMB
> > in ibm,dynamic-memory as 'reserved'. Is this something you could use
> > to flag RMA LMBs and put them in the ibm,dynamic-memory property?
> > 
> > I'm just curious why these LMBs are not in this property.
> 
> Not sure about both the above observations.
> 
> Section B.6.6 of LoPAPR mentions "... called the RMA, that is represented
> by the first value of the reg property of this first /memory node. Additional
> storage regions may each be represented by their own /memory node that
> includes dynamic reconfiguration (DR) properties or by an entry in
> /ibm,dynamic-reconfiguration-memory nodes"
> 
> Section B.6.6.2 says "All memory which is not subject to dynamic
> reconfiguration 

Re: [PATCH v8 20/45] powerpc/powernv: Allocate PE# in reverse order

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

PE number for one particular PE can be allocated dynamically or
reserved according to the consumed M64 (64-bits prefetchable)
segments of the PE. The M64 resources, and hence their segments
and PE number are assigned/reserved in ascending order. The PE
numbers are allocated dynamically in ascending order as well.
It's not a problem as the PE numbers are reserved and then
allocated all at once in fine order. However, it will introduce
conflicts when PCI hotplug is supported: the PE number to be
reserved for newly added PE might have been assigned.

To resolve above conflicts, this forces the PE number to be
allocated dynamically in reverse order. With this patch applied,
the PE numbers are reserved in ascending order, but allocated
dynamically in reverse order.



The patch is probably is ok, the commit log is not - I do not follow it. 
Some PEs are reserved (for what? why does the absolute PE number matter? 
put it in the commit log), that means that the corresponding bits in 
pe_alloc[] should be set so when you will be allocating PEs for a just 
plugged device, you won't pick them and you will pick free ones, and the 
order should not matter. I would think that "reservation" happens once at 
the boot time so you set "used" bits for the reserved PEs then and after 
that the dynamic allocator will skip them.





Signed-off-by: Gavin Shan 
---
  arch/powerpc/platforms/powernv/pci-ioda.c | 14 ++
  1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index f182ca7..565725b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -144,16 +144,14 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int 
pe_no)

  static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
  {
-   unsigned long pe;
+   unsigned long pe = phb->ioda.total_pe_num - 1;

-   do {
-   pe = find_next_zero_bit(phb->ioda.pe_alloc,
-   phb->ioda.total_pe_num, 0);
-   if (pe >= phb->ioda.total_pe_num)
-   return NULL;
-   } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
+   for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
+   if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
+   return pnv_ioda_init_pe(phb, pe);
+   }

-   return pnv_ioda_init_pe(phb, pe);
+   return NULL;
  }

  static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)




--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 19/45] powerpc/powernv: Use PE instead of number during setup and release

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

In current implementation, the PEs that are allocated or picked
from the reserved list are identified by PE number. The PE instance
has to be picked according to the PE number eventually. We have
same issue when PE is released.

For pnv_ioda_pick_m64_pe() and pnv_ioda_alloc_pe(), this returns
PE instance so that pnv_ioda_setup_bus_PE() can use the allocated
or reserved PE instance directly. Also, pnv_ioda_setup_bus_PE()
returns the reserved/allocated PE instance to be used in subsequent
patches. On the other hand, pnv_ioda_free_pe() uses PE instance
(not number) as its argument. No logical changes introduced.

Signed-off-by: Gavin Shan 



Reviewed-by: Alexey Kardashevskiy 



---
  arch/powerpc/platforms/powernv/pci-ioda.c | 104 +-
  arch/powerpc/platforms/powernv/pci.h  |   2 +-
  2 files changed, 59 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7800897..f182ca7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -119,6 +119,14 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long 
flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
  }

+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+   phb->ioda.pe_array[pe_no].phb = phb;
+   phb->ioda.pe_array[pe_no].pe_number = pe_no;
+
+   return >ioda.pe_array[pe_no];
+}
+
  static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
  {
if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
@@ -131,11 +139,10 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int 
pe_no)
pr_debug("%s: PE %d was reserved on PHB#%x\n",
 __func__, pe_no, phb->hose->global_number);

-   phb->ioda.pe_array[pe_no].phb = phb;
-   phb->ioda.pe_array[pe_no].pe_number = pe_no;
+   pnv_ioda_init_pe(phb, pe_no);
  }

-static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
  {
unsigned long pe;

@@ -143,20 +150,20 @@ static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
pe = find_next_zero_bit(phb->ioda.pe_alloc,
phb->ioda.total_pe_num, 0);
if (pe >= phb->ioda.total_pe_num)
-   return IODA_INVALID_PE;
+   return NULL;
} while(test_and_set_bit(pe, phb->ioda.pe_alloc));

-   phb->ioda.pe_array[pe].phb = phb;
-   phb->ioda.pe_array[pe].pe_number = pe;
-   return pe;
+   return pnv_ioda_init_pe(phb, pe);
  }

-static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
+static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
  {
-   WARN_ON(phb->ioda.pe_array[pe].pdev);
+   struct pnv_phb *phb = pe->phb;

-   memset(>ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
-   clear_bit(pe, phb->ioda.pe_alloc);
+   WARN_ON(pe->pdev);
+
+   memset(pe, 0, sizeof(struct pnv_ioda_pe));
+   clear_bit(pe->pe_number, phb->ioda.pe_alloc);
  }

  /* The default M64 BAR is shared by all PEs */
@@ -316,7 +323,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
}
  }

-static int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
  {
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
@@ -326,7 +333,7 @@ static int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool 
all)

/* Root bus shouldn't use M64 */
if (pci_is_root_bus(bus))
-   return IODA_INVALID_PE;
+   return NULL;

/* Allocate bitmap */
size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
@@ -334,7 +341,7 @@ static int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool 
all)
if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
__func__);
-   return IODA_INVALID_PE;
+   return NULL;
}

/* Figure out reserved PE numbers by the PE */
@@ -347,7 +354,7 @@ static int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool 
all)
 */
if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
kfree(pe_alloc);
-   return IODA_INVALID_PE;
+   return NULL;
}

/*
@@ -393,7 +400,7 @@ static int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool 
all)
}

kfree(pe_alloc);
-   return master_pe->pe_number;
+   return master_pe;
  }

  static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
@@ -959,7 +966,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct 
pci_dev *dev)
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(dev);

Re: [PATCH] cxl: Add a kernel thread to check the coherent platform function's state

2016-04-18 Thread Andrew Donnellan

On 18/04/16 23:05, Christophe Lombard wrote:

In the POWERVM environement, the PHYP CoherentAccel component manages


environment


the state of the Coherant Accelerator Processor Interface adapter and


Coherent


virtualizes CAPI resources, handles CAPP, PSL, PSL Slice errors - and
interrupts - and provides a new set of HCALLs for the OS APIs to utilize
AFUs.

During the course of operation, a coherent platform function can
encounter errors. Some possible reason for errors are:
• Hardware recoverable and unrecoverable errors
• Transient and over-threshold correctable errors

PHYP implements its own state model for the coherent platform function.
The current state of this Acclerator Fonction Unit (AFU) is available


Accelerator Function Unit


through a hcall.

In case of low-level troubles (or error injection), The PHYP component


the


may reset the card and change the AFU state. The PHYP interface doesn't
provide any way to be notified when that happens.

The current implementation of the cxl driver, for the POWERVM
environment, follows the general error recovery procedures required to
reset operation of the coherent platform function. The platform firmware
resets and reconfigures hardware when an external action is required -
attach/detach a process, link ok, 

The purpose of this patch is to interact with the external driver
(where the AFU is shown) even if no action is required. A kernel thread
is needed to check every x seconds the current state of the AFU to see
if we need to enter an error recovery path.

Signed-off-by: Christophe Lombard 


A few minor issues below.


diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 8213372..06dfe7f 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -19,6 +19,10 @@
  #define CXL_SLOT_RESET_EVENT  2
  #define CXL_RESUME_EVENT  3

+#define CXL_KTHREAD"cxl_kthread"
+
+void stop_state_thread(struct cxl_afu *afu);


static?

[...]


-static int afu_do_recovery(struct cxl_afu *afu)
+static int handle_state_thread(void *data)
  {
-   int rc;
+   struct cxl_afu *afu;
+   int rc = 0;


It looks like we don't use rc (see also comment below).



-   /* many threads can arrive here, in case of detach_all for example.
-* Only one needs to drive the recovery
-*/
-   if (mutex_trylock(>guest->recovery_lock)) {
-   rc = afu_update_state(afu);
-   mutex_unlock(>guest->recovery_lock);
-   return rc;
+   pr_devel("in %s\n", __func__);
+
+   afu = (struct cxl_afu*)data;


CodingStyle: space between cxl_afu and *


+   do {
+   set_current_state(TASK_INTERRUPTIBLE);
+
+   if (afu) {
+   afu_update_state(afu);


Should we be checking the retval here?


+   if (afu->guest->previous_state == 
H_STATE_PERM_UNAVAILABLE)
+   goto out;
+   } else
+   return -ENODEV;
+   schedule_timeout(msecs_to_jiffies(3000));
+   } while(!kthread_should_stop());


CodingStyle: space between while and (


+
+out:
+   afu->guest->kthread_tsk = NULL;
+   return rc;
+}
+
+void start_state_thread(struct cxl_afu *afu)


static?


+{
+   if (afu->guest->kthread_tsk)
+   return;
+
+   /* start kernel thread to handle the state of the afu */
+   afu->guest->kthread_tsk = kthread_run(_state_thread,
+ (void *)afu, CXL_KTHREAD);
+   if (IS_ERR(afu->guest->kthread_tsk)) {
+   pr_devel("cannot start state kthread\n");
+   afu->guest->kthread_tsk = NULL;
}
-   return 0;
+}
+
+void stop_state_thread(struct cxl_afu *afu)


static?

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 18/45] powerpc/powernv: Increase PE# capacity

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

Each PHB maintains an array helping to translate 2-bytes Request
ID (RID) to PE# with the assumption that PE# takes one byte, meaning
that we can't have more than 256 PEs. However, pci_dn->pe_number
already had 4-bytes for the PE#.

This extends the PE# capacity for every PHB. After that, the PE number
is represented by 4-bytes value. Then we can reuse IODA_INVALID_PE to
check the PE# in phb->pe_rmap[] is valid or not.



This should be merged into "[PATCH v8 21/45] powerpc/powernv: Create PEs at 
PCI hot plugging time" as it does not make sense alone (this patch does the 
initialization but only 3 patches apart this default value is analyzed -> 
hard to review).





Signed-off-by: Gavin Shan 
Reviewed-by: Daniel Axtens 
---
  arch/powerpc/platforms/powernv/pci-ioda.c | 6 +-
  arch/powerpc/platforms/powernv/pci.h  | 7 ++-
  2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 59782fba..7800897 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -757,7 +757,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, 
struct pnv_ioda_pe *pe)

/* Clear the reverse map */
for (rid = pe->rid; rid < rid_end; rid++)
-   phb->ioda.pe_rmap[rid] = 0;
+   phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;

/* Release from all parents PELT-V */
while (parent) {
@@ -3387,6 +3387,10 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
if (prop32)
phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);

+   /* Invalidate RID to PE# mapping */
+   for (i = 0; i < ARRAY_SIZE(phb->ioda.pe_rmap); ++i)
+   phb->ioda.pe_rmap[i] = IODA_INVALID_PE;
+
/* Parse 64-bit MMIO range */
pnv_ioda_parse_m64_window(phb);

diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 350e630..928cf81 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -160,11 +160,8 @@ struct pnv_phb {
struct list_headpe_list;
struct mutexpe_list_mutex;

-   /* Reverse map of PEs, will have to extend if
-* we are to support more than 256 PEs, indexed
-* bus { bus, devfn }
-*/
-   unsigned char   pe_rmap[0x1];
+   /* Reverse map of PEs, indexed by {bus, devfn} */
+   int pe_rmap[0x1];

/* TCE cache invalidate registers (physical and
 * remapped)




--
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: linux-next: manual merge of the livepatching tree with the powerpc tree

2016-04-18 Thread Michael Ellerman
On Mon, 2016-04-18 at 14:27 +1000, Stephen Rothwell wrote:
> Hi Jiri,
> 
> Today's linux-next merge of the livepatching tree got a conflict in:
> 
>   arch/powerpc/kernel/process.c
> 
> between commit:
> 
>   7f92bc569455 ("powerpc: sparse: Include headers for __weak symbols")
> 
> from the powerpc tree and commit:
> 
>   5d31a96e6c01 ("powerpc/livepatch: Add livepatch stack to struct 
> thread_info")
> 
> from the livepatching tree.
> 
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging.  You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.

Thanks.

This should go away tomorrow because then my next will have the topic branch
merged.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 17/45] powerpc/powernv/ioda1: Improve DMA32 segment track

2016-04-18 Thread Alexey Kardashevskiy

On 02/17/2016 02:44 PM, Gavin Shan wrote:

In current implementation, the DMA32 segments required by one specific
PE isn't calculated with the information hold in the PE independently.
It conflicts with the PCI hotplug design: PE centralized, meaning the
PE's DMA32 segments should be calculated from the information hold in
the PE independently.

This introduces an array (@dma32_segmap) for every PHB to track the
DMA32 segmeng usage. Besides, this moves the logic calculating PE's
consumed DMA32 segments to pnv_pci_ioda1_setup_dma_pe() so that PE's
DMA32 segments are calculated/allocated from the information hold in
the PE (DMA32 weight). Also the logic is improved: we try to allocate
as much DMA32 segments as we can. It's acceptable that number of DMA32
segments less than the expected number are allocated.

Signed-off-by: Gavin Shan 



This DMA segments business was the reason why I have not even tried 
implementing DDW for POWER7 - it is way too different from POWER8 and there 
is no chance that anyone outside Ozlabs will ever try using this in 
practice; the same applies to PCI hotplug on POWER7.


I am suggesting to ditch all IODA1 changes from this patchset as this code 
will hang around (unused) for may be a year or so and then will be gone as 
p5ioc2.





---
  arch/powerpc/platforms/powernv/pci-ioda.c | 111 +-
  arch/powerpc/platforms/powernv/pci.h  |   7 +-
  2 files changed, 66 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 0fc2309..59782fba 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2007,20 +2007,54 @@ static unsigned int 
pnv_pci_ioda_total_dma_weight(struct pnv_phb *phb)
  }

  static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
-  struct pnv_ioda_pe *pe,
-  unsigned int base,
-  unsigned int segs)
+  struct pnv_ioda_pe *pe)
  {

struct page *tce_mem = NULL;
struct iommu_table *tbl;
-   unsigned int tce32_segsz, i;
+   unsigned int weight, total_weight;
+   unsigned int tce32_segsz, base, segs, i;
int64_t rc;
void *addr;

/* XXX FIXME: Handle 64-bit only DMA devices */
/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
/* XXX FIXME: Allocate multi-level tables on PHB3 */
+   total_weight = pnv_pci_ioda_total_dma_weight(phb);
+   weight = pnv_pci_ioda_pe_dma_weight(pe);
+
+   segs = (weight * phb->ioda.dma32_count) / total_weight;
+   if (!segs)
+   segs = 1;
+
+   /*
+* Allocate contiguous DMA32 segments. We begin with the expected
+* number of segments. With one more attempt, the number of DMA32
+* segments to be allocated is decreased by one until one segment
+* is allocated successfully.
+*/
+   while (segs) {
+   for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
+   for (i = base; i < base + segs; i++) {
+   if (phb->ioda.dma32_segmap[i] !=
+   IODA_INVALID_PE)
+   break;
+   }
+
+   if (i >= base + segs)
+   break;
+   }
+
+   if (i >= base + segs)
+   break;
+
+   segs--;
+   }
+
+   if (!segs) {
+   pe_warn(pe, "No available DMA32 segments\n");
+   return;
+   }

tbl = pnv_pci_table_alloc(phb->hose->node);
iommu_register_group(>table_group, phb->hose->global_number,
@@ -2028,6 +2062,8 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb 
*phb,
pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, >table_group);

/* Grab a 32-bit TCE table */
+   pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
+   weight, total_weight, base, segs);
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
base * PNV_IODA1_DMA32_SEGSIZE,
(base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
@@ -2064,6 +2100,10 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb 
*phb,
}
}

+   /* Setup DMA32 segment mapping */
+   for (i = base; i < base + segs; i++)
+   phb->ioda.dma32_segmap[i] = pe->pe_number;
+
/* Setup linux iommu table */
pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
  base * PNV_IODA1_DMA32_SEGSIZE,
@@ -2538,70 +2578,34 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
  static void pnv_ioda_setup_dma(struct pnv_phb *phb)
  {
struct pci_controller 

Re: [PATCH kernel v3 9/9] powerpc/powernv/npu: Enable NVLink pass through

2016-04-18 Thread Alexey Kardashevskiy

On 04/18/2016 11:52 AM, Alistair Popple wrote:

Hi David,

On Fri, 15 Apr 2016 14:40:20 David Gibson wrote:

On Tue, Apr 12, 2016 at 06:37:50PM +1000, Alexey Kardashevskiy wrote:

IBM POWER8 NVlink systems come with Tesla K40-ish GPUs each of which
also has a couple of fast speed links (NVLink). The interface to links
is exposed as an emulated PCI bridge which is included into the same
IOMMU group as the corresponding GPU.

In the kernel, NPUs get a separate PHB of the PNV_PHB_NPU type and a PE.

In order to make these links work when GPU is passed to the guest,
these bridges need to be passed as well; otherwise performance will
degrade.

This implements and exports API to manage NPU state in regard to VFIO;
it replicates iommu_table_group_ops.

This defines a new pnv_pci_ioda2_npu_ops which is assigned to
the IODA2 bridge if there are NPUs for a GPU on the bridge.
The new callbacks call the default IODA2 callbacks plus new NPU API.
This adds a gpe_table_group_to_npe() helper to find NPU PE for the IODA2
table_group, it is not expected to fail as the helper is only called
from the pnv_pci_ioda2_npu_ops.

This adds a pnv_pci_npu_setup_iommu() helper which adds NPUs to
the GPU group if any found. The helper uses helpers to look for
the "ibm,gpu" property in the device tree which is a phandle of
the corresponding GPU.

This adds an additional loop over PEs in pnv_ioda_setup_dma() as the main
loop skips NPU PEs as they do not have 32bit DMA segments.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v3:
* moved NPU-to-GPU IOMMU grouping later after all PHBs are discovered
* removed hack to make iommu_add_device() work, iommu_group_add_device()
is used instead
* cleanup in gpe_table_group_to_npe_cb()

v2:
* reimplemented to support NPU + GPU in the same group
* merged "powerpc/powernv/npu: Add NPU devices to IOMMU group" and
"powerpc/powernv/npu: Enable passing through via VFIO" into this patch
---
  arch/powerpc/platforms/powernv/npu-dma.c  | 126 ++
  arch/powerpc/platforms/powernv/pci-ioda.c | 105 +
  arch/powerpc/platforms/powernv/pci.h  |   6 ++
  3 files changed, 237 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
b/arch/powerpc/platforms/powernv/npu-dma.c
index 8e70221..7cb9f6a 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -12,6 +12,7 @@
  #include 
  #include 
  #include 
+#include 

  #include 
  #include 
@@ -262,3 +263,128 @@ void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, 
bool bypass)
}
}
  }
+
+long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+   struct iommu_table *tbl)
+{
+   struct pnv_phb *phb = npe->phb;
+   int64_t rc;
+   const unsigned long size = tbl->it_indirect_levels ?
+   tbl->it_level_size : tbl->it_size;
+   const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+   const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+   pe_info(npe, "Setting up window#%d %llx..%llx pg=%lx\n", num,
+   start_addr, start_addr + win_size - 1,
+   IOMMU_PAGE_SIZE(tbl));
+
+   /* Ignore @num as there is just one window per NPU */
+   rc = opal_pci_map_pe_dma_window(phb->opal_id,
+   npe->pe_number,
+   npe->pe_number,
+   tbl->it_indirect_levels + 1,
+   __pa(tbl->it_base),
+   size << 3,
+   IOMMU_PAGE_SIZE(tbl));
+   if (rc) {
+   pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
+   return rc;
+   }
+
+   pnv_pci_link_table_and_group(phb->hose->node, num,
+   tbl, >table_group);
+   pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+
+   return rc;
+}
+
+long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
+{
+   struct pnv_phb *phb = npe->phb;
+   long ret;
+
+   pe_info(npe, "Removing DMA window #%d\n", num);
+
+   /* Ignore @num as there is just one window per NPU */
+   ret = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
+   npe->pe_number,
+   0/* levels */, 0/* table address */,
+   0/* table size */, 0/* page size */);
+   if (ret)
+   pe_warn(npe, "Unmapping failed, ret = %ld\n", ret);
+   else
+   pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+
+   pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
+   >table_group);
+
+   return ret;
+}
+
+/* Switch ownership from platform code to external user (e.g. VFIO) */
+void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
+{
+   struct pnv_phb *phb = npe->phb;
+   int64_t ret;
+
+   if (npe->table_group.tables[0]) {
+   /* Disable 32bit window */
+  

Re: [RFC v4] powerpc/devtree: Parse new DRC mem/cpu/dev device tree elements

2016-04-18 Thread Michael Ellerman
On Mon, 2016-04-18 at 09:38 -0500, m...@linux.vnet.ibm.com wrote:

> Several properties in the DRC device tree format are replaced by
> more compact representations to allow, for example, for the encoding
> of vast amounts of memory, and or reduced duplication of information
> in related data structures.
> 
> "ibm,drc-info": This property, when present, replaces the following
> four properties: ibm,drc-indexes, ibm,drc-names, ibm,drc-types
> and ibm,drc-power-domains.  This property is defined for all
> dynamically reconfigurable platform nodes.  The "ibm,drc-info" elements
> are intended to provide a more compact representation, and reduce some
> search overhead.
> 
> "ibm,dynamic-memory-v2": This property replaces the "ibm,dynamic-memory"
> node representation within the "ibm,dynamic-reconfiguration-memory"
> property.  This element format is intended to provide a more compact
> representation of memory, especially, for systems with massive amounts
> of RAM.
> 
> "ibm,architecture.vec": Bit flags are added to this data structure
> by the front end processor to inform the kernel as to whether to expect
> the changes to one or both of the device tree structures "ibm,drc-info"
> and "ibm,dynamic-memory-v2".
> 
> The new element structures, "ibm,dynamic-memory-v2" and "ibm,drc-info",
> should completely replace the previously used structures at execution.
> 
> Signed-off-by: Michael Bringmann 
> ---

What's changed in version 2, 3 and 4?

I also sent you comments on v2, did you see them?

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

unsubscribe

2016-04-18 Thread cybin


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Trouble with DMA on PPC linux question

2016-04-18 Thread Bruce_Leonard
Good afternoon everyone,

We're trying to get some performance gains in an older embedded design by 
adding DMA to our NAND driver.  The HW is an MPC8349 talking across a PCI 
bus to a NAND controller and we have 512Mb of RAM.  We're using the 3.18 
kernel and the Freescale "fsl,mpc8349-dma" driver.  I've verified using a 
bus analyzer that DMA transactions are occurring on the PCI bus correctly 
(correct addresses and the data I'm reading is coming across the bus to 
the processor correctly).  What's not happening is periodically the data 
being read doesn't make it to RAM.  I've narrowed this down to the 
dma_addr_t I get back from dma_map_single().

Now I'm not an expert on how memory management in the PPC linux kernel 
works, but based on some experimentation and stepping through some of the 
code, translating a kernel virtual address is essentially subtracting 
0xC000 from the virtual address.  I know the equations a bit more than 
that, I've dug into some of the macros, but many of the constants compile 
to zero on my setup, so the end result is just the subtraction.

On the DMA transactions that work, the virtual address I hand to 
dma_map_single() is something like 0xe084 and the dma_addr_t result is 
0x1084 which is less than my 512Mb limit.  On the transactions that 
don't work, the virtual address is 0xd539 with the mapped result being 
0x2539, which is past my upper bound on my RAM.  In fact it's not even 
in my memory map, there's a hole there.  (Evidently the MPC4349 DMA engine 
bypasses the TLBs, since I'm not getting an exception of any 
kind...learned something new today!)  So on the transactions that don't 
work, they fail because the physical address I give to the DMA engine 
doesn't exist.  The only error indication I get is when I get an ECC error 
because what's pointed to be the virtual address (where ever that may be) 
still contains zeros and it fails the ECC comparison check.

So my question is, where should I be looking or what config option should 
I be checking to try and figure out why the upper layers 
(MTD/UBI/UBIFS/user space) should be giving the NAND layer or my driver a 
virtual address that can't be translated into a physical address?  One 
thing I have noticed (though I don't know if it's relevant or not) is that 
when I get a "good" virtual address it's through a call to 
nand_subpage_read() and when I get a "bad" virtual address it's through a 
call to nand_read_page_swecc().

I'm not asking if someone can solve my problem for me, but any suggestions 
of what rocks I can turn over to look for clues would be greatly 
appreciated.

Thanks for you time and suggestions!

Bruce
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: clock_gettime.2: _COARSE clocks are not always faster...

2016-04-18 Thread Michael Kerrisk (man-pages)
On 04/18/2016 04:12 PM, Michael Kerrisk (man-pages) wrote:
> Hello Rasmus,
> 
> On 04/09/2016 05:50 PM, Rasmus Villemoes wrote:
>> Hi Michael
>>
>> The other day, I was curious how the vdso was implemented on ppc, and I
>> noted that neither ppc32 or ppc64 handle the _COARSE versions of
>> CLOCK_{REALTIME,MONOTONIC} in the vdso, so they fall back to an actual
>> syscall. And sure enough, measuring CLOCK_MONOTONIC
>> vs. CLOCK_MONOTONIC_COARSE shows that the latter has three times as much
>> overhead as the former.
>>
>> Whether it's worth adding a note to the man page is up to you.
> 
> I think it's useful to do so. For both *COARSE flags, I added:
> 
> [[
> Requires per-architecture support, and probably also architecture support
> for this flag in the vdso(7).
> ]]

And in the PowerPC sections of vdso(7), I added:

   The CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE clocks are
   not   supported   by   the   __kernel_clock_getres  and  __ker‐
   nel_clock_gettime interfaces; the kernel falls back to the real
   system call.

Cheers,

Michael

-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc: Load Monitor Register Support

2016-04-18 Thread Jack Miller
This enables new registers, LMRR and LMSER, that can trigger an EBB in
userspace code when a monitored load (via the new ldmx instruction)
loads memory from a monitored space. This facility is controlled by a
new FSCR bit, LM.

This patch disables the control bit on CPU setup and enables that bit
when a facility unavailable exception is taken for using it. On context
switch, this bit is then used to determine whether the two relevant
registers are saved and restored. This is done lazily for performance
reasons.

Signed-off-by: Jack Miller 
---
 arch/powerpc/include/asm/processor.h  |  2 ++
 arch/powerpc/include/asm/reg.h|  5 +
 arch/powerpc/kernel/cpu_setup_power.S |  3 ++-
 arch/powerpc/kernel/process.c | 20 
 arch/powerpc/kernel/traps.c   |  4 
 5 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 009fab1..2bb822b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -314,6 +314,8 @@ struct thread_struct {
unsigned long   mmcr2;
unsignedmmcr0;
unsignedused_ebb;
+   unsigned long   lmrr;
+   unsigned long   lmser;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7972c9f..ab98ca4 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -282,6 +282,8 @@
 #define SPRN_HRMOR 0x139   /* Real mode offset register */
 #define SPRN_HSRR0 0x13A   /* Hypervisor Save/Restore 0 */
 #define SPRN_HSRR1 0x13B   /* Hypervisor Save/Restore 1 */
+#define SPRN_LMRR  0x32D   /* Load Monitor Region Register */
+#define SPRN_LMSER 0x32E   /* Load Monitor Section Enable Register */
 #define SPRN_IC0x350   /* Virtual Instruction Count */
 #define SPRN_VTB   0x351   /* Virtual Time Base */
 #define SPRN_LDBAR 0x352   /* LD Base Address Register */
@@ -291,6 +293,7 @@
 #define SPRN_PMCR  0x374   /* Power Management Control Register */
 
 /* HFSCR and FSCR bit numbers are the same */
+#define FSCR_LM_LG 11  /* Enable Load Monitor Registers */
 #define FSCR_TAR_LG8   /* Enable Target Address Register */
 #define FSCR_EBB_LG7   /* Enable Event Based Branching */
 #define FSCR_TM_LG 5   /* Enable Transactional Memory */
@@ -300,10 +303,12 @@
 #define FSCR_VECVSX_LG 1   /* Enable VMX/VSX  */
 #define FSCR_FP_LG 0   /* Enable Floating Point */
 #define SPRN_FSCR  0x099   /* Facility Status & Control Register */
+#define   FSCR_LM  __MASK(FSCR_LM_LG)
 #define   FSCR_TAR __MASK(FSCR_TAR_LG)
 #define   FSCR_EBB __MASK(FSCR_EBB_LG)
 #define   FSCR_DSCR__MASK(FSCR_DSCR_LG)
 #define SPRN_HFSCR 0xbe/* HV=1 Facility Status & Control Register */
+#define   HFSCR_LM __MASK(FSCR_LM_LG)
 #define   HFSCR_TAR__MASK(FSCR_TAR_LG)
 #define   HFSCR_EBB__MASK(FSCR_EBB_LG)
 #define   HFSCR_TM __MASK(FSCR_TM_LG)
diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
b/arch/powerpc/kernel/cpu_setup_power.S
index 584e119..a232930 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -157,7 +157,8 @@ __init_LPCR:
 
 __init_FSCR:
mfspr   r3,SPRN_FSCR
-   ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
+   ori r3,r3,FSCR_LM|FSCR_TAR|FSCR_DSCR|FSCR_EBB
+   xorir3,r3,FSCR_LM
mtspr   SPRN_FSCR,r3
blr
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 00bf6f5..f0061ec 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1005,6 +1005,14 @@ static inline void save_sprs(struct thread_struct *t)
 */
t->tar = mfspr(SPRN_TAR);
}
+
+   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+   /* Conditionally save Load Monitor registers, if enabled */
+   if (t->fscr & FSCR_LM) {
+   t->lmrr = mfspr(SPRN_LMRR);
+   t->lmser = mfspr(SPRN_LMSER);
+   }
+   }
 #endif
 }
 
@@ -1041,6 +1049,16 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
if (old_thread->tar != new_thread->tar)
mtspr(SPRN_TAR, new_thread->tar);
}
+
+   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+   /* Conditionally restore Load Monitor registers, if enabled */
+   if (new_thread->fscr & FSCR_LM) {
+   if (old_thread->lmrr != new_thread->lmrr);
+   mtspr(SPRN_LMRR, new_thread->lmrr);
+   if (old_thread->lmser != new_thread->lmser);
+   mtspr(SPRN_LMSER, new_thread->lmser);
+   }
+   }
 #endif
 }
 
@@ -1566,6 +1584,8 @@ void start_thread(struct pt_regs *regs, unsigned long 
start, unsigned long sp)
  

[PATCH 1/3] powerpc: Complete FSCR context switch

2016-04-18 Thread Jack Miller
Previously we just saved the FSCR, but only restored it in some
settings, and never copied it thread to thread. This patch always
restores the FSCR and formalizes new threads inheriting its setting so
that later we can manipulate FSCR bits in start_thread.

Signed-off-by: Jack Miller 
---
 arch/powerpc/kernel/process.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b8500b4..00bf6f5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1019,18 +1019,12 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
 #ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_DSCR)) {
u64 dscr = get_paca()->dscr_default;
-   u64 fscr = old_thread->fscr & ~FSCR_DSCR;
 
-   if (new_thread->dscr_inherit) {
+   if (new_thread->dscr_inherit)
dscr = new_thread->dscr;
-   fscr |= FSCR_DSCR;
-   }
 
if (old_thread->dscr != dscr)
mtspr(SPRN_DSCR, dscr);
-
-   if (old_thread->fscr != fscr)
-   mtspr(SPRN_FSCR, fscr);
}
 
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
@@ -1041,6 +1035,9 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
if (old_thread->ebbrr != new_thread->ebbrr)
mtspr(SPRN_EBBRR, new_thread->ebbrr);
 
+   if (old_thread->fscr != new_thread->fscr)
+   mtspr(SPRN_FSCR, new_thread->fscr);
+
if (old_thread->tar != new_thread->tar)
mtspr(SPRN_TAR, new_thread->tar);
}
@@ -1478,6 +1475,9 @@ int copy_thread(unsigned long clone_flags, unsigned long 
usp,
}
if (cpu_has_feature(CPU_FTR_HAS_PPR))
p->thread.ppr = INIT_PPR;
+
+   if (cpu_has_feature(CPU_FTR_ARCH_207S))
+   p->thread.fscr = mfspr(SPRN_FSCR);
 #endif
kregs->nip = ppc_function_entry(f);
return 0;
-- 
2.8.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/3] powerpc: Load Monitor Register Support

2016-04-18 Thread kbuild test robot
Hi Jack,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.6-rc4 next-20160418]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Jack-Miller/powerpc-Complete-FSCR-context-switch/20160419-031650
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allnoconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   arch/powerpc/kernel/process.c: In function 'start_thread':
>> arch/powerpc/kernel/process.c:1615:17: error: 'struct thread_struct' has no 
>> member named 'fscr'
 current->thread.fscr &= ~FSCR_LM;
^

vim +1615 arch/powerpc/kernel/process.c

  1609  if (cpu_has_feature(CPU_FTR_TM))
  1610  regs->msr |= MSR_TM;
  1611  current->thread.tm_tfhar = 0;
  1612  current->thread.tm_texasr = 0;
  1613  current->thread.tm_tfiar = 0;
  1614  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> 1615  current->thread.fscr &= ~FSCR_LM;
  1616  }
  1617  EXPORT_SYMBOL(start_thread);
  1618  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc: Load Monitor Register Support

2016-04-18 Thread Jack Miller
This enables new registers, LMRR and LMSER, that can trigger an EBB in
userspace code when a monitored load (via the new ldmx instruction)
loads memory from a monitored space. This facility is controlled by a
new FSCR bit, LM.

This patch disables the control bit on CPU setup and enables that bit
when a facility unavailable exception is taken for using it. On context
switch, this bit is then used to determine whether the two relevant
registers are saved and restored. This is done lazily for performance
reasons.

Signed-off-by: Jack Miller 
---
 arch/powerpc/include/asm/processor.h  |  2 ++
 arch/powerpc/include/asm/reg.h|  5 +
 arch/powerpc/kernel/cpu_setup_power.S |  3 ++-
 arch/powerpc/kernel/process.c | 19 +++
 arch/powerpc/kernel/traps.c   |  4 
 5 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 009fab1..2bb822b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -314,6 +314,8 @@ struct thread_struct {
unsigned long   mmcr2;
unsignedmmcr0;
unsignedused_ebb;
+   unsigned long   lmrr;
+   unsigned long   lmser;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7972c9f..ab98ca4 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -282,6 +282,8 @@
 #define SPRN_HRMOR 0x139   /* Real mode offset register */
 #define SPRN_HSRR0 0x13A   /* Hypervisor Save/Restore 0 */
 #define SPRN_HSRR1 0x13B   /* Hypervisor Save/Restore 1 */
+#define SPRN_LMRR  0x32D   /* Load Monitor Region Register */
+#define SPRN_LMSER 0x32E   /* Load Monitor Section Enable Register */
 #define SPRN_IC0x350   /* Virtual Instruction Count */
 #define SPRN_VTB   0x351   /* Virtual Time Base */
 #define SPRN_LDBAR 0x352   /* LD Base Address Register */
@@ -291,6 +293,7 @@
 #define SPRN_PMCR  0x374   /* Power Management Control Register */
 
 /* HFSCR and FSCR bit numbers are the same */
+#define FSCR_LM_LG 11  /* Enable Load Monitor Registers */
 #define FSCR_TAR_LG8   /* Enable Target Address Register */
 #define FSCR_EBB_LG7   /* Enable Event Based Branching */
 #define FSCR_TM_LG 5   /* Enable Transactional Memory */
@@ -300,10 +303,12 @@
 #define FSCR_VECVSX_LG 1   /* Enable VMX/VSX  */
 #define FSCR_FP_LG 0   /* Enable Floating Point */
 #define SPRN_FSCR  0x099   /* Facility Status & Control Register */
+#define   FSCR_LM  __MASK(FSCR_LM_LG)
 #define   FSCR_TAR __MASK(FSCR_TAR_LG)
 #define   FSCR_EBB __MASK(FSCR_EBB_LG)
 #define   FSCR_DSCR__MASK(FSCR_DSCR_LG)
 #define SPRN_HFSCR 0xbe/* HV=1 Facility Status & Control Register */
+#define   HFSCR_LM __MASK(FSCR_LM_LG)
 #define   HFSCR_TAR__MASK(FSCR_TAR_LG)
 #define   HFSCR_EBB__MASK(FSCR_EBB_LG)
 #define   HFSCR_TM __MASK(FSCR_TM_LG)
diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
b/arch/powerpc/kernel/cpu_setup_power.S
index 584e119..a232930 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -157,7 +157,8 @@ __init_LPCR:
 
 __init_FSCR:
mfspr   r3,SPRN_FSCR
-   ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
+   ori r3,r3,FSCR_LM|FSCR_TAR|FSCR_DSCR|FSCR_EBB
+   xorir3,r3,FSCR_LM
mtspr   SPRN_FSCR,r3
blr
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 00bf6f5..3e91bd6 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1005,6 +1005,14 @@ static inline void save_sprs(struct thread_struct *t)
 */
t->tar = mfspr(SPRN_TAR);
}
+
+   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+   /* Conditionally save Load Monitor registers, if enabled */
+   if (t->fscr & FSCR_LM) {
+   t->lmrr = mfspr(SPRN_LMRR);
+   t->lmser = mfspr(SPRN_LMSER);
+   }
+   }
 #endif
 }
 
@@ -1041,6 +1049,16 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
if (old_thread->tar != new_thread->tar)
mtspr(SPRN_TAR, new_thread->tar);
}
+
+   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+   /* Conditionally restore Load Monitor registers, if enabled */
+   if (new_thread->fscr & FSCR_LM) {
+   if (old_thread->lmrr != new_thread->lmrr);
+   mtspr(SPRN_LMRR, new_thread->lmrr);
+   if (old_thread->lmser != new_thread->lmser);
+   mtspr(SPRN_LMSER, new_thread->lmser);
+   }
+   }
 #endif
 }
 
@@ -1592,6 +1610,7 @@ void start_thread(struct pt_regs *regs, unsigned long 
start, unsigned long sp)
   

[PATCH 1/3] powerpc: Complete FSCR context switch

2016-04-18 Thread Jack Miller
Previously we just saved the FSCR, but only restored it in some
settings, and never copied it thread to thread. This patch always
restores the FSCR and formalizes new threads inheriting its setting so
that later we can manipulate FSCR bits in start_thread.

Signed-off-by: Jack Miller 
---
 arch/powerpc/kernel/process.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b8500b4..00bf6f5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1019,18 +1019,12 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
 #ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_DSCR)) {
u64 dscr = get_paca()->dscr_default;
-   u64 fscr = old_thread->fscr & ~FSCR_DSCR;
 
-   if (new_thread->dscr_inherit) {
+   if (new_thread->dscr_inherit)
dscr = new_thread->dscr;
-   fscr |= FSCR_DSCR;
-   }
 
if (old_thread->dscr != dscr)
mtspr(SPRN_DSCR, dscr);
-
-   if (old_thread->fscr != fscr)
-   mtspr(SPRN_FSCR, fscr);
}
 
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
@@ -1041,6 +1035,9 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
if (old_thread->ebbrr != new_thread->ebbrr)
mtspr(SPRN_EBBRR, new_thread->ebbrr);
 
+   if (old_thread->fscr != new_thread->fscr)
+   mtspr(SPRN_FSCR, new_thread->fscr);
+
if (old_thread->tar != new_thread->tar)
mtspr(SPRN_TAR, new_thread->tar);
}
@@ -1478,6 +1475,9 @@ int copy_thread(unsigned long clone_flags, unsigned long 
usp,
}
if (cpu_has_feature(CPU_FTR_HAS_PPR))
p->thread.ppr = INIT_PPR;
+
+   if (cpu_has_feature(CPU_FTR_ARCH_207S))
+   p->thread.fscr = mfspr(SPRN_FSCR);
 #endif
kregs->nip = ppc_function_entry(f);
return 0;
-- 
2.8.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v2] P9 ldmx support

2016-04-18 Thread Jack Miller
Second spin of these patches:

https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-April/141609.html

Differences from v1:

- As part of the FSCR context switch patch, remove extra FSCR manipulation in
  the DSCR case. If anything set FSCR.DSCR it should automatically be set
  correctly now. If I understand correctly, dscr_inherit is still required
  however and covers the < P8 case.

- Minor assembly readability clean up suggested by Segher (thanks!)

Any further comments or concerns welcome.

- Jack

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] cxl: static-ify variables to fix sparse warnings

2016-04-18 Thread Matthew R. Ochs
Reviewed-by: Matthew R. Ochs 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v4] powerpc/devtree: Parse new DRC mem/cpu/dev device tree elements

2016-04-18 Thread mwb
Several properties in the DRC device tree format are replaced by
more compact representations to allow, for example, for the encoding
of vast amounts of memory, and or reduced duplication of information
in related data structures.

"ibm,drc-info": This property, when present, replaces the following
four properties: “ibm,drc-indexes”, “ibm,drc-names”, “ibm,drc-types”
and “ibm,drc-power-domains”.  This property is defined for all
dynamically reconfigurable platform nodes.  The "ibm,drc-info" elements
are intended to provide a more compact representation, and reduce some
search overhead.

"ibm,dynamic-memory-v2": This property replaces the "ibm,dynamic-memory"
node representation within the "ibm,dynamic-reconfiguration-memory"
property.  This element format is intended to provide a more compact
representation of memory, especially, for systems with massive amounts
of RAM.

"ibm,architecture.vec": Bit flags are added to this data structure
by the front end processor to inform the kernel as to whether to expect
the changes to one or both of the device tree structures "ibm,drc-info"
and "ibm,dynamic-memory-v2".

The new element structures, "ibm,dynamic-memory-v2" and "ibm,drc-info",
should completely replace the previously used structures at execution.

Signed-off-by: Michael Bringmann 
---
diff --git a/arch/powerpc/include/asm/firmware.h 
b/arch/powerpc/include/asm/firmware.h
index b062924..a9d66d5 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -51,6 +51,8 @@
 #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x8000)
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0001)
 #define FW_FEATURE_PRRNASM_CONST(0x0002)
+#define FW_FEATURE_RPS_DM2 ASM_CONST(0x0004)
+#define FW_FEATURE_RPS_DRC_INFOASM_CONST(0x0008)
 
 #ifndef __ASSEMBLY__
 
@@ -66,7 +68,8 @@ enum {
FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
-   FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN,
+   FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
+   FW_FEATURE_RPS_DM2 | FW_FEATURE_RPS_DRC_INFO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7f436ba..30f8cb2 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -69,6 +69,8 @@ struct boot_param_header {
  * OF address retreival & translation
  */
 
+extern int n_mem_addr_cells;
+
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
@@ -81,8 +83,9 @@ extern void of_instantiate_rtc(void);
 extern int of_get_ibm_chip_id(struct device_node *np);
 
 /* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
+ * specified in the device tree properties,
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory-v2
  */
 struct of_drconf_cell {
u64 base_addr;
@@ -92,9 +95,61 @@ struct of_drconf_cell {
u32 flags;
 };
 
-#define DRCONF_MEM_ASSIGNED0x0008
-#define DRCONF_MEM_AI_INVALID  0x0040
-#define DRCONF_MEM_RESERVED0x0080
+   /* It is important to note that this structure can not
+* be safely mapped onto the memory containing the
+* 'ibm,dynamic-memory-v2' property due to the issues
+* of compiler alignment.  This structure represents
+* the order of the fields stored, but compiler alignment
+* may insert extra bytes of padding between the fields
+* 'num_seq_lmbs' and 'base_addr'.
+*/
+struct of_drconf_cell_v2 {
+   u32 num_seq_lmbs;
+   u64 base_addr;
+   u32 drc_index;
+   u32 aa_index;
+   u32 flags;
+};
+
+#define DRCONF_MEM_PRESERVED   0x0001
+#define DRCONF_MEM_PRESERVABLE 0x0002
+#define DRCONF_MEM_PRESERVED_STATE 0x0004
+#define DRCONF_MEM_ASSIGNED0x0008
+#define DRCONF_MEM_NO_H_MIGRATE_DATA   0x0010
+#define DRCONF_MEM_DRC_INVALID 0x0020
+#define DRCONF_MEM_AI_INVALID  0x0040
+#define DRCONF_MEM_RESERVED0x0080
+#define DRCONF_MEM_RESERVED_SW 0x8000
+
+#defineDRCONF_V2_CELLS (n_mem_addr_cells + 4)
+#defineDRCONF_V2_CELLS_LEN (DRCONF_V2_CELLS * 
sizeof(unsigned int))
+#defineDRCONF_V2_CELL_OFFSET(i)(i * DRCONF_V2_CELLS_LEN)
+#defineDRCONF_V2_CELL_POSITION(p, i)   \
+   (void *)(((char *)p)+(i * 

Re: [PATCH] powerpc: define the fman node for the kmcoge4 DTS

2016-04-18 Thread Valentin Longchamp
On 17/04/16 03:49, Scott Wood wrote:
> On Thu, 2016-04-07 at 08:14 +0200, Valentin Longchamp wrote:
>> On 06/04/16 23:49, Scott Wood wrote:
>>> On Wed, 2016-04-06 at 15:37 +0200, Valentin Longchamp wrote:
 Now that the FMAN mac driver has been merged the fman node is relevant.

 The kmcoge4 board implements 3 ethernet interfaces, 1 with a RGMII phy
 and 2 with fixed 1 Giga SGMII links.

 Signed-off-by: Valentin Longchamp 
 ---
  arch/powerpc/boot/dts/fsl/kmcoge4.dts | 39
 +++
  1 file changed, 39 insertions(+)

 diff --git a/arch/powerpc/boot/dts/fsl/kmcoge4.dts
 b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
 index 6858ec9..1cec66d 100644
 --- a/arch/powerpc/boot/dts/fsl/kmcoge4.dts
 +++ b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
 @@ -106,6 +106,45 @@
sata@221000 {
status = "disabled";
};
 +
 +  fman0: fman@40 {
 +  enet0: ethernet@e {
 +  phy-connection-type = "sgmii";
 +  local-mac-address = [00 11 22 33 44
 55];
 +  fixed-link {
 +  speed = <1000>;
 +  full-duplex;
 +  };
 +  };
 +  mdio0: mdio@e1120 {
 +  front_phy: ethernet-phy@11 {
 +  reg = <0x11>;
 +  };
 +  };
 +
 +  enet1: ethernet@e2000 {
 +  phy-connection-type = "sgmii";
 +  local-mac-address = [00 11 22 33 44
 56];
 +  fixed-link {
 +  speed = <1000>;
 +  full-duplex;
 +  };
 +  };
>>>
>>> No hardcoded MAC addresses.
>>>
>>
>> For these 2 interfaces where I have the local-mac-address field, the MAC
>> addresses are set later by an application that reads the real address in
>> some
>> EEPROM. However, in order to let the fman mac_probe to run successfully in
>> the
>> first place I have set non-zero MAC addresses since the local-mac-address
>> fields
>> are not set by u-boot.
> 
> Why can't it be set from U-Boot?

It can and should be set from u-boot. It's an old leftover from the vxworks time
where all the MAC addresses where set from the vxworks application itself. When
the company migrated to embedded Linux, they only implemented setting the MAC
address for the "debug" Ethernet interface in u-boot (for NFS boot). The other
were still controlled by the application after the Kernel boot and since then it
has remained so.

I will write this down on the (long) list of remaining vxworks migration 
cleanups.

> 
> If you absolutely must hardcode a mac address, use one with the locally
> -administered bit set (0x02 in the first byte).

I don't think this is necessary. As you hinted above, the correct way would be
to set them from u-boot and there is nothing that would prevent it other than
actually implementing it (u-boot actually has access to the MAC-address list of
the board).

I send a v2 patch without these fields.

> 
>> I have found several local-mac-address fields in other DTS files that are
>> all
>> zeros, and thus are rejected by of_get_mac_address. Are they leftovers from
>> the
>> past or should they be used here as well ? If not, I will simply drop these
>> 2
>> fields.
> 
> That's a relic from ancient U-Boots that could only overwrite existing
> properties rather than insert them from scratch.
> 

OK, interesting to know, thanks for the explanation.

Valentin
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] cxl: Add a kernel thread to check the coherent platform function's state

2016-04-18 Thread Christophe Lombard
In the POWERVM environement, the PHYP CoherentAccel component manages
the state of the Coherant Accelerator Processor Interface adapter and
virtualizes CAPI resources, handles CAPP, PSL, PSL Slice errors - and
interrupts - and provides a new set of HCALLs for the OS APIs to utilize
AFUs.

During the course of operation, a coherent platform function can
encounter errors. Some possible reason for errors are:
• Hardware recoverable and unrecoverable errors
• Transient and over-threshold correctable errors

PHYP implements its own state model for the coherent platform function.
The current state of this Acclerator Fonction Unit (AFU) is available
through a hcall.

In case of low-level troubles (or error injection), The PHYP component
may reset the card and change the AFU state. The PHYP interface doesn't
provide any way to be notified when that happens.

The current implementation of the cxl driver, for the POWERVM
environment, follows the general error recovery procedures required to
reset operation of the coherent platform function. The platform firmware
resets and reconfigures hardware when an external action is required -
attach/detach a process, link ok, 

The purpose of this patch is to interact with the external driver
(where the AFU is shown) even if no action is required. A kernel thread
is needed to check every x seconds the current state of the AFU to see
if we need to enter an error recovery path.

Signed-off-by: Christophe Lombard 
---
 drivers/misc/cxl/cxl.h   |  3 +-
 drivers/misc/cxl/guest.c | 81 
 2 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 38e21cf..a26c210 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -379,7 +380,7 @@ struct cxl_afu_guest {
phys_addr_t p2n_phys;
u64 p2n_size;
int max_ints;
-   struct mutex recovery_lock;
+   struct task_struct *kthread_tsk;
int previous_state;
 };
 
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 8213372..06dfe7f 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -19,6 +19,10 @@
 #define CXL_SLOT_RESET_EVENT   2
 #define CXL_RESUME_EVENT   3
 
+#define CXL_KTHREAD"cxl_kthread"
+
+void stop_state_thread(struct cxl_afu *afu);
+
 static void pci_error_handlers(struct cxl_afu *afu,
int bus_error_event,
pci_channel_state_t state)
@@ -178,6 +182,9 @@ static int afu_read_error_state(struct cxl_afu *afu, int 
*state_out)
u64 state;
int rc = 0;
 
+   if (!afu)
+   return -EIO;
+
rc = cxl_h_read_error_state(afu->guest->handle, );
if (!rc) {
WARN_ON(state != H_STATE_NORMAL &&
@@ -645,6 +652,8 @@ static void guest_release_afu(struct device *dev)
 
idr_destroy(>contexts_idr);
 
+   stop_state_thread(afu);
+
kfree(afu->guest);
kfree(afu);
 }
@@ -818,7 +827,6 @@ static int afu_update_state(struct cxl_afu *afu)
switch (cur_state) {
case H_STATE_NORMAL:
afu->guest->previous_state = cur_state;
-   rc = 1;
break;
 
case H_STATE_DISABLE:
@@ -834,7 +842,6 @@ static int afu_update_state(struct cxl_afu *afu)
pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
pci_channel_io_normal);
pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
-   rc = 1;
}
afu->guest->previous_state = 0;
break;
@@ -859,39 +866,61 @@ static int afu_update_state(struct cxl_afu *afu)
return rc;
 }
 
-static int afu_do_recovery(struct cxl_afu *afu)
+static int handle_state_thread(void *data)
 {
-   int rc;
+   struct cxl_afu *afu;
+   int rc = 0;
 
-   /* many threads can arrive here, in case of detach_all for example.
-* Only one needs to drive the recovery
-*/
-   if (mutex_trylock(>guest->recovery_lock)) {
-   rc = afu_update_state(afu);
-   mutex_unlock(>guest->recovery_lock);
-   return rc;
+   pr_devel("in %s\n", __func__);
+
+   afu = (struct cxl_afu*)data;
+   do {
+   set_current_state(TASK_INTERRUPTIBLE);
+
+   if (afu) {
+   afu_update_state(afu);
+   if (afu->guest->previous_state == 
H_STATE_PERM_UNAVAILABLE)
+   goto out;
+   } else
+   return -ENODEV;
+   schedule_timeout(msecs_to_jiffies(3000));
+   } while(!kthread_should_stop());
+
+out:
+   afu->guest->kthread_tsk = NULL;
+   return rc;
+}
+
+void 

Re: crash in ppc4xx-rng on canyonland

2016-04-18 Thread Christian Lamparter via Linuxppc-dev
On Monday, April 18, 2016 05:59:39 PM Herbert Xu wrote:
> Christian Lamparter  wrote:
> > 
> > I tried to move ppc4xx-rng into crypto4xx (see attachment - patch #1).
> > The driver works as is. But I can't come up with a way to attach the
> > crypto4xx driver to the ppc4xx-rng OF node cleanly. Basically,
> > I'm looking for a way to have one driver (with one context) be 
> > in charge of two different OF nodes (ppc4xx-rng and ppc4xx-crypto).
> > Is there any solution to this? Because otherwise, I would add a
> 
> Is it possible to have an RNG unit without the crypto unit?
No. In AMCC's product brief the TRNG (true random number generator) is
part of the security engine. The security engine also provides more 
features like a "public key authentication" core which has it's own
address range.


> If not then your first patch should be OK, provided that you add
> some error handling when the RNG probe fails.  For example, if 
> the RNG probe fails we should probably not call remove on it later.
I checked the error handling code again and verified it works on the
device. The original code resets the dev->trng_base = NULL and
core_dev->trng = NULL; in the err_out case. the "dev and core_dev"
are coming from the main crypto driver, they will always be
valid. Hence ppc4xx_trng_remove can safely be executed, even if 
ppc4xx_trng_probe fails as devm_hwrng_unregister, iounmap and kfree
can deal with the NULL properly. Nevertheless, I added an early
bailout if core_dev->trng == NULL.

what else I fixed in v1->v2: 
 - added a check to test trng device's status state with
   of_device_is_available.
 - if the hwrng device registration failed, the flag which
   enables the trng was left enabled (note: the v1 code
   disabled the hwrng device as part of crypto4xx_remove.
   so it wasn't enabled when the crypto4xx driver was
   unloaded)

---
From c0b580a50bdade97f0d06c98fc7dccbf64d25eb2 Mon Sep 17 00:00:00 2001
From: Christian Lamparter 
Date: Mon, 18 Apr 2016 12:57:41 +0200
Subject: [PATCH v2] crypto4xx: integrate ppc4xx-rng into crypto4xx

This patch integrates the ppc4xx-rng driver into the existing
crypto4xx. This is because the true random number generator
is controlled and part of the security core.

Signed-off-by: Christian Lamparter 
---
 drivers/char/hw_random/Kconfig  |  13 ---
 drivers/char/hw_random/Makefile |   1 -
 drivers/char/hw_random/ppc4xx-rng.c | 147 
 drivers/crypto/Kconfig  |   8 ++
 drivers/crypto/amcc/Makefile|   1 +
 drivers/crypto/amcc/crypto4xx_core.c|   7 +-
 drivers/crypto/amcc/crypto4xx_core.h|   4 +
 drivers/crypto/amcc/crypto4xx_reg_def.h |   1 +
 drivers/crypto/amcc/crypto4xx_trng.c| 131 
 drivers/crypto/amcc/crypto4xx_trng.h|  34 
 10 files changed, 184 insertions(+), 163 deletions(-)
 delete mode 100644 drivers/char/hw_random/ppc4xx-rng.c
 create mode 100644 drivers/crypto/amcc/crypto4xx_trng.c
 create mode 100644 drivers/crypto/amcc/crypto4xx_trng.h

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 3c5be60..abc8720 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -268,19 +268,6 @@ config HW_RANDOM_NOMADIK
 
  If unsure, say Y.
 
-config HW_RANDOM_PPC4XX
-   tristate "PowerPC 4xx generic true random number generator support"
-   depends on PPC && 4xx
-   default HW_RANDOM
-   ---help---
-This driver provides the kernel-side support for the TRNG hardware
-found in the security function of some PowerPC 4xx SoCs.
-
-To compile this driver as a module, choose M here: the
-module will be called ppc4xx-rng.
-
-If unsure, say N.
-
 config HW_RANDOM_PSERIES
tristate "pSeries HW Random Number Generator support"
depends on PPC64 && IBMVIO
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index f5a6fa7..079745f 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
 obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
 obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
 obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
-obj-$(CONFIG_HW_RANDOM_PPC4XX) += ppc4xx-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
 obj-$(CONFIG_HW_RANDOM_POWERNV) += powernv-rng.o
 obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o
diff --git a/drivers/char/hw_random/ppc4xx-rng.c 
b/drivers/char/hw_random/ppc4xx-rng.c
deleted file mode 100644
index c0db438..000
--- a/drivers/char/hw_random/ppc4xx-rng.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Generic PowerPC 44x RNG driver
- *
- * Copyright 2011 IBM Corporation
- *
- * This program is free software; you can redistribute it 

RE: [RFC v6 06/10] PCI: Add a new PCI_BUS_FLAGS_MSI_REMAP flag

2016-04-18 Thread David Laight
From: Yongji Xie
> Sent: 18 April 2016 11:59
> We introduce a new pci_bus_flags, PCI_BUS_FLAGS_MSI_REMAP
> which indicates all devices on the bus are protected by the
> hardware which supports IRQ remapping(intel naming).
> 
> This flag will be used to know whether it's safe to expose
> MSI-X tables of PCI BARs to userspace. Because the capability
> of IRQ remapping can guarantee the PCI device cannot trigger
> MSIs that correspond to interrupt IDs of other devices.

I'm worried that this entire series is going to break drivers
for existing hardware.

I understand some of the reasoning for 'vm pass through' configurations,
but there will be PCIe devices out there that have the MSI-X tables
in the same BAR as other device registers.
If you are lucky nothing else is in the same 4k area, but I wouldn't
assume it.

In any case, if the hardware can't police the card's master transfers
there is nothing to stop a different bus master block on the card
from raising MSI-X interrupts - they are just a PCIe write.
So all you are doing is raising the bar slightly and giving a very false
sense of security.

David

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 10/10] vfio-pci: Allow to mmap MSI-X table if interrupt remapping is supported

2016-04-18 Thread Yongji Xie
This patch enables mmapping MSI-X tables if hardware supports
interrupt remapping which can ensure that a given pci device
can only shoot the MSIs assigned for it.

With MSI-X table mmapped, we also need to expose the
read/write interface which will be used to access MSI-X table.

Signed-off-by: Yongji Xie 
---
 drivers/vfio/pci/vfio_pci.c  |7 +--
 drivers/vfio/pci/vfio_pci_rdwr.c |3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index dc1779c..b08abe0 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -635,7 +635,9 @@ static long vfio_pci_ioctl(void *device_data,
 VFIO_REGION_INFO_FLAG_WRITE;
if (vdev->bar_mmap_supported[info.index]) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
-   if (info.index == vdev->msix_bar) {
+   if (info.index == vdev->msix_bar &&
+   !(pdev->bus->bus_flags &
+   PCI_BUS_FLAGS_MSI_REMAP)) {
ret = msix_sparse_mmap_cap(vdev, );
if (ret)
return ret;
@@ -1067,7 +1069,8 @@ static int vfio_pci_mmap(void *device_data, struct 
vm_area_struct *vma)
if (req_start + req_len > phys_len)
return -EINVAL;
 
-   if (index == vdev->msix_bar) {
+   if (index == vdev->msix_bar &&
+   !(pdev->bus->bus_flags & PCI_BUS_FLAGS_MSI_REMAP)) {
/*
 * Disallow mmaps overlapping the MSI-X table; users don't
 * get to touch this directly.  We could find somewhere
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 5ffd1d9..dbf9cd0 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -164,7 +164,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char 
__user *buf,
} else
io = vdev->barmap[bar];
 
-   if (bar == vdev->msix_bar) {
+   if (bar == vdev->msix_bar &&
+   !(pdev->bus->bus_flags & PCI_BUS_FLAGS_MSI_REMAP)) {
x_start = vdev->msix_offset;
x_end = vdev->msix_offset + vdev->msix_size;
}
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 09/10] pci-ioda: Set PCI_BUS_FLAGS_MSI_REMAP for IODA host bridge

2016-04-18 Thread Yongji Xie
Any IODA host bridge have the capability of IRQ remapping.
So we set PCI_BUS_FLAGS_MSI_REMAP when this kind of host birdge
is detected.

Signed-off-by: Yongji Xie 
---
 arch/powerpc/platforms/powernv/pci-ioda.c |8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index f90dc04..9557638 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3080,6 +3080,12 @@ static void pnv_pci_ioda_fixup(void)
pnv_npu_ioda_fixup();
 }
 
+int pnv_pci_ioda_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+   bridge->bus->bus_flags |= PCI_BUS_FLAGS_MSI_REMAP;
+   return 0;
+}
+
 /*
  * Returns the alignment for I/O or memory windows for P2P
  * bridges. That actually depends on how PEs are segmented.
@@ -3364,6 +3370,8 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
 */
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 
+   ppc_md.pcibios_root_bridge_prepare = pnv_pci_ioda_root_bridge_prepare;
+
if (phb->type == PNV_PHB_NPU)
hose->controller_ops = pnv_npu_ioda_controller_ops;
else
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 08/10] PCI: Set PCI_BUS_FLAGS_MSI_REMAP if MSI controller supports IRQ remapping

2016-04-18 Thread Yongji Xie
On ARM HW the capability of IRQ remapping is abstracted on
MSI controller side. MSI_FLAG_IRQ_REMAPPING is used to advertise
this [1].

To have a universal flag to test this capability for different
archs on PCI side, we set PCI_BUS_FLAGS_MSI_REMAP for PCI buses
when MSI_FLAG_IRQ_REMAPPING is set.

[1] http://www.spinics.net/lists/kvm/msg130256.html

Signed-off-by: Yongji Xie 
---
 drivers/pci/msi.c   |   12 
 drivers/pci/probe.c |3 +++
 include/linux/msi.h |6 +-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index a080f44..1661cdf 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1134,6 +1134,18 @@ void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
 }
 EXPORT_SYMBOL_GPL(msi_desc_to_pci_sysdata);
 
+void pci_bus_check_msi_remapping(struct pci_bus *bus,
+struct irq_domain *domain)
+{
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+   struct msi_domain_info *info;
+
+   info = msi_get_domain_info(domain);
+   if (info->flags & MSI_FLAG_IRQ_REMAPPING)
+   bus->bus_flags |= PCI_BUS_FLAGS_MSI_REMAP;
+#endif
+}
+
 #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
 /**
  * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6d7ab9b..25cf1b1 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -696,6 +696,9 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
if (!d)
d = pci_host_bridge_msi_domain(b);
 
+   if (d && b == bus)
+   pci_bus_check_msi_remapping(bus, d);
+
dev_set_msi_domain(>dev, d);
 }
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 03eda72..b4c649e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -15,6 +15,8 @@ extern int pci_msi_ignore_mask;
 struct irq_data;
 struct msi_desc;
 struct pci_dev;
+struct pci_bus;
+struct irq_domain;
 struct platform_msi_priv_data;
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
@@ -155,6 +157,9 @@ void arch_restore_msi_irqs(struct pci_dev *dev);
 void default_teardown_msi_irqs(struct pci_dev *dev);
 void default_restore_msi_irqs(struct pci_dev *dev);
 
+void pci_bus_check_msi_remapping(struct pci_bus *bus,
+struct irq_domain *domain);
+
 struct msi_controller {
struct module *owner;
struct device *dev;
@@ -173,7 +178,6 @@ struct msi_controller {
 #include 
 #include 
 
-struct irq_domain;
 struct irq_domain_ops;
 struct irq_chip;
 struct device_node;
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 07/10] iommu: Set PCI_BUS_FLAGS_MSI_REMAP if IOMMU have capability of IRQ remapping

2016-04-18 Thread Yongji Xie
The capability of IRQ remapping is abstracted on IOMMU side on
some archs. There is a existing flag IOMMU_CAP_INTR_REMAP for this.

To have a universal flag to test this capability for different
archs on PCI side, we set PCI_BUS_FLAGS_MSI_REMAP for PCI buses
when IOMMU_CAP_INTR_REMAP is set.

Signed-off-by: Yongji Xie 
---
 drivers/iommu/iommu.c |   15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0e3b009..5d2b6f6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -813,6 +813,16 @@ struct iommu_group *pci_device_group(struct device *dev)
return group;
 }
 
+static void pci_check_msi_remapping(struct pci_dev *pdev,
+   const struct iommu_ops *ops)
+{
+   struct pci_bus *bus = pdev->bus;
+
+   if (ops->capable(IOMMU_CAP_INTR_REMAP) &&
+   !(bus->bus_flags & PCI_BUS_FLAGS_MSI_REMAP))
+   bus->bus_flags |= PCI_BUS_FLAGS_MSI_REMAP;
+}
+
 /**
  * iommu_group_get_for_dev - Find or create the IOMMU group for a device
  * @dev: target device
@@ -871,6 +881,9 @@ static int add_iommu_group(struct device *dev, void *data)
const struct iommu_ops *ops = cb->ops;
int ret;
 
+   if (dev_is_pci(dev) && ops->capable)
+   pci_check_msi_remapping(to_pci_dev(dev), ops);
+
if (!ops->add_device)
return 0;
 
@@ -913,6 +926,8 @@ static int iommu_bus_notifier(struct notifier_block *nb,
 * result in ADD/DEL notifiers to group->notifier
 */
if (action == BUS_NOTIFY_ADD_DEVICE) {
+   if (dev_is_pci(dev) && ops->capable)
+   pci_check_msi_remapping(to_pci_dev(dev), ops);
if (ops->add_device)
return ops->add_device(dev);
} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 06/10] PCI: Add a new PCI_BUS_FLAGS_MSI_REMAP flag

2016-04-18 Thread Yongji Xie
We introduce a new pci_bus_flags, PCI_BUS_FLAGS_MSI_REMAP
which indicates all devices on the bus are protected by the
hardware which supports IRQ remapping(intel naming).

This flag will be used to know whether it's safe to expose
MSI-X tables of PCI BARs to userspace. Because the capability
of IRQ remapping can guarantee the PCI device cannot trigger
MSIs that correspond to interrupt IDs of other devices.

There is a existing flag for this in the IOMMU space:

enum iommu_cap {
IOMMU_CAP_CACHE_COHERENCY,
--->IOMMU_CAP_INTR_REMAP,
IOMMU_CAP_NOEXEC,
};

and Eric also posted a patchset [1] to abstract this
capability on MSI controller side for ARM. But it would
make sense to have a more common flag like
PCI_BUS_FLAGS_MSI_REMAP so that we can use a universal
flag to test this capability for different archs on
PCI side.

[1] http://www.spinics.net/lists/kvm/msg130256.html

Signed-off-by: Yongji Xie 
---
 include/linux/pci.h |1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6..d619228 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -193,6 +193,7 @@ typedef unsigned short __bitwise pci_bus_flags_t;
 enum pci_bus_flags {
PCI_BUS_FLAGS_NO_MSI   = (__force pci_bus_flags_t) 1,
PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
+   PCI_BUS_FLAGS_MSI_REMAP = (__force pci_bus_flags_t) 4,
 };
 
 /* These values come from the PCI Express Spec */
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 05/10] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

2016-04-18 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
page may be shared with other BARs.

But we should allow to mmap these sub-page MMIO BARs if we can
make sure these BARs' mmio page will not be shared with other BARs.

To acheive that, we firstly need to enforce all PCI MMIO BARs
to be page aligned like the commit "PCI: Add support for
enforcing all MMIO BARs to be page aligned" does. Most of PCI
BARs will be assigned into an exclusive page with a hole. Then,
we must make sure that hot-add device's BAR will never be assigned
into the hole. So we add shadow resources and put them into the
hole in this patch. With these two guarantees, I think it should
be safe to mmap sub-page BAR.

Signed-off-by: Yongji Xie 
---
 drivers/vfio/pci/vfio_pci.c |   58 ++-
 drivers/vfio/pci/vfio_pci_private.h |8 +
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 98059df..dc1779c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -110,13 +110,47 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
 }
 
+static bool vfio_pci_bar_mmap_supported(struct vfio_pci_device *vdev, int 
index)
+{
+   struct resource *res = vdev->pdev->resource + index;
+   struct vfio_pci_shadow_resource *shadow_res;
+
+   if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && res->flags & IORESOURCE_MEM &&
+   resource_size(res) > 0) {
+   if (resource_size(res) >= PAGE_SIZE)
+   return true;
+
+   if (!(res->start & ~PAGE_MASK)) {
+   /*
+* Add shadow resource for sub-page bar whose mmio
+* page is exclusive in case that hot-add device's
+* bar is assigned into the mem hole.
+*/
+   shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
+   shadow_res->resource.start = res->end + 1;
+   shadow_res->resource.end = res->start + PAGE_SIZE - 1;
+   shadow_res->resource.flags = res->flags;
+   if (request_resource(res->parent,
+   _res->resource)) {
+   kfree(shadow_res);
+   return false;
+   }
+   shadow_res->index = index;
+   list_add(_res->res_next,
+   >shadow_resources_list);
+   return true;
+   }
+   }
+   return false;
+}
+
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
 static void vfio_pci_disable(struct vfio_pci_device *vdev);
 
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
struct pci_dev *pdev = vdev->pdev;
-   int ret;
+   int ret, bar;
u16 cmd;
u8 msix_pos;
 
@@ -183,12 +217,17 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}
 
+   for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
+   vdev->bar_mmap_supported[bar] =
+   vfio_pci_bar_mmap_supported(vdev, bar);
+   }
return 0;
 }
 
 static void vfio_pci_disable(struct vfio_pci_device *vdev)
 {
struct pci_dev *pdev = vdev->pdev;
+   struct vfio_pci_shadow_resource *shadow_res, *tmp;
int i, bar;
 
/* Stop the device from further DMA */
@@ -217,6 +256,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
vdev->barmap[bar] = NULL;
}
 
+   list_for_each_entry_safe(shadow_res, tmp,
+>shadow_resources_list, res_next) {
+   list_del(_res->res_next);
+   release_resource(_res->resource);
+   kfree(shadow_res);
+   }
+
vdev->needs_reset = true;
 
/*
@@ -587,9 +633,7 @@ static long vfio_pci_ioctl(void *device_data,
 
info.flags = VFIO_REGION_INFO_FLAG_READ |
 VFIO_REGION_INFO_FLAG_WRITE;
-   if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
-   pci_resource_flags(pdev, info.index) &
-   IORESOURCE_MEM && info.size >= PAGE_SIZE) {
+   if (vdev->bar_mmap_supported[info.index]) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
if (info.index == vdev->msix_bar) {
ret = msix_sparse_mmap_cap(vdev, );
@@ -1011,16 +1055,16 @@ static int vfio_pci_mmap(void *device_data, struct 
vm_area_struct *vma)
return -EINVAL;
if (index >= 

[RFC v6 02/10] PCI: Do not Use IORESOURCE_STARTALIGN to identify bridge resources

2016-04-18 Thread Yongji Xie
Now we use the IORESOURCE_STARTALIGN to identify bridge resources
in __assign_resources_sorted(). That's quite fragile. We can't
make sure that the PCI devices' resources will not use
IORESOURCE_STARTALIGN any more.

In this patch, we try to use a more robust way to identify
bridge resources.

Signed-off-by: Yongji Xie 
---
 drivers/pci/setup-bus.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7796d0a..45fc484 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -391,6 +391,7 @@ static void __assign_resources_sorted(struct list_head 
*head,
struct pci_dev_resource *dev_res, *tmp_res, *dev_res2;
unsigned long fail_type;
resource_size_t add_align, align;
+   int index;
 
/* Check if optional add_size is there */
if (!realloc_head || list_empty(realloc_head))
@@ -411,11 +412,13 @@ static void __assign_resources_sorted(struct list_head 
*head,
 
/*
 * There are two kinds of additional resources in the list:
-* 1. bridge resource  -- IORESOURCE_STARTALIGN
-* 2. SR-IOV resource   -- IORESOURCE_SIZEALIGN
+* 1. bridge resource
+* 2. SR-IOV resource
 * Here just fix the additional alignment for bridge
 */
-   if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
+   index = dev_res->res - dev_res->dev->resource;
+   if (index < PCI_BRIDGE_RESOURCES ||
+   index > PCI_BRIDGE_RESOURCE_END)
continue;
 
add_align = get_res_add_align(realloc_head, dev_res->res);
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 04/10] PCI: Add support for enforcing all MMIO BARs to be page aligned

2016-04-18 Thread Yongji Xie
When vfio passthrough a PCI device of which MMIO BARs are
smaller than PAGE_SIZE, guest will not handle the mmio
accesses to the BARs which leads to mmio emulations in host.

This is because vfio will not allow to passthrough one BAR's
mmio page which may be shared with other BARs. Otherwise,
there will be a backdoor that guest can use to access BARs
of other guest.

To solve this issue, this patch modifies resource_alignment
to support syntax where multiple devices get the same
alignment. So we can use something like
"pci=resource_alignment=*:*:*.*:noresize" to enforce the
alignment of all MMIO BARs to be at least PAGE_SIZE so that
one BAR's mmio page would not be shared with other BARs.

And we also define a macro PCIBIOS_MIN_ALIGNMENT to enable this
automatically on PPC64 platform which can easily hit this issue
because its PAGE_SIZE is 64KB.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |2 ++
 arch/powerpc/include/asm/pci.h  |2 ++
 drivers/pci/pci.c   |   64 +--
 3 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index d8b29ab..542be4a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
+   , ,  and  can be set to
+   "*" which means match all values.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
noresize: Don't change the resources' sizes when
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a..78f230f 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -30,6 +30,8 @@
 #define PCIBIOS_MIN_IO 0x1000
 #define PCIBIOS_MIN_MEM0x1000
 
+#define PCIBIOS_MIN_ALIGNMENT  PAGE_SIZE
+
 struct pci_dev;
 
 /* Values for the `which' argument to sys_pciconfig_iobase syscall.  */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7564ccc..0381c28 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4605,7 +4605,12 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
char *p;
+   bool invalid = false;
 
+#ifdef PCIBIOS_MIN_ALIGNMENT
+   align = PCIBIOS_MIN_ALIGNMENT;
+   *resize = false;
+#endif
spin_lock(_alignment_lock);
p = resource_alignment_param;
while (*p) {
@@ -4622,16 +4627,49 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
} else {
align_order = -1;
}
-   if (sscanf(p, "%x:%x:%x.%x%n",
-   , , , , ) != 4) {
+   if (p[0] == '*' && p[1] == ':') {
+   seg = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1 ||
+   p[count] != ':') {
+   invalid = true;
+   break;
+   }
+   p += count + 1;
+   if (*p == '*') {
+   bus = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1) {
+   invalid = true;
+   break;
+   }
+   p += count;
+   if (*p == '.') {
+   slot = bus;
+   bus = seg;
seg = 0;
-   if (sscanf(p, "%x:%x.%x%n",
-   , , , ) != 3) {
-   /* Invalid format */
-   printk(KERN_ERR "PCI: Can't parse 
resource_alignment parameter: %s\n",
-   p);
+   p++;
+   } else if (*p == ':') {
+   p++;
+   if (p[0] == '*' && p[1] == '.') {
+   slot = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1 ||
+   p[count] != '.') {
+   invalid = true;
break;
}
+   p += count + 1;
+   } else {
+   invalid = true;
+   break;
+   }
+   if (*p == '*') {
+   func = -1;
+  

[RFC v6 03/10] PCI: Add a new option for resource_alignment to reassign alignment

2016-04-18 Thread Yongji Xie
When using resource_alignment kernel parameter, the current
implement reassigns the alignment by changing resources' size
which can potentially break some drivers. For example, the driver
uses the size to locate some register whose length is related
to the size.

This patch adds a new option "noresize" for the parameter to
solve this problem.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |5 -
 drivers/pci/pci.c   |   35 +--
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 9a53c92..d8b29ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2912,13 +2912,16 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
window. The default value is 64 megabytes.
resource_alignment=
Format:
-   [@][:]:.[; ...]
+   [@][:]:.
+   [:noresize][; ...]
Specifies alignment and device to reassign
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
+   noresize: Don't change the resources' sizes when
+   reassigning alignment.
ecrc=   Enable/disable PCIe ECRC (transaction layer
end-to-end CRC checking).
bios: Use BIOS/firmware settings. This is the
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 77b7494..7564ccc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4594,11 +4594,13 @@ static DEFINE_SPINLOCK(resource_alignment_lock);
 /**
  * pci_specified_resource_alignment - get resource alignment specified by user.
  * @dev: the PCI device to get
+ * @resize: whether or not to change resources' size when reassigning alignment
  *
  * RETURNS: Resource alignment if it is specified.
  *  Zero if it is not specified.
  */
-static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
+   bool *resize)
 {
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
@@ -4632,6 +4634,11 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev)
}
}
p += count;
+   if (!strncmp(p, ":noresize", 9)) {
+   *resize = false;
+   p += 9;
+   } else
+   *resize = true;
if (seg == pci_domain_nr(dev->bus) &&
bus == dev->bus->number &&
slot == PCI_SLOT(dev->devfn) &&
@@ -4664,11 +4671,12 @@ void pci_reassigndev_resource_alignment(struct pci_dev 
*dev)
 {
int i;
struct resource *r;
+   bool resize = true;
resource_size_t align, size;
u16 command;
 
/* check if specified PCI is target device to reassign */
-   align = pci_specified_resource_alignment(dev);
+   align = pci_specified_resource_alignment(dev, );
if (!align)
return;
 
@@ -4690,15 +4698,22 @@ void pci_reassigndev_resource_alignment(struct pci_dev 
*dev)
if (!(r->flags & IORESOURCE_MEM))
continue;
size = resource_size(r);
-   if (size < align) {
-   size = align;
-   dev_info(>dev,
-   "Rounding up size of resource #%d to %#llx.\n",
-   i, (unsigned long long)size);
+   if (resize) {
+   if (size < align) {
+   size = align;
+   dev_info(>dev,
+   "Rounding up size of resource #%d to 
%#llx.\n",
+   i, (unsigned long long)size);
+   }
+   r->flags |= IORESOURCE_UNSET;
+   r->end = size - 1;
+   r->start = 0;
+   } else {
+   r->flags &= ~IORESOURCE_SIZEALIGN;
+   r->flags |= IORESOURCE_STARTALIGN | IORESOURCE_UNSET;
+   r->start = max(align, size);
+   r->end = r->start + size - 1;
}
-   r->flags |= IORESOURCE_UNSET;
-   r->end = size - 1;
-   

[RFC v6 01/10] PCI: Ignore resource_alignment if PCI_PROBE_ONLY was set

2016-04-18 Thread Yongji Xie
The resource_alignment will releases memory resources allocated
by firmware so that kernel can reassign new resources later on.
But this will cause the problem that no resources can be
allocated by kernel if PCI_PROBE_ONLY was set, e.g. on pSeries
platform because PCI_PROBE_ONLY force kernel to use firmware
setup and not to reassign any resources.

To solve this problem, this patch ignores resource_alignment
if PCI_PROBE_ONLY was set.

Signed-off-by: Yongji Xie 
---
 drivers/pci/pci.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 602eb42..77b7494 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4607,6 +4607,12 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev)
spin_lock(_alignment_lock);
p = resource_alignment_param;
while (*p) {
+   if (pci_has_flag(PCI_PROBE_ONLY)) {
+   printk(KERN_ERR "PCI: Ignore resource_alignment 
parameter: %s with PCI_PROBE_ONLY set\n",
+   p);
+   *p = 0;
+   break;
+   }
count = 0;
if (sscanf(p, "%d%n", _order, ) == 1 &&
p[count] == '@') {
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC v6 00/10] vfio-pci: Allow to mmap sub-page MMIO BARs and MSI-X table

2016-04-18 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs and MSI-X table. This is because
sub-page BARs' mmio page may be shared with other BARs and MSI-X table
should not be accessed directly from the guest for security reasons.

But it will easily cause some performance issues for mmio accesses
in guest when vfio passthrough sub-page BARs or BARs containing MSI-X
table on PPC64 platform. This is because PAGE_SIZE is 64KB by default
on PPC64 platform and the big page may easily hit the sub-page MMIO
BARs' unmmapping and cause the unmmaping of the mmio page which
MSI-X table locate in, which lead to mmio emulation in host.

For sub-page MMIO BARs' unmmapping, this patchset modifies
resource_alignment kernel parameter to enforce the alignment of all 
MMIO BARs to be at least PAGE_SZIE so that sub-page BAR's mmio page
will not be shared with other BARs. And we also add shadow resources
to the vfio device and put them into the holes of mmio pages in case
that hot-add device's BARs are assigned into the holes. Then we can 
mmap sub-page MMIO BARs safely.

For MSI-X table's unmmapping, we think MSI-X table is safe to access
directly from userspace if hardware supports the capability of  
interrupt remapping which can ensure that a given pci device can 
only shoot the MSIs assigned for it. But the implenmentation of  
this capability is arch-independent. To have a universal way 
to test this capability on PCI side for different archs, we introduce
a new bus_flags PCI_BUS_FLAGS_MSI_REMAP.

With this patchset applied, we can get almost 100% improvement on
performance for small block 4k random read when we passthrough a FC
HBA containing sub-page BARs and MSI-X BARs to guest on PPC64 in
our test.

The patch 8 are based on the proposed patchset[2].

Changelog v6: 
- Rebase on vfio/next with patchset[2] applied
- Fix some bugs of v5
- Add three patches to make PCI_BUS_FLAGS_MSI_REMAP as
  a universal flag to test IRQ remapping

Changelog v5:
- Rebase on vfio/next
- Change the order of patch 1,2,3
- Move the warning "resource_alignment will not work with
  PCI_PROBE_ONLY set" from documentation to kernel log
- Remove IORESOURCE_WINDOW
- Add description for parameter "resize"
- Add PCIBIOS_MIN_ALIGNMENT to force all MMIO BARs to
  get minimum alignment
- Add shadow resources to make sure sub-page BAR's mmio
  page will not be shared with hot-add BARs.
- Add a new bit to pci_bus_flags to indicate the capbility
  of interrupt remapping on PPC64
- Remove IOMMU_CAP_INTR_REMAP on PPC64
- Add a property msi_remap to vfio_pci_device to cache the
  capbility of interrupt remapping

Changelog v4:
- Rebase on v4.5-rc6 with patchset[1] applied.
- Remove resource_page_aligned kernel parameter
- Fix some problems with resource_alignment kernel parameter
- Modify resource_alignment kernel parameter to support multiple
  devices.
- Remove host bridge attribute: msi_filtered
- Use IOMMU_CAP_INTR_REMAP to check if MSI-X table can be mmapped
- Add IOMMU_CAP_INTR_REMAP for IODA host bridge on PPC64 platform

Changelog v3:
- Rebase on new linux kernel mainline with the patchset[1] applied.
- Add a function to check whether PCI BARs'mmio page is shared with
  other BARs.
- Add a host bridge attribute to indicate PCI host bridge support
  filtering of MSIs.
- Use the new host bridge attribute to check if MSI-X table can
  be mmapped instead of CONFIG_EEH.
- Remove Kconfig option VFIO_PCI_MMAP_MSIX

Changelog v2:
- Rebase on v4.4-rc6 with the patchset[1] applied.
- Use kernel parameter to enforce all MMIO BARs to be page aligned
  on PCI core code instead of doing it on PPC64 arch code.
- Remove flags: VFIO_DEVICE_FLAGS_PCI_PAGE_ALIGNED

[1] http://www.spinics.net/lists/kvm/msg127812.html
[2] http://www.spinics.net/lists/kvm/msg130256.html

Yongji Xie (10):
  PCI: Ignore resource_alignment if PCI_PROBE_ONLY was set
  PCI: Do not Use IORESOURCE_STARTALIGN to identify bridge resources
  PCI: Add a new option for resource_alignment to reassign alignment
  PCI: Add support for enforcing all MMIO BARs to be page aligned
  vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  PCI: Add a new PCI_BUS_FLAGS_MSI_REMAP flag
  iommu: Set PCI_BUS_FLAGS_MSI_REMAP if IOMMU have capability of IRQ remapping
  PCI: Set PCI_BUS_FLAGS_MSI_REMAP if MSI controller supports IRQ remapping
  pci-ioda: Set PCI_BUS_FLAGS_MSI_REMAP for IODA host bridge
  vfio-pci: Allow to mmap MSI-X table if interrupt remapping is supported

 Documentation/kernel-parameters.txt   |7 +-
 arch/powerpc/include/asm/pci.h|2 +
 arch/powerpc/platforms/powernv/pci-ioda.c |8 +++
 drivers/iommu/iommu.c |   15 +
 drivers/pci/msi.c |   12 
 drivers/pci/pci.c |  105 +++--
 drivers/pci/probe.c   |3 +
 drivers/pci/setup-bus.c   |9 ++-
 drivers/vfio/pci/vfio_pci.c   |  

[PATCH v2 1/3] powerpc: scan_features() updates incorrect bits for REAL_LE

2016-04-18 Thread Michael Ellerman
From: Anton Blanchard 

The REAL_LE feature entry in the ibm_pa_feature struct is missing an MMU
feature value, meaning all the remaining elements initialise the wrong
values.

This means instead of checking for byte 5, bit 0, we check for byte 0,
bit 0, and then we incorrectly set the CPU feature bit as well as MMU
feature bit 1 and CPU user feature bits 0 and 2 (5).

Checking byte 0 bit 0 (IBM numbering), means we're looking at the
"Memory Management Unit (MMU)" feature - ie. does the CPU have an MMU.
In practice that bit is set on all platforms which have the property.

This means we set CPU_FTR_REAL_LE always. In practice that seems not to
matter because all the modern cpus which have this property also
implement REAL_LE, and we've never needed to disable it.

We're also incorrectly setting MMU feature bit 1, which is:

  #define MMU_FTR_TYPE_8xx  0x0002

Luckily the only place that looks for MMU_FTR_TYPE_8xx is in Book3E
code, which can't run on the same cpus as scan_features(). So this also
doesn't matter in practice.

Finally in the CPU user feature mask, we're setting bits 0 and 2. Bit 2
is not currently used, and bit 0 is:

  #define PPC_FEATURE_PPC_LE0x0001

Which says the CPU supports the old style "PPC Little Endian" mode.
Again this should be harmless in practice as no 64-bit CPUs implement
that mode.

Fix the code by adding the missing initialisation of the MMU feature.

Also add a comment marking CPU user feature bit 2 (0x4) as reserved. It
would be unsafe to start using it as old kernels incorrectly set it.

Fixes: 44ae3ab3358e ("powerpc: Free up some CPU feature bits by moving out 
MMU-related features")
Signed-off-by: Anton Blanchard 
Cc: sta...@vger.kernel.org
[mpe: Flesh out changelog, add comment reserving 0x4]
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/uapi/asm/cputable.h | 1 +
 arch/powerpc/kernel/prom.c   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/uapi/asm/cputable.h 
b/arch/powerpc/include/uapi/asm/cputable.h
index 8dde19962a5b..f63c96cd3608 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -31,6 +31,7 @@
 #define PPC_FEATURE_PSERIES_PERFMON_COMPAT \
0x0040
 
+/* Reserved - do not use   0x0004 */
 #define PPC_FEATURE_TRUE_LE0x0002
 #define PPC_FEATURE_PPC_LE 0x0001
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7030b035905d..080c96b44a7f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -158,7 +158,7 @@ static struct ibm_pa_feature {
{CPU_FTR_NOEXECUTE, 0, 0,   0, 6, 0},
{CPU_FTR_NODSISRALIGN, 0, 0,1, 1, 1},
{0, MMU_FTR_CI_LARGE_PAGE, 0,   1, 2, 0},
-   {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+   {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 5, 0, 0},
/*
 * If the kernel doesn't support TM (ie. 
CONFIG_PPC_TRANSACTIONAL_MEM=n),
 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
-- 
2.5.0


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/2] cpufreq: qoriq: Don't show cooling device messages if THERMAL_OF undefined

2016-04-18 Thread Viresh Kumar
On 18-04-16, 15:59, Jia Hongtao wrote:
> When THERMAL_OF is undefined the cooling device messages should not be
> shown. -ENOSYS is returned from of_cpufreq_cooling_register() when
> THERMAL_OF is undefined.
> 
> Signed-off-by: Jia Hongtao 
> ---
>  drivers/cpufreq/qoriq-cpufreq.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
> index 1c2fdc1..ff8da83 100644
> --- a/drivers/cpufreq/qoriq-cpufreq.c
> +++ b/drivers/cpufreq/qoriq-cpufreq.c
> @@ -340,8 +340,8 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy 
> *policy)
>   cpud->cdev = of_cpufreq_cooling_register(np,
>policy->related_cpus);
>  
> - if (IS_ERR(cpud->cdev)) {
> - pr_err("Failed to register cooling device cpu%d: %ld\n",
> + if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
> + pr_err("cpu%d is not running as cooling device: %ld\n",
>   policy->cpu, PTR_ERR(cpud->cdev));
>  
>   cpud->cdev = NULL;

Acked-by: Viresh Kumar 

-- 
viresh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/2] cpufreq: qoriq: Fix cooling device registration issue during suspend

2016-04-18 Thread Viresh Kumar
On 18-04-16, 15:59, Jia Hongtao wrote:
> Cooling device is registered by ready callback. It's also invoked while
> system resuming from sleep (Enabling non-boot cpus). Thus cooling device
> may be multiple registered. Stop_cpu callback is invoked during suspend
> (Disabling non-boot cpus). So matchable unregistration is added to fix
> this issue.
> 
> Signed-off-by: Jia Hongtao 
> ---
>  drivers/cpufreq/qoriq-cpufreq.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
> index b23e525..1c2fdc1 100644
> --- a/drivers/cpufreq/qoriq-cpufreq.c
> +++ b/drivers/cpufreq/qoriq-cpufreq.c
> @@ -305,6 +305,7 @@ static int __exit qoriq_cpufreq_cpu_exit(struct 
> cpufreq_policy *policy)
>  {
>   struct cpu_data *data = policy->driver_data;
>  
> + cpufreq_cooling_unregister(data->cdev);
>   kfree(data->pclk);
>   kfree(data->table);
>   kfree(data);
> @@ -323,6 +324,12 @@ static int qoriq_cpufreq_target(struct cpufreq_policy 
> *policy,
>   return clk_set_parent(policy->clk, parent);
>  }
>  
> +static void qoriq_cpufreq_stop_cpu(struct cpufreq_policy *policy)
> +{
> + struct cpu_data *cpud = policy->driver_data;
> +
> + cpufreq_cooling_unregister(cpud->cdev);
> +}
>  
>  static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
>  {
> @@ -352,6 +359,7 @@ static struct cpufreq_driver qoriq_cpufreq_driver = {
>   .verify = cpufreq_generic_frequency_table_verify,
>   .target_index   = qoriq_cpufreq_target,
>   .get= cpufreq_generic_get,
> + .stop_cpu   = qoriq_cpufreq_stop_cpu,
>   .ready  = qoriq_cpufreq_ready,
>   .attr   = cpufreq_generic_attr,
>  };

You don't need to do it from stop_cpu(), please use
qoriq_cpufreq_cpu_exit() for this.

-- 
viresh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: introduce {cmp}xchg for u8 and u16

2016-04-18 Thread Pan Xinhui


On 2016年04月17日 03:43, Arnd Bergmann wrote:
> On Wednesday 13 April 2016 19:15:17 Pan Xinhui wrote:
>> Hello Peter,
>>
>> On 2016年04月12日 22:30, Peter Zijlstra wrote:
>>> On Sun, Apr 10, 2016 at 10:17:28PM +0800, Pan Xinhui wrote:

 On 2016年04月08日 15:47, Peter Zijlstra wrote:
> On Fri, Apr 08, 2016 at 02:41:46PM +0800, Pan Xinhui wrote:
>> From: pan xinhui 
>>
>> Implement xchg{u8,u16}{local,relaxed}, and
>> cmpxchg{u8,u16}{,local,acquire,relaxed}.
>>
>> Atomic operation on 8-bit and 16-bit data type is supported from power7
>
> And yes I see nothing P7 specific here, this implementation is for
> everything PPC64 afaict, no?
>
 Hello Peter,
No, it's not for every ppc. So yes, I need add #ifdef here. Thanks for 
 pointing it out.
 We might need a new config option and let it depend on POWER7/POWER8_CPU 
 or even POWER9...
>>>
>>> Right, I'm not sure if PPC has alternatives, but you could of course
>>> runtime patch the code from emulated with 32bit ll/sc to native 8/16bit
>>> ll/sc if present on the current CPU if you have infrastructure for these
>>> things.
>>>
>> seems interesting. I have no idea about how to runtime patch the code. I 
>> will try to learn that.
>> If so, we need change {cmp}xchg into uninline functions?
> 
> I think you don't need to, see do_feature_fixups()/patch_feature_section()
> 
Hello, Arnd
thanks for your tips :) I will take a look at them.
This time I will make generic functions for all ppc. But in future, We will 
runtime patch the code.

> Note that an #ifdef by itself has to worry about any combination of
> architectures, so in a kernel that has both POWER6 and POWER7 enabled,
> you cannot call the POWER7-only function.
> 
seems your are right.
While I think it's not a good idea to enable several cpu types. just select the 
minimum supported cpu in real world. :)

thanks
xinhui
>   Arnd
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 2/2] cpufreq: powernv: Ramp-down global pstate slower than local-pstate

2016-04-18 Thread Viresh Kumar
On 15-04-16, 11:58, Akshay Adiga wrote:
>  static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
> - unsigned long action, void *unused)
> +unsigned long action, void *unused)

Unrelated change.. better don't add such changes..

>  {
>   int cpu;
>   struct cpufreq_policy cpu_policy;
> @@ -603,15 +843,18 @@ static struct notifier_block powernv_cpufreq_opal_nb = {
>  static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
>  {
>   struct powernv_smp_call_data freq_data;
> -
> + struct global_pstate_info *gpstates = policy->driver_data;

You removed a blank line here and I feel the code looks better with
that.

>   freq_data.pstate_id = powernv_pstate_info.min;
> + freq_data.gpstate_id = powernv_pstate_info.min;
>   smp_call_function_single(policy->cpu, set_pstate, _data, 1);
> + del_timer_sync(>timer);
>  }
>  
>  static struct cpufreq_driver powernv_cpufreq_driver = {
>   .name   = "powernv-cpufreq",
>   .flags  = CPUFREQ_CONST_LOOPS,
>   .init   = powernv_cpufreq_cpu_init,
> + .exit   = powernv_cpufreq_cpu_exit,
>   .verify = cpufreq_generic_frequency_table_verify,
>   .target_index   = powernv_cpufreq_target_index,
>   .get= powernv_cpufreq_get,

None of the above comments are mandatory for you to fix..

Acked-by: Viresh Kumar 

-- 
viresh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 1/2] cpufreq: powernv: Remove flag use-case of policy->driver_data

2016-04-18 Thread Viresh Kumar
On 15-04-16, 11:58, Akshay Adiga wrote:
> From: Shilpasri G Bhat 
> 
> commit 1b0289848d5d ("cpufreq: powernv: Add sysfs attributes to show
> throttle stats") used policy->driver_data as a flag for one-time creation
> of throttle sysfs files. Instead of this use 'kernfs_find_and_get()' to
> check if the attribute already exists. This is required as
> policy->driver_data is used for other purposes in the later patch.
> 
> Signed-off-by: Shilpasri G Bhat 
> Signed-off-by: Akshay Adiga 
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 11 +--
>  1 file changed, 5 insertions(+), 6 deletions(-)

Reviewed-by: Viresh Kumar 

-- 
viresh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: crash in ppc4xx-rng on canyonland

2016-04-18 Thread Herbert Xu
Christian Lamparter  wrote:
> 
> I tried to move ppc4xx-rng into crypto4xx (see attachment - patch #1).
> The driver works as is. But I can't come up with a way to attach the
> crypto4xx driver to the ppc4xx-rng OF node cleanly. Basically,
> I'm looking for a way to have one driver (with one context) be 
> in charge of two different OF nodes (ppc4xx-rng and ppc4xx-crypto).
> Is there any solution to this? Because otherwise, I would add a

Is it possible to have an RNG unit without the crypto unit? If not
then your first patch should be OK, provided that you add some error
handling when the RNG probe fails.  For example, if the RNG probe
fails we should probably not call remove on it later.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V11 0/4]perf/powerpc: Add ability to sample intr machine state in powerpc

2016-04-18 Thread Anju T

On Saturday 20 February 2016 10:32 AM, Anju T wrote:

This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:
available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 
r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 nip msr orig_r3 ctr 
link xer ccr softe trap dar dsisr

  usage: perf record [] []
 or: perf record [] --  []

 -I, --intr-regs[=]
   sample selected machine registers on interrupt, use 
-I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patchset/ output looks like as follows:

496768515470 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4522/4522: 
0xc01e538c period: 1 addr: 0
... intr regs: mask 0x7ff ABI 64-bit
 r00xc01e5e34
 r10xc00fe733f9a0
 r20xc1523100
 r30xc00ffaadeb60
 r40xc3456800
 r50x73a9b5e000
 r60x1e00
 r70x0
 r80x0
 r90x0
 r10   0x1
 r11   0x0
 r12   0x24022822
 r13   0xcfeec180
 r14   0x0
 r15   0xc01e4be18800
 r16   0x0
 r17   0xc00ffaac5000
 r18   0xc00fe733f8a0
 r19   0xc1523100
 r20   0xc009fd1c
 r21   0xc00fcaa69000
 r22   0xc01e4968
 r23   0xc1523100
 r24   0xc00fe733f850
 r25   0xc00fcaa69000
 r26   0xc3b8fcf0
 r27   0xfead
 r28   0x0
 r29   0xc00fcaa69000
 r30   0x1
 r31   0x0
 nip   0xc01dd320
 msr   0x90009032
 orig_r3 0xc01e538c
 ctr   0xc009d550
 link  0xc01e5e34
 xer   0x0
 ccr   0x84022882
 softe 0x0
 trap  0xf01
 dar   0x0
 dsisr 0xf0004006004
  ... thread: :4522:4522
  .. dso: /root/.debug/.build-id/b0/ef11b1a1629e62ac9de75199117ee5ef9469e9
:4522  4522   496.768515:  1 cycles:  c01e538c 
.perf_event_context_sched_in (/boot/vmlinux)



Changes from v10:

- Included SOFTE as suggested by mpe
- The name of registers displayed is  changed from
   gpr* to r* also the macro names changed from
   PERF_REG_POWERPC_GPR* to PERF_REG_POWERPC_R*.
- The conflict in returning the ABI is resolved.
- #define PERF_REG_SP  is again changed to  PERF_REG_POWERPC_R1
- Comment in tools/perf/config/Makefile is updated.
- removed the "Reviewed-By" tag as the patch has logic changes.


Changes from V9:

- Changed the name displayed for link register from "lnk" to "link" in
   tools/perf/arch/powerpc/include/perf_regs.h

changes from V8:

- Corrected the indentation issue in the Makefile mentioned in 3rd patch

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
   ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in
arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
   list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
   it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
   perf/powerpc: assign an id to each powerpc register
   perf/powerpc: add support for sampling intr machine state
   tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
   tool/perf: Add sample_reg_mask to include all perf_regs regs


  arch/powerpc/Kconfig|  1 +
  arch/powerpc/include/uapi/asm/perf_regs.h   | 50 
  arch/powerpc/perf/Makefile  |  1 +
  arch/powerpc/perf/perf_regs.c   | 91 +
  tools/perf/arch/powerpc/include/perf_regs.h | 69 ++
  tools/perf/arch/powerpc/util/Build  |  1 +
  tools/perf/arch/powerpc/util/perf_regs.c| 49 
  tools/perf/config/Makefile   

Re: [PATCH 00/10] Enable HugeTLB page migration on POWER

2016-04-18 Thread Anshuman Khandual
On 04/07/2016 11:07 AM, Anshuman Khandual wrote:
> This patch series enables HugeTLB page migration on POWER platform.
> This series has some core VM changes (patch 1, 2, 3) and some powerpc
> specific changes (patch 4, 5, 6, 7, 8, 9, 10). Comments, suggestions
> and inputs are welcome.
> 
> Anshuman Khandual (10):
>   mm/mmap: Replace SHM_HUGE_MASK with MAP_HUGE_MASK inside mmap_pgoff
>   mm/hugetlb: Add PGD based implementation awareness
>   mm/hugetlb: Protect follow_huge_(pud|pgd) functions from race

Hugh/Mel/Naoya/Andrew,

Andrew had already reviewed the changes in the first two patches during
the RFC phase and was okay with them. Could you please review the third
patch here as well and let me know your inputs/suggestions. Currently
the third patch has got build failures on SPARC and S390 platforms
(details of which are on the thread with possible fixes). Thank you.





___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 03/10] mm/hugetlb: Protect follow_huge_(pud|pgd) functions from race

2016-04-18 Thread Anshuman Khandual
On 04/07/2016 02:46 PM, kbuild test robot wrote:
> Hi Anshuman,
> 
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on v4.6-rc2 next-20160407]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improving the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Anshuman-Khandual/Enable-HugeTLB-page-migration-on-POWER/20160407-165841
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: sparc64-allyesconfig (attached as .config)
> reproduce:
> wget 
> https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
>  -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> make.cross ARCH=sparc64 
> 
> All error/warnings (new ones prefixed by >>):
> 
>mm/hugetlb.c: In function 'follow_huge_pgd':
>>> >> mm/hugetlb.c:4395:3: error: implicit declaration of function 'pgd_page' 
>>> >> [-Werror=implicit-function-declaration]
>   page = pgd_page(*pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
>   ^


The following change seems to fix the build problem on SPARC but will
require some inputs from SPARC maintainers regarding the functional
correctness of the patch.

diff --git a/arch/sparc/include/asm/pgtable_64.h
b/arch/sparc/include/asm/pgtable_64.h
index f089cfa..7b7e6a0 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -804,6 +804,7 @@ static inline unsigned long __pmd_page(pmd_t pmd)
 #define pmd_clear(pmdp)(pmd_val(*(pmdp)) = 0UL)
 #define pud_present(pud)   (pud_val(pud) != 0U)
 #define pud_clear(pudp)(pud_val(*(pudp)) = 0UL)
+#define pgd_page(pgd)  (pgd_val(pgd))
 #define pgd_page_vaddr(pgd)\
((unsigned long) __va(pgd_val(pgd)))
 #define pgd_present(pgd)   (pgd_val(pgd) != 0U)

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 03/10] mm/hugetlb: Protect follow_huge_(pud|pgd) functions from race

2016-04-18 Thread Anshuman Khandual
On 04/11/2016 11:34 AM, Anshuman Khandual wrote:
> On 04/07/2016 03:04 PM, kbuild test robot wrote:
>> > All errors (new ones prefixed by >>):
>> > 
>> >mm/hugetlb.c: In function 'follow_huge_pud':
>> >>> >> mm/hugetlb.c:4360:3: error: implicit declaration of function 
>> >>> >> 'pud_page' [-Werror=implicit-function-declaration]
>> >   page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
>> >   ^
>> >mm/hugetlb.c:4360:8: warning: assignment makes pointer from integer 
>> > without a cast
>> >   page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
>> >^
>> >mm/hugetlb.c: In function 'follow_huge_pgd':
>> >mm/hugetlb.c:4395:3: error: implicit declaration of function 'pgd_page' 
>> > [-Werror=implicit-function-declaration]
>> >   page = pgd_page(*pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
> Both the build errors here are because of the fact that pgd_page() is
> not available for some platforms and config options. It got missed as
> I ran only powerpc config options for build test purpose. My bad, will
> fix it.

The following change seems to fix the build problem on S390 but will
require some inputs from S390 maintainers regarding the functional
correctness of the patch.

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2f66645..834a8a6 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -963,6 +963,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long 
address)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 
 #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+#define pud_page(pud) pud_val(pud)
+#define pgd_page(pgd) pgd_val(pgd)



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] cxl: static-ify variables to fix sparse warnings

2016-04-18 Thread Frederic Barrat


Thanks Andrew!

Reviewed-by: fbar...@linux.vnet.ibm.com

 Fred


Le 18/04/2016 07:03, Andrew Donnellan a écrit :

Make a couple more variables static. Found by sparse.

Signed-off-by: Andrew Donnellan 
---
  drivers/misc/cxl/flash.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c
index 68dd0b7..c63d61e 100644
--- a/drivers/misc/cxl/flash.c
+++ b/drivers/misc/cxl/flash.c
@@ -24,8 +24,8 @@ struct ai_header {
  };

  static struct semaphore sem;
-unsigned long *buffer[CXL_AI_MAX_ENTRIES];
-struct sg_list *le;
+static unsigned long *buffer[CXL_AI_MAX_ENTRIES];
+static struct sg_list *le;
  static u64 continue_token;
  static unsigned int transfer;




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] cpufreq: qoriq: Fix cooling device registration issue during suspend

2016-04-18 Thread Jia Hongtao
Cooling device is registered by ready callback. It's also invoked while
system resuming from sleep (Enabling non-boot cpus). Thus cooling device
may be multiple registered. Stop_cpu callback is invoked during suspend
(Disabling non-boot cpus). So matchable unregistration is added to fix
this issue.

Signed-off-by: Jia Hongtao 
---
 drivers/cpufreq/qoriq-cpufreq.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index b23e525..1c2fdc1 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -305,6 +305,7 @@ static int __exit qoriq_cpufreq_cpu_exit(struct 
cpufreq_policy *policy)
 {
struct cpu_data *data = policy->driver_data;
 
+   cpufreq_cooling_unregister(data->cdev);
kfree(data->pclk);
kfree(data->table);
kfree(data);
@@ -323,6 +324,12 @@ static int qoriq_cpufreq_target(struct cpufreq_policy 
*policy,
return clk_set_parent(policy->clk, parent);
 }
 
+static void qoriq_cpufreq_stop_cpu(struct cpufreq_policy *policy)
+{
+   struct cpu_data *cpud = policy->driver_data;
+
+   cpufreq_cooling_unregister(cpud->cdev);
+}
 
 static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
 {
@@ -352,6 +359,7 @@ static struct cpufreq_driver qoriq_cpufreq_driver = {
.verify = cpufreq_generic_frequency_table_verify,
.target_index   = qoriq_cpufreq_target,
.get= cpufreq_generic_get,
+   .stop_cpu   = qoriq_cpufreq_stop_cpu,
.ready  = qoriq_cpufreq_ready,
.attr   = cpufreq_generic_attr,
 };
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] cpufreq: qoriq: Don't show cooling device messages if THERMAL_OF undefined

2016-04-18 Thread Jia Hongtao
When THERMAL_OF is undefined the cooling device messages should not be
shown. -ENOSYS is returned from of_cpufreq_cooling_register() when
THERMAL_OF is undefined.

Signed-off-by: Jia Hongtao 
---
 drivers/cpufreq/qoriq-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index 1c2fdc1..ff8da83 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -340,8 +340,8 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy 
*policy)
cpud->cdev = of_cpufreq_cooling_register(np,
 policy->related_cpus);
 
-   if (IS_ERR(cpud->cdev)) {
-   pr_err("Failed to register cooling device cpu%d: %ld\n",
+   if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
+   pr_err("cpu%d is not running as cooling device: %ld\n",
policy->cpu, PTR_ERR(cpud->cdev));
 
cpud->cdev = NULL;
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/1] powerpc/86xx: Add support for Emerson/Artesyn MVME7100

2016-04-18 Thread Alessio Igor Bogani
Add support for the Artesyn MVME7100 Single Board Computer.

The MVME7100 is a 6U form factor VME64 computer with:

- A two e600 cores Freescale MPC8641D CPU
- 2 GB of DDR2 onboard memory
- Four Gigabit Ethernets
- Five 16550 compatible UARTs
- One USB 2.0 port
- Two PCI/PCI eXpress Mezzanine Card (PMC/XMC) Slots
- A DS1375 Real Time Clock (RTC)
- 512 KB of Non-Volatile Memory (NVRAM)
- Two 64 KB EEPROMs
- 128 MB NOR and 4/8 GB NAND Flash

This patch is based on linux-4.6-rc4 and has been only boot tested.

Signed-off-by: Alessio Igor Bogani 
---
v1 -> v2
Fix BCSR handling
Add missing @interrupt-cells in the device tree
to avoid 'of_irq_parse_pci() failed with rc=-22'
Reduce from 3 to 2 the PCI windows to avoid
'Ran out of outbound PCI ATMUs for IO resource'

This patch requires 
https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-April/141813.html
to be built and 
https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-April/141785.html to
work correctly.

Limitations:
This patch covers only models 171 and 173
No plans to support CPLD timers

Know issues:
All four PHYs work in polling mode

Configuration is missing for:
PCI IDSEL and PCI Interrupt definition

Support is missing for:
Cache and memory controllers (which are very similar to the 85xx ones
but right now I don't know if we can re-use their support)
Watchdog, USB, NVRAM, NOR, NAND, EEPROMs, VME, PMC/XMC and RTC

 arch/powerpc/boot/Makefile   |   4 +
 arch/powerpc/boot/dts/fsl/mvme7100.dts   | 206 +++
 arch/powerpc/boot/mvme7100.c |  70 +
 arch/powerpc/boot/ppcboot.h  |   2 +-
 arch/powerpc/boot/wrapper|   4 +
 arch/powerpc/configs/mpc86xx_basic_defconfig |   1 +
 arch/powerpc/platforms/86xx/Kconfig  |   7 +-
 arch/powerpc/platforms/86xx/Makefile |   1 +
 arch/powerpc/platforms/86xx/mvme7100.c   | 124 
 9 files changed, 417 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/fsl/mvme7100.dts
 create mode 100644 arch/powerpc/boot/mvme7100.c
 create mode 100644 arch/powerpc/platforms/86xx/mvme7100.c

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 8fe78a3..2c75fdb 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -113,6 +113,7 @@ src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
 src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
 src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
 src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
+src-plat-$(CONFIG_MVME7100) += mvme7100.c
 
 src-wlib := $(sort $(src-wlib-y))
 src-plat := $(sort $(src-plat-y))
@@ -296,6 +297,9 @@ image-$(CONFIG_TQM8560) += 
cuImage.tqm8560
 image-$(CONFIG_SBC8548)+= cuImage.sbc8548
 image-$(CONFIG_KSI8560)+= cuImage.ksi8560
 
+# Board ports in arch/powerpc/platform/86xx/Kconfig
+image-$(CONFIG_MVME7100)+= dtbImage.mvme7100
+
 # Board ports in arch/powerpc/platform/embedded6xx/Kconfig
 image-$(CONFIG_STORCENTER) += cuImage.storcenter
 image-$(CONFIG_MPC7448HPC2)+= cuImage.mpc7448hpc2
diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts 
b/arch/powerpc/boot/dts/fsl/mvme7100.dts
new file mode 100644
index 000..2fdb912
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts
@@ -0,0 +1,206 @@
+/*
+ * Device tree source for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani 
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+   model = "MVME7100";
+   compatible = "artesyn,MVME7100";
+
+   aliases {
+   pci1 = 
+   };
+
+   memory {
+   device_type = "memory";
+   reg = <0x 0x8000>;
+   };
+
+   soc: soc@f100 {
+   ranges = <0x 0xf100 0x0010>;
+
+   i2c@3000 {
+   hwmon@4c {
+   compatible = "dallas,max6649";
+   reg = <0x4c>;
+   };
+
+   rtc@68 {
+   compatible = "dallas,ds1337";
+   reg = <0x68>;
+   };
+   };
+
+
+   enet0: ethernet@24000 {
+   phy-handle = <>;
+   phy-connection-type = "rgmii-id";
+   };
+
+   mdio@24520 {
+   phy0: ethernet-phy@1 {
+

[PATCH 1/1] powerpc/fsl: Fix build of the dtb embedded kernel images

2016-04-18 Thread Alessio Igor Bogani
The commit dc37374 move a lot of device tree files into fsl directory
fixing Makefile for cuImage target only. Unfortunately there are others
target which require to embebbed device tree into the kernel image
(i.e. dtbImage.%). So use a more generic approach.

Signed-off-by: Alessio Igor Bogani 
---
 arch/powerpc/boot/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 6116510..8fe78a3 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -362,9 +362,6 @@ $(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
 $(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
$(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
 
-$(obj)/cuImage.%: vmlinux $(obj)/fsl/%.dtb $(wrapperbits)
-   $(call if_changed,wrap,cuboot-$*,,$(obj)/fsl/$*.dtb)
-
 $(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
$(call 
if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
 
@@ -381,6 +378,9 @@ $(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
 $(obj)/%.dtb: $(src)/dts/%.dts FORCE
$(call if_changed_dep,dtc)
 
+$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE
+   $(call if_changed_dep,dtc)
+
 # If there isn't a platform selected then just strip the vmlinux.
 ifeq (,$(image-y))
 image-y := vmlinux.strip
-- 
2.8.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 1/1] powerpc/fsl: Fix build of the dtb embedded kernel images

2016-04-18 Thread Alessio Igor Bogani
Scott,

On 17 April 2016 at 03:50, Scott Wood  wrote:
> On Fri, 2016-04-15 at 10:27 +0200, Alessio Igor Bogani wrote:
[...]
>> Any comments?
>
> Looks OK to me.

Thanks for review it. Follow a no-RFC version.

Ciao,
Alessio
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev