[PATCH v9 07/26] powerpc/powernv: Fix initial IO and M32 segmap

2016-05-02 Thread Gavin Shan
There are two arrays for IO and M32 segment maps on every PHB.
The index of the arrays are segment number and the value stored
in the corresponding element is PE number, indicating the segment
is assigned to the PE. Initially, all elements in those two arrays
are zeroes, meaning all segments are assigned to PE#0. It's wrong.

This fixes the initial values in the elements of those two arrays
to IODA_INVALID_PE, meaning all segments aren't assigned to any
PE.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 4aa6cdf..59b20e5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3240,6 +3240,7 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
const __be64 *prop64;
const __be32 *prop32;
int len;
+   unsigned int segno;
u64 phb_id;
void *aux;
long rc;
@@ -3334,8 +3335,13 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
aux = memblock_virt_alloc(size, 0);
phb->ioda.pe_alloc = aux;
phb->ioda.m32_segmap = aux + m32map_off;
-   if (phb->type == PNV_PHB_IODA1)
+   for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
+   phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
+   if (phb->type == PNV_PHB_IODA1) {
phb->ioda.io_segmap = aux + iomap_off;
+   for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
+   phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
+   }
phb->ioda.pe_array = aux + pemap_off;
set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc);
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 18/26] powerpc/pci: Rename pcibios_{add, remove}_pci_devices()

2016-05-02 Thread Gavin Shan
This renames pcibios_{add,remove}_pci_devices() to avoid conflicts
with names of the weak functions in PCI subsystem, which have the
prefix "pcibios". No logical changes introduced.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/pci-bridge.h |  4 ++--
 arch/powerpc/kernel/eeh_driver.c  | 12 ++--
 arch/powerpc/kernel/pci-hotplug.c | 15 +++
 drivers/pci/hotplug/rpadlpar_core.c   |  2 +-
 drivers/pci/hotplug/rpaphp_core.c |  4 ++--
 drivers/pci/hotplug/rpaphp_pci.c  |  2 +-
 6 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 220129f..99027b8 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -260,10 +260,10 @@ static inline struct eeh_dev *pdn_to_eeh_dev(struct 
pci_dn *pdn)
 extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
 
 /** Remove all of the PCI devices under this bus */
-extern void pcibios_remove_pci_devices(struct pci_bus *bus);
+extern void pci_hp_remove_devices(struct pci_bus *bus);
 
 /** Discover new pci devices under this bus, and add them */
-extern void pcibios_add_pci_devices(struct pci_bus *bus);
+extern void pci_hp_add_devices(struct pci_bus *bus);
 
 
 extern void isa_bridge_find_early(struct pci_controller *hose);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index fb6207d..618d13c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -621,7 +621,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
 * We don't remove the corresponding PE instances because
 * we need the information afterwords. The attached EEH
 * devices are expected to be attached soon when calling
-* into pcibios_add_pci_devices().
+* into pci_hp_add_devices().
 */
eeh_pe_state_mark(pe, EEH_PE_KEEP);
if (bus) {
@@ -630,7 +630,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
} else {
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_lock_rescan_remove();
-   pcibios_remove_pci_devices(bus);
+   pci_hp_remove_devices(bus);
pci_unlock_rescan_remove();
}
} else if (frozen_bus) {
@@ -681,7 +681,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
if (pe->type & EEH_PE_VF)
eeh_add_virt_device(edev, NULL);
else
-   pcibios_add_pci_devices(bus);
+   pci_hp_add_devices(bus);
} else if (frozen_bus && rmv_data->removed) {
pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
ssleep(5);
@@ -691,7 +691,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct 
pci_bus *bus,
if (pe->type & EEH_PE_VF)
eeh_add_virt_device(edev, NULL);
else
-   pcibios_add_pci_devices(frozen_bus);
+   pci_hp_add_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
@@ -896,7 +896,7 @@ perm_error:
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 
pci_lock_rescan_remove();
-   pcibios_remove_pci_devices(frozen_bus);
+   pci_hp_remove_devices(frozen_bus);
pci_unlock_rescan_remove();
}
}
@@ -981,7 +981,7 @@ static void eeh_handle_special_event(void)
bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
-   pcibios_remove_pci_devices(bus);
+   pci_hp_remove_devices(bus);
}
pci_unlock_rescan_remove();
}
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 59c4361..2d108e5 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -38,20 +38,20 @@ void pcibios_release_device(struct pci_dev *dev)
 }
 
 /**
- * pcibios_remove_pci_devices - remove all devices under this bus
+ * pci_hp_remove_devices - remove all devices under this bus
  * @bus: the indicated PCI bus
  *
  * Remove all of the PCI devices under this bus both from the
  * linux pci device tree, and from the powerpc EEH address cache.
  */
-void pcibios_remove_pci_devices(struct pci_bus *bus)
+void pci_hp_remove_devices(struct pci_bus *bus)
 {
struct pci_dev *dev, *tmp;
struct pci_bus *child_bus;
 
/* First go down child busses */
list_for_each_entry(child_bus, >children, node)
-   

[PATCH v9 08/26] powerpc/powernv: Simplify pnv_ioda_setup_pe_seg()

2016-05-02 Thread Gavin Shan
pnv_ioda_setup_pe_seg() associates the IO and M32 segments with the
owner PE. The code mapping segments should be fixed and immune from
logic changes introduced to pnv_ioda_setup_pe_seg().

This moves the code mapping segments to helper pnv_ioda_setup_pe_res().
The data type for @rc is changed to "int64_t". Also, argument @hose is
removed from pnv_ioda_setup_pe() as it can be got from @pe. No functional
changes introduced.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 121 +++---
 1 file changed, 62 insertions(+), 59 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 59b20e5..b954fbc 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2929,19 +2929,72 @@ truncate_iov:
 }
 #endif /* CONFIG_PCI_IOV */
 
+static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
+ struct resource *res)
+{
+   struct pnv_phb *phb = pe->phb;
+   struct pci_bus_region region;
+   int index;
+   int64_t rc;
+
+   if (!res || !res->flags || res->start > res->end)
+   return;
+
+   if (res->flags & IORESOURCE_IO) {
+   region.start = res->start - phb->ioda.io_pci_base;
+   region.end   = res->end - phb->ioda.io_pci_base;
+   index = region.start / phb->ioda.io_segsize;
+
+   while (index < phb->ioda.total_pe_num &&
+  region.start <= region.end) {
+   phb->ioda.io_segmap[index] = pe->pe_number;
+   rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+   pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+   if (rc != OPAL_SUCCESS) {
+   pr_err("%s: Error %lld mapping IO segment#%d to 
PE#%d\n",
+  __func__, rc, index, pe->pe_number);
+   break;
+   }
+
+   region.start += phb->ioda.io_segsize;
+   index++;
+   }
+   } else if ((res->flags & IORESOURCE_MEM) &&
+  !pnv_pci_is_mem_pref_64(res->flags)) {
+   region.start = res->start -
+  phb->hose->mem_offset[0] -
+  phb->ioda.m32_pci_base;
+   region.end   = res->end -
+  phb->hose->mem_offset[0] -
+  phb->ioda.m32_pci_base;
+   index = region.start / phb->ioda.m32_segsize;
+
+   while (index < phb->ioda.total_pe_num &&
+  region.start <= region.end) {
+   phb->ioda.m32_segmap[index] = pe->pe_number;
+   rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+   pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+   if (rc != OPAL_SUCCESS) {
+   pr_err("%s: Error %lld mapping M32 segment#%d 
to PE#%d",
+  __func__, rc, index, pe->pe_number);
+   break;
+   }
+
+   region.start += phb->ioda.m32_segsize;
+   index++;
+   }
+   }
+}
+
 /*
  * This function is supposed to be called on basis of PE from top
  * to bottom style. So the the I/O or MMIO segment assigned to
  * parent PE could be overrided by its child PEs if necessary.
  */
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
- struct pnv_ioda_pe *pe)
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
 {
-   struct pnv_phb *phb = hose->private_data;
-   struct pci_bus_region region;
struct resource *res;
-   int i, index;
-   int rc;
+   int i;
 
/*
 * NOTE: We only care PCI bus based PE for now. For PCI
@@ -2950,58 +3003,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller 
*hose,
 */
BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
 
-   pci_bus_for_each_resource(pe->pbus, res, i) {
-   if (!res || !res->flags ||
-   res->start > res->end)
-   continue;
-
-   if (res->flags & IORESOURCE_IO) {
-   region.start = res->start - phb->ioda.io_pci_base;
-   region.end   = res->end - phb->ioda.io_pci_base;
-   index = region.start / phb->ioda.io_segsize;
-
-   while (index < phb->ioda.total_pe_num &&
-  region.start <= region.end) {
-   phb->ioda.io_segmap[index] = pe->pe_number;
-   rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- 

[PATCH v9 11/26] powerpc/powernv: Rename M64 related functions

2016-05-02 Thread Gavin Shan
This renames those functions picking PE number based on consumed
M64 segments, mapping M64 segments to PEs as those functions are
going to be shared by IODA1/IODA2 in next patch. No logical changes
introduced.

Signed-off-by: Gavin Shan 
Reviewed-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 832b430..37f22b0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -219,7 +219,7 @@ fail:
return -EIO;
 }
 
-static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
+static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
 unsigned long *pe_bitmap)
 {
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -246,22 +246,22 @@ static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev 
*pdev,
}
 }
 
-static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus,
-unsigned long *pe_bitmap,
-bool all)
+static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
+   unsigned long *pe_bitmap,
+   bool all)
 {
struct pci_dev *pdev;
 
list_for_each_entry(pdev, >devices, bus_list) {
-   pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap);
+   pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
 
if (all && pdev->subordinate)
-   pnv_ioda2_reserve_m64_pe(pdev->subordinate,
-pe_bitmap, all);
+   pnv_ioda_reserve_m64_pe(pdev->subordinate,
+   pe_bitmap, all);
}
 }
 
-static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
+static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
 {
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
@@ -283,7 +283,7 @@ static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus 
*bus, bool all)
}
 
/* Figure out reserved PE numbers by the PE */
-   pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all);
+   pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
 
/*
 * the current bus might not own M64 window and that's all
@@ -365,8 +365,8 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb 
*phb)
/* Use last M64 BAR to cover M64 window */
phb->ioda.m64_bar_idx = 15;
phb->init_m64 = pnv_ioda2_init_m64;
-   phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe;
-   phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+   phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
+   phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
 }
 
 static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 17/26] powerpc/powernv: Use PE instead of number during setup and release

2016-05-02 Thread Gavin Shan
In current implementation, the PEs that are allocated or picked
from the reserved list are identified by PE number. The PE instance
has to be picked according to the PE number eventually. We have
same issue when PE is released.

For pnv_ioda_pick_m64_pe() and pnv_ioda_alloc_pe(), this returns
PE instance so that pnv_ioda_setup_bus_PE() can use the allocated
or reserved PE instance directly. Also, pnv_ioda_setup_bus_PE()
returns the reserved/allocated PE instance to be used in subsequent
patches. On the other hand, pnv_ioda_free_pe() uses PE instance
(not number) as its argument. No logical changes introduced.

Signed-off-by: Gavin Shan 
Reviewed-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 104 +-
 arch/powerpc/platforms/powernv/pci.h  |   2 +-
 2 files changed, 59 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index cfd2906..5ee8a57 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -123,6 +123,14 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long 
flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
 }
 
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+   phb->ioda.pe_array[pe_no].phb = phb;
+   phb->ioda.pe_array[pe_no].pe_number = pe_no;
+
+   return >ioda.pe_array[pe_no];
+}
+
 static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 {
if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
@@ -135,11 +143,10 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int 
pe_no)
pr_debug("%s: PE %d was reserved on PHB#%x\n",
 __func__, pe_no, phb->hose->global_number);
 
-   phb->ioda.pe_array[pe_no].phb = phb;
-   phb->ioda.pe_array[pe_no].pe_number = pe_no;
+   pnv_ioda_init_pe(phb, pe_no);
 }
 
-static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
unsigned long pe;
 
@@ -147,20 +154,20 @@ static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb)
pe = find_next_zero_bit(phb->ioda.pe_alloc,
phb->ioda.total_pe_num, 0);
if (pe >= phb->ioda.total_pe_num)
-   return IODA_INVALID_PE;
+   return NULL;
} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
 
-   phb->ioda.pe_array[pe].phb = phb;
-   phb->ioda.pe_array[pe].pe_number = pe;
-   return pe;
+   return pnv_ioda_init_pe(phb, pe);
 }
 
-static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
+static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 {
-   WARN_ON(phb->ioda.pe_array[pe].pdev);
+   struct pnv_phb *phb = pe->phb;
 
-   memset(>ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
-   clear_bit(pe, phb->ioda.pe_alloc);
+   WARN_ON(pe->pdev);
+
+   memset(pe, 0, sizeof(struct pnv_ioda_pe));
+   clear_bit(pe->pe_number, phb->ioda.pe_alloc);
 }
 
 /* The default M64 BAR is shared by all PEs */
@@ -320,7 +327,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
}
 }
 
-static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
 {
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
@@ -330,7 +337,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus 
*bus, bool all)
 
/* Root bus shouldn't use M64 */
if (pci_is_root_bus(bus))
-   return IODA_INVALID_PE;
+   return NULL;
 
/* Allocate bitmap */
size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
@@ -338,7 +345,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus 
*bus, bool all)
if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
__func__);
-   return IODA_INVALID_PE;
+   return NULL;
}
 
/* Figure out reserved PE numbers by the PE */
@@ -351,7 +358,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus 
*bus, bool all)
 */
if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
kfree(pe_alloc);
-   return IODA_INVALID_PE;
+   return NULL;
}
 
/*
@@ -397,7 +404,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus 
*bus, bool all)
}
 
kfree(pe_alloc);
-   return master_pe->pe_number;
+   return master_pe;
 }
 
 static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
@@ -963,7 +970,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct 
pci_dev *dev)
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = 

[PATCH v9 12/26] powerpc/powernv/ioda1: M64 support on P7IOC

2016-05-02 Thread Gavin Shan
This enables M64 window on P7IOC, which has been enabled on PHB3.
Different from PHB3 where 16 M64 BARs are supported and each of
them can be owned by one particular PE# exclusively or divided
evenly to 256 segments, every P7IOC PHB has 16 M64 BARs and each
of them are divided to 8 segments. So every P7IOC PHB supports
128 M64 segments in total. P7IOC has M64DT, which helps mapping
one particular M64 segment# to arbitrary PE#. PHB3 doesn't have
M64DT, indicating that one M64 segment can only be pinned to the
fixed PE#.

In order to unified M64 support M64 on P7IOC and PHB3, we just
provide 128 M64 segments on every P7IOC PHB and each of them is
pinned to the fixed PE# by bypassing the function of M64DT. In
turn, we just need different phb->init_m64() for P7IOC and PHB3
and maps M64 segment in pnv_ioda_reserve_m64_pe() for P7IOC, most
of the code are shared by them.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 89 +--
 1 file changed, 86 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 37f22b0..a1b74ec 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -48,6 +48,9 @@
 #include "powernv.h"
 #include "pci.h"
 
+#define PNV_IODA1_M64_NUM  16  /* Number of M64 BARs   */
+#define PNV_IODA1_M64_SEGS 8   /* Segments per M64 BAR */
+
 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
 #define TCE32_TABLE_SIZE   ((0x1000 / 0x1000) * 8)
 
@@ -246,6 +249,64 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev 
*pdev,
}
 }
 
+static int pnv_ioda1_init_m64(struct pnv_phb *phb)
+{
+   struct resource *r;
+   int index;
+
+   /*
+* There are 16 M64 BARs, each of which has 8 segments. So
+* there are as many M64 segments as the maximum number of
+* PEs, which is 128.
+*/
+   for (index = 0; index < PNV_IODA1_M64_NUM; index++) {
+   unsigned long base, segsz = phb->ioda.m64_segsize;
+   int64_t rc;
+
+   base = phb->ioda.m64_base +
+  index * PNV_IODA1_M64_SEGS * segsz;
+   rc = opal_pci_set_phb_mem_window(phb->opal_id,
+   OPAL_M64_WINDOW_TYPE, index, base, 0,
+   PNV_IODA1_M64_SEGS * segsz);
+   if (rc != OPAL_SUCCESS) {
+   pr_warn("  Error %lld setting M64 PHB#%d-BAR#%d\n",
+   rc, phb->hose->global_number, index);
+   goto fail;
+   }
+
+   rc = opal_pci_phb_mmio_enable(phb->opal_id,
+   OPAL_M64_WINDOW_TYPE, index,
+   OPAL_ENABLE_M64_SPLIT);
+   if (rc != OPAL_SUCCESS) {
+   pr_warn("  Error %lld enabling M64 PHB#%d-BAR#%d\n",
+   rc, phb->hose->global_number, index);
+   goto fail;
+   }
+   }
+
+   /*
+* Exclude the segment used by the reserved PE, which
+* is expected to be 0 or last supported PE#.
+*/
+   r = >hose->mem_resources[1];
+   if (phb->ioda.reserved_pe_idx == 0)
+   r->start += phb->ioda.m64_segsize;
+   else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
+   r->end -= phb->ioda.m64_segsize;
+   else
+   pr_warn("  Cannot cut M64 segment for reserved PE#%d\n",
+   phb->ioda.reserved_pe_idx);
+
+   return 0;
+
+fail:
+   for ( ; index >= 0; index--)
+   opal_pci_phb_mmio_enable(phb->opal_id,
+   OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64);
+
+   return -EIO;
+}
+
 static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
unsigned long *pe_bitmap,
bool all)
@@ -315,6 +376,26 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus 
*bus, bool all)
pe->master = master_pe;
list_add_tail(>list, _pe->slaves);
}
+
+   /*
+* P7IOC supports M64DT, which helps mapping M64 segment
+* to one particular PE#. However, PHB3 has fixed mapping
+* between M64 segment and PE#. In order to have same logic
+* for P7IOC and PHB3, we enforce fixed mapping between M64
+* segment and PE# on P7IOC.
+*/
+   if (phb->type == PNV_PHB_IODA1) {
+   int64_t rc;
+
+   rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+   pe->pe_number, OPAL_M64_WINDOW_TYPE,
+   pe->pe_number / PNV_IODA1_M64_SEGS,
+   

[PATCH v9 22/26] powerpc/pci: Introduce pci_remove_device_node_info()

2016-05-02 Thread Gavin Shan
This implements and exports pci_remove_device_node_info(). It's
used to remove the pdn (struct pci_dn) for the indicated device
node. The function is going to be used by PowerNV PCI hotplug
driver.

Signed-off-by: Gavin Shan 
Reviewed-by: Alexey Kardashevskiy 
---
 arch/powerpc/include/asm/pci-bridge.h |  1 +
 arch/powerpc/kernel/pci_dn.c  | 23 +++
 2 files changed, 24 insertions(+)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 07b94ec..467c0b0 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -237,6 +237,7 @@ extern struct pci_dn *add_dev_pci_data(struct pci_dev 
*pdev);
 extern void remove_dev_pci_data(struct pci_dev *pdev);
 extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
   struct device_node *dn);
+extern void pci_remove_device_node_info(struct device_node *dn);
 
 static inline int pci_device_from_OF_node(struct device_node *np,
  u8 *bus, u8 *devfn)
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 0a249ff..ce10281 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -331,6 +331,29 @@ struct pci_dn *pci_add_device_node_info(struct 
pci_controller *hose,
 }
 EXPORT_SYMBOL_GPL(pci_add_device_node_info);
 
+void pci_remove_device_node_info(struct device_node *dn)
+{
+   struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+#ifdef CONFIG_EEH
+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+   if (edev)
+   edev->pdn = NULL;
+#endif
+
+   if (!pdn)
+   return;
+
+   WARN_ON(!list_empty(>child_list));
+   list_del(>list);
+   if (pdn->parent)
+   of_node_put(pdn->parent->node);
+
+   dn->data = NULL;
+   kfree(pdn);
+}
+EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
+
 /*
  * Traverse a device tree stopping each PCI device in the tree.
  * This is done depth first.  As each node is processed, a "pre"
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 24/26] powerpc/pci: Don't scan empty slot

2016-05-02 Thread Gavin Shan
In hotplug case, function pci_add_pci_devices() is called to rescan
the specified PCI bus, which might not have any child devices. Access
to the PCI bus's child device node will cause kernel crash without
exception.

This adds one more check to skip scanning PCI bus that doesn't have
any subordinate devices from device-tree, in order to avoid kernel
crash.

Signed-off-by: Gavin Shan 
Reviewed-by: Alexey Kardashevskiy 
---
 arch/powerpc/kernel/pci-hotplug.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 46587a1..2d71269 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -120,7 +120,8 @@ void pci_hp_add_devices(struct pci_bus *bus)
if (mode == PCI_PROBE_DEVTREE) {
/* use ofdt-based probe */
of_rescan_bus(dn, bus);
-   } else if (mode == PCI_PROBE_NORMAL) {
+   } else if (mode == PCI_PROBE_NORMAL &&
+  dn->child && PCI_DN(dn->child)) {
/*
 * Use legacy probe. In the partial hotplug case, we
 * probably have grandchildren devices unplugged. So
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 09/26] powerpc/powernv: IO and M32 mapping based on PCI device resources

2016-05-02 Thread Gavin Shan
Currently, the IO and M32 segments are mapped to the corresponding
PE based on the windows of the parent bridge of PE's primary bus.
It's not going to work when the windows of root port or upstream
port of the PCIe switch behind root port are extended to PHB's
apertures in order to support hotplug in subsequent patch.

This fixes the issue by mapping IO and M32 segments based on the
resources of the PCI devices included in the PE, instead of the
windows of the parent bridge of the PE's primary bus.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index b954fbc..904790b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2993,7 +2993,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
  */
 static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
 {
-   struct resource *res;
+   struct pci_dev *pdev;
int i;
 
/*
@@ -3003,8 +3003,21 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
 */
BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
 
-   pci_bus_for_each_resource(pe->pbus, res, i)
-   pnv_ioda_setup_pe_res(pe, res);
+   list_for_each_entry(pdev, >pbus->devices, bus_list) {
+   for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+   pnv_ioda_setup_pe_res(pe, >resource[i]);
+
+   /*
+* If the PE contains all subordinate PCI buses, the
+* windows of the child bridges should be mapped to
+* the PE as well.
+*/
+   if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
+   continue;
+   for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+   pnv_ioda_setup_pe_res(pe,
+   >resource[PCI_BRIDGE_RESOURCES + i]);
+   }
 }
 
 static void pnv_pci_ioda_setup_seg(void)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 06/26] powerpc/powernv: Data type unsigned int for PE number

2016-05-02 Thread Gavin Shan
This changes the data type of PE number from "int" to "unsigned int"
in order to match the fact PE number is never negative:

   * The number of PE to which the specified PCI device is attached.
   * The PE number map for SRIOV VFs.
   * The returned PE number from pnv_ioda_alloc_pe().
   * The returned PE number from pnv_ioda2_pick_m64_pe().

Suggested-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/pci-bridge.h | 6 +++---
 arch/powerpc/platforms/powernv/pci-ioda.c | 8 
 arch/powerpc/platforms/powernv/pci.c  | 2 +-
 arch/powerpc/platforms/powernv/pci.h  | 2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 023c8c8..220129f 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -209,14 +209,14 @@ struct pci_dn {
 #ifdef CONFIG_EEH
struct eeh_dev *edev;   /* eeh device */
 #endif
-#define IODA_INVALID_PE(-1)
+#define IODA_INVALID_PE0x
 #ifdef CONFIG_PPC_POWERNV
-   int pe_number;
+   unsigned int pe_number;
int vf_index;   /* VF index in the PF */
 #ifdef CONFIG_PCI_IOV
u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
u16 num_vfs;/* number of VFs enabled*/
-   int *pe_num_map;/* PE# for the first VF PE or array */
+   unsigned int *pe_num_map;   /* PE# for the first VF PE or array */
boolm64_single_mode;/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64(-1)
int (*m64_map)[PCI_SRIOV_NUM_BARS];
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 1d2514f..4aa6cdf 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -138,7 +138,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int 
pe_no)
phb->ioda.pe_array[pe_no].pe_number = pe_no;
 }
 
-static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
unsigned long pe;
 
@@ -261,7 +261,7 @@ static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus,
}
 }
 
-static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
+static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
 {
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
@@ -919,7 +919,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct 
pci_dev *dev)
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(dev);
struct pnv_ioda_pe *pe;
-   int pe_num;
+   unsigned int pe_num;
 
if (!pdn) {
pr_err("%s: Device tree node not associated properly\n",
@@ -1010,7 +1010,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
bool all)
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
struct pnv_ioda_pe *pe;
-   int pe_num = IODA_INVALID_PE;
+   unsigned int pe_num = IODA_INVALID_PE;
 
/* Check if PE is determined by M64 */
if (phb->pick_m64_pe)
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index afbaa1c..8827461 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -370,7 +370,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
struct pnv_phb *phb = pdn->phb->private_data;
u8  fstate;
__be16  pcierr;
-   int pe_no;
+   unsigned int pe_no;
s64 rc;
 
/*
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 784882a..66f2569 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -113,7 +113,7 @@ struct pnv_phb {
int (*init_m64)(struct pnv_phb *phb);
void (*reserve_m64_pe)(struct pci_bus *bus,
   unsigned long *pe_bitmap, bool all);
-   int (*pick_m64_pe)(struct pci_bus *bus, bool all);
+   unsigned int (*pick_m64_pe)(struct pci_bus *bus, bool all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 04/26] powerpc/powernv: Reorder fields in struct pnv_phb

2016-05-02 Thread Gavin Shan
This moves those fields in struct pnv_phb that are related to PE
allocation around. No logical change.

Signed-off-by: Gavin Shan 
Reviewed-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/powernv/pci.h | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 78f035e..f2a1452 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -140,15 +140,14 @@ struct pnv_phb {
unsigned intio_segsize;
unsigned intio_pci_base;
 
-   /* PE allocation bitmap */
-   unsigned long   *pe_alloc;
-   /* PE allocation mutex */
+   /* PE allocation */
struct mutexpe_alloc_mutex;
+   unsigned long   *pe_alloc;
+   struct pnv_ioda_pe  *pe_array;
 
/* M32 & IO segment maps */
unsigned int*m32_segmap;
unsigned int*io_segmap;
-   struct pnv_ioda_pe  *pe_array;
 
/* IRQ chip */
int irq_chip_init;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 00/26] powerpc/powernv: PCI hotplug preparation

2016-05-02 Thread Gavin Shan
The series is split from "[PATCH v8 00/45] powerpc/powernv: PCI hotplug
support". The series does couple of things as below. The patches are
required to support PCI hotplug on PowerNV platforms. However, the
patches refactor the code with the goal: not affecting current logic.

   * Code cleanup and refactoring.
   * Track IO/M32/M64 segments consumed by one particular PE.
   * Remove DMA32 list and improve DMA32 segment tracking.
   * M64 support for IODA1 so that we have unified basis for the
 subsequent patches to support PCI hotplug.
   * Couple of fixes to PCI hotplug (used by EEH).
   * Exported functions to be used by PCI hotplug.

Gavin Shan (26):
  powerpc/pci: Cleanup on struct pci_controller_ops
  powerpc/powernv: Cleanup on pci_controller_ops instances
  powerpc/powernv: Drop phb->bdfn_to_pe()
  powerpc/powernv: Reorder fields in struct pnv_phb
  powerpc/powernv: Rename PE# fields in struct pnv_phb
  powerpc/powernv: Data type unsigned int for PE number
  powerpc/powernv: Fix initial IO and M32 segmap
  powerpc/powernv: Simplify pnv_ioda_setup_pe_seg()
  powerpc/powernv: IO and M32 mapping based on PCI device resources
  powerpc/powernv: Track M64 segment consumption
  powerpc/powernv: Rename M64 related functions
  powerpc/powernv/ioda1: M64 support on P7IOC
  powerpc/powernv/ioda1: Rename pnv_pci_ioda_setup_dma_pe()
  powerpc/powernv/ioda1: Introduce PNV_IODA1_DMA32_SEGSIZE
  powerpc/powernv: Remove DMA32 PE list
  powerpc/powernv/ioda1: Improve DMA32 segment track
  powerpc/powernv: Use PE instead of number during setup and release
  powerpc/pci: Rename pcibios_{add,remove}_pci_devices()
  powerpc/pci: Rename pcibios_find_pci_bus()
  powerpc/pci: Move pci_find_bus_by_node() around
  powerpc/pci: Export pci_add_device_node_info()
  powerpc/pci: Introduce pci_remove_device_node_info()
  powerpc/pci: Export pci_traverse_device_nodes()
  powerpc/pci: Don't scan empty slot
  powerpc/powernv: Simplify pnv_eeh_reset()
  powerpc/powernv: Exclude root bus in pnv_pci_reset_secondary_bus()

 arch/powerpc/include/asm/pci-bridge.h|  41 +-
 arch/powerpc/include/asm/ppc-pci.h   |   6 +-
 arch/powerpc/kernel/eeh_driver.c |  12 +-
 arch/powerpc/kernel/pci-hotplug.c|  47 +-
 arch/powerpc/kernel/pci_dn.c |  66 ++-
 arch/powerpc/platforms/powernv/eeh-powernv.c |  81 ++-
 arch/powerpc/platforms/powernv/pci-ioda.c| 719 +++
 arch/powerpc/platforms/powernv/pci.c |   4 +-
 arch/powerpc/platforms/powernv/pci.h |  41 +-
 arch/powerpc/platforms/pseries/msi.c |   4 +-
 arch/powerpc/platforms/pseries/pci_dlpar.c   |  32 --
 arch/powerpc/platforms/pseries/setup.c   |   2 +-
 drivers/pci/hotplug/rpadlpar_core.c  |   8 +-
 drivers/pci/hotplug/rpaphp_core.c|   4 +-
 drivers/pci/hotplug/rpaphp_pci.c |   4 +-
 15 files changed, 592 insertions(+), 479 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 03/26] powerpc/powernv: Drop phb->bdfn_to_pe()

2016-05-02 Thread Gavin Shan
This drops struct pnv_phb::bdfn_to_pe() as nobody uses it.

Signed-off-by: Gavin Shan 
Reviewed-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 9 -
 arch/powerpc/platforms/powernv/pci.h  | 1 -
 2 files changed, 10 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 524c9c7..10ecd97 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3195,12 +3195,6 @@ static bool pnv_pci_enable_device_hook(struct pci_dev 
*dev)
return true;
 }
 
-static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
-  u32 devfn)
-{
-   return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
-}
-
 static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
 {
struct pnv_phb *phb = hose->private_data;
@@ -3377,9 +3371,6 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
phb->freeze_pe = pnv_ioda_freeze_pe;
phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
 
-   /* Setup RID -> PE mapping function */
-   phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
-
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
 
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 3f814f3..78f035e 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -110,7 +110,6 @@ struct pnv_phb {
 unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
-   u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
int (*init_m64)(struct pnv_phb *phb);
void (*reserve_m64_pe)(struct pci_bus *bus,
   unsigned long *pe_bitmap, bool all);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 01/26] powerpc/pci: Cleanup on struct pci_controller_ops

2016-05-02 Thread Gavin Shan
Each PHB has one instance of "struct pci_controller_ops" that includes
various callbacks called by PCI subsystem. In the definition of this
struct, some callbacks have explicit names for its arguments, but the
left don't have.

This adds all explicit names of the arguments to the callbacks in
"struct pci_controller_ops" so that the code looks consistent. Also,
argument name @dev is replaced by @pdev as the later one is the
preferred name for PCI device.

Signed-off-by: Gavin Shan 
Reviewed-by: Daniel Axtens 
Reviewed-by: Andrew Donnellan 
Reviewed-by: Alexey Kardashevskiy 
---
 arch/powerpc/include/asm/pci-bridge.h | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index f5056e3..023c8c8 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -17,33 +17,34 @@ struct device_node;
  * PCI controller operations
  */
 struct pci_controller_ops {
-   void(*dma_dev_setup)(struct pci_dev *dev);
+   void(*dma_dev_setup)(struct pci_dev *pdev);
void(*dma_bus_setup)(struct pci_bus *bus);
 
-   int (*probe_mode)(struct pci_bus *);
+   int (*probe_mode)(struct pci_bus *bus);
 
/* Called when pci_enable_device() is called. Returns true to
 * allow assignment/enabling of the device. */
-   bool(*enable_device_hook)(struct pci_dev *);
+   bool(*enable_device_hook)(struct pci_dev *pdev);
 
-   void(*disable_device)(struct pci_dev *);
+   void(*disable_device)(struct pci_dev *pdev);
 
-   void(*release_device)(struct pci_dev *);
+   void(*release_device)(struct pci_dev *pdev);
 
/* Called during PCI resource reassignment */
-   resource_size_t (*window_alignment)(struct pci_bus *, unsigned long 
type);
-   void(*reset_secondary_bus)(struct pci_dev *dev);
+   resource_size_t (*window_alignment)(struct pci_bus *bus,
+   unsigned long type);
+   void(*reset_secondary_bus)(struct pci_dev *pdev);
 
 #ifdef CONFIG_PCI_MSI
-   int (*setup_msi_irqs)(struct pci_dev *dev,
+   int (*setup_msi_irqs)(struct pci_dev *pdev,
  int nvec, int type);
-   void(*teardown_msi_irqs)(struct pci_dev *dev);
+   void(*teardown_msi_irqs)(struct pci_dev *pdev);
 #endif
 
-   int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask);
-   u64 (*dma_get_required_mask)(struct pci_dev *dev);
+   int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask);
+   u64 (*dma_get_required_mask)(struct pci_dev *pdev);
 
-   void(*shutdown)(struct pci_controller *);
+   void(*shutdown)(struct pci_controller *hose);
 };
 
 /*
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH 5/5] vfio-pci: Allow to mmap MSI-X table if interrupt remapping is supported

2016-05-02 Thread Tian, Kevin
> From: Yongji Xie
> Sent: Wednesday, April 27, 2016 8:43 PM
> 
> This patch enables mmapping MSI-X tables if hardware supports
> interrupt remapping which can ensure that a given pci device
> can only shoot the MSIs assigned for it.
> 
> With MSI-X table mmapped, we also need to expose the
> read/write interface which will be used to access MSI-X table.
> 
> Signed-off-by: Yongji Xie 

A curious question here. Does "allow to mmap MSI-X" essentially
mean that KVM guest can directly read/write physical MSI-X
structure then?

Thanks
Kevin
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 7/9] powerpc/powernv: Add platform support for stop instruction

2016-05-02 Thread Michael Neuling

> diff --git a/arch/powerpc/include/asm/cputable.h 
> b/arch/powerpc/include/asm/cputable.h
> index df4fb5f..a4739a1 100644
> --- a/arch/powerpc/include/asm/cputable.h
> +++ b/arch/powerpc/include/asm/cputable.h
> @@ -205,6 +205,7 @@ enum {
>  #define CPU_FTR_DABRX
> LONG_ASM_CONST(0x0800)
>  #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000)
>  #define CPU_FTR_SUBCORE  
> LONG_ASM_CONST(0x2000)
> +#define CPU_FTR_STOP_INSTLONG_ASM_CONST(0x4000)

In general, we are putting all the POWER9 features under CPU_FTR_ARCH_300.
Is there a reason you need this separate bit?

CPU_FTR bits are fairly scarce these days.

Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 0/5] powerpc/pm: QorIQ deep sleep

2016-05-02 Thread Scott Wood
On Tue, 2016-04-26 at 10:27 +, Chenhui Zhao wrote:
> Any comment?
> 
> Thanks,
> Chenhui

Leo already commented on the "add a compatible string" patch (though the
threading got broken thanks to Microsoft) and you said you'd fix it.

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: radeonhd and xorg issues on powerpc p5020 and G5

2016-05-02 Thread luigi burdo
hi michel,
tested with  sapphire 7750Hd 2gb ddr3 on Cyrus+ Amigaone X5000 P5020,
with radeon  selected i have a stalling of fence 0 and video binking with all 
glitched like on 7770hd, only fbdev video gave the desktop ok.
distro is the ubuntu mate 16.04 
kernels used for testing are the 4.6 rc 5 and 4.5 
will build today the 4.4 and test there too.

if something needed (logs)and some specific test are requied for help the devs
i will glad to send.


thanks
and sorry for my english
luigi

Inviato da iPad

> Il giorno 30 apr 2016, alle ore 09:29, luigi burdo  
> ha scritto:
> 
> hi michel,
> without xorg conf on cyrus+ p5020, and g5 quad i have only the black screen 
> with pulse cursor. i need to made it and put in etc/X11/ or not desktop at 
> all.
> 
> about modeset=0 on both cards:
> yes i know is normal dont have video but i think the more strange is have the 
> opposite compared the kernel options . with kernel radeon. modeset=0 and 
> nouveau.modeset=1 look like xorg understand to activate the radeon and 
> deactivate the nouveau .
> and this make the system stalling without any way to recover.
> 
> about radeonsi, will check  on 7750 too and report.
> for now 4xxx,5xxx,6xxx can say are running on ppc system with 3d too , yes 
> many endianess on egl and egl2 colors not right and some creazy texture on 
> some games but ... running ;-)
> 
> ciao
> luigi
> 
> 
> 
> 
> Inviato da iPad
> 
>>> Il giorno 30 apr 2016, alle ore 09:10, Michel Dänzer  
>>> ha scritto:
>>> 
>>> On 23.04.2016 02:06, luigi burdo wrote:
>>> 
>>> On Quad G5 with 2 video boards
>>> if i set radeon.modeset=1  nouveau.modeset=1 Xorg -configure dont found
>>> at all the video boards
>>> if i set radeon.modeset=0  nouveau.modeset=1 Xorg -configure dont found
>>> the nouveau board
>>> if i set radeon.modeset=1  nouveau.modeset=0 Xorg -configure dont found
>>> the radeon board
>> 
>> Xorg -configure has various known issues and shouldn't be used anymore.
>> Xorg is normally able to automatically detect and use all GPUs in a
>> system without any xorg.conf file.
>> 
>> 
>>> if i set radeon.modeset=0  nouveau.modeset=0 no video working ... no tty
>>> too ;-)
>> 
>> That disables both kernel drivers, which are required for fbcon and Xorg.
>> 
>> 
>>> On P5020
>>> last of my test is this machine with a  radeon hd 7770 core edition
>>> if radeon drv is running the system run in softpipe mode and i face many
>>> drm errors and fence issue.
>>> i atteched the dmesg files.
>> 
>> Which kernel version are you testing? There were some fixes in 4.4/4.5
>> which may help for the kernel driver issues, but note that the Mesa
>> radeonsi driver for >= 7xxx Radeons still needs a lot of work to be
>> usable on big endian systems.
>> 
>> 
>> -- 
>> Earthling Michel Dänzer   |   http://www.amd.com
>> Libre software enthusiast | Mesa and X developer
> ___
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: powerpc: Add out of bounds check to crash_shutdown_unregister()

2016-05-02 Thread Michael Ellerman
On Thu, 2016-28-04 at 06:17:45 UTC, Suraj Jitindar Singh wrote:
> When unregistering a crash_shutdown_handle in the function
> crash_shutdown_unregister() the other handles are shifted down in the
> array to replace the unregistered handle. The for loop assumes that the
> last element in the array is null and uses this as the stop condition,
> however in the case that the last element is not null there is no check
> to ensure that an out of bounds access is not performed.

But AFAICS the code ensures that entry will always be NULL. So there's no bug at
the moment.

> Add a check to terminate the shift operation when CRASH_HANDLER_MAX is
> reached in order to protect against out of bounds accesses.

Doing it this way is more robust though. The chance of the NULL terminator being
corrupted is definitely higher than the code being corrupted, and if the latter
happens we're probably toast anyway.

> diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
> index 2bb252c..6b267af 100644
> --- a/arch/powerpc/kernel/crash.c
> +++ b/arch/powerpc/kernel/crash.c
> @@ -288,7 +288,7 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
>   rc = 1;
>   } else {
>   /* Shift handles down */
> - for (; crash_shutdown_handles[i]; i++)
> + for (; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++)
>   crash_shutdown_handles[i] =
>   crash_shutdown_handles[i+1];
>   rc = 0;

So if I'm reading it right, with this change we have removed all the code that
uses the NULL-terminated property of the list.

If so we should also shrink the array to be only CRASH_HANDLER_MAX in size, and
remove any references to it being NULL terminated.

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2] cxl: Check periodically the coherent platform function's state

2016-05-02 Thread Ian Munsie
Acked-by: Ian Munsie 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 12/16] rtc: powerpc: provide rtc_class_ops directly

2016-05-02 Thread Michael Ellerman
On Thu, 2016-04-28 at 00:34 +0200, Arnd Bergmann wrote:

> The rtc-generic driver provides an architecture specific
> wrapper on top of the generic rtc_class_ops abstraction,
> and powerpc has another abstraction on top, which is a bit
> silly.
> 
> This changes the powerpc rtc-generic device to provide its
> rtc_class_ops directly, to reduce the number of layers
> by one.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/powerpc/kernel/time.c | 29 -
>  drivers/rtc/rtc-generic.c  |  2 +-
>  2 files changed, 29 insertions(+), 2 deletions(-)

If this hits linux-next it will go through my automated boot testing, which
hopefully would be sufficient to catch any bugs in this patch, cross fingers.

I don't know jack about all the layers of RTC mess, so my ack is basically
worthless here. But if you like you can have one anyway :)

Acked-by: Michael Ellerman 

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

2016-05-02 Thread Tian, Kevin
> From: Yongji Xie
> Sent: Wednesday, April 27, 2016 8:22 PM
> 
> Current vfio-pci implementation disallows to mmap
> sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
> page may be shared with other BARs. This will cause some
> performance issues when we passthrough a PCI device with
> this kind of BARs. Guest will be not able to handle the mmio
> accesses to the BARs which leads to mmio emulations in host.
> 
> However, not all sub-page BARs will share page with other BARs.
> We should allow to mmap those sub-page MMIO BARs which we can
> make sure will not share page with other BARs.
> 
> This patch adds support for this case. And we also try to use
> shadow resource to reserve the remaind of the page which hot-add
> device's BAR might be assigned into.

'shadow' usually means you have a corresponding part being
shadowed, while here looks you mostly want some 'dummy'
resource for reservation purpose?

> +
> + if (!(res->start & ~PAGE_MASK)) {
> + /*
> +  * Add shadow resource for sub-page bar whose mmio
> +  * page is exclusive in case that hot-add device's
> +  * bar is assigned into the mem hole.
> +  */
> + shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL);
> + shadow_res->resource.start = res->end + 1;
> + shadow_res->resource.end = res->start + PAGE_SIZE - 1;

What about res->start not page aligned so you end up still having 
a portion before res->start not exclusively reserved?

> + shadow_res->resource.flags = res->flags;
> + if (request_resource(res->parent,
> + _res->resource)) {
> + kfree(shadow_res);
> + return false;
> + }
> + shadow_res->index = index;
> + list_add(_res->res_next,
> + >shadow_resources_list);
> + return true;

Thanks
Kevin
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver

2016-05-02 Thread Alexey Kardashevskiy

On 05/03/2016 09:41 AM, Gavin Shan wrote:

On Wed, Apr 20, 2016 at 11:55:56AM +1000, Alistair Popple wrote:

On Tue, 19 Apr 2016 20:36:48 Alexey Kardashevskiy wrote:

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This adds standalone driver to support PCI hotplug for PowerPC PowerNV
platform that runs on top of skiboot firmware. The firmware identifies
hotpluggable slots and marked their device tree node with proper
"ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans
device tree nodes to create/register PCI hotplug slot accordingly.

The PCI slots are organized in fashion of tree, which means one
PCI slot might have parent PCI slot and parent PCI slot possibly
contains multiple child PCI slots. At the plugging time, the parent
PCI slot is populated before its children. The child PCI slots are
removed before their parent PCI slot can be removed from the system.

If the skiboot firmware doesn't support slot status retrieval, the PCI
slot device node shouldn't have property "ibm,reset-by-firmware". In
that case, none of valid PCI slots will be detected from device tree.
The skiboot firmware doesn't export the capability to access attention
LEDs yet and it's something for TBD.

Signed-off-by: Gavin Shan 
Acked-by: Bjorn Helgaas 
---
  drivers/pci/hotplug/Kconfig   |  12 +
  drivers/pci/hotplug/Makefile  |   3 +
  drivers/pci/hotplug/pnv_php.c | 870 ++
  3 files changed, 885 insertions(+)
  create mode 100644 drivers/pci/hotplug/pnv_php.c

diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index df8caec..167c8ce 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC

  When in doubt, say N.

+config HOTPLUG_PCI_POWERNV
+   tristate "PowerPC PowerNV PCI Hotplug driver"
+   depends on PPC_POWERNV && EEH
+   help
+ Say Y here if you run PowerPC PowerNV platform that supports
+ PCI Hotplug
+
+ To compile this driver as a module, choose M here: the
+ module will be called pnv-php.
+
+ When in doubt, say N.
+
  config HOTPLUG_PCI_RPA
tristate "RPA PCI Hotplug driver"
depends on PPC_PSERIES && EEH
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index b616e75..e33cdda 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o
  obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o
  obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC)+= cpcihp_generic.o
  obj-$(CONFIG_HOTPLUG_PCI_SHPC)+= shpchp.o
+obj-$(CONFIG_HOTPLUG_PCI_POWERNV)  += pnv-php.o
  obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o
  obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)   += rpadlpar_io.o
  obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o
@@ -50,6 +51,8 @@ ibmphp-objs   :=  ibmphp_core.o   \
  acpiphp-objs  :=  acpiphp_core.o  \
acpiphp_glue.o

+pnv-php-objs   :=  pnv_php.o
+
  rpaphp-objs   :=  rpaphp_core.o   \
rpaphp_pci.o\
rpaphp_slot.o
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
new file mode 100644
index 000..364ec36
--- /dev/null
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -0,0 +1,870 @@
+/*
+ * PCI Hotplug Driver for PowerPC PowerNV platform.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2015.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#define DRIVER_VERSION "0.1"
+#define DRIVER_AUTHOR  "Gavin Shan, IBM Corporation"
+#define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver"
+
+struct pnv_php_slot {
+   struct hotplug_slot slot;
+   struct hotplug_slot_infoslot_info;
+   uint64_tid;
+   char*name;
+   int slot_no;
+   struct kref kref;
+#define PNV_PHP_STATE_INITIALIZED  0
+#define PNV_PHP_STATE_REGISTERED   1
+#define PNV_PHP_STATE_POPULATED2
+   int state;
+   struct device_node  *dn;
+   struct pci_dev  *pdev;
+   struct pci_bus  *bus;
+   boolpower_state_check;
+   int power_state_confirmed;
+#define PNV_PHP_POWER_CONFIRMED_INVALID0
+#define PNV_PHP_POWER_CONFIRMED_SUCCESS1
+#define PNV_PHP_POWER_CONFIRMED_FAIL   2
+   struct opal_msg

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Andy Lutomirski
On Mon, May 2, 2016 at 1:00 PM, Jiri Kosina  wrote:
> On Mon, 2 May 2016, Jiri Kosina wrote:
>
>> > FWIW, I just tried this:
>> >
>> > static bool is_entry_text(unsigned long addr)
>> > {
>> > return addr >= (unsigned long)__entry_text_start &&
>> > addr < (unsigned long)__entry_text_end;
>> > }
>> >
>> > it works.  So the entry code is already annotated reasonably well :)
>> >
>> > I just hacked it up here:
>> >
>> > https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21
>> >
>> > and it seems to work, at least for page faults.  A better
>> > implementation would print out the entire contents of pt_regs so that
>> > people reading the stack trace will know the registers at the time of
>> > the exception, which might be helpful.
>>
>> Sorry for being dense, but how do you distinguish here between a "real"
>> kernel entry, that pushes pt_regs, and any "non-entry" function call that
>> passes pt_regs around?
>
> Umm, actually, the more tricky part is the other way around -- how do you
> make sure that whenever you are calling out from a code between
> __entry_text_start and __entry_text_end, pt_regs will be at the place
> you're looking for it? How's that guaranteed?

It's not guaranteed in my code.  I think we'd want to add a little
table of call sites and their pt_regs offsets.  This was just meant to
test that the general idea works (and it does indeed generate better
traces than the stock kernel, which gets it unconditionally wrong).

--Andy

>
> Thanks,
>
> --
> Jiri Kosina
> SUSE Labs
>



-- 
Andy Lutomirski
AMA Capital Management, LLC
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver

2016-05-02 Thread Gavin Shan
On Wed, Apr 20, 2016 at 11:55:56AM +1000, Alistair Popple wrote:
>On Tue, 19 Apr 2016 20:36:48 Alexey Kardashevskiy wrote:
>> On 02/17/2016 02:44 PM, Gavin Shan wrote:
>> > This adds standalone driver to support PCI hotplug for PowerPC PowerNV
>> > platform that runs on top of skiboot firmware. The firmware identifies
>> > hotpluggable slots and marked their device tree node with proper
>> > "ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans
>> > device tree nodes to create/register PCI hotplug slot accordingly.
>> >
>> > The PCI slots are organized in fashion of tree, which means one
>> > PCI slot might have parent PCI slot and parent PCI slot possibly
>> > contains multiple child PCI slots. At the plugging time, the parent
>> > PCI slot is populated before its children. The child PCI slots are
>> > removed before their parent PCI slot can be removed from the system.
>> >
>> > If the skiboot firmware doesn't support slot status retrieval, the PCI
>> > slot device node shouldn't have property "ibm,reset-by-firmware". In
>> > that case, none of valid PCI slots will be detected from device tree.
>> > The skiboot firmware doesn't export the capability to access attention
>> > LEDs yet and it's something for TBD.
>> >
>> > Signed-off-by: Gavin Shan 
>> > Acked-by: Bjorn Helgaas 
>> > ---
>> >   drivers/pci/hotplug/Kconfig   |  12 +
>> >   drivers/pci/hotplug/Makefile  |   3 +
>> >   drivers/pci/hotplug/pnv_php.c | 870 
>> > ++
>> >   3 files changed, 885 insertions(+)
>> >   create mode 100644 drivers/pci/hotplug/pnv_php.c
>> >
>> > diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
>> > index df8caec..167c8ce 100644
>> > --- a/drivers/pci/hotplug/Kconfig
>> > +++ b/drivers/pci/hotplug/Kconfig
>> > @@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC
>> >
>> >  When in doubt, say N.
>> >
>> > +config HOTPLUG_PCI_POWERNV
>> > +  tristate "PowerPC PowerNV PCI Hotplug driver"
>> > +  depends on PPC_POWERNV && EEH
>> > +  help
>> > +Say Y here if you run PowerPC PowerNV platform that supports
>> > +PCI Hotplug
>> > +
>> > +To compile this driver as a module, choose M here: the
>> > +module will be called pnv-php.
>> > +
>> > +When in doubt, say N.
>> > +
>> >   config HOTPLUG_PCI_RPA
>> >tristate "RPA PCI Hotplug driver"
>> >depends on PPC_PSERIES && EEH
>> > diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
>> > index b616e75..e33cdda 100644
>> > --- a/drivers/pci/hotplug/Makefile
>> > +++ b/drivers/pci/hotplug/Makefile
>> > @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)   += pciehp.o
>> >   obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550)+= cpcihp_zt5550.o
>> >   obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC)   += cpcihp_generic.o
>> >   obj-$(CONFIG_HOTPLUG_PCI_SHPC)   += shpchp.o
>> > +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += pnv-php.o
>> >   obj-$(CONFIG_HOTPLUG_PCI_RPA)+= rpaphp.o
>> >   obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)  += rpadlpar_io.o
>> >   obj-$(CONFIG_HOTPLUG_PCI_SGI)+= sgi_hotplug.o
>> > @@ -50,6 +51,8 @@ ibmphp-objs  :=  ibmphp_core.o   \
>> >   acpiphp-objs :=  acpiphp_core.o  \
>> >acpiphp_glue.o
>> >
>> > +pnv-php-objs  :=  pnv_php.o
>> > +
>> >   rpaphp-objs  :=  rpaphp_core.o   \
>> >rpaphp_pci.o\
>> >rpaphp_slot.o
>> > diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
>> > new file mode 100644
>> > index 000..364ec36
>> > --- /dev/null
>> > +++ b/drivers/pci/hotplug/pnv_php.c
>> > @@ -0,0 +1,870 @@
>> > +/*
>> > + * PCI Hotplug Driver for PowerPC PowerNV platform.
>> > + *
>> > + * Copyright Gavin Shan, IBM Corporation 2015.
>> > + *
>> > + * This program is free software; you can redistribute it and/or modify
>> > + * it under the terms of the GNU General Public License as published by
>> > + * the Free Software Foundation; either version 2 of the License, or
>> > + * (at your option) any later version.
>> > + */
>> > +
>> > +#include 
>> > +#include 
>> > +#include 
>> > +#include 
>> > +
>> > +#include 
>> > +#include 
>> > +#include 
>> > +
>> > +#define DRIVER_VERSION"0.1"
>> > +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation"
>> > +#define DRIVER_DESC   "PowerPC PowerNV PCI Hotplug Driver"
>> > +
>> > +struct pnv_php_slot {
>> > +  struct hotplug_slot slot;
>> > +  struct hotplug_slot_infoslot_info;
>> > +  uint64_tid;
>> > +  char*name;
>> > +  int slot_no;
>> > +  struct kref kref;
>> > +#define PNV_PHP_STATE_INITIALIZED 0
>> > +#define PNV_PHP_STATE_REGISTERED  1
>> > +#define PNV_PHP_STATE_POPULATED   2
>> > +  int state;
>> > +  struct 

Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver

2016-05-02 Thread Gavin Shan
On Mon, May 02, 2016 at 04:11:53PM +1000, Alexey Kardashevskiy wrote:
>On 05/02/2016 01:44 PM, Gavin Shan wrote:
>>On Tue, Apr 19, 2016 at 08:36:48PM +1000, Alexey Kardashevskiy wrote:
>>>On 02/17/2016 02:44 PM, Gavin Shan wrote:
This adds standalone driver to support PCI hotplug for PowerPC PowerNV
platform that runs on top of skiboot firmware. The firmware identifies
hotpluggable slots and marked their device tree node with proper
"ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans
device tree nodes to create/register PCI hotplug slot accordingly.

The PCI slots are organized in fashion of tree, which means one
PCI slot might have parent PCI slot and parent PCI slot possibly
contains multiple child PCI slots. At the plugging time, the parent
PCI slot is populated before its children. The child PCI slots are
removed before their parent PCI slot can be removed from the system.

If the skiboot firmware doesn't support slot status retrieval, the PCI
slot device node shouldn't have property "ibm,reset-by-firmware". In
that case, none of valid PCI slots will be detected from device tree.
The skiboot firmware doesn't export the capability to access attention
LEDs yet and it's something for TBD.

Signed-off-by: Gavin Shan 
Acked-by: Bjorn Helgaas 
---
 drivers/pci/hotplug/Kconfig   |  12 +
 drivers/pci/hotplug/Makefile  |   3 +
 drivers/pci/hotplug/pnv_php.c | 870 
 ++
 3 files changed, 885 insertions(+)
 create mode 100644 drivers/pci/hotplug/pnv_php.c

diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index df8caec..167c8ce 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC

  When in doubt, say N.

+config HOTPLUG_PCI_POWERNV
+   tristate "PowerPC PowerNV PCI Hotplug driver"
+   depends on PPC_POWERNV && EEH
+   help
+ Say Y here if you run PowerPC PowerNV platform that supports
+ PCI Hotplug
+
+ To compile this driver as a module, choose M here: the
+ module will be called pnv-php.
+
+ When in doubt, say N.
+
 config HOTPLUG_PCI_RPA
tristate "RPA PCI Hotplug driver"
depends on PPC_PSERIES && EEH
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index b616e75..e33cdda 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o
 obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550)  += cpcihp_zt5550.o
 obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o
 obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o
+obj-$(CONFIG_HOTPLUG_PCI_POWERNV)  += pnv-php.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA)  += rpaphp.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)+= rpadlpar_io.o
 obj-$(CONFIG_HOTPLUG_PCI_SGI)  += sgi_hotplug.o
@@ -50,6 +51,8 @@ ibmphp-objs   :=  ibmphp_core.o   \
 acpiphp-objs   :=  acpiphp_core.o  \
acpiphp_glue.o

+pnv-php-objs   :=  pnv_php.o
+
 rpaphp-objs:=  rpaphp_core.o   \
rpaphp_pci.o\
rpaphp_slot.o
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
new file mode 100644
index 000..364ec36
--- /dev/null
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -0,0 +1,870 @@
+/*
+ * PCI Hotplug Driver for PowerPC PowerNV platform.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2015.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#define DRIVER_VERSION "0.1"
+#define DRIVER_AUTHOR  "Gavin Shan, IBM Corporation"
+#define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver"
+
+struct pnv_php_slot {
+   struct hotplug_slot slot;
+   struct hotplug_slot_infoslot_info;
+   uint64_tid;
+   char*name;
+   int slot_no;
+   struct kref kref;
+#define PNV_PHP_STATE_INITIALIZED  0
+#define PNV_PHP_STATE_REGISTERED   1
+#define PNV_PHP_STATE_POPULATED2
+   int state;
+   struct device_node

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Jiri Kosina
On Mon, 2 May 2016, Jiri Kosina wrote:

> > FWIW, I just tried this:
> > 
> > static bool is_entry_text(unsigned long addr)
> > {
> > return addr >= (unsigned long)__entry_text_start &&
> > addr < (unsigned long)__entry_text_end;
> > }
> > 
> > it works.  So the entry code is already annotated reasonably well :)
> > 
> > I just hacked it up here:
> > 
> > https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21
> > 
> > and it seems to work, at least for page faults.  A better
> > implementation would print out the entire contents of pt_regs so that
> > people reading the stack trace will know the registers at the time of
> > the exception, which might be helpful.
> 
> Sorry for being dense, but how do you distinguish here between a "real" 
> kernel entry, that pushes pt_regs, and any "non-entry" function call that 
> passes pt_regs around?

Umm, actually, the more tricky part is the other way around -- how do you 
make sure that whenever you are calling out from a code between 
__entry_text_start and __entry_text_end, pt_regs will be at the place 
you're looking for it? How's that guaranteed?

Thanks,

-- 
Jiri Kosina
SUSE Labs

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Jiri Kosina
On Mon, 2 May 2016, Andy Lutomirski wrote:

> FWIW, I just tried this:
> 
> static bool is_entry_text(unsigned long addr)
> {
> return addr >= (unsigned long)__entry_text_start &&
> addr < (unsigned long)__entry_text_end;
> }
> 
> it works.  So the entry code is already annotated reasonably well :)
> 
> I just hacked it up here:
> 
> https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21
> 
> and it seems to work, at least for page faults.  A better
> implementation would print out the entire contents of pt_regs so that
> people reading the stack trace will know the registers at the time of
> the exception, which might be helpful.

Sorry for being dense, but how do you distinguish here between a "real" 
kernel entry, that pushes pt_regs, and any "non-entry" function call that 
passes pt_regs around?

-- 
Jiri Kosina
SUSE Labs

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Josh Poimboeuf
On Mon, May 02, 2016 at 11:12:39AM -0700, Andy Lutomirski wrote:
> On Mon, May 2, 2016 at 10:31 AM, Josh Poimboeuf  wrote:
> > On Mon, May 02, 2016 at 08:52:41AM -0700, Andy Lutomirski wrote:
> >> On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeuf  wrote:
> >> > On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote:
> >> >> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf"  wrote:
> >> >> >
> >> >> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote:
> >> >> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf 
> >> >> > >  wrote:
> >> >> > > >> I suppose we could try to rejigger the code so that rbp points to
> >> >> > > >> pt_regs or similar.
> >> >> > > >
> >> >> > > > I think we should avoid doing something like that because it 
> >> >> > > > would break
> >> >> > > > gdb and all the other unwinders who don't know about it.
> >> >> > >
> >> >> > > How so?
> >> >> > >
> >> >> > > Currently, rbp in the entry code is meaningless.  I'm suggesting 
> >> >> > > that,
> >> >> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to
> >> >> > > the pt_regs.  Currently it points to something stale (which the
> >> >> > > dump_stack code might be relying on.  Hmm.)  But it's probably also
> >> >> > > safe to assume that if you unwind to the 'call \do_sym', then 
> >> >> > > pt_regs
> >> >> > > is the next thing on the stack, so just doing the section thing 
> >> >> > > would
> >> >> > > work.
> >> >> >
> >> >> > Yes, rbp is meaningless on the entry from user space.  But if an
> >> >> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have
> >> >> > nested entry, rbp keeps its old value, right?  So the unwinder can 
> >> >> > walk
> >> >> > past the nested entry frame and keep going until it gets to the 
> >> >> > original
> >> >> > entry.
> >> >>
> >> >> Yes.
> >> >>
> >> >> It would be nice if we could do better, though, and actually notice
> >> >> the pt_regs and identify the entry.  For example, I'd love to see
> >> >> "page fault, RIP=xyz" printed in the middle of a stack dump on a
> >> >> crash.
> >> >>
> >> >> Also, I think that just following rbp links will lose the
> >> >> actual function that took the page fault (or whatever function
> >> >> pt_regs->ip actually points to).
> >> >
> >> > Hm.  I think we could fix all that in a more standard way.  Whenever a
> >> > new pt_regs frame gets saved on entry, we could also create a new stack
> >> > frame which points to a fake kernel_entry() function.  That would tell
> >> > the unwinder there's a pt_regs frame without otherwise breaking frame
> >> > pointers across the frame.
> >> >
> >> > Then I guess we wouldn't need my other solution of putting the idt
> >> > entries in a special section.
> >> >
> >> > How does that sound?
> >>
> >> Let me try to understand.
> >>
> >> The normal call sequence is call; push %rbp; mov %rsp, %rbp.  So rbp
> >> points to (prev rbp, prev rip) on the stack, and you can follow the
> >> chain back.  Right now, on a user access page fault or similar, we
> >> have rbp (probably) pointing to the interrupted frame, and the
> >> interrupted rip isn't saved anywhere that a naive unwinder can find
> >> it.  (It's in pt_regs, but the rbp chain skips right over that.)
> >>
> >> We could change the entry code so that an interrupt / idtentry does:
> >>
> >> push pt_regs
> >> push kernel_entry
> >> push %rbp
> >> mov %rsp, %rbp
> >> call handler
> >> pop %rbp
> >> addq $8, %rsp
> >>
> >> or similar.  That would make it appear that the actual C handler was
> >> caused by a dummy function "kernel_entry".  Now the unwinder would get
> >> to kernel_entry, but it *still* wouldn't find its way to the calling
> >> frame, which only solves part of the problem.  We could at least teach
> >> the unwinder how kernel_entry works and let it decode pt_regs to
> >> continue unwinding.  This would be nice, and I think it could work.
> >
> > Yeah, that's about what I had in mind.
> 
> FWIW, I just tried this:
> 
> static bool is_entry_text(unsigned long addr)
> {
> return addr >= (unsigned long)__entry_text_start &&
> addr < (unsigned long)__entry_text_end;
> }
> 
> it works.  So the entry code is already annotated reasonably well :)
> 
> I just hacked it up here:
> 
> https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21
> 
> and it seems to work, at least for page faults.  A better
> implementation would print out the entire contents of pt_regs so that
> people reading the stack trace will know the registers at the time of
> the exception, which might be helpful.

I still think we would need more specific annotations to do that
reliably: a call from entry code doesn't necessarily correlate with a
pt_regs frame.

> >> I think I like this, except that, if it used a separate section, it
> >> could potentially be faster, as, for each actual entry type, the
> >> 

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Ingo Molnar

* Andy Lutomirski  wrote:

> > Another idea to detect missing frames: for each return address on the 
> > stack, 
> > ensure there's a corresponding "call " instruction immediately 
> > preceding 
> > the return location, where  matches what's on the stack.
> 
> Hmm, interesting.
> 
> I hope your plans include rewriting the current stack unwinder completely.  
> The 
> thing in print_context_stack is (a) hard-to-understand and hard-to-modify 
> crap 
> and (b) is called in a loop from another file using totally ridiculous 
> conventions.

So we had several attempts at making it better, any further improvements 
(including radical rewrites) are more than welcome!

The generalization between the various stack walking methods certainly didn't 
make 
things easier to read - we might want to eliminate that by using better 
primitives 
to iterate over the stack frame.

Thanks,

Ingo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Andy Lutomirski
On Mon, May 2, 2016 at 10:31 AM, Josh Poimboeuf  wrote:
> On Mon, May 02, 2016 at 08:52:41AM -0700, Andy Lutomirski wrote:
>> On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeuf  wrote:
>> > On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote:
>> >> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf"  wrote:
>> >> >
>> >> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote:
>> >> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf  
>> >> > > wrote:
>> >> > > >> I suppose we could try to rejigger the code so that rbp points to
>> >> > > >> pt_regs or similar.
>> >> > > >
>> >> > > > I think we should avoid doing something like that because it would 
>> >> > > > break
>> >> > > > gdb and all the other unwinders who don't know about it.
>> >> > >
>> >> > > How so?
>> >> > >
>> >> > > Currently, rbp in the entry code is meaningless.  I'm suggesting that,
>> >> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to
>> >> > > the pt_regs.  Currently it points to something stale (which the
>> >> > > dump_stack code might be relying on.  Hmm.)  But it's probably also
>> >> > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs
>> >> > > is the next thing on the stack, so just doing the section thing would
>> >> > > work.
>> >> >
>> >> > Yes, rbp is meaningless on the entry from user space.  But if an
>> >> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have
>> >> > nested entry, rbp keeps its old value, right?  So the unwinder can walk
>> >> > past the nested entry frame and keep going until it gets to the original
>> >> > entry.
>> >>
>> >> Yes.
>> >>
>> >> It would be nice if we could do better, though, and actually notice
>> >> the pt_regs and identify the entry.  For example, I'd love to see
>> >> "page fault, RIP=xyz" printed in the middle of a stack dump on a
>> >> crash.
>> >>
>> >> Also, I think that just following rbp links will lose the
>> >> actual function that took the page fault (or whatever function
>> >> pt_regs->ip actually points to).
>> >
>> > Hm.  I think we could fix all that in a more standard way.  Whenever a
>> > new pt_regs frame gets saved on entry, we could also create a new stack
>> > frame which points to a fake kernel_entry() function.  That would tell
>> > the unwinder there's a pt_regs frame without otherwise breaking frame
>> > pointers across the frame.
>> >
>> > Then I guess we wouldn't need my other solution of putting the idt
>> > entries in a special section.
>> >
>> > How does that sound?
>>
>> Let me try to understand.
>>
>> The normal call sequence is call; push %rbp; mov %rsp, %rbp.  So rbp
>> points to (prev rbp, prev rip) on the stack, and you can follow the
>> chain back.  Right now, on a user access page fault or similar, we
>> have rbp (probably) pointing to the interrupted frame, and the
>> interrupted rip isn't saved anywhere that a naive unwinder can find
>> it.  (It's in pt_regs, but the rbp chain skips right over that.)
>>
>> We could change the entry code so that an interrupt / idtentry does:
>>
>> push pt_regs
>> push kernel_entry
>> push %rbp
>> mov %rsp, %rbp
>> call handler
>> pop %rbp
>> addq $8, %rsp
>>
>> or similar.  That would make it appear that the actual C handler was
>> caused by a dummy function "kernel_entry".  Now the unwinder would get
>> to kernel_entry, but it *still* wouldn't find its way to the calling
>> frame, which only solves part of the problem.  We could at least teach
>> the unwinder how kernel_entry works and let it decode pt_regs to
>> continue unwinding.  This would be nice, and I think it could work.
>
> Yeah, that's about what I had in mind.

FWIW, I just tried this:

static bool is_entry_text(unsigned long addr)
{
return addr >= (unsigned long)__entry_text_start &&
addr < (unsigned long)__entry_text_end;
}

it works.  So the entry code is already annotated reasonably well :)

I just hacked it up here:

https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21

and it seems to work, at least for page faults.  A better
implementation would print out the entire contents of pt_regs so that
people reading the stack trace will know the registers at the time of
the exception, which might be helpful.

>
>> I think I like this, except that, if it used a separate section, it
>> could potentially be faster, as, for each actual entry type, the
>> offset from the C handler frame to pt_regs is a foregone conclusion.
>
> Hm, this I don't really follow.  It's true that the unwinder can easily
> find RIP from pt_regs, which will always be a known offset from the
> kernel_entry pointer on the stack.  But why would having the entry code
> in a separate section make that faster?

It doesn't make the unwinder faster -- it makes the entry code faster.

>
>> But this is pretty simple and performance is already abysmal in most
>> 

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Josh Poimboeuf
On Mon, May 02, 2016 at 08:52:41AM -0700, Andy Lutomirski wrote:
> On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeuf  wrote:
> > On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote:
> >> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf"  wrote:
> >> >
> >> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote:
> >> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf  
> >> > > wrote:
> >> > > >> I suppose we could try to rejigger the code so that rbp points to
> >> > > >> pt_regs or similar.
> >> > > >
> >> > > > I think we should avoid doing something like that because it would 
> >> > > > break
> >> > > > gdb and all the other unwinders who don't know about it.
> >> > >
> >> > > How so?
> >> > >
> >> > > Currently, rbp in the entry code is meaningless.  I'm suggesting that,
> >> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to
> >> > > the pt_regs.  Currently it points to something stale (which the
> >> > > dump_stack code might be relying on.  Hmm.)  But it's probably also
> >> > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs
> >> > > is the next thing on the stack, so just doing the section thing would
> >> > > work.
> >> >
> >> > Yes, rbp is meaningless on the entry from user space.  But if an
> >> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have
> >> > nested entry, rbp keeps its old value, right?  So the unwinder can walk
> >> > past the nested entry frame and keep going until it gets to the original
> >> > entry.
> >>
> >> Yes.
> >>
> >> It would be nice if we could do better, though, and actually notice
> >> the pt_regs and identify the entry.  For example, I'd love to see
> >> "page fault, RIP=xyz" printed in the middle of a stack dump on a
> >> crash.
> >>
> >> Also, I think that just following rbp links will lose the
> >> actual function that took the page fault (or whatever function
> >> pt_regs->ip actually points to).
> >
> > Hm.  I think we could fix all that in a more standard way.  Whenever a
> > new pt_regs frame gets saved on entry, we could also create a new stack
> > frame which points to a fake kernel_entry() function.  That would tell
> > the unwinder there's a pt_regs frame without otherwise breaking frame
> > pointers across the frame.
> >
> > Then I guess we wouldn't need my other solution of putting the idt
> > entries in a special section.
> >
> > How does that sound?
> 
> Let me try to understand.
> 
> The normal call sequence is call; push %rbp; mov %rsp, %rbp.  So rbp
> points to (prev rbp, prev rip) on the stack, and you can follow the
> chain back.  Right now, on a user access page fault or similar, we
> have rbp (probably) pointing to the interrupted frame, and the
> interrupted rip isn't saved anywhere that a naive unwinder can find
> it.  (It's in pt_regs, but the rbp chain skips right over that.)
> 
> We could change the entry code so that an interrupt / idtentry does:
> 
> push pt_regs
> push kernel_entry
> push %rbp
> mov %rsp, %rbp
> call handler
> pop %rbp
> addq $8, %rsp
> 
> or similar.  That would make it appear that the actual C handler was
> caused by a dummy function "kernel_entry".  Now the unwinder would get
> to kernel_entry, but it *still* wouldn't find its way to the calling
> frame, which only solves part of the problem.  We could at least teach
> the unwinder how kernel_entry works and let it decode pt_regs to
> continue unwinding.  This would be nice, and I think it could work.

Yeah, that's about what I had in mind.

> I think I like this, except that, if it used a separate section, it
> could potentially be faster, as, for each actual entry type, the
> offset from the C handler frame to pt_regs is a foregone conclusion.

Hm, this I don't really follow.  It's true that the unwinder can easily
find RIP from pt_regs, which will always be a known offset from the
kernel_entry pointer on the stack.  But why would having the entry code
in a separate section make that faster?

> But this is pretty simple and performance is already abysmal in most
> handlers.
> 
> There's an added benefit to using a separate section, though: we could
> also annotate the calls with what type of entry they were so the
> unwinder could print it out nicely.

Yeah, that could be a nice feature... but doesn't printing the name of
the C handler pretty much already give that information?

In any case, once we have a working DWARF unwinder, I think it will show
the name of the idt entry anyway.

> >> Have you looked at my vdso unwinding test at all?  If we could do
> >> something similar for the kernel, IMO it would make testing much more
> >> pleasant.
> >
> > I found it, but I'm not sure what it would mean to do something similar
> > for the kernel.  Do you mean doing something like an NMI sampling-based
> > approach where we periodically do a random stack sanity check?
> 
> I was imagining something a little more strict: single-step
> 

Re: [PATCH v5] powerpc/pci: Assign fixed PHB number based on device-tree properties

2016-05-02 Thread Bjorn Helgaas
On Thu, Apr 14, 2016 at 06:55:24PM -0300, Guilherme G. Piccoli wrote:
> The domain/PHB field of PCI addresses has its value obtained from a
> global variable, incremented each time a new domain (represented by
> struct pci_controller) is added on the system. The domain addition
> process happens during boot or due to PCI device hotplug.
> 
> As recent kernels are using predictable naming for network interfaces,
> the network stack is more tied to PCI naming. This can be a problem in
> hotplug scenarios, because PCI addresses will change if devices are
> removed and then re-added. This situation seems unusual, but it can
> happen if a user wants to replace a NIC without rebooting the machine,
> for example.
> 
> This patch changes the way PCI domain values are generated: now, we use
> device-tree properties to assign fixed PHB numbers to PCI addresses
> when available (meaning pSeries and PowerNV cases). We also use a bitmap
> to allow dynamic PHB numbering when device-tree properties are not
> used. This bitmap keeps track of used PHB numbers and if a PHB is
> released (by hotplug operations for example), it allows the reuse of
> this PHB number, avoiding PCI address to change in case of device remove
> and re-add soon after. No functional changes were introduced.
> 
> Reviewed-by: Gavin Shan 
> Signed-off-by: Guilherme G. Piccoli 

I assume the powerpc guys will take care of this.  Let me know if you
need me to do anything.

> ---
>  arch/powerpc/kernel/pci-common.c | 66 
> ++--
>  1 file changed, 63 insertions(+), 3 deletions(-)
> 
> v5:
>   * Improved comments.
> 
>   * Changed the the Fixed PHB Numbering to set the PHB number bit
>   on the bitmap anyway, avoiding issues when system has virtual PHBs.
> 
>   * Changed the device-tree check order - now, firstly we check for
>   "ibm,opal-phbid" and if it's not available, we try the pSeries case.
> 
> v4:
>   * Minor change (if/else nesting rearranged).
> 
> v3:
>   * Made the bitmap static.
> 
>   * Rearranged if/else statements of Fixed PHB checking.
> 
>   * Improved bitmap checkings, by removing loop and using instead the
>   find_first_zero_bit() function.
> 
>   * Removed the single-statement function release_phb_number() by
>   adding its logic directly into pcibios_free_controller().
> 
>   *Added check for bitmap size before clearing bit, avoiding memory
>   corruption.
> 
> v2:
>   * Added the Fixed PHB Numbering mechanism based on device-tree
>   properties.
> 
>   * Changed list approach to bitmap on the Dynamic PHB Numbering
>   mechanism.
> 
> diff --git a/arch/powerpc/kernel/pci-common.c 
> b/arch/powerpc/kernel/pci-common.c
> index 0f7a60f..ad423c1 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -41,11 +41,17 @@
>  #include 
>  #include 
>  
> +/* hose_spinlock protects accesses to the the phb_bitmap. */
>  static DEFINE_SPINLOCK(hose_spinlock);
>  LIST_HEAD(hose_list);
>  
> -/* XXX kill that some day ... */
> -static int global_phb_number;/* Global phb counter */
> +/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */
> +#define  MAX_PHBS8192
> +
> +/* For dynamic PHB numbering: used/free PHBs tracking bitmap.
> + * Accesses to this bitmap should be protected by hose_spinlock.
> + */
> +static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
>  
>  /* ISA Memory physical address */
>  resource_size_t isa_mem_base;
> @@ -64,6 +70,55 @@ struct dma_map_ops *get_pci_dma_ops(void)
>  }
>  EXPORT_SYMBOL(get_pci_dma_ops);
>  
> +/* get_phb_number() function should run under locking
> + * protection, specifically hose_spinlock.
> + */
> +static int get_phb_number(struct device_node *dn)
> +{
> + const __be64 *prop64;
> + const __be32 *regs;
> + int phb_id = 0;
> +
> + /* Try fixed PHB numbering first, by checking archs and reading
> +  * the respective device-tree properties. Firstly, try PowerNV by
> +  * reading "ibm,opal-phbid", only present in OPAL environment.
> +  */
> + prop64 = of_get_property(dn, "ibm,opal-phbid", NULL);
> + if (prop64) {
> + phb_id = (int)(be64_to_cpup(prop64) & 0x);
> +
> + } else if (machine_is(pseries)) {
> + regs = of_get_property(dn, "reg", NULL);
> + if (regs)
> + phb_id = (int)(be32_to_cpu(regs[1]) & 0x);
> + } else {
> + goto dynamic_phb_numbering;
> + }
> +
> + /* If we have a huge PHB number obtained from device-tree, no need
> +  * to worry with the bitmap. Otherwise, we need to be sure we're
> +  * not trying to use the same PHB number twice.
> +  */
> + if (phb_id < MAX_PHBS) {
> + if (test_bit(phb_id, phb_bitmap))
> + goto dynamic_phb_numbering;
> + set_bit(phb_id, phb_bitmap);
> + }
> +
> + return phb_id;
> +
> + /* If not pSeries nor 

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Andy Lutomirski
On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeuf  wrote:
> On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote:
>> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf"  wrote:
>> >
>> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote:
>> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf  
>> > > wrote:
>> > > >> I suppose we could try to rejigger the code so that rbp points to
>> > > >> pt_regs or similar.
>> > > >
>> > > > I think we should avoid doing something like that because it would 
>> > > > break
>> > > > gdb and all the other unwinders who don't know about it.
>> > >
>> > > How so?
>> > >
>> > > Currently, rbp in the entry code is meaningless.  I'm suggesting that,
>> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to
>> > > the pt_regs.  Currently it points to something stale (which the
>> > > dump_stack code might be relying on.  Hmm.)  But it's probably also
>> > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs
>> > > is the next thing on the stack, so just doing the section thing would
>> > > work.
>> >
>> > Yes, rbp is meaningless on the entry from user space.  But if an
>> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have
>> > nested entry, rbp keeps its old value, right?  So the unwinder can walk
>> > past the nested entry frame and keep going until it gets to the original
>> > entry.
>>
>> Yes.
>>
>> It would be nice if we could do better, though, and actually notice
>> the pt_regs and identify the entry.  For example, I'd love to see
>> "page fault, RIP=xyz" printed in the middle of a stack dump on a
>> crash.
>>
>> Also, I think that just following rbp links will lose the
>> actual function that took the page fault (or whatever function
>> pt_regs->ip actually points to).
>
> Hm.  I think we could fix all that in a more standard way.  Whenever a
> new pt_regs frame gets saved on entry, we could also create a new stack
> frame which points to a fake kernel_entry() function.  That would tell
> the unwinder there's a pt_regs frame without otherwise breaking frame
> pointers across the frame.
>
> Then I guess we wouldn't need my other solution of putting the idt
> entries in a special section.
>
> How does that sound?

Let me try to understand.

The normal call sequence is call; push %rbp; mov %rsp, %rbp.  So rbp
points to (prev rbp, prev rip) on the stack, and you can follow the
chain back.  Right now, on a user access page fault or similar, we
have rbp (probably) pointing to the interrupted frame, and the
interrupted rip isn't saved anywhere that a naive unwinder can find
it.  (It's in pt_regs, but the rbp chain skips right over that.)

We could change the entry code so that an interrupt / idtentry does:

push pt_regs
push kernel_entry
push %rbp
mov %rsp, %rbp
call handler
pop %rbp
addq $8, %rsp

or similar.  That would make it appear that the actual C handler was
caused by a dummy function "kernel_entry".  Now the unwinder would get
to kernel_entry, but it *still* wouldn't find its way to the calling
frame, which only solves part of the problem.  We could at least teach
the unwinder how kernel_entry works and let it decode pt_regs to
continue unwinding.  This would be nice, and I think it could work.

I think I like this, except that, if it used a separate section, it
could potentially be faster, as, for each actual entry type, the
offset from the C handler frame to pt_regs is a foregone conclusion.
But this is pretty simple and performance is already abysmal in most
handlers.

There's an added benefit to using a separate section, though: we could
also annotate the calls with what type of entry they were so the
unwinder could print it out nicely.

I could be convinced either way.


>
>> Have you looked at my vdso unwinding test at all?  If we could do
>> something similar for the kernel, IMO it would make testing much more
>> pleasant.
>
> I found it, but I'm not sure what it would mean to do something similar
> for the kernel.  Do you mean doing something like an NMI sampling-based
> approach where we periodically do a random stack sanity check?

I was imagining something a little more strict: single-step
interesting parts of the kernel and make sure that each step unwinds
correctly.  That could detect missing frames and similar.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking

2016-05-02 Thread Josh Poimboeuf
On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote:
> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf"  wrote:
> >
> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote:
> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf  
> > > wrote:
> > > >> I suppose we could try to rejigger the code so that rbp points to
> > > >> pt_regs or similar.
> > > >
> > > > I think we should avoid doing something like that because it would break
> > > > gdb and all the other unwinders who don't know about it.
> > >
> > > How so?
> > >
> > > Currently, rbp in the entry code is meaningless.  I'm suggesting that,
> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to
> > > the pt_regs.  Currently it points to something stale (which the
> > > dump_stack code might be relying on.  Hmm.)  But it's probably also
> > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs
> > > is the next thing on the stack, so just doing the section thing would
> > > work.
> >
> > Yes, rbp is meaningless on the entry from user space.  But if an
> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have
> > nested entry, rbp keeps its old value, right?  So the unwinder can walk
> > past the nested entry frame and keep going until it gets to the original
> > entry.
> 
> Yes.
> 
> It would be nice if we could do better, though, and actually notice
> the pt_regs and identify the entry.  For example, I'd love to see
> "page fault, RIP=xyz" printed in the middle of a stack dump on a
> crash.
>
> Also, I think that just following rbp links will lose the
> actual function that took the page fault (or whatever function
> pt_regs->ip actually points to).

Hm.  I think we could fix all that in a more standard way.  Whenever a
new pt_regs frame gets saved on entry, we could also create a new stack
frame which points to a fake kernel_entry() function.  That would tell
the unwinder there's a pt_regs frame without otherwise breaking frame
pointers across the frame.

Then I guess we wouldn't need my other solution of putting the idt
entries in a special section.

How does that sound?

> Have you looked at my vdso unwinding test at all?  If we could do
> something similar for the kernel, IMO it would make testing much more
> pleasant.

I found it, but I'm not sure what it would mean to do something similar
for the kernel.  Do you mean doing something like an NMI sampling-based
approach where we periodically do a random stack sanity check?

(If so, I do have something like that planned.)

-- 
Josh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/mm/slice: Remove slice_mm_new_context

2016-05-02 Thread Aneesh Kumar K.V
The existing usage is bogus, because we set the context.id value
in the same function. The book3s 64 got removed in the old patch.
Hence remove the redundant definition.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/page_64.h   | 3 ---
 arch/powerpc/mm/mmu_context_nohash.c | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/page_64.h 
b/arch/powerpc/include/asm/page_64.h
index 77488857c26d..dd5f0712afa2 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -128,8 +128,6 @@ extern void slice_set_user_psize(struct mm_struct *mm, 
unsigned int psize);
 extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
  unsigned long len, unsigned int psize);
 
-#define slice_mm_new_context(mm)   ((mm)->context.id == MMU_NO_CONTEXT)
-
 #endif /* __ASSEMBLY__ */
 #else
 #define slice_init()
@@ -151,7 +149,6 @@ do {\
 
 #define slice_set_range_psize(mm, start, len, psize)   \
slice_set_user_psize((mm), (psize))
-#define slice_mm_new_context(mm)   1
 #endif /* CONFIG_PPC_MM_SLICES */
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/arch/powerpc/mm/mmu_context_nohash.c 
b/arch/powerpc/mm/mmu_context_nohash.c
index a36c43a27893..7d95bc402dba 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -335,8 +335,7 @@ int init_new_context(struct task_struct *t, struct 
mm_struct *mm)
mm->context.active = 0;
 
 #ifdef CONFIG_PPC_MM_SLICES
-   if (slice_mm_new_context(mm))
-   slice_set_user_psize(mm, mmu_virtual_psize);
+   slice_set_user_psize(mm, mmu_virtual_psize);
 #endif
 
return 0;
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [Qemu-devel] [PATCH v2] spapr: Don't set the TM ibm, pa-features bit in PR KVM mode

2016-05-02 Thread haris iqbal
On Sat, Apr 30, 2016 at 6:18 AM, Anton Blanchard  wrote:
> We don't support transactional memory in PR KVM, so don't tell
> the OS that we do.
>
> Signed-off-by: Anton Blanchard 
> ---
>
> v2: Fix build with CONFIG_KVM disabled, noticed by Alex.
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index b69995e..dc3e3c9 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -696,6 +696,14 @@ static void spapr_populate_cpu_dt(CPUState *cs, void 
> *fdt, int offset,
>  } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
>  pa_features = pa_features_207;
>  pa_size = sizeof(pa_features_207);
> +
> +#ifdef CONFIG_KVM
> +/* Don't enable TM in PR KVM mode */
> +if (kvm_enabled() &&
> +kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
> +pa_features[24] &= ~0x80;
> +}
> +#endif
>  }
>  if (env->ci_large_pages) {
>  pa_features[3] |= 0x20;
>

This email was put in the spam folder by gmail. The message said "It
has a from address in samba.org but has failed samba.org's required
tests for authentication". Just bringing this to peoples attention. I
thought a patch might go unnoticed else.

-- 

With regards,

Md Haris Iqbal,
Placement Coordinator, MTech IT
NITK Surathkal,
Contact: +91 8861996962
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: linux-next: build failure after merge of the akpm-current tree

2016-05-02 Thread Aneesh Kumar K.V
Stephen Rothwell  writes:

> Hi Andrew,
>
> After merging the akpm-current tree, today's linux-next build (powerpc
> allyesconfig and pseries_le_defconfig) failed like this:
>
> In file included from include/linux/mm.h:394:0,
>  from mm/huge_memory.c:10:
> include/linux/huge_mm.h:53:22: error: initializer element is not constant
>  #define HPAGE_PMD_NR (1<   ^
> mm/huge_memory.c:104:62: note: in expansion of macro 'HPAGE_PMD_NR'
>  static unsigned int khugepaged_max_ptes_swap __read_mostly = HPAGE_PMD_NR/8;
>   ^
>
> Caused by commit
>
>   6d34b9749be2 ("mm: make optimistic check for swapin readahead")
>
> interacting with commit
>
>   dd1842a2a448 ("powerpc/mm: Make page table size a variable")
>
> from the powerpc tree.
>
> I applied this fix patch for today (hopefully this is still initialised
> early enough):
>
> From: Stephen Rothwell 
> Date: Mon, 2 May 2016 18:25:42 +1000
> Subject: [PATCH] mm: make optimistic check for swapin readahead fix
>
> Signed-off-by: Stephen Rothwell 


Reviewed-by: Aneesh Kumar K.V 

> ---
>  mm/huge_memory.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index f0cd9dbc1157..6aabfa166b6d 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -101,7 +101,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
>   * fault.
>   */
>  static unsigned int khugepaged_max_ptes_none __read_mostly;
> -static unsigned int khugepaged_max_ptes_swap __read_mostly = HPAGE_PMD_NR/8;
> +static unsigned int khugepaged_max_ptes_swap __read_mostly;
>  static unsigned long allocstall;
>
>  static int khugepaged(void *none);
> @@ -703,6 +703,7 @@ static int __init hugepage_init(void)
>
>   khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
>   khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
> + khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8;
>   /*
>* hugepages can't be allocated by the buddy allocator
>*/
> -- 
> 2.7.0
>
>
>
>
> -- 
> Cheers,
> Stephen Rothwell

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] selftests/powerpc: Fix subpage_prot test to return !0 on failure

2016-05-02 Thread Aneesh Kumar K.V
Michael Ellerman  writes:

> It's helpful for automated testing if the test returns error codes back
> to the calling program.
>
> Signed-off-by: Michael Ellerman 


Reviewed-by: Aneesh Kumar K.V 

> ---
>  tools/testing/selftests/powerpc/mm/subpage_prot.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c 
> b/tools/testing/selftests/powerpc/mm/subpage_prot.c
> index 440180ff8089..7ccdc96b977d 100644
> --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c
> +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
> @@ -207,14 +207,16 @@ int test_file(void)
>
>  int main(int argc, char *argv[])
>  {
> - test_harness(test_anon, "subpage_prot_anon");
> + int rc;
> +
> + rc = test_harness(test_anon, "subpage_prot_anon");
> + if (rc)
> + return rc;
>
>   if (argc > 1)
>   file_name = argv[1];
>   else
>   file_name = "tempfile";
>
> - test_harness(test_file, "subpage_prot_file");
> -
> - return 0;
> + return test_harness(test_file, "subpage_prot_file");
>  }
> -- 
> 2.5.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc/mm/subpage: Fix subpage protection with 4K hpte config

2016-05-02 Thread Aneesh Kumar K.V
With Linux page size of 64K and hardware only supporting 4K hpte, if
we use subpage protection, we always fail for the subpage 0 as shown
below (using the selftest subpage_prot test).

520175565: (4520111850): Failed at 0x0x3fffad4b (p=13,sp=0,w=0), 
want=fault, got=pass !
4520890210: (4520826495): Failed at 0x0x3fffad5b (p=29,sp=0,w=0), 
want=fault, got=pass !
4521574251: (4521510536): Failed at 0x0x3fffad6b (p=45,sp=0,w=0), 
want=fault, got=pass !
4522258324: (4522194609): Failed at 0x0x3fffad7b (p=61,sp=0,w=0), 
want=fault, got=pass !

This is because hash preload wrongly insert the hpte entry for subpage 0
without looking at the subapge protection information. Don't do hash
page table entry preload if we have subpage protection configured for
that range.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/hash_utils_64.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 262082e51db1..b5a454415215 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1329,15 +1329,26 @@ void hash_preload(struct mm_struct *mm, unsigned long 
ea,
unsigned long vsid;
pgd_t *pgdir;
pte_t *ptep;
+   int psize;
unsigned long flags;
int rc, ssize, update_flags = 0;
 
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
 
 #ifdef CONFIG_PPC_MM_SLICES
-   /* We only prefault standard pages for now */
-   if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize))
+   psize = get_slice_psize(mm, ea);
+   /*
+* We only prefault standard pages
+*/
+   if (psize != mm->context.user_psize)
return;
+#ifdef CONFIG_PPC_64K_PAGES
+   /*
+* Don't prefault is subpage protection is enabled for that ea
+*/
+   if ((psize == MMU_PAGE_4K) && subpage_protection(mm, ea))
+   return;
+#endif
 #endif
 
DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc/mm/subpage: Fix subpage protection with 4K hpte config

2016-05-02 Thread Aneesh Kumar K.V
With Linux page size of 64K and hardware only supporting 4K hpte, if
we use subpage protection, we always fail for the subpage 0 as shown
below (using the selftest subpage_prot test).

520175565: (4520111850): Failed at 0x0x3fffad4b (p=13,sp=0,w=0), 
want=fault, got=pass !
4520890210: (4520826495): Failed at 0x0x3fffad5b (p=29,sp=0,w=0), 
want=fault, got=pass !
4521574251: (4521510536): Failed at 0x0x3fffad6b (p=45,sp=0,w=0), 
want=fault, got=pass !
4522258324: (4522194609): Failed at 0x0x3fffad7b (p=61,sp=0,w=0), 
want=fault, got=pass !

This is because hash preload wrongly insert the hpte entry for subpage 0
without looking at the subapge protection information. Don't do hash
page table entry preload if we have subpage protection configured for
that range.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/hash_utils_64.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 262082e51db1..b5a454415215 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1329,15 +1329,26 @@ void hash_preload(struct mm_struct *mm, unsigned long 
ea,
unsigned long vsid;
pgd_t *pgdir;
pte_t *ptep;
+   int psize;
unsigned long flags;
int rc, ssize, update_flags = 0;
 
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
 
 #ifdef CONFIG_PPC_MM_SLICES
-   /* We only prefault standard pages for now */
-   if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize))
+   psize = get_slice_psize(mm, ea);
+   /*
+* We only prefault standard pages
+*/
+   if (psize != mm->context.user_psize)
return;
+#ifdef CONFIG_PPC_64K_PAGES
+   /*
+* Don't prefault is subpage protection is enabled for that ea
+*/
+   if ((psize == MMU_PAGE_4K) && subpage_protection(mm, ea))
+   return;
+#endif
 #endif
 
DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] powerpc/mm/subpage: Init user psize correctly

2016-05-02 Thread Aneesh Kumar K.V
Check against a context.id value of zero instead of MMU_NO_CONTEXT
when doing a slice psize init. Without this patch we end up with
a slice psize value of zero and we always end up using 4K hpte.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/mmu_context_book3s64.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c 
b/arch/powerpc/mm/mmu_context_book3s64.c
index b5288b460bef..a28ed6a96286 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -85,8 +85,16 @@ int init_new_context(struct task_struct *tsk, struct 
mm_struct *mm)
/* The old code would re-promote on fork, we don't do that
 * when using slices as it could cause problem promoting slices
 * that have been forced down to 4K
+*
+* For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
+* explicitly against context.id == 0. This ensures that we
+* properly initialize context slice details for newly allocated
+* mm and don't alter context slice inherited via fork.
+*
+* We should not be calling init_new_context on init_mm. Hence a
+* check against 0 is ok.
 */
-   if (slice_mm_new_context(mm))
+   if (mm->context.id == 0)
slice_set_user_psize(mm, mmu_virtual_psize);
subpage_prot_init_new_context(mm);
}
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] selftests/powerpc: Test cp_abort during context switch

2016-05-02 Thread Cyril Bur
On Mon, 2 May 2016 13:51:38 +1000
Chris Smart  wrote:

> Test that performing a copy paste sequence in userspace on P9 does not
> result in a leak of the copy into the paste of another process.
> 
> This is based on Anton Blanchard's context_switch benchmarking code. It
> sets up two processes tied to the same CPU, one which copies and one
> which pastes.
> 
> The paste should never succeed and the test fails if it does.
> 
> This is a test for commit, "8a64904 powerpc: Add support for userspace
> P9 copy paste."
> 

Hi Chris,

I must admit I didn't run on it on real hardware ;).

Looks good.

> Patch created with much assistance from Michael Neuling
> 
> 
> Signed-off-by: Chris Smart 

Reviewed-by: Cyril Bur 

> ---
>  tools/testing/selftests/powerpc/Makefile   |   1 +
>  .../selftests/powerpc/context_switch/.gitignore|   1 +
>  .../selftests/powerpc/context_switch/Makefile  |  10 ++
>  .../selftests/powerpc/context_switch/cp_abort.c| 110 
> +
>  tools/testing/selftests/powerpc/utils.h|   7 ++
>  5 files changed, 129 insertions(+)
>  create mode 100644 tools/testing/selftests/powerpc/context_switch/.gitignore
>  create mode 100644 tools/testing/selftests/powerpc/context_switch/Makefile
>  create mode 100644 tools/testing/selftests/powerpc/context_switch/cp_abort.c
> 
> diff --git a/tools/testing/selftests/powerpc/Makefile 
> b/tools/testing/selftests/powerpc/Makefile
> index b08f77cbe31b..4ca83fe80654 100644
> --- a/tools/testing/selftests/powerpc/Makefile
> +++ b/tools/testing/selftests/powerpc/Makefile
> @@ -14,6 +14,7 @@ export CFLAGS
>  
>  SUB_DIRS = benchmarks\
>  copyloops\
> +context_switch   \
>  dscr \
>  mm   \
>  pmu  \
> diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore 
> b/tools/testing/selftests/powerpc/context_switch/.gitignore
> new file mode 100644
> index ..c1431af7b51c
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/context_switch/.gitignore
> @@ -0,0 +1 @@
> +cp_abort
> diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile 
> b/tools/testing/selftests/powerpc/context_switch/Makefile
> new file mode 100644
> index ..e164d1466466
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/context_switch/Makefile
> @@ -0,0 +1,10 @@
> +TEST_PROGS := cp_abort
> +
> +all: $(TEST_PROGS)
> +
> +$(TEST_PROGS): ../harness.c ../utils.c
> +
> +include ../../lib.mk
> +
> +clean:
> + rm -f $(TEST_PROGS)
> diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c 
> b/tools/testing/selftests/powerpc/context_switch/cp_abort.c
> new file mode 100644
> index ..5a5b55afda0e
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/context_switch/cp_abort.c
> @@ -0,0 +1,110 @@
> +/*
> + * Adapted from Anton Blanchard's context switch microbenchmark.
> + *
> + * Copyright 2009, Anton Blanchard, IBM Corporation.
> + * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + *
> + * This program tests the copy paste abort functionality of a P9
> + * (or later) by setting up two processes on the same CPU, one
> + * which executes the copy instruction and the other which
> + * executes paste.
> + *
> + * The paste instruction should never succeed, as the cp_abort
> + * instruction is called by the kernel during a context switch.
> + *
> + */
> +
> +#define _GNU_SOURCE
> +
> +#include 
> +#include 
> +#include 
> +#include "utils.h"
> +#include 
> +
> +#define READ_FD 0
> +#define WRITE_FD 1
> +
> +#define NUM_LOOPS 1000
> +
> +/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 
> 4.4. */
> +#define PASTE(RA, RB, L, RC) \
> + .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) 
> | (RC) << (31-31))
> +
> +int paste(void *i)
> +{
> + int cr;
> +
> + asm volatile(str(PASTE(0, %1, 1, 1))";"
> + "mfcr %0;"
> + : "=r" (cr)
> + : "b" (i)
> + : "memory"
> + );
> + return cr;
> +}
> +
> +/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 
> 4.4. */
> +#define COPY(RA, RB, L) \
> + .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
> +
> +void copy(void *i)
> +{
> + asm volatile(str(COPY(0, %0, 1))";"
> + :
> + : "b" (i)
> + : "memory"
> + );
> +}
> +
> +int test_cp_abort(void)
> +{
> + /* 128 bytes for a full cache line */

Re: [PATCH v3 00/16] genrtc removal

2016-05-02 Thread Geert Uytterhoeven
Hi Arnd,

On Thu, Apr 28, 2016 at 9:48 AM, Geert Uytterhoeven
 wrote:
> On Thu, Apr 28, 2016 at 12:34 AM, Arnd Bergmann  wrote:
>> I ended up stuffing the two patch series into one, as they are now
>> more dependent on one another. This now thoroughly removes the
>> genrtc driver including the asm/rtc.h headers it uses. For all
>> architectures that still have a meaningful asm/rtc.h, this goes
>> through two stages:
>>
>> 1) make the rtc-generic implementation independent of asm/rtc.h
>> 2) remove the asm/rtc.h header and disallow the gen_rtc driver
>>
>> As the last step, the driver itself gets removed.
>
> In general, after fixing the minor nit:
> Acked-by: Geert Uytterhoeven 
>
> For the m68k bits:
> Tested-by: Geert Uytterhoeven 

More build coverage uncovered two build failures on m68k due to "[PATCH v3
02/16] rtc: cmos: move mc146818rtc code out of asm-generic/rtc.h":

bvme6000_defconfig:

In file included from arch/m68k/bvme6000/rtc.c:19:
include/linux/mc146818rtc.h: In function ‘mc146818_is_updating’:
include/linux/mc146818rtc.h:138: error: implicit declaration of
function ‘CMOS_READ’
include/linux/mc146818rtc.h: In function ‘mc146818_get_time’:
include/linux/mc146818rtc.h:189: error: ‘RTC_ALWAYS_BCD’
undeclared (first use in this function)
include/linux/mc146818rtc.h:189: error: (Each undeclared
identifier is reported only once
include/linux/mc146818rtc.h:189: error: for each function it appears in.)
include/linux/mc146818rtc.h: In function ‘mc146818_set_time’:
include/linux/mc146818rtc.h:279: error: ‘RTC_ALWAYS_BCD’
undeclared (first use in this function)
include/linux/mc146818rtc.h:290: error: implicit declaration of
function ‘CMOS_WRITE’

mvme16x_defconfig:

In file included from arch/m68k/mvme16x/rtc.c:18:
include/linux/mc146818rtc.h: In function ‘mc146818_is_updating’:
include/linux/mc146818rtc.h:138: error: implicit declaration of
function ‘CMOS_READ’
include/linux/mc146818rtc.h: In function ‘mc146818_get_time’:
include/linux/mc146818rtc.h:189: error: ‘RTC_ALWAYS_BCD’
undeclared (first use in this function)
include/linux/mc146818rtc.h:189: error: (Each undeclared
identifier is reported only once
include/linux/mc146818rtc.h:189: error: for each function it appears in.)
include/linux/mc146818rtc.h: In function ‘mc146818_set_time’:
include/linux/mc146818rtc.h:279: error: ‘RTC_ALWAYS_BCD’
undeclared (first use in this function)
include/linux/mc146818rtc.h:290: error: implicit declaration of
function ‘CMOS_WRITE’

These do not show up with a multi-platform config including Atari
support, as arch/m68k/include/asm/mc146818rtc.h provides a definition of
CMOS_READ() if CONFIG_ATARI=y.

Fortunately the fixes are simple: replace  by
. Will send patches to fix...

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

linux-next: build failure after merge of the akpm-current tree

2016-05-02 Thread Stephen Rothwell
Hi Andrew,

After merging the akpm-current tree, today's linux-next build (powerpc
allyesconfig and pseries_le_defconfig) failed like this:

In file included from include/linux/mm.h:394:0,
 from mm/huge_memory.c:10:
include/linux/huge_mm.h:53:22: error: initializer element is not constant
 #define HPAGE_PMD_NR (1<
Date: Mon, 2 May 2016 18:25:42 +1000
Subject: [PATCH] mm: make optimistic check for swapin readahead fix

Signed-off-by: Stephen Rothwell 
---
 mm/huge_memory.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f0cd9dbc1157..6aabfa166b6d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -101,7 +101,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
  * fault.
  */
 static unsigned int khugepaged_max_ptes_none __read_mostly;
-static unsigned int khugepaged_max_ptes_swap __read_mostly = HPAGE_PMD_NR/8;
+static unsigned int khugepaged_max_ptes_swap __read_mostly;
 static unsigned long allocstall;
 
 static int khugepaged(void *none);
@@ -703,6 +703,7 @@ static int __init hugepage_init(void)
 
khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
+   khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8;
/*
 * hugepages can't be allocated by the buddy allocator
 */
-- 
2.7.0




-- 
Cheers,
Stephen Rothwell
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH kernel v2] powerpc/powernv: Fix debug macro

2016-05-02 Thread Alexey Kardashevskiy
When cfg_dbg() is enabled (i.e. mapped to printk()), gcc produces
errors as the __func__ parameter is missing (pnv_pci_cfg_read() has one);
this adds the missing parameter. Since cfg_dbg() is used not just for
config space access, this replaces it with well-known pr_devel().

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* s/cfg_dbg/pr_devel/

---
 arch/powerpc/platforms/powernv/pci.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 73c8dc2..0db20ae 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -39,9 +39,6 @@
 /* Delay in usec */
 #define PCI_RESET_DELAY_US 300
 
-#define cfg_dbg(fmt...)do { } while(0)
-//#define cfg_dbg(fmt...)  printk(fmt)
-
 #ifdef CONFIG_PCI_MSI
 int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
@@ -402,8 +399,8 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
}
}
 
-   cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
-   (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
+   pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+(pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
 
/* Clear the frozen state if applicable */
if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
@@ -451,8 +448,8 @@ int pnv_pci_cfg_read(struct pci_dn *pdn,
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
 
-   cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
-   __func__, pdn->busno, pdn->devfn, where, size, *val);
+   pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+__func__, pdn->busno, pdn->devfn, where, size, *val);
return PCIBIOS_SUCCESSFUL;
 }
 
@@ -462,8 +459,8 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
struct pnv_phb *phb = pdn->phb->private_data;
u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 
-   cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
-   pdn->busno, pdn->devfn, where, size, val);
+   pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+__func__, pdn->busno, pdn->devfn, where, size, val);
switch (size) {
case 1:
opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
-- 
2.5.0.rc3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver

2016-05-02 Thread Alexey Kardashevskiy

On 05/02/2016 01:44 PM, Gavin Shan wrote:

On Tue, Apr 19, 2016 at 08:36:48PM +1000, Alexey Kardashevskiy wrote:

On 02/17/2016 02:44 PM, Gavin Shan wrote:

This adds standalone driver to support PCI hotplug for PowerPC PowerNV
platform that runs on top of skiboot firmware. The firmware identifies
hotpluggable slots and marked their device tree node with proper
"ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans
device tree nodes to create/register PCI hotplug slot accordingly.

The PCI slots are organized in fashion of tree, which means one
PCI slot might have parent PCI slot and parent PCI slot possibly
contains multiple child PCI slots. At the plugging time, the parent
PCI slot is populated before its children. The child PCI slots are
removed before their parent PCI slot can be removed from the system.

If the skiboot firmware doesn't support slot status retrieval, the PCI
slot device node shouldn't have property "ibm,reset-by-firmware". In
that case, none of valid PCI slots will be detected from device tree.
The skiboot firmware doesn't export the capability to access attention
LEDs yet and it's something for TBD.

Signed-off-by: Gavin Shan 
Acked-by: Bjorn Helgaas 
---
 drivers/pci/hotplug/Kconfig   |  12 +
 drivers/pci/hotplug/Makefile  |   3 +
 drivers/pci/hotplug/pnv_php.c | 870 ++
 3 files changed, 885 insertions(+)
 create mode 100644 drivers/pci/hotplug/pnv_php.c

diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index df8caec..167c8ce 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC

  When in doubt, say N.

+config HOTPLUG_PCI_POWERNV
+   tristate "PowerPC PowerNV PCI Hotplug driver"
+   depends on PPC_POWERNV && EEH
+   help
+ Say Y here if you run PowerPC PowerNV platform that supports
+ PCI Hotplug
+
+ To compile this driver as a module, choose M here: the
+ module will be called pnv-php.
+
+ When in doubt, say N.
+
 config HOTPLUG_PCI_RPA
tristate "RPA PCI Hotplug driver"
depends on PPC_PSERIES && EEH
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index b616e75..e33cdda 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o
 obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550)  += cpcihp_zt5550.o
 obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o
 obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o
+obj-$(CONFIG_HOTPLUG_PCI_POWERNV)  += pnv-php.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA)  += rpaphp.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)+= rpadlpar_io.o
 obj-$(CONFIG_HOTPLUG_PCI_SGI)  += sgi_hotplug.o
@@ -50,6 +51,8 @@ ibmphp-objs   :=  ibmphp_core.o   \
 acpiphp-objs   :=  acpiphp_core.o  \
acpiphp_glue.o

+pnv-php-objs   :=  pnv_php.o
+
 rpaphp-objs:=  rpaphp_core.o   \
rpaphp_pci.o\
rpaphp_slot.o
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
new file mode 100644
index 000..364ec36
--- /dev/null
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -0,0 +1,870 @@
+/*
+ * PCI Hotplug Driver for PowerPC PowerNV platform.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2015.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#define DRIVER_VERSION "0.1"
+#define DRIVER_AUTHOR  "Gavin Shan, IBM Corporation"
+#define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver"
+
+struct pnv_php_slot {
+   struct hotplug_slot slot;
+   struct hotplug_slot_infoslot_info;
+   uint64_tid;
+   char*name;
+   int slot_no;
+   struct kref kref;
+#define PNV_PHP_STATE_INITIALIZED  0
+#define PNV_PHP_STATE_REGISTERED   1
+#define PNV_PHP_STATE_POPULATED2
+   int state;
+   struct device_node  *dn;
+   struct pci_dev  *pdev;
+   struct pci_bus  *bus;
+   boolpower_state_check;
+   int power_state_confirmed;
+#define PNV_PHP_POWER_CONFIRMED_INVALID0
+#define PNV_PHP_POWER_CONFIRMED_SUCCESS1
+#define PNV_PHP_POWER_CONFIRMED_FAIL   2
+   struct opal_msg *msg;
+   void*fdt;
+   

[PATCH] powerpc: Remove unnecessary CONFIG_SMP #ifdefs

2016-05-02 Thread Chris Smart

The code in machine_restart/power_off/halt() includes #ifdefs around
calls to smp_send_stop(), however these are not required as
include/linux/smp.h includes an empty version of this function for
CONFIG_SMP=n builds.

Signed-off-by: Chris Smart 
---
arch/powerpc/kernel/setup-common.c | 6 --
1 file changed, 6 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 44c8d03558ac..8ca79b7503d8 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -128,9 +128,7 @@ void machine_restart(char *cmd)
machine_shutdown();
if (ppc_md.restart)
ppc_md.restart(cmd);
-#ifdef CONFIG_SMP
smp_send_stop();
-#endif
printk(KERN_EMERG "System Halted, OK to turn off power\n");
local_irq_disable();
while (1) ;
@@ -141,9 +139,7 @@ void machine_power_off(void)
machine_shutdown();
if (pm_power_off)
pm_power_off();
-#ifdef CONFIG_SMP
smp_send_stop();
-#endif
printk(KERN_EMERG "System Halted, OK to turn off power\n");
local_irq_disable();
while (1) ;
@@ -159,9 +155,7 @@ void machine_halt(void)
machine_shutdown();
if (ppc_md.halt)
ppc_md.halt();
-#ifdef CONFIG_SMP
smp_send_stop();
-#endif
printk(KERN_EMERG "System Halted, OK to turn off power\n");
local_irq_disable();
while (1) ;
--
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev