[PATCH v9 07/26] powerpc/powernv: Fix initial IO and M32 segmap
There are two arrays for IO and M32 segment maps on every PHB. The index of the arrays are segment number and the value stored in the corresponding element is PE number, indicating the segment is assigned to the PE. Initially, all elements in those two arrays are zeroes, meaning all segments are assigned to PE#0. It's wrong. This fixes the initial values in the elements of those two arrays to IODA_INVALID_PE, meaning all segments aren't assigned to any PE. Signed-off-by: Gavin Shan--- arch/powerpc/platforms/powernv/pci-ioda.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4aa6cdf..59b20e5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3240,6 +3240,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, const __be64 *prop64; const __be32 *prop32; int len; + unsigned int segno; u64 phb_id; void *aux; long rc; @@ -3334,8 +3335,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; - if (phb->type == PNV_PHB_IODA1) + for (segno = 0; segno < phb->ioda.total_pe_num; segno++) + phb->ioda.m32_segmap[segno] = IODA_INVALID_PE; + if (phb->type == PNV_PHB_IODA1) { phb->ioda.io_segmap = aux + iomap_off; + for (segno = 0; segno < phb->ioda.total_pe_num; segno++) + phb->ioda.io_segmap[segno] = IODA_INVALID_PE; + } phb->ioda.pe_array = aux + pemap_off; set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 18/26] powerpc/pci: Rename pcibios_{add, remove}_pci_devices()
This renames pcibios_{add,remove}_pci_devices() to avoid conflicts with names of the weak functions in PCI subsystem, which have the prefix "pcibios". No logical changes introduced. Signed-off-by: Gavin Shan--- arch/powerpc/include/asm/pci-bridge.h | 4 ++-- arch/powerpc/kernel/eeh_driver.c | 12 ++-- arch/powerpc/kernel/pci-hotplug.c | 15 +++ drivers/pci/hotplug/rpadlpar_core.c | 2 +- drivers/pci/hotplug/rpaphp_core.c | 4 ++-- drivers/pci/hotplug/rpaphp_pci.c | 2 +- 6 files changed, 19 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 220129f..99027b8 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -260,10 +260,10 @@ static inline struct eeh_dev *pdn_to_eeh_dev(struct pci_dn *pdn) extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn); /** Remove all of the PCI devices under this bus */ -extern void pcibios_remove_pci_devices(struct pci_bus *bus); +extern void pci_hp_remove_devices(struct pci_bus *bus); /** Discover new pci devices under this bus, and add them */ -extern void pcibios_add_pci_devices(struct pci_bus *bus); +extern void pci_hp_add_devices(struct pci_bus *bus); extern void isa_bridge_find_early(struct pci_controller *hose); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index fb6207d..618d13c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -621,7 +621,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * We don't remove the corresponding PE instances because * we need the information afterwords. The attached EEH * devices are expected to be attached soon when calling -* into pcibios_add_pci_devices(). +* into pci_hp_add_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { @@ -630,7 +630,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, } else { eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); - pcibios_remove_pci_devices(bus); + pci_hp_remove_devices(bus); pci_unlock_rescan_remove(); } } else if (frozen_bus) { @@ -681,7 +681,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) eeh_add_virt_device(edev, NULL); else - pcibios_add_pci_devices(bus); + pci_hp_add_devices(bus); } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); @@ -691,7 +691,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) eeh_add_virt_device(edev, NULL); else - pcibios_add_pci_devices(frozen_bus); + pci_hp_add_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -896,7 +896,7 @@ perm_error: eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); pci_lock_rescan_remove(); - pcibios_remove_pci_devices(frozen_bus); + pci_hp_remove_devices(frozen_bus); pci_unlock_rescan_remove(); } } @@ -981,7 +981,7 @@ static void eeh_handle_special_event(void) bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - pcibios_remove_pci_devices(bus); + pci_hp_remove_devices(bus); } pci_unlock_rescan_remove(); } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 59c4361..2d108e5 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -38,20 +38,20 @@ void pcibios_release_device(struct pci_dev *dev) } /** - * pcibios_remove_pci_devices - remove all devices under this bus + * pci_hp_remove_devices - remove all devices under this bus * @bus: the indicated PCI bus * * Remove all of the PCI devices under this bus both from the * linux pci device tree, and from the powerpc EEH address cache. */ -void pcibios_remove_pci_devices(struct pci_bus *bus) +void pci_hp_remove_devices(struct pci_bus *bus) { struct pci_dev *dev, *tmp; struct pci_bus *child_bus; /* First go down child busses */ list_for_each_entry(child_bus, >children, node) -
[PATCH v9 08/26] powerpc/powernv: Simplify pnv_ioda_setup_pe_seg()
pnv_ioda_setup_pe_seg() associates the IO and M32 segments with the owner PE. The code mapping segments should be fixed and immune from logic changes introduced to pnv_ioda_setup_pe_seg(). This moves the code mapping segments to helper pnv_ioda_setup_pe_res(). The data type for @rc is changed to "int64_t". Also, argument @hose is removed from pnv_ioda_setup_pe() as it can be got from @pe. No functional changes introduced. Signed-off-by: Gavin Shan--- arch/powerpc/platforms/powernv/pci-ioda.c | 121 +++--- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 59b20e5..b954fbc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2929,19 +2929,72 @@ truncate_iov: } #endif /* CONFIG_PCI_IOV */ +static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, + struct resource *res) +{ + struct pnv_phb *phb = pe->phb; + struct pci_bus_region region; + int index; + int64_t rc; + + if (!res || !res->flags || res->start > res->end) + return; + + if (res->flags & IORESOURCE_IO) { + region.start = res->start - phb->ioda.io_pci_base; + region.end = res->end - phb->ioda.io_pci_base; + index = region.start / phb->ioda.io_segsize; + + while (index < phb->ioda.total_pe_num && + region.start <= region.end) { + phb->ioda.io_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.io_segsize; + index++; + } + } else if ((res->flags & IORESOURCE_MEM) && + !pnv_pci_is_mem_pref_64(res->flags)) { + region.start = res->start - + phb->hose->mem_offset[0] - + phb->ioda.m32_pci_base; + region.end = res->end - + phb->hose->mem_offset[0] - + phb->ioda.m32_pci_base; + index = region.start / phb->ioda.m32_segsize; + + while (index < phb->ioda.total_pe_num && + region.start <= region.end) { + phb->ioda.m32_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.m32_segsize; + index++; + } + } +} + /* * This function is supposed to be called on basis of PE from top * to bottom style. So the the I/O or MMIO segment assigned to * parent PE could be overrided by its child PEs if necessary. */ -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, - struct pnv_ioda_pe *pe) +static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) { - struct pnv_phb *phb = hose->private_data; - struct pci_bus_region region; struct resource *res; - int i, index; - int rc; + int i; /* * NOTE: We only care PCI bus based PE for now. For PCI @@ -2950,58 +3003,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, */ BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); - pci_bus_for_each_resource(pe->pbus, res, i) { - if (!res || !res->flags || - res->start > res->end) - continue; - - if (res->flags & IORESOURCE_IO) { - region.start = res->start - phb->ioda.io_pci_base; - region.end = res->end - phb->ioda.io_pci_base; - index = region.start / phb->ioda.io_segsize; - - while (index < phb->ioda.total_pe_num && - region.start <= region.end) { - phb->ioda.io_segmap[index] = pe->pe_number; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, -
[PATCH v9 11/26] powerpc/powernv: Rename M64 related functions
This renames those functions picking PE number based on consumed M64 segments, mapping M64 segments to PEs as those functions are going to be shared by IODA1/IODA2 in next patch. No logical changes introduced. Signed-off-by: Gavin ShanReviewed-by: Alexey Kardashevskiy --- arch/powerpc/platforms/powernv/pci-ioda.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 832b430..37f22b0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -219,7 +219,7 @@ fail: return -EIO; } -static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev, +static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, unsigned long *pe_bitmap) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); @@ -246,22 +246,22 @@ static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev, } } -static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus, -unsigned long *pe_bitmap, -bool all) +static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, + unsigned long *pe_bitmap, + bool all) { struct pci_dev *pdev; list_for_each_entry(pdev, >devices, bus_list) { - pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap); + pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap); if (all && pdev->subordinate) - pnv_ioda2_reserve_m64_pe(pdev->subordinate, -pe_bitmap, all); + pnv_ioda_reserve_m64_pe(pdev->subordinate, + pe_bitmap, all); } } -static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) +static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -283,7 +283,7 @@ static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) } /* Figure out reserved PE numbers by the PE */ - pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all); + pnv_ioda_reserve_m64_pe(bus, pe_alloc, all); /* * the current bus might not own M64 window and that's all @@ -365,8 +365,8 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; phb->init_m64 = pnv_ioda2_init_m64; - phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; - phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; + phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe; + phb->pick_m64_pe = pnv_ioda_pick_m64_pe; } static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 17/26] powerpc/powernv: Use PE instead of number during setup and release
In current implementation, the PEs that are allocated or picked from the reserved list are identified by PE number. The PE instance has to be picked according to the PE number eventually. We have same issue when PE is released. For pnv_ioda_pick_m64_pe() and pnv_ioda_alloc_pe(), this returns PE instance so that pnv_ioda_setup_bus_PE() can use the allocated or reserved PE instance directly. Also, pnv_ioda_setup_bus_PE() returns the reserved/allocated PE instance to be used in subsequent patches. On the other hand, pnv_ioda_free_pe() uses PE instance (not number) as its argument. No logical changes introduced. Signed-off-by: Gavin ShanReviewed-by: Alexey Kardashevskiy --- arch/powerpc/platforms/powernv/pci-ioda.c | 104 +- arch/powerpc/platforms/powernv/pci.h | 2 +- 2 files changed, 59 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cfd2906..5ee8a57 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -123,6 +123,14 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) +{ + phb->ioda.pe_array[pe_no].phb = phb; + phb->ioda.pe_array[pe_no].pe_number = pe_no; + + return >ioda.pe_array[pe_no]; +} + static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) { if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) { @@ -135,11 +143,10 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) pr_debug("%s: PE %d was reserved on PHB#%x\n", __func__, pe_no, phb->hose->global_number); - phb->ioda.pe_array[pe_no].phb = phb; - phb->ioda.pe_array[pe_no].pe_number = pe_no; + pnv_ioda_init_pe(phb, pe_no); } -static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb) +static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -147,20 +154,20 @@ static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb) pe = find_next_zero_bit(phb->ioda.pe_alloc, phb->ioda.total_pe_num, 0); if (pe >= phb->ioda.total_pe_num) - return IODA_INVALID_PE; + return NULL; } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); - phb->ioda.pe_array[pe].phb = phb; - phb->ioda.pe_array[pe].pe_number = pe; - return pe; + return pnv_ioda_init_pe(phb, pe); } -static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) +static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { - WARN_ON(phb->ioda.pe_array[pe].pdev); + struct pnv_phb *phb = pe->phb; - memset(>ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); - clear_bit(pe, phb->ioda.pe_alloc); + WARN_ON(pe->pdev); + + memset(pe, 0, sizeof(struct pnv_ioda_pe)); + clear_bit(pe->pe_number, phb->ioda.pe_alloc); } /* The default M64 BAR is shared by all PEs */ @@ -320,7 +327,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, } } -static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) +static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -330,7 +337,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) /* Root bus shouldn't use M64 */ if (pci_is_root_bus(bus)) - return IODA_INVALID_PE; + return NULL; /* Allocate bitmap */ size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); @@ -338,7 +345,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) if (!pe_alloc) { pr_warn("%s: Out of memory !\n", __func__); - return IODA_INVALID_PE; + return NULL; } /* Figure out reserved PE numbers by the PE */ @@ -351,7 +358,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) */ if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) { kfree(pe_alloc); - return IODA_INVALID_PE; + return NULL; } /* @@ -397,7 +404,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) } kfree(pe_alloc); - return master_pe->pe_number; + return master_pe; } static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) @@ -963,7 +970,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn =
[PATCH v9 12/26] powerpc/powernv/ioda1: M64 support on P7IOC
This enables M64 window on P7IOC, which has been enabled on PHB3. Different from PHB3 where 16 M64 BARs are supported and each of them can be owned by one particular PE# exclusively or divided evenly to 256 segments, every P7IOC PHB has 16 M64 BARs and each of them are divided to 8 segments. So every P7IOC PHB supports 128 M64 segments in total. P7IOC has M64DT, which helps mapping one particular M64 segment# to arbitrary PE#. PHB3 doesn't have M64DT, indicating that one M64 segment can only be pinned to the fixed PE#. In order to unified M64 support M64 on P7IOC and PHB3, we just provide 128 M64 segments on every P7IOC PHB and each of them is pinned to the fixed PE# by bypassing the function of M64DT. In turn, we just need different phb->init_m64() for P7IOC and PHB3 and maps M64 segment in pnv_ioda_reserve_m64_pe() for P7IOC, most of the code are shared by them. Signed-off-by: Gavin Shan--- arch/powerpc/platforms/powernv/pci-ioda.c | 89 +-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 37f22b0..a1b74ec 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -48,6 +48,9 @@ #include "powernv.h" #include "pci.h" +#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ +#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ + /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ #define TCE32_TABLE_SIZE ((0x1000 / 0x1000) * 8) @@ -246,6 +249,64 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, } } +static int pnv_ioda1_init_m64(struct pnv_phb *phb) +{ + struct resource *r; + int index; + + /* +* There are 16 M64 BARs, each of which has 8 segments. So +* there are as many M64 segments as the maximum number of +* PEs, which is 128. +*/ + for (index = 0; index < PNV_IODA1_M64_NUM; index++) { + unsigned long base, segsz = phb->ioda.m64_segsize; + int64_t rc; + + base = phb->ioda.m64_base + + index * PNV_IODA1_M64_SEGS * segsz; + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, index, base, 0, + PNV_IODA1_M64_SEGS * segsz); + if (rc != OPAL_SUCCESS) { + pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n", + rc, phb->hose->global_number, index); + goto fail; + } + + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, index, + OPAL_ENABLE_M64_SPLIT); + if (rc != OPAL_SUCCESS) { + pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n", + rc, phb->hose->global_number, index); + goto fail; + } + } + + /* +* Exclude the segment used by the reserved PE, which +* is expected to be 0 or last supported PE#. +*/ + r = >hose->mem_resources[1]; + if (phb->ioda.reserved_pe_idx == 0) + r->start += phb->ioda.m64_segsize; + else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) + r->end -= phb->ioda.m64_segsize; + else + pr_warn(" Cannot cut M64 segment for reserved PE#%d\n", + phb->ioda.reserved_pe_idx); + + return 0; + +fail: + for ( ; index >= 0; index--) + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64); + + return -EIO; +} + static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, unsigned long *pe_bitmap, bool all) @@ -315,6 +376,26 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) pe->master = master_pe; list_add_tail(>list, _pe->slaves); } + + /* +* P7IOC supports M64DT, which helps mapping M64 segment +* to one particular PE#. However, PHB3 has fixed mapping +* between M64 segment and PE#. In order to have same logic +* for P7IOC and PHB3, we enforce fixed mapping between M64 +* segment and PE# on P7IOC. +*/ + if (phb->type == PNV_PHB_IODA1) { + int64_t rc; + + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_M64_WINDOW_TYPE, + pe->pe_number / PNV_IODA1_M64_SEGS, +
[PATCH v9 22/26] powerpc/pci: Introduce pci_remove_device_node_info()
This implements and exports pci_remove_device_node_info(). It's used to remove the pdn (struct pci_dn) for the indicated device node. The function is going to be used by PowerNV PCI hotplug driver. Signed-off-by: Gavin ShanReviewed-by: Alexey Kardashevskiy --- arch/powerpc/include/asm/pci-bridge.h | 1 + arch/powerpc/kernel/pci_dn.c | 23 +++ 2 files changed, 24 insertions(+) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 07b94ec..467c0b0 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -237,6 +237,7 @@ extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev); extern void remove_dev_pci_data(struct pci_dev *pdev); extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, struct device_node *dn); +extern void pci_remove_device_node_info(struct device_node *dn); static inline int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0a249ff..ce10281 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -331,6 +331,29 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, } EXPORT_SYMBOL_GPL(pci_add_device_node_info); +void pci_remove_device_node_info(struct device_node *dn) +{ + struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL; +#ifdef CONFIG_EEH + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + + if (edev) + edev->pdn = NULL; +#endif + + if (!pdn) + return; + + WARN_ON(!list_empty(>child_list)); + list_del(>list); + if (pdn->parent) + of_node_put(pdn->parent->node); + + dn->data = NULL; + kfree(pdn); +} +EXPORT_SYMBOL_GPL(pci_remove_device_node_info); + /* * Traverse a device tree stopping each PCI device in the tree. * This is done depth first. As each node is processed, a "pre" -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 24/26] powerpc/pci: Don't scan empty slot
In hotplug case, function pci_add_pci_devices() is called to rescan the specified PCI bus, which might not have any child devices. Access to the PCI bus's child device node will cause kernel crash without exception. This adds one more check to skip scanning PCI bus that doesn't have any subordinate devices from device-tree, in order to avoid kernel crash. Signed-off-by: Gavin ShanReviewed-by: Alexey Kardashevskiy --- arch/powerpc/kernel/pci-hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 46587a1..2d71269 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -120,7 +120,8 @@ void pci_hp_add_devices(struct pci_bus *bus) if (mode == PCI_PROBE_DEVTREE) { /* use ofdt-based probe */ of_rescan_bus(dn, bus); - } else if (mode == PCI_PROBE_NORMAL) { + } else if (mode == PCI_PROBE_NORMAL && + dn->child && PCI_DN(dn->child)) { /* * Use legacy probe. In the partial hotplug case, we * probably have grandchildren devices unplugged. So -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 09/26] powerpc/powernv: IO and M32 mapping based on PCI device resources
Currently, the IO and M32 segments are mapped to the corresponding PE based on the windows of the parent bridge of PE's primary bus. It's not going to work when the windows of root port or upstream port of the PCIe switch behind root port are extended to PHB's apertures in order to support hotplug in subsequent patch. This fixes the issue by mapping IO and M32 segments based on the resources of the PCI devices included in the PE, instead of the windows of the parent bridge of the PE's primary bus. Signed-off-by: Gavin Shan--- arch/powerpc/platforms/powernv/pci-ioda.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b954fbc..904790b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2993,7 +2993,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, */ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) { - struct resource *res; + struct pci_dev *pdev; int i; /* @@ -3003,8 +3003,21 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) */ BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); - pci_bus_for_each_resource(pe->pbus, res, i) - pnv_ioda_setup_pe_res(pe, res); + list_for_each_entry(pdev, >pbus->devices, bus_list) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) + pnv_ioda_setup_pe_res(pe, >resource[i]); + + /* +* If the PE contains all subordinate PCI buses, the +* windows of the child bridges should be mapped to +* the PE as well. +*/ + if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev)) + continue; + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) + pnv_ioda_setup_pe_res(pe, + >resource[PCI_BRIDGE_RESOURCES + i]); + } } static void pnv_pci_ioda_setup_seg(void) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 06/26] powerpc/powernv: Data type unsigned int for PE number
This changes the data type of PE number from "int" to "unsigned int" in order to match the fact PE number is never negative: * The number of PE to which the specified PCI device is attached. * The PE number map for SRIOV VFs. * The returned PE number from pnv_ioda_alloc_pe(). * The returned PE number from pnv_ioda2_pick_m64_pe(). Suggested-by: Alexey KardashevskiySigned-off-by: Gavin Shan --- arch/powerpc/include/asm/pci-bridge.h | 6 +++--- arch/powerpc/platforms/powernv/pci-ioda.c | 8 arch/powerpc/platforms/powernv/pci.c | 2 +- arch/powerpc/platforms/powernv/pci.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 023c8c8..220129f 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -209,14 +209,14 @@ struct pci_dn { #ifdef CONFIG_EEH struct eeh_dev *edev; /* eeh device */ #endif -#define IODA_INVALID_PE(-1) +#define IODA_INVALID_PE0x #ifdef CONFIG_PPC_POWERNV - int pe_number; + unsigned int pe_number; int vf_index; /* VF index in the PF */ #ifdef CONFIG_PCI_IOV u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs;/* number of VFs enabled*/ - int *pe_num_map;/* PE# for the first VF PE or array */ + unsigned int *pe_num_map; /* PE# for the first VF PE or array */ boolm64_single_mode;/* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64(-1) int (*m64_map)[PCI_SRIOV_NUM_BARS]; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1d2514f..4aa6cdf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -138,7 +138,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) phb->ioda.pe_array[pe_no].pe_number = pe_no; } -static int pnv_ioda_alloc_pe(struct pnv_phb *phb) +static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -261,7 +261,7 @@ static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus, } } -static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) +static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -919,7 +919,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(dev); struct pnv_ioda_pe *pe; - int pe_num; + unsigned int pe_num; if (!pdn) { pr_err("%s: Device tree node not associated properly\n", @@ -1010,7 +1010,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pe; - int pe_num = IODA_INVALID_PE; + unsigned int pe_num = IODA_INVALID_PE; /* Check if PE is determined by M64 */ if (phb->pick_m64_pe) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index afbaa1c..8827461 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -370,7 +370,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) struct pnv_phb *phb = pdn->phb->private_data; u8 fstate; __be16 pcierr; - int pe_no; + unsigned int pe_no; s64 rc; /* diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 784882a..66f2569 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -113,7 +113,7 @@ struct pnv_phb { int (*init_m64)(struct pnv_phb *phb); void (*reserve_m64_pe)(struct pci_bus *bus, unsigned long *pe_bitmap, bool all); - int (*pick_m64_pe)(struct pci_bus *bus, bool all); + unsigned int (*pick_m64_pe)(struct pci_bus *bus, bool all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 04/26] powerpc/powernv: Reorder fields in struct pnv_phb
This moves those fields in struct pnv_phb that are related to PE allocation around. No logical change. Signed-off-by: Gavin ShanReviewed-by: Alexey Kardashevskiy --- arch/powerpc/platforms/powernv/pci.h | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 78f035e..f2a1452 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -140,15 +140,14 @@ struct pnv_phb { unsigned intio_segsize; unsigned intio_pci_base; - /* PE allocation bitmap */ - unsigned long *pe_alloc; - /* PE allocation mutex */ + /* PE allocation */ struct mutexpe_alloc_mutex; + unsigned long *pe_alloc; + struct pnv_ioda_pe *pe_array; /* M32 & IO segment maps */ unsigned int*m32_segmap; unsigned int*io_segmap; - struct pnv_ioda_pe *pe_array; /* IRQ chip */ int irq_chip_init; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 00/26] powerpc/powernv: PCI hotplug preparation
The series is split from "[PATCH v8 00/45] powerpc/powernv: PCI hotplug support". The series does couple of things as below. The patches are required to support PCI hotplug on PowerNV platforms. However, the patches refactor the code with the goal: not affecting current logic. * Code cleanup and refactoring. * Track IO/M32/M64 segments consumed by one particular PE. * Remove DMA32 list and improve DMA32 segment tracking. * M64 support for IODA1 so that we have unified basis for the subsequent patches to support PCI hotplug. * Couple of fixes to PCI hotplug (used by EEH). * Exported functions to be used by PCI hotplug. Gavin Shan (26): powerpc/pci: Cleanup on struct pci_controller_ops powerpc/powernv: Cleanup on pci_controller_ops instances powerpc/powernv: Drop phb->bdfn_to_pe() powerpc/powernv: Reorder fields in struct pnv_phb powerpc/powernv: Rename PE# fields in struct pnv_phb powerpc/powernv: Data type unsigned int for PE number powerpc/powernv: Fix initial IO and M32 segmap powerpc/powernv: Simplify pnv_ioda_setup_pe_seg() powerpc/powernv: IO and M32 mapping based on PCI device resources powerpc/powernv: Track M64 segment consumption powerpc/powernv: Rename M64 related functions powerpc/powernv/ioda1: M64 support on P7IOC powerpc/powernv/ioda1: Rename pnv_pci_ioda_setup_dma_pe() powerpc/powernv/ioda1: Introduce PNV_IODA1_DMA32_SEGSIZE powerpc/powernv: Remove DMA32 PE list powerpc/powernv/ioda1: Improve DMA32 segment track powerpc/powernv: Use PE instead of number during setup and release powerpc/pci: Rename pcibios_{add,remove}_pci_devices() powerpc/pci: Rename pcibios_find_pci_bus() powerpc/pci: Move pci_find_bus_by_node() around powerpc/pci: Export pci_add_device_node_info() powerpc/pci: Introduce pci_remove_device_node_info() powerpc/pci: Export pci_traverse_device_nodes() powerpc/pci: Don't scan empty slot powerpc/powernv: Simplify pnv_eeh_reset() powerpc/powernv: Exclude root bus in pnv_pci_reset_secondary_bus() arch/powerpc/include/asm/pci-bridge.h| 41 +- arch/powerpc/include/asm/ppc-pci.h | 6 +- arch/powerpc/kernel/eeh_driver.c | 12 +- arch/powerpc/kernel/pci-hotplug.c| 47 +- arch/powerpc/kernel/pci_dn.c | 66 ++- arch/powerpc/platforms/powernv/eeh-powernv.c | 81 ++- arch/powerpc/platforms/powernv/pci-ioda.c| 719 +++ arch/powerpc/platforms/powernv/pci.c | 4 +- arch/powerpc/platforms/powernv/pci.h | 41 +- arch/powerpc/platforms/pseries/msi.c | 4 +- arch/powerpc/platforms/pseries/pci_dlpar.c | 32 -- arch/powerpc/platforms/pseries/setup.c | 2 +- drivers/pci/hotplug/rpadlpar_core.c | 8 +- drivers/pci/hotplug/rpaphp_core.c| 4 +- drivers/pci/hotplug/rpaphp_pci.c | 4 +- 15 files changed, 592 insertions(+), 479 deletions(-) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 03/26] powerpc/powernv: Drop phb->bdfn_to_pe()
This drops struct pnv_phb::bdfn_to_pe() as nobody uses it. Signed-off-by: Gavin ShanReviewed-by: Alexey Kardashevskiy --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 - arch/powerpc/platforms/powernv/pci.h | 1 - 2 files changed, 10 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 524c9c7..10ecd97 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3195,12 +3195,6 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; } -static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, - u32 devfn) -{ - return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; -} - static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { struct pnv_phb *phb = hose->private_data; @@ -3377,9 +3371,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->freeze_pe = pnv_ioda_freeze_pe; phb->unfreeze_pe = pnv_ioda_unfreeze_pe; - /* Setup RID -> PE mapping function */ - phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; - /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 3f814f3..78f035e 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -110,7 +110,6 @@ struct pnv_phb { unsigned int is_64, struct msi_msg *msg); void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); - u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); int (*init_m64)(struct pnv_phb *phb); void (*reserve_m64_pe)(struct pci_bus *bus, unsigned long *pe_bitmap, bool all); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v9 01/26] powerpc/pci: Cleanup on struct pci_controller_ops
Each PHB has one instance of "struct pci_controller_ops" that includes various callbacks called by PCI subsystem. In the definition of this struct, some callbacks have explicit names for its arguments, but the left don't have. This adds all explicit names of the arguments to the callbacks in "struct pci_controller_ops" so that the code looks consistent. Also, argument name @dev is replaced by @pdev as the later one is the preferred name for PCI device. Signed-off-by: Gavin ShanReviewed-by: Daniel Axtens Reviewed-by: Andrew Donnellan Reviewed-by: Alexey Kardashevskiy --- arch/powerpc/include/asm/pci-bridge.h | 25 + 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index f5056e3..023c8c8 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -17,33 +17,34 @@ struct device_node; * PCI controller operations */ struct pci_controller_ops { - void(*dma_dev_setup)(struct pci_dev *dev); + void(*dma_dev_setup)(struct pci_dev *pdev); void(*dma_bus_setup)(struct pci_bus *bus); - int (*probe_mode)(struct pci_bus *); + int (*probe_mode)(struct pci_bus *bus); /* Called when pci_enable_device() is called. Returns true to * allow assignment/enabling of the device. */ - bool(*enable_device_hook)(struct pci_dev *); + bool(*enable_device_hook)(struct pci_dev *pdev); - void(*disable_device)(struct pci_dev *); + void(*disable_device)(struct pci_dev *pdev); - void(*release_device)(struct pci_dev *); + void(*release_device)(struct pci_dev *pdev); /* Called during PCI resource reassignment */ - resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type); - void(*reset_secondary_bus)(struct pci_dev *dev); + resource_size_t (*window_alignment)(struct pci_bus *bus, + unsigned long type); + void(*reset_secondary_bus)(struct pci_dev *pdev); #ifdef CONFIG_PCI_MSI - int (*setup_msi_irqs)(struct pci_dev *dev, + int (*setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type); - void(*teardown_msi_irqs)(struct pci_dev *dev); + void(*teardown_msi_irqs)(struct pci_dev *pdev); #endif - int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask); - u64 (*dma_get_required_mask)(struct pci_dev *dev); + int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask); + u64 (*dma_get_required_mask)(struct pci_dev *pdev); - void(*shutdown)(struct pci_controller *); + void(*shutdown)(struct pci_controller *hose); }; /* -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH 5/5] vfio-pci: Allow to mmap MSI-X table if interrupt remapping is supported
> From: Yongji Xie > Sent: Wednesday, April 27, 2016 8:43 PM > > This patch enables mmapping MSI-X tables if hardware supports > interrupt remapping which can ensure that a given pci device > can only shoot the MSIs assigned for it. > > With MSI-X table mmapped, we also need to expose the > read/write interface which will be used to access MSI-X table. > > Signed-off-by: Yongji XieA curious question here. Does "allow to mmap MSI-X" essentially mean that KVM guest can directly read/write physical MSI-X structure then? Thanks Kevin ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 7/9] powerpc/powernv: Add platform support for stop instruction
> diff --git a/arch/powerpc/include/asm/cputable.h > b/arch/powerpc/include/asm/cputable.h > index df4fb5f..a4739a1 100644 > --- a/arch/powerpc/include/asm/cputable.h > +++ b/arch/powerpc/include/asm/cputable.h > @@ -205,6 +205,7 @@ enum { > #define CPU_FTR_DABRX > LONG_ASM_CONST(0x0800) > #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000) > #define CPU_FTR_SUBCORE > LONG_ASM_CONST(0x2000) > +#define CPU_FTR_STOP_INSTLONG_ASM_CONST(0x4000) In general, we are putting all the POWER9 features under CPU_FTR_ARCH_300. Is there a reason you need this separate bit? CPU_FTR bits are fairly scarce these days. Mikey ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 0/5] powerpc/pm: QorIQ deep sleep
On Tue, 2016-04-26 at 10:27 +, Chenhui Zhao wrote: > Any comment? > > Thanks, > Chenhui Leo already commented on the "add a compatible string" patch (though the threading got broken thanks to Microsoft) and you said you'd fix it. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: radeonhd and xorg issues on powerpc p5020 and G5
hi michel, tested with sapphire 7750Hd 2gb ddr3 on Cyrus+ Amigaone X5000 P5020, with radeon selected i have a stalling of fence 0 and video binking with all glitched like on 7770hd, only fbdev video gave the desktop ok. distro is the ubuntu mate 16.04 kernels used for testing are the 4.6 rc 5 and 4.5 will build today the 4.4 and test there too. if something needed (logs)and some specific test are requied for help the devs i will glad to send. thanks and sorry for my english luigi Inviato da iPad > Il giorno 30 apr 2016, alle ore 09:29, luigi burdo> ha scritto: > > hi michel, > without xorg conf on cyrus+ p5020, and g5 quad i have only the black screen > with pulse cursor. i need to made it and put in etc/X11/ or not desktop at > all. > > about modeset=0 on both cards: > yes i know is normal dont have video but i think the more strange is have the > opposite compared the kernel options . with kernel radeon. modeset=0 and > nouveau.modeset=1 look like xorg understand to activate the radeon and > deactivate the nouveau . > and this make the system stalling without any way to recover. > > about radeonsi, will check on 7750 too and report. > for now 4xxx,5xxx,6xxx can say are running on ppc system with 3d too , yes > many endianess on egl and egl2 colors not right and some creazy texture on > some games but ... running ;-) > > ciao > luigi > > > > > Inviato da iPad > >>> Il giorno 30 apr 2016, alle ore 09:10, Michel Dänzer >>> ha scritto: >>> >>> On 23.04.2016 02:06, luigi burdo wrote: >>> >>> On Quad G5 with 2 video boards >>> if i set radeon.modeset=1 nouveau.modeset=1 Xorg -configure dont found >>> at all the video boards >>> if i set radeon.modeset=0 nouveau.modeset=1 Xorg -configure dont found >>> the nouveau board >>> if i set radeon.modeset=1 nouveau.modeset=0 Xorg -configure dont found >>> the radeon board >> >> Xorg -configure has various known issues and shouldn't be used anymore. >> Xorg is normally able to automatically detect and use all GPUs in a >> system without any xorg.conf file. >> >> >>> if i set radeon.modeset=0 nouveau.modeset=0 no video working ... no tty >>> too ;-) >> >> That disables both kernel drivers, which are required for fbcon and Xorg. >> >> >>> On P5020 >>> last of my test is this machine with a radeon hd 7770 core edition >>> if radeon drv is running the system run in softpipe mode and i face many >>> drm errors and fence issue. >>> i atteched the dmesg files. >> >> Which kernel version are you testing? There were some fixes in 4.4/4.5 >> which may help for the kernel driver issues, but note that the Mesa >> radeonsi driver for >= 7xxx Radeons still needs a lot of work to be >> usable on big endian systems. >> >> >> -- >> Earthling Michel Dänzer | http://www.amd.com >> Libre software enthusiast | Mesa and X developer > ___ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: powerpc: Add out of bounds check to crash_shutdown_unregister()
On Thu, 2016-28-04 at 06:17:45 UTC, Suraj Jitindar Singh wrote: > When unregistering a crash_shutdown_handle in the function > crash_shutdown_unregister() the other handles are shifted down in the > array to replace the unregistered handle. The for loop assumes that the > last element in the array is null and uses this as the stop condition, > however in the case that the last element is not null there is no check > to ensure that an out of bounds access is not performed. But AFAICS the code ensures that entry will always be NULL. So there's no bug at the moment. > Add a check to terminate the shift operation when CRASH_HANDLER_MAX is > reached in order to protect against out of bounds accesses. Doing it this way is more robust though. The chance of the NULL terminator being corrupted is definitely higher than the code being corrupted, and if the latter happens we're probably toast anyway. > diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c > index 2bb252c..6b267af 100644 > --- a/arch/powerpc/kernel/crash.c > +++ b/arch/powerpc/kernel/crash.c > @@ -288,7 +288,7 @@ int crash_shutdown_unregister(crash_shutdown_t handler) > rc = 1; > } else { > /* Shift handles down */ > - for (; crash_shutdown_handles[i]; i++) > + for (; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++) > crash_shutdown_handles[i] = > crash_shutdown_handles[i+1]; > rc = 0; So if I'm reading it right, with this change we have removed all the code that uses the NULL-terminated property of the list. If so we should also shrink the array to be only CRASH_HANDLER_MAX in size, and remove any references to it being NULL terminated. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V2] cxl: Check periodically the coherent platform function's state
Acked-by: Ian Munsie___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 12/16] rtc: powerpc: provide rtc_class_ops directly
On Thu, 2016-04-28 at 00:34 +0200, Arnd Bergmann wrote: > The rtc-generic driver provides an architecture specific > wrapper on top of the generic rtc_class_ops abstraction, > and powerpc has another abstraction on top, which is a bit > silly. > > This changes the powerpc rtc-generic device to provide its > rtc_class_ops directly, to reduce the number of layers > by one. > > Signed-off-by: Arnd Bergmann> --- > arch/powerpc/kernel/time.c | 29 - > drivers/rtc/rtc-generic.c | 2 +- > 2 files changed, 29 insertions(+), 2 deletions(-) If this hits linux-next it will go through my automated boot testing, which hopefully would be sufficient to catch any bugs in this patch, cross fingers. I don't know jack about all the layers of RTC mess, so my ack is basically worthless here. But if you like you can have one anyway :) Acked-by: Michael Ellerman cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
> From: Yongji Xie > Sent: Wednesday, April 27, 2016 8:22 PM > > Current vfio-pci implementation disallows to mmap > sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio > page may be shared with other BARs. This will cause some > performance issues when we passthrough a PCI device with > this kind of BARs. Guest will be not able to handle the mmio > accesses to the BARs which leads to mmio emulations in host. > > However, not all sub-page BARs will share page with other BARs. > We should allow to mmap those sub-page MMIO BARs which we can > make sure will not share page with other BARs. > > This patch adds support for this case. And we also try to use > shadow resource to reserve the remaind of the page which hot-add > device's BAR might be assigned into. 'shadow' usually means you have a corresponding part being shadowed, while here looks you mostly want some 'dummy' resource for reservation purpose? > + > + if (!(res->start & ~PAGE_MASK)) { > + /* > + * Add shadow resource for sub-page bar whose mmio > + * page is exclusive in case that hot-add device's > + * bar is assigned into the mem hole. > + */ > + shadow_res = kzalloc(sizeof(*shadow_res), GFP_KERNEL); > + shadow_res->resource.start = res->end + 1; > + shadow_res->resource.end = res->start + PAGE_SIZE - 1; What about res->start not page aligned so you end up still having a portion before res->start not exclusively reserved? > + shadow_res->resource.flags = res->flags; > + if (request_resource(res->parent, > + _res->resource)) { > + kfree(shadow_res); > + return false; > + } > + shadow_res->index = index; > + list_add(_res->res_next, > + >shadow_resources_list); > + return true; Thanks Kevin ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver
On 05/03/2016 09:41 AM, Gavin Shan wrote: On Wed, Apr 20, 2016 at 11:55:56AM +1000, Alistair Popple wrote: On Tue, 19 Apr 2016 20:36:48 Alexey Kardashevskiy wrote: On 02/17/2016 02:44 PM, Gavin Shan wrote: This adds standalone driver to support PCI hotplug for PowerPC PowerNV platform that runs on top of skiboot firmware. The firmware identifies hotpluggable slots and marked their device tree node with proper "ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans device tree nodes to create/register PCI hotplug slot accordingly. The PCI slots are organized in fashion of tree, which means one PCI slot might have parent PCI slot and parent PCI slot possibly contains multiple child PCI slots. At the plugging time, the parent PCI slot is populated before its children. The child PCI slots are removed before their parent PCI slot can be removed from the system. If the skiboot firmware doesn't support slot status retrieval, the PCI slot device node shouldn't have property "ibm,reset-by-firmware". In that case, none of valid PCI slots will be detected from device tree. The skiboot firmware doesn't export the capability to access attention LEDs yet and it's something for TBD. Signed-off-by: Gavin ShanAcked-by: Bjorn Helgaas --- drivers/pci/hotplug/Kconfig | 12 + drivers/pci/hotplug/Makefile | 3 + drivers/pci/hotplug/pnv_php.c | 870 ++ 3 files changed, 885 insertions(+) create mode 100644 drivers/pci/hotplug/pnv_php.c diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index df8caec..167c8ce 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC When in doubt, say N. +config HOTPLUG_PCI_POWERNV + tristate "PowerPC PowerNV PCI Hotplug driver" + depends on PPC_POWERNV && EEH + help + Say Y here if you run PowerPC PowerNV platform that supports + PCI Hotplug + + To compile this driver as a module, choose M here: the + module will be called pnv-php. + + When in doubt, say N. + config HOTPLUG_PCI_RPA tristate "RPA PCI Hotplug driver" depends on PPC_PSERIES && EEH diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index b616e75..e33cdda 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC)+= cpcihp_generic.o obj-$(CONFIG_HOTPLUG_PCI_SHPC)+= shpchp.o +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += pnv-php.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o @@ -50,6 +51,8 @@ ibmphp-objs := ibmphp_core.o \ acpiphp-objs := acpiphp_core.o \ acpiphp_glue.o +pnv-php-objs := pnv_php.o + rpaphp-objs := rpaphp_core.o \ rpaphp_pci.o\ rpaphp_slot.o diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c new file mode 100644 index 000..364ec36 --- /dev/null +++ b/drivers/pci/hotplug/pnv_php.c @@ -0,0 +1,870 @@ +/* + * PCI Hotplug Driver for PowerPC PowerNV platform. + * + * Copyright Gavin Shan, IBM Corporation 2015. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver" + +struct pnv_php_slot { + struct hotplug_slot slot; + struct hotplug_slot_infoslot_info; + uint64_tid; + char*name; + int slot_no; + struct kref kref; +#define PNV_PHP_STATE_INITIALIZED 0 +#define PNV_PHP_STATE_REGISTERED 1 +#define PNV_PHP_STATE_POPULATED2 + int state; + struct device_node *dn; + struct pci_dev *pdev; + struct pci_bus *bus; + boolpower_state_check; + int power_state_confirmed; +#define PNV_PHP_POWER_CONFIRMED_INVALID0 +#define PNV_PHP_POWER_CONFIRMED_SUCCESS1 +#define PNV_PHP_POWER_CONFIRMED_FAIL 2 + struct opal_msg
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Mon, May 2, 2016 at 1:00 PM, Jiri Kosinawrote: > On Mon, 2 May 2016, Jiri Kosina wrote: > >> > FWIW, I just tried this: >> > >> > static bool is_entry_text(unsigned long addr) >> > { >> > return addr >= (unsigned long)__entry_text_start && >> > addr < (unsigned long)__entry_text_end; >> > } >> > >> > it works. So the entry code is already annotated reasonably well :) >> > >> > I just hacked it up here: >> > >> > https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21 >> > >> > and it seems to work, at least for page faults. A better >> > implementation would print out the entire contents of pt_regs so that >> > people reading the stack trace will know the registers at the time of >> > the exception, which might be helpful. >> >> Sorry for being dense, but how do you distinguish here between a "real" >> kernel entry, that pushes pt_regs, and any "non-entry" function call that >> passes pt_regs around? > > Umm, actually, the more tricky part is the other way around -- how do you > make sure that whenever you are calling out from a code between > __entry_text_start and __entry_text_end, pt_regs will be at the place > you're looking for it? How's that guaranteed? It's not guaranteed in my code. I think we'd want to add a little table of call sites and their pt_regs offsets. This was just meant to test that the general idea works (and it does indeed generate better traces than the stock kernel, which gets it unconditionally wrong). --Andy > > Thanks, > > -- > Jiri Kosina > SUSE Labs > -- Andy Lutomirski AMA Capital Management, LLC ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver
On Wed, Apr 20, 2016 at 11:55:56AM +1000, Alistair Popple wrote: >On Tue, 19 Apr 2016 20:36:48 Alexey Kardashevskiy wrote: >> On 02/17/2016 02:44 PM, Gavin Shan wrote: >> > This adds standalone driver to support PCI hotplug for PowerPC PowerNV >> > platform that runs on top of skiboot firmware. The firmware identifies >> > hotpluggable slots and marked their device tree node with proper >> > "ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans >> > device tree nodes to create/register PCI hotplug slot accordingly. >> > >> > The PCI slots are organized in fashion of tree, which means one >> > PCI slot might have parent PCI slot and parent PCI slot possibly >> > contains multiple child PCI slots. At the plugging time, the parent >> > PCI slot is populated before its children. The child PCI slots are >> > removed before their parent PCI slot can be removed from the system. >> > >> > If the skiboot firmware doesn't support slot status retrieval, the PCI >> > slot device node shouldn't have property "ibm,reset-by-firmware". In >> > that case, none of valid PCI slots will be detected from device tree. >> > The skiboot firmware doesn't export the capability to access attention >> > LEDs yet and it's something for TBD. >> > >> > Signed-off-by: Gavin Shan>> > Acked-by: Bjorn Helgaas >> > --- >> > drivers/pci/hotplug/Kconfig | 12 + >> > drivers/pci/hotplug/Makefile | 3 + >> > drivers/pci/hotplug/pnv_php.c | 870 >> > ++ >> > 3 files changed, 885 insertions(+) >> > create mode 100644 drivers/pci/hotplug/pnv_php.c >> > >> > diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig >> > index df8caec..167c8ce 100644 >> > --- a/drivers/pci/hotplug/Kconfig >> > +++ b/drivers/pci/hotplug/Kconfig >> > @@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC >> > >> > When in doubt, say N. >> > >> > +config HOTPLUG_PCI_POWERNV >> > + tristate "PowerPC PowerNV PCI Hotplug driver" >> > + depends on PPC_POWERNV && EEH >> > + help >> > +Say Y here if you run PowerPC PowerNV platform that supports >> > +PCI Hotplug >> > + >> > +To compile this driver as a module, choose M here: the >> > +module will be called pnv-php. >> > + >> > +When in doubt, say N. >> > + >> > config HOTPLUG_PCI_RPA >> >tristate "RPA PCI Hotplug driver" >> >depends on PPC_PSERIES && EEH >> > diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile >> > index b616e75..e33cdda 100644 >> > --- a/drivers/pci/hotplug/Makefile >> > +++ b/drivers/pci/hotplug/Makefile >> > @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o >> > obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550)+= cpcihp_zt5550.o >> > obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o >> > obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o >> > +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += pnv-php.o >> > obj-$(CONFIG_HOTPLUG_PCI_RPA)+= rpaphp.o >> > obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o >> > obj-$(CONFIG_HOTPLUG_PCI_SGI)+= sgi_hotplug.o >> > @@ -50,6 +51,8 @@ ibmphp-objs := ibmphp_core.o \ >> > acpiphp-objs := acpiphp_core.o \ >> >acpiphp_glue.o >> > >> > +pnv-php-objs := pnv_php.o >> > + >> > rpaphp-objs := rpaphp_core.o \ >> >rpaphp_pci.o\ >> >rpaphp_slot.o >> > diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c >> > new file mode 100644 >> > index 000..364ec36 >> > --- /dev/null >> > +++ b/drivers/pci/hotplug/pnv_php.c >> > @@ -0,0 +1,870 @@ >> > +/* >> > + * PCI Hotplug Driver for PowerPC PowerNV platform. >> > + * >> > + * Copyright Gavin Shan, IBM Corporation 2015. >> > + * >> > + * This program is free software; you can redistribute it and/or modify >> > + * it under the terms of the GNU General Public License as published by >> > + * the Free Software Foundation; either version 2 of the License, or >> > + * (at your option) any later version. >> > + */ >> > + >> > +#include >> > +#include >> > +#include >> > +#include >> > + >> > +#include >> > +#include >> > +#include >> > + >> > +#define DRIVER_VERSION"0.1" >> > +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" >> > +#define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver" >> > + >> > +struct pnv_php_slot { >> > + struct hotplug_slot slot; >> > + struct hotplug_slot_infoslot_info; >> > + uint64_tid; >> > + char*name; >> > + int slot_no; >> > + struct kref kref; >> > +#define PNV_PHP_STATE_INITIALIZED 0 >> > +#define PNV_PHP_STATE_REGISTERED 1 >> > +#define PNV_PHP_STATE_POPULATED 2 >> > + int state; >> > + struct
Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver
On Mon, May 02, 2016 at 04:11:53PM +1000, Alexey Kardashevskiy wrote: >On 05/02/2016 01:44 PM, Gavin Shan wrote: >>On Tue, Apr 19, 2016 at 08:36:48PM +1000, Alexey Kardashevskiy wrote: >>>On 02/17/2016 02:44 PM, Gavin Shan wrote: This adds standalone driver to support PCI hotplug for PowerPC PowerNV platform that runs on top of skiboot firmware. The firmware identifies hotpluggable slots and marked their device tree node with proper "ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans device tree nodes to create/register PCI hotplug slot accordingly. The PCI slots are organized in fashion of tree, which means one PCI slot might have parent PCI slot and parent PCI slot possibly contains multiple child PCI slots. At the plugging time, the parent PCI slot is populated before its children. The child PCI slots are removed before their parent PCI slot can be removed from the system. If the skiboot firmware doesn't support slot status retrieval, the PCI slot device node shouldn't have property "ibm,reset-by-firmware". In that case, none of valid PCI slots will be detected from device tree. The skiboot firmware doesn't export the capability to access attention LEDs yet and it's something for TBD. Signed-off-by: Gavin ShanAcked-by: Bjorn Helgaas --- drivers/pci/hotplug/Kconfig | 12 + drivers/pci/hotplug/Makefile | 3 + drivers/pci/hotplug/pnv_php.c | 870 ++ 3 files changed, 885 insertions(+) create mode 100644 drivers/pci/hotplug/pnv_php.c diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index df8caec..167c8ce 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC When in doubt, say N. +config HOTPLUG_PCI_POWERNV + tristate "PowerPC PowerNV PCI Hotplug driver" + depends on PPC_POWERNV && EEH + help + Say Y here if you run PowerPC PowerNV platform that supports + PCI Hotplug + + To compile this driver as a module, choose M here: the + module will be called pnv-php. + + When in doubt, say N. + config HOTPLUG_PCI_RPA tristate "RPA PCI Hotplug driver" depends on PPC_PSERIES && EEH diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index b616e75..e33cdda 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += pnv-php.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)+= rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o @@ -50,6 +51,8 @@ ibmphp-objs := ibmphp_core.o \ acpiphp-objs := acpiphp_core.o \ acpiphp_glue.o +pnv-php-objs := pnv_php.o + rpaphp-objs:= rpaphp_core.o \ rpaphp_pci.o\ rpaphp_slot.o diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c new file mode 100644 index 000..364ec36 --- /dev/null +++ b/drivers/pci/hotplug/pnv_php.c @@ -0,0 +1,870 @@ +/* + * PCI Hotplug Driver for PowerPC PowerNV platform. + * + * Copyright Gavin Shan, IBM Corporation 2015. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver" + +struct pnv_php_slot { + struct hotplug_slot slot; + struct hotplug_slot_infoslot_info; + uint64_tid; + char*name; + int slot_no; + struct kref kref; +#define PNV_PHP_STATE_INITIALIZED 0 +#define PNV_PHP_STATE_REGISTERED 1 +#define PNV_PHP_STATE_POPULATED2 + int state; + struct device_node
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Mon, 2 May 2016, Jiri Kosina wrote: > > FWIW, I just tried this: > > > > static bool is_entry_text(unsigned long addr) > > { > > return addr >= (unsigned long)__entry_text_start && > > addr < (unsigned long)__entry_text_end; > > } > > > > it works. So the entry code is already annotated reasonably well :) > > > > I just hacked it up here: > > > > https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21 > > > > and it seems to work, at least for page faults. A better > > implementation would print out the entire contents of pt_regs so that > > people reading the stack trace will know the registers at the time of > > the exception, which might be helpful. > > Sorry for being dense, but how do you distinguish here between a "real" > kernel entry, that pushes pt_regs, and any "non-entry" function call that > passes pt_regs around? Umm, actually, the more tricky part is the other way around -- how do you make sure that whenever you are calling out from a code between __entry_text_start and __entry_text_end, pt_regs will be at the place you're looking for it? How's that guaranteed? Thanks, -- Jiri Kosina SUSE Labs ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Mon, 2 May 2016, Andy Lutomirski wrote: > FWIW, I just tried this: > > static bool is_entry_text(unsigned long addr) > { > return addr >= (unsigned long)__entry_text_start && > addr < (unsigned long)__entry_text_end; > } > > it works. So the entry code is already annotated reasonably well :) > > I just hacked it up here: > > https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21 > > and it seems to work, at least for page faults. A better > implementation would print out the entire contents of pt_regs so that > people reading the stack trace will know the registers at the time of > the exception, which might be helpful. Sorry for being dense, but how do you distinguish here between a "real" kernel entry, that pushes pt_regs, and any "non-entry" function call that passes pt_regs around? -- Jiri Kosina SUSE Labs ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Mon, May 02, 2016 at 11:12:39AM -0700, Andy Lutomirski wrote: > On Mon, May 2, 2016 at 10:31 AM, Josh Poimboeufwrote: > > On Mon, May 02, 2016 at 08:52:41AM -0700, Andy Lutomirski wrote: > >> On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeuf wrote: > >> > On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote: > >> >> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf" wrote: > >> >> > > >> >> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote: > >> >> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf > >> >> > > wrote: > >> >> > > >> I suppose we could try to rejigger the code so that rbp points to > >> >> > > >> pt_regs or similar. > >> >> > > > > >> >> > > > I think we should avoid doing something like that because it > >> >> > > > would break > >> >> > > > gdb and all the other unwinders who don't know about it. > >> >> > > > >> >> > > How so? > >> >> > > > >> >> > > Currently, rbp in the entry code is meaningless. I'm suggesting > >> >> > > that, > >> >> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to > >> >> > > the pt_regs. Currently it points to something stale (which the > >> >> > > dump_stack code might be relying on. Hmm.) But it's probably also > >> >> > > safe to assume that if you unwind to the 'call \do_sym', then > >> >> > > pt_regs > >> >> > > is the next thing on the stack, so just doing the section thing > >> >> > > would > >> >> > > work. > >> >> > > >> >> > Yes, rbp is meaningless on the entry from user space. But if an > >> >> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have > >> >> > nested entry, rbp keeps its old value, right? So the unwinder can > >> >> > walk > >> >> > past the nested entry frame and keep going until it gets to the > >> >> > original > >> >> > entry. > >> >> > >> >> Yes. > >> >> > >> >> It would be nice if we could do better, though, and actually notice > >> >> the pt_regs and identify the entry. For example, I'd love to see > >> >> "page fault, RIP=xyz" printed in the middle of a stack dump on a > >> >> crash. > >> >> > >> >> Also, I think that just following rbp links will lose the > >> >> actual function that took the page fault (or whatever function > >> >> pt_regs->ip actually points to). > >> > > >> > Hm. I think we could fix all that in a more standard way. Whenever a > >> > new pt_regs frame gets saved on entry, we could also create a new stack > >> > frame which points to a fake kernel_entry() function. That would tell > >> > the unwinder there's a pt_regs frame without otherwise breaking frame > >> > pointers across the frame. > >> > > >> > Then I guess we wouldn't need my other solution of putting the idt > >> > entries in a special section. > >> > > >> > How does that sound? > >> > >> Let me try to understand. > >> > >> The normal call sequence is call; push %rbp; mov %rsp, %rbp. So rbp > >> points to (prev rbp, prev rip) on the stack, and you can follow the > >> chain back. Right now, on a user access page fault or similar, we > >> have rbp (probably) pointing to the interrupted frame, and the > >> interrupted rip isn't saved anywhere that a naive unwinder can find > >> it. (It's in pt_regs, but the rbp chain skips right over that.) > >> > >> We could change the entry code so that an interrupt / idtentry does: > >> > >> push pt_regs > >> push kernel_entry > >> push %rbp > >> mov %rsp, %rbp > >> call handler > >> pop %rbp > >> addq $8, %rsp > >> > >> or similar. That would make it appear that the actual C handler was > >> caused by a dummy function "kernel_entry". Now the unwinder would get > >> to kernel_entry, but it *still* wouldn't find its way to the calling > >> frame, which only solves part of the problem. We could at least teach > >> the unwinder how kernel_entry works and let it decode pt_regs to > >> continue unwinding. This would be nice, and I think it could work. > > > > Yeah, that's about what I had in mind. > > FWIW, I just tried this: > > static bool is_entry_text(unsigned long addr) > { > return addr >= (unsigned long)__entry_text_start && > addr < (unsigned long)__entry_text_end; > } > > it works. So the entry code is already annotated reasonably well :) > > I just hacked it up here: > > https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21 > > and it seems to work, at least for page faults. A better > implementation would print out the entire contents of pt_regs so that > people reading the stack trace will know the registers at the time of > the exception, which might be helpful. I still think we would need more specific annotations to do that reliably: a call from entry code doesn't necessarily correlate with a pt_regs frame. > >> I think I like this, except that, if it used a separate section, it > >> could potentially be faster, as, for each actual entry type, the > >>
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
* Andy Lutomirskiwrote: > > Another idea to detect missing frames: for each return address on the > > stack, > > ensure there's a corresponding "call " instruction immediately > > preceding > > the return location, where matches what's on the stack. > > Hmm, interesting. > > I hope your plans include rewriting the current stack unwinder completely. > The > thing in print_context_stack is (a) hard-to-understand and hard-to-modify > crap > and (b) is called in a loop from another file using totally ridiculous > conventions. So we had several attempts at making it better, any further improvements (including radical rewrites) are more than welcome! The generalization between the various stack walking methods certainly didn't make things easier to read - we might want to eliminate that by using better primitives to iterate over the stack frame. Thanks, Ingo ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Mon, May 2, 2016 at 10:31 AM, Josh Poimboeufwrote: > On Mon, May 02, 2016 at 08:52:41AM -0700, Andy Lutomirski wrote: >> On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeuf wrote: >> > On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote: >> >> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf" wrote: >> >> > >> >> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote: >> >> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf >> >> > > wrote: >> >> > > >> I suppose we could try to rejigger the code so that rbp points to >> >> > > >> pt_regs or similar. >> >> > > > >> >> > > > I think we should avoid doing something like that because it would >> >> > > > break >> >> > > > gdb and all the other unwinders who don't know about it. >> >> > > >> >> > > How so? >> >> > > >> >> > > Currently, rbp in the entry code is meaningless. I'm suggesting that, >> >> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to >> >> > > the pt_regs. Currently it points to something stale (which the >> >> > > dump_stack code might be relying on. Hmm.) But it's probably also >> >> > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs >> >> > > is the next thing on the stack, so just doing the section thing would >> >> > > work. >> >> > >> >> > Yes, rbp is meaningless on the entry from user space. But if an >> >> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have >> >> > nested entry, rbp keeps its old value, right? So the unwinder can walk >> >> > past the nested entry frame and keep going until it gets to the original >> >> > entry. >> >> >> >> Yes. >> >> >> >> It would be nice if we could do better, though, and actually notice >> >> the pt_regs and identify the entry. For example, I'd love to see >> >> "page fault, RIP=xyz" printed in the middle of a stack dump on a >> >> crash. >> >> >> >> Also, I think that just following rbp links will lose the >> >> actual function that took the page fault (or whatever function >> >> pt_regs->ip actually points to). >> > >> > Hm. I think we could fix all that in a more standard way. Whenever a >> > new pt_regs frame gets saved on entry, we could also create a new stack >> > frame which points to a fake kernel_entry() function. That would tell >> > the unwinder there's a pt_regs frame without otherwise breaking frame >> > pointers across the frame. >> > >> > Then I guess we wouldn't need my other solution of putting the idt >> > entries in a special section. >> > >> > How does that sound? >> >> Let me try to understand. >> >> The normal call sequence is call; push %rbp; mov %rsp, %rbp. So rbp >> points to (prev rbp, prev rip) on the stack, and you can follow the >> chain back. Right now, on a user access page fault or similar, we >> have rbp (probably) pointing to the interrupted frame, and the >> interrupted rip isn't saved anywhere that a naive unwinder can find >> it. (It's in pt_regs, but the rbp chain skips right over that.) >> >> We could change the entry code so that an interrupt / idtentry does: >> >> push pt_regs >> push kernel_entry >> push %rbp >> mov %rsp, %rbp >> call handler >> pop %rbp >> addq $8, %rsp >> >> or similar. That would make it appear that the actual C handler was >> caused by a dummy function "kernel_entry". Now the unwinder would get >> to kernel_entry, but it *still* wouldn't find its way to the calling >> frame, which only solves part of the problem. We could at least teach >> the unwinder how kernel_entry works and let it decode pt_regs to >> continue unwinding. This would be nice, and I think it could work. > > Yeah, that's about what I had in mind. FWIW, I just tried this: static bool is_entry_text(unsigned long addr) { return addr >= (unsigned long)__entry_text_start && addr < (unsigned long)__entry_text_end; } it works. So the entry code is already annotated reasonably well :) I just hacked it up here: https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=stack=085eacfe0edfc18768e48340084415dba9a6bd21 and it seems to work, at least for page faults. A better implementation would print out the entire contents of pt_regs so that people reading the stack trace will know the registers at the time of the exception, which might be helpful. > >> I think I like this, except that, if it used a separate section, it >> could potentially be faster, as, for each actual entry type, the >> offset from the C handler frame to pt_regs is a foregone conclusion. > > Hm, this I don't really follow. It's true that the unwinder can easily > find RIP from pt_regs, which will always be a known offset from the > kernel_entry pointer on the stack. But why would having the entry code > in a separate section make that faster? It doesn't make the unwinder faster -- it makes the entry code faster. > >> But this is pretty simple and performance is already abysmal in most >>
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Mon, May 02, 2016 at 08:52:41AM -0700, Andy Lutomirski wrote: > On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeufwrote: > > On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote: > >> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf" wrote: > >> > > >> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote: > >> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf > >> > > wrote: > >> > > >> I suppose we could try to rejigger the code so that rbp points to > >> > > >> pt_regs or similar. > >> > > > > >> > > > I think we should avoid doing something like that because it would > >> > > > break > >> > > > gdb and all the other unwinders who don't know about it. > >> > > > >> > > How so? > >> > > > >> > > Currently, rbp in the entry code is meaningless. I'm suggesting that, > >> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to > >> > > the pt_regs. Currently it points to something stale (which the > >> > > dump_stack code might be relying on. Hmm.) But it's probably also > >> > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs > >> > > is the next thing on the stack, so just doing the section thing would > >> > > work. > >> > > >> > Yes, rbp is meaningless on the entry from user space. But if an > >> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have > >> > nested entry, rbp keeps its old value, right? So the unwinder can walk > >> > past the nested entry frame and keep going until it gets to the original > >> > entry. > >> > >> Yes. > >> > >> It would be nice if we could do better, though, and actually notice > >> the pt_regs and identify the entry. For example, I'd love to see > >> "page fault, RIP=xyz" printed in the middle of a stack dump on a > >> crash. > >> > >> Also, I think that just following rbp links will lose the > >> actual function that took the page fault (or whatever function > >> pt_regs->ip actually points to). > > > > Hm. I think we could fix all that in a more standard way. Whenever a > > new pt_regs frame gets saved on entry, we could also create a new stack > > frame which points to a fake kernel_entry() function. That would tell > > the unwinder there's a pt_regs frame without otherwise breaking frame > > pointers across the frame. > > > > Then I guess we wouldn't need my other solution of putting the idt > > entries in a special section. > > > > How does that sound? > > Let me try to understand. > > The normal call sequence is call; push %rbp; mov %rsp, %rbp. So rbp > points to (prev rbp, prev rip) on the stack, and you can follow the > chain back. Right now, on a user access page fault or similar, we > have rbp (probably) pointing to the interrupted frame, and the > interrupted rip isn't saved anywhere that a naive unwinder can find > it. (It's in pt_regs, but the rbp chain skips right over that.) > > We could change the entry code so that an interrupt / idtentry does: > > push pt_regs > push kernel_entry > push %rbp > mov %rsp, %rbp > call handler > pop %rbp > addq $8, %rsp > > or similar. That would make it appear that the actual C handler was > caused by a dummy function "kernel_entry". Now the unwinder would get > to kernel_entry, but it *still* wouldn't find its way to the calling > frame, which only solves part of the problem. We could at least teach > the unwinder how kernel_entry works and let it decode pt_regs to > continue unwinding. This would be nice, and I think it could work. Yeah, that's about what I had in mind. > I think I like this, except that, if it used a separate section, it > could potentially be faster, as, for each actual entry type, the > offset from the C handler frame to pt_regs is a foregone conclusion. Hm, this I don't really follow. It's true that the unwinder can easily find RIP from pt_regs, which will always be a known offset from the kernel_entry pointer on the stack. But why would having the entry code in a separate section make that faster? > But this is pretty simple and performance is already abysmal in most > handlers. > > There's an added benefit to using a separate section, though: we could > also annotate the calls with what type of entry they were so the > unwinder could print it out nicely. Yeah, that could be a nice feature... but doesn't printing the name of the C handler pretty much already give that information? In any case, once we have a working DWARF unwinder, I think it will show the name of the idt entry anyway. > >> Have you looked at my vdso unwinding test at all? If we could do > >> something similar for the kernel, IMO it would make testing much more > >> pleasant. > > > > I found it, but I'm not sure what it would mean to do something similar > > for the kernel. Do you mean doing something like an NMI sampling-based > > approach where we periodically do a random stack sanity check? > > I was imagining something a little more strict: single-step >
Re: [PATCH v5] powerpc/pci: Assign fixed PHB number based on device-tree properties
On Thu, Apr 14, 2016 at 06:55:24PM -0300, Guilherme G. Piccoli wrote: > The domain/PHB field of PCI addresses has its value obtained from a > global variable, incremented each time a new domain (represented by > struct pci_controller) is added on the system. The domain addition > process happens during boot or due to PCI device hotplug. > > As recent kernels are using predictable naming for network interfaces, > the network stack is more tied to PCI naming. This can be a problem in > hotplug scenarios, because PCI addresses will change if devices are > removed and then re-added. This situation seems unusual, but it can > happen if a user wants to replace a NIC without rebooting the machine, > for example. > > This patch changes the way PCI domain values are generated: now, we use > device-tree properties to assign fixed PHB numbers to PCI addresses > when available (meaning pSeries and PowerNV cases). We also use a bitmap > to allow dynamic PHB numbering when device-tree properties are not > used. This bitmap keeps track of used PHB numbers and if a PHB is > released (by hotplug operations for example), it allows the reuse of > this PHB number, avoiding PCI address to change in case of device remove > and re-add soon after. No functional changes were introduced. > > Reviewed-by: Gavin Shan> Signed-off-by: Guilherme G. Piccoli I assume the powerpc guys will take care of this. Let me know if you need me to do anything. > --- > arch/powerpc/kernel/pci-common.c | 66 > ++-- > 1 file changed, 63 insertions(+), 3 deletions(-) > > v5: > * Improved comments. > > * Changed the the Fixed PHB Numbering to set the PHB number bit > on the bitmap anyway, avoiding issues when system has virtual PHBs. > > * Changed the device-tree check order - now, firstly we check for > "ibm,opal-phbid" and if it's not available, we try the pSeries case. > > v4: > * Minor change (if/else nesting rearranged). > > v3: > * Made the bitmap static. > > * Rearranged if/else statements of Fixed PHB checking. > > * Improved bitmap checkings, by removing loop and using instead the > find_first_zero_bit() function. > > * Removed the single-statement function release_phb_number() by > adding its logic directly into pcibios_free_controller(). > > *Added check for bitmap size before clearing bit, avoiding memory > corruption. > > v2: > * Added the Fixed PHB Numbering mechanism based on device-tree > properties. > > * Changed list approach to bitmap on the Dynamic PHB Numbering > mechanism. > > diff --git a/arch/powerpc/kernel/pci-common.c > b/arch/powerpc/kernel/pci-common.c > index 0f7a60f..ad423c1 100644 > --- a/arch/powerpc/kernel/pci-common.c > +++ b/arch/powerpc/kernel/pci-common.c > @@ -41,11 +41,17 @@ > #include > #include > > +/* hose_spinlock protects accesses to the the phb_bitmap. */ > static DEFINE_SPINLOCK(hose_spinlock); > LIST_HEAD(hose_list); > > -/* XXX kill that some day ... */ > -static int global_phb_number;/* Global phb counter */ > +/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */ > +#define MAX_PHBS8192 > + > +/* For dynamic PHB numbering: used/free PHBs tracking bitmap. > + * Accesses to this bitmap should be protected by hose_spinlock. > + */ > +static DECLARE_BITMAP(phb_bitmap, MAX_PHBS); > > /* ISA Memory physical address */ > resource_size_t isa_mem_base; > @@ -64,6 +70,55 @@ struct dma_map_ops *get_pci_dma_ops(void) > } > EXPORT_SYMBOL(get_pci_dma_ops); > > +/* get_phb_number() function should run under locking > + * protection, specifically hose_spinlock. > + */ > +static int get_phb_number(struct device_node *dn) > +{ > + const __be64 *prop64; > + const __be32 *regs; > + int phb_id = 0; > + > + /* Try fixed PHB numbering first, by checking archs and reading > + * the respective device-tree properties. Firstly, try PowerNV by > + * reading "ibm,opal-phbid", only present in OPAL environment. > + */ > + prop64 = of_get_property(dn, "ibm,opal-phbid", NULL); > + if (prop64) { > + phb_id = (int)(be64_to_cpup(prop64) & 0x); > + > + } else if (machine_is(pseries)) { > + regs = of_get_property(dn, "reg", NULL); > + if (regs) > + phb_id = (int)(be32_to_cpu(regs[1]) & 0x); > + } else { > + goto dynamic_phb_numbering; > + } > + > + /* If we have a huge PHB number obtained from device-tree, no need > + * to worry with the bitmap. Otherwise, we need to be sure we're > + * not trying to use the same PHB number twice. > + */ > + if (phb_id < MAX_PHBS) { > + if (test_bit(phb_id, phb_bitmap)) > + goto dynamic_phb_numbering; > + set_bit(phb_id, phb_bitmap); > + } > + > + return phb_id; > + > + /* If not pSeries nor
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Mon, May 2, 2016 at 6:52 AM, Josh Poimboeufwrote: > On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote: >> On Apr 29, 2016 3:41 PM, "Josh Poimboeuf" wrote: >> > >> > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote: >> > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf >> > > wrote: >> > > >> I suppose we could try to rejigger the code so that rbp points to >> > > >> pt_regs or similar. >> > > > >> > > > I think we should avoid doing something like that because it would >> > > > break >> > > > gdb and all the other unwinders who don't know about it. >> > > >> > > How so? >> > > >> > > Currently, rbp in the entry code is meaningless. I'm suggesting that, >> > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to >> > > the pt_regs. Currently it points to something stale (which the >> > > dump_stack code might be relying on. Hmm.) But it's probably also >> > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs >> > > is the next thing on the stack, so just doing the section thing would >> > > work. >> > >> > Yes, rbp is meaningless on the entry from user space. But if an >> > in-kernel interrupt occurs (e.g. page fault, preemption) and you have >> > nested entry, rbp keeps its old value, right? So the unwinder can walk >> > past the nested entry frame and keep going until it gets to the original >> > entry. >> >> Yes. >> >> It would be nice if we could do better, though, and actually notice >> the pt_regs and identify the entry. For example, I'd love to see >> "page fault, RIP=xyz" printed in the middle of a stack dump on a >> crash. >> >> Also, I think that just following rbp links will lose the >> actual function that took the page fault (or whatever function >> pt_regs->ip actually points to). > > Hm. I think we could fix all that in a more standard way. Whenever a > new pt_regs frame gets saved on entry, we could also create a new stack > frame which points to a fake kernel_entry() function. That would tell > the unwinder there's a pt_regs frame without otherwise breaking frame > pointers across the frame. > > Then I guess we wouldn't need my other solution of putting the idt > entries in a special section. > > How does that sound? Let me try to understand. The normal call sequence is call; push %rbp; mov %rsp, %rbp. So rbp points to (prev rbp, prev rip) on the stack, and you can follow the chain back. Right now, on a user access page fault or similar, we have rbp (probably) pointing to the interrupted frame, and the interrupted rip isn't saved anywhere that a naive unwinder can find it. (It's in pt_regs, but the rbp chain skips right over that.) We could change the entry code so that an interrupt / idtentry does: push pt_regs push kernel_entry push %rbp mov %rsp, %rbp call handler pop %rbp addq $8, %rsp or similar. That would make it appear that the actual C handler was caused by a dummy function "kernel_entry". Now the unwinder would get to kernel_entry, but it *still* wouldn't find its way to the calling frame, which only solves part of the problem. We could at least teach the unwinder how kernel_entry works and let it decode pt_regs to continue unwinding. This would be nice, and I think it could work. I think I like this, except that, if it used a separate section, it could potentially be faster, as, for each actual entry type, the offset from the C handler frame to pt_regs is a foregone conclusion. But this is pretty simple and performance is already abysmal in most handlers. There's an added benefit to using a separate section, though: we could also annotate the calls with what type of entry they were so the unwinder could print it out nicely. I could be convinced either way. > >> Have you looked at my vdso unwinding test at all? If we could do >> something similar for the kernel, IMO it would make testing much more >> pleasant. > > I found it, but I'm not sure what it would mean to do something similar > for the kernel. Do you mean doing something like an NMI sampling-based > approach where we periodically do a random stack sanity check? I was imagining something a little more strict: single-step interesting parts of the kernel and make sure that each step unwinds correctly. That could detect missing frames and similar. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking
On Fri, Apr 29, 2016 at 05:08:50PM -0700, Andy Lutomirski wrote: > On Apr 29, 2016 3:41 PM, "Josh Poimboeuf"wrote: > > > > On Fri, Apr 29, 2016 at 02:37:41PM -0700, Andy Lutomirski wrote: > > > On Fri, Apr 29, 2016 at 2:25 PM, Josh Poimboeuf > > > wrote: > > > >> I suppose we could try to rejigger the code so that rbp points to > > > >> pt_regs or similar. > > > > > > > > I think we should avoid doing something like that because it would break > > > > gdb and all the other unwinders who don't know about it. > > > > > > How so? > > > > > > Currently, rbp in the entry code is meaningless. I'm suggesting that, > > > when we do, for example, 'call \do_sym' in idtentry, we point rbp to > > > the pt_regs. Currently it points to something stale (which the > > > dump_stack code might be relying on. Hmm.) But it's probably also > > > safe to assume that if you unwind to the 'call \do_sym', then pt_regs > > > is the next thing on the stack, so just doing the section thing would > > > work. > > > > Yes, rbp is meaningless on the entry from user space. But if an > > in-kernel interrupt occurs (e.g. page fault, preemption) and you have > > nested entry, rbp keeps its old value, right? So the unwinder can walk > > past the nested entry frame and keep going until it gets to the original > > entry. > > Yes. > > It would be nice if we could do better, though, and actually notice > the pt_regs and identify the entry. For example, I'd love to see > "page fault, RIP=xyz" printed in the middle of a stack dump on a > crash. > > Also, I think that just following rbp links will lose the > actual function that took the page fault (or whatever function > pt_regs->ip actually points to). Hm. I think we could fix all that in a more standard way. Whenever a new pt_regs frame gets saved on entry, we could also create a new stack frame which points to a fake kernel_entry() function. That would tell the unwinder there's a pt_regs frame without otherwise breaking frame pointers across the frame. Then I guess we wouldn't need my other solution of putting the idt entries in a special section. How does that sound? > Have you looked at my vdso unwinding test at all? If we could do > something similar for the kernel, IMO it would make testing much more > pleasant. I found it, but I'm not sure what it would mean to do something similar for the kernel. Do you mean doing something like an NMI sampling-based approach where we periodically do a random stack sanity check? (If so, I do have something like that planned.) -- Josh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/mm/slice: Remove slice_mm_new_context
The existing usage is bogus, because we set the context.id value in the same function. The book3s 64 got removed in the old patch. Hence remove the redundant definition. Signed-off-by: Aneesh Kumar K.V--- arch/powerpc/include/asm/page_64.h | 3 --- arch/powerpc/mm/mmu_context_nohash.c | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 77488857c26d..dd5f0712afa2 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -128,8 +128,6 @@ extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); -#define slice_mm_new_context(mm) ((mm)->context.id == MMU_NO_CONTEXT) - #endif /* __ASSEMBLY__ */ #else #define slice_init() @@ -151,7 +149,6 @@ do {\ #define slice_set_range_psize(mm, start, len, psize) \ slice_set_user_psize((mm), (psize)) -#define slice_mm_new_context(mm) 1 #endif /* CONFIG_PPC_MM_SLICES */ #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index a36c43a27893..7d95bc402dba 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -335,8 +335,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) mm->context.active = 0; #ifdef CONFIG_PPC_MM_SLICES - if (slice_mm_new_context(mm)) - slice_set_user_psize(mm, mmu_virtual_psize); + slice_set_user_psize(mm, mmu_virtual_psize); #endif return 0; -- 2.7.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [Qemu-devel] [PATCH v2] spapr: Don't set the TM ibm, pa-features bit in PR KVM mode
On Sat, Apr 30, 2016 at 6:18 AM, Anton Blanchardwrote: > We don't support transactional memory in PR KVM, so don't tell > the OS that we do. > > Signed-off-by: Anton Blanchard > --- > > v2: Fix build with CONFIG_KVM disabled, noticed by Alex. > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index b69995e..dc3e3c9 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -696,6 +696,14 @@ static void spapr_populate_cpu_dt(CPUState *cs, void > *fdt, int offset, > } else /* env->mmu_model == POWERPC_MMU_2_07 */ { > pa_features = pa_features_207; > pa_size = sizeof(pa_features_207); > + > +#ifdef CONFIG_KVM > +/* Don't enable TM in PR KVM mode */ > +if (kvm_enabled() && > +kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { > +pa_features[24] &= ~0x80; > +} > +#endif > } > if (env->ci_large_pages) { > pa_features[3] |= 0x20; > This email was put in the spam folder by gmail. The message said "It has a from address in samba.org but has failed samba.org's required tests for authentication". Just bringing this to peoples attention. I thought a patch might go unnoticed else. -- With regards, Md Haris Iqbal, Placement Coordinator, MTech IT NITK Surathkal, Contact: +91 8861996962 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: linux-next: build failure after merge of the akpm-current tree
Stephen Rothwellwrites: > Hi Andrew, > > After merging the akpm-current tree, today's linux-next build (powerpc > allyesconfig and pseries_le_defconfig) failed like this: > > In file included from include/linux/mm.h:394:0, > from mm/huge_memory.c:10: > include/linux/huge_mm.h:53:22: error: initializer element is not constant > #define HPAGE_PMD_NR (1< ^ > mm/huge_memory.c:104:62: note: in expansion of macro 'HPAGE_PMD_NR' > static unsigned int khugepaged_max_ptes_swap __read_mostly = HPAGE_PMD_NR/8; > ^ > > Caused by commit > > 6d34b9749be2 ("mm: make optimistic check for swapin readahead") > > interacting with commit > > dd1842a2a448 ("powerpc/mm: Make page table size a variable") > > from the powerpc tree. > > I applied this fix patch for today (hopefully this is still initialised > early enough): > > From: Stephen Rothwell > Date: Mon, 2 May 2016 18:25:42 +1000 > Subject: [PATCH] mm: make optimistic check for swapin readahead fix > > Signed-off-by: Stephen Rothwell Reviewed-by: Aneesh Kumar K.V > --- > mm/huge_memory.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index f0cd9dbc1157..6aabfa166b6d 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -101,7 +101,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); > * fault. > */ > static unsigned int khugepaged_max_ptes_none __read_mostly; > -static unsigned int khugepaged_max_ptes_swap __read_mostly = HPAGE_PMD_NR/8; > +static unsigned int khugepaged_max_ptes_swap __read_mostly; > static unsigned long allocstall; > > static int khugepaged(void *none); > @@ -703,6 +703,7 @@ static int __init hugepage_init(void) > > khugepaged_pages_to_scan = HPAGE_PMD_NR * 8; > khugepaged_max_ptes_none = HPAGE_PMD_NR - 1; > + khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8; > /* >* hugepages can't be allocated by the buddy allocator >*/ > -- > 2.7.0 > > > > > -- > Cheers, > Stephen Rothwell ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] selftests/powerpc: Fix subpage_prot test to return !0 on failure
Michael Ellermanwrites: > It's helpful for automated testing if the test returns error codes back > to the calling program. > > Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V > --- > tools/testing/selftests/powerpc/mm/subpage_prot.c | 10 ++ > 1 file changed, 6 insertions(+), 4 deletions(-) > > diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c > b/tools/testing/selftests/powerpc/mm/subpage_prot.c > index 440180ff8089..7ccdc96b977d 100644 > --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c > +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c > @@ -207,14 +207,16 @@ int test_file(void) > > int main(int argc, char *argv[]) > { > - test_harness(test_anon, "subpage_prot_anon"); > + int rc; > + > + rc = test_harness(test_anon, "subpage_prot_anon"); > + if (rc) > + return rc; > > if (argc > 1) > file_name = argv[1]; > else > file_name = "tempfile"; > > - test_harness(test_file, "subpage_prot_file"); > - > - return 0; > + return test_harness(test_file, "subpage_prot_file"); > } > -- > 2.5.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/2] powerpc/mm/subpage: Fix subpage protection with 4K hpte config
With Linux page size of 64K and hardware only supporting 4K hpte, if we use subpage protection, we always fail for the subpage 0 as shown below (using the selftest subpage_prot test). 520175565: (4520111850): Failed at 0x0x3fffad4b (p=13,sp=0,w=0), want=fault, got=pass ! 4520890210: (4520826495): Failed at 0x0x3fffad5b (p=29,sp=0,w=0), want=fault, got=pass ! 4521574251: (4521510536): Failed at 0x0x3fffad6b (p=45,sp=0,w=0), want=fault, got=pass ! 4522258324: (4522194609): Failed at 0x0x3fffad7b (p=61,sp=0,w=0), want=fault, got=pass ! This is because hash preload wrongly insert the hpte entry for subpage 0 without looking at the subapge protection information. Don't do hash page table entry preload if we have subpage protection configured for that range. Signed-off-by: Aneesh Kumar K.V--- arch/powerpc/mm/hash_utils_64.c | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 262082e51db1..b5a454415215 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1329,15 +1329,26 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long vsid; pgd_t *pgdir; pte_t *ptep; + int psize; unsigned long flags; int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); #ifdef CONFIG_PPC_MM_SLICES - /* We only prefault standard pages for now */ - if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) + psize = get_slice_psize(mm, ea); + /* +* We only prefault standard pages +*/ + if (psize != mm->context.user_psize) return; +#ifdef CONFIG_PPC_64K_PAGES + /* +* Don't prefault is subpage protection is enabled for that ea +*/ + if ((psize == MMU_PAGE_4K) && subpage_protection(mm, ea)) + return; +#endif #endif DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," -- 2.7.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/2] powerpc/mm/subpage: Fix subpage protection with 4K hpte config
With Linux page size of 64K and hardware only supporting 4K hpte, if we use subpage protection, we always fail for the subpage 0 as shown below (using the selftest subpage_prot test). 520175565: (4520111850): Failed at 0x0x3fffad4b (p=13,sp=0,w=0), want=fault, got=pass ! 4520890210: (4520826495): Failed at 0x0x3fffad5b (p=29,sp=0,w=0), want=fault, got=pass ! 4521574251: (4521510536): Failed at 0x0x3fffad6b (p=45,sp=0,w=0), want=fault, got=pass ! 4522258324: (4522194609): Failed at 0x0x3fffad7b (p=61,sp=0,w=0), want=fault, got=pass ! This is because hash preload wrongly insert the hpte entry for subpage 0 without looking at the subapge protection information. Don't do hash page table entry preload if we have subpage protection configured for that range. Signed-off-by: Aneesh Kumar K.V--- arch/powerpc/mm/hash_utils_64.c | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 262082e51db1..b5a454415215 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1329,15 +1329,26 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long vsid; pgd_t *pgdir; pte_t *ptep; + int psize; unsigned long flags; int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); #ifdef CONFIG_PPC_MM_SLICES - /* We only prefault standard pages for now */ - if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) + psize = get_slice_psize(mm, ea); + /* +* We only prefault standard pages +*/ + if (psize != mm->context.user_psize) return; +#ifdef CONFIG_PPC_64K_PAGES + /* +* Don't prefault is subpage protection is enabled for that ea +*/ + if ((psize == MMU_PAGE_4K) && subpage_protection(mm, ea)) + return; +#endif #endif DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," -- 2.7.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/2] powerpc/mm/subpage: Init user psize correctly
Check against a context.id value of zero instead of MMU_NO_CONTEXT when doing a slice psize init. Without this patch we end up with a slice psize value of zero and we always end up using 4K hpte. Signed-off-by: Aneesh Kumar K.V--- arch/powerpc/mm/mmu_context_book3s64.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index b5288b460bef..a28ed6a96286 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -85,8 +85,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* The old code would re-promote on fork, we don't do that * when using slices as it could cause problem promoting slices * that have been forced down to 4K +* +* For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check +* explicitly against context.id == 0. This ensures that we +* properly initialize context slice details for newly allocated +* mm and don't alter context slice inherited via fork. +* +* We should not be calling init_new_context on init_mm. Hence a +* check against 0 is ok. */ - if (slice_mm_new_context(mm)) + if (mm->context.id == 0) slice_set_user_psize(mm, mmu_virtual_psize); subpage_prot_init_new_context(mm); } -- 2.7.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] selftests/powerpc: Test cp_abort during context switch
On Mon, 2 May 2016 13:51:38 +1000 Chris Smartwrote: > Test that performing a copy paste sequence in userspace on P9 does not > result in a leak of the copy into the paste of another process. > > This is based on Anton Blanchard's context_switch benchmarking code. It > sets up two processes tied to the same CPU, one which copies and one > which pastes. > > The paste should never succeed and the test fails if it does. > > This is a test for commit, "8a64904 powerpc: Add support for userspace > P9 copy paste." > Hi Chris, I must admit I didn't run on it on real hardware ;). Looks good. > Patch created with much assistance from Michael Neuling > > > Signed-off-by: Chris Smart Reviewed-by: Cyril Bur > --- > tools/testing/selftests/powerpc/Makefile | 1 + > .../selftests/powerpc/context_switch/.gitignore| 1 + > .../selftests/powerpc/context_switch/Makefile | 10 ++ > .../selftests/powerpc/context_switch/cp_abort.c| 110 > + > tools/testing/selftests/powerpc/utils.h| 7 ++ > 5 files changed, 129 insertions(+) > create mode 100644 tools/testing/selftests/powerpc/context_switch/.gitignore > create mode 100644 tools/testing/selftests/powerpc/context_switch/Makefile > create mode 100644 tools/testing/selftests/powerpc/context_switch/cp_abort.c > > diff --git a/tools/testing/selftests/powerpc/Makefile > b/tools/testing/selftests/powerpc/Makefile > index b08f77cbe31b..4ca83fe80654 100644 > --- a/tools/testing/selftests/powerpc/Makefile > +++ b/tools/testing/selftests/powerpc/Makefile > @@ -14,6 +14,7 @@ export CFLAGS > > SUB_DIRS = benchmarks\ > copyloops\ > +context_switch \ > dscr \ > mm \ > pmu \ > diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore > b/tools/testing/selftests/powerpc/context_switch/.gitignore > new file mode 100644 > index ..c1431af7b51c > --- /dev/null > +++ b/tools/testing/selftests/powerpc/context_switch/.gitignore > @@ -0,0 +1 @@ > +cp_abort > diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile > b/tools/testing/selftests/powerpc/context_switch/Makefile > new file mode 100644 > index ..e164d1466466 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/context_switch/Makefile > @@ -0,0 +1,10 @@ > +TEST_PROGS := cp_abort > + > +all: $(TEST_PROGS) > + > +$(TEST_PROGS): ../harness.c ../utils.c > + > +include ../../lib.mk > + > +clean: > + rm -f $(TEST_PROGS) > diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c > b/tools/testing/selftests/powerpc/context_switch/cp_abort.c > new file mode 100644 > index ..5a5b55afda0e > --- /dev/null > +++ b/tools/testing/selftests/powerpc/context_switch/cp_abort.c > @@ -0,0 +1,110 @@ > +/* > + * Adapted from Anton Blanchard's context switch microbenchmark. > + * > + * Copyright 2009, Anton Blanchard, IBM Corporation. > + * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + * This program tests the copy paste abort functionality of a P9 > + * (or later) by setting up two processes on the same CPU, one > + * which executes the copy instruction and the other which > + * executes paste. > + * > + * The paste instruction should never succeed, as the cp_abort > + * instruction is called by the kernel during a context switch. > + * > + */ > + > +#define _GNU_SOURCE > + > +#include > +#include > +#include > +#include "utils.h" > +#include > + > +#define READ_FD 0 > +#define WRITE_FD 1 > + > +#define NUM_LOOPS 1000 > + > +/* This defines the "paste" instruction from Power ISA 3.0 Book II, section > 4.4. */ > +#define PASTE(RA, RB, L, RC) \ > + .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) > | (RC) << (31-31)) > + > +int paste(void *i) > +{ > + int cr; > + > + asm volatile(str(PASTE(0, %1, 1, 1))";" > + "mfcr %0;" > + : "=r" (cr) > + : "b" (i) > + : "memory" > + ); > + return cr; > +} > + > +/* This defines the "copy" instruction from Power ISA 3.0 Book II, section > 4.4. */ > +#define COPY(RA, RB, L) \ > + .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10)) > + > +void copy(void *i) > +{ > + asm volatile(str(COPY(0, %0, 1))";" > + : > + : "b" (i) > + : "memory" > + ); > +} > + > +int test_cp_abort(void) > +{ > + /* 128 bytes for a full cache line */
Re: [PATCH v3 00/16] genrtc removal
Hi Arnd, On Thu, Apr 28, 2016 at 9:48 AM, Geert Uytterhoevenwrote: > On Thu, Apr 28, 2016 at 12:34 AM, Arnd Bergmann wrote: >> I ended up stuffing the two patch series into one, as they are now >> more dependent on one another. This now thoroughly removes the >> genrtc driver including the asm/rtc.h headers it uses. For all >> architectures that still have a meaningful asm/rtc.h, this goes >> through two stages: >> >> 1) make the rtc-generic implementation independent of asm/rtc.h >> 2) remove the asm/rtc.h header and disallow the gen_rtc driver >> >> As the last step, the driver itself gets removed. > > In general, after fixing the minor nit: > Acked-by: Geert Uytterhoeven > > For the m68k bits: > Tested-by: Geert Uytterhoeven More build coverage uncovered two build failures on m68k due to "[PATCH v3 02/16] rtc: cmos: move mc146818rtc code out of asm-generic/rtc.h": bvme6000_defconfig: In file included from arch/m68k/bvme6000/rtc.c:19: include/linux/mc146818rtc.h: In function ‘mc146818_is_updating’: include/linux/mc146818rtc.h:138: error: implicit declaration of function ‘CMOS_READ’ include/linux/mc146818rtc.h: In function ‘mc146818_get_time’: include/linux/mc146818rtc.h:189: error: ‘RTC_ALWAYS_BCD’ undeclared (first use in this function) include/linux/mc146818rtc.h:189: error: (Each undeclared identifier is reported only once include/linux/mc146818rtc.h:189: error: for each function it appears in.) include/linux/mc146818rtc.h: In function ‘mc146818_set_time’: include/linux/mc146818rtc.h:279: error: ‘RTC_ALWAYS_BCD’ undeclared (first use in this function) include/linux/mc146818rtc.h:290: error: implicit declaration of function ‘CMOS_WRITE’ mvme16x_defconfig: In file included from arch/m68k/mvme16x/rtc.c:18: include/linux/mc146818rtc.h: In function ‘mc146818_is_updating’: include/linux/mc146818rtc.h:138: error: implicit declaration of function ‘CMOS_READ’ include/linux/mc146818rtc.h: In function ‘mc146818_get_time’: include/linux/mc146818rtc.h:189: error: ‘RTC_ALWAYS_BCD’ undeclared (first use in this function) include/linux/mc146818rtc.h:189: error: (Each undeclared identifier is reported only once include/linux/mc146818rtc.h:189: error: for each function it appears in.) include/linux/mc146818rtc.h: In function ‘mc146818_set_time’: include/linux/mc146818rtc.h:279: error: ‘RTC_ALWAYS_BCD’ undeclared (first use in this function) include/linux/mc146818rtc.h:290: error: implicit declaration of function ‘CMOS_WRITE’ These do not show up with a multi-platform config including Atari support, as arch/m68k/include/asm/mc146818rtc.h provides a definition of CMOS_READ() if CONFIG_ATARI=y. Fortunately the fixes are simple: replace by . Will send patches to fix... Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
linux-next: build failure after merge of the akpm-current tree
Hi Andrew, After merging the akpm-current tree, today's linux-next build (powerpc allyesconfig and pseries_le_defconfig) failed like this: In file included from include/linux/mm.h:394:0, from mm/huge_memory.c:10: include/linux/huge_mm.h:53:22: error: initializer element is not constant #define HPAGE_PMD_NR (1<Date: Mon, 2 May 2016 18:25:42 +1000 Subject: [PATCH] mm: make optimistic check for swapin readahead fix Signed-off-by: Stephen Rothwell --- mm/huge_memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f0cd9dbc1157..6aabfa166b6d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -101,7 +101,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); * fault. */ static unsigned int khugepaged_max_ptes_none __read_mostly; -static unsigned int khugepaged_max_ptes_swap __read_mostly = HPAGE_PMD_NR/8; +static unsigned int khugepaged_max_ptes_swap __read_mostly; static unsigned long allocstall; static int khugepaged(void *none); @@ -703,6 +703,7 @@ static int __init hugepage_init(void) khugepaged_pages_to_scan = HPAGE_PMD_NR * 8; khugepaged_max_ptes_none = HPAGE_PMD_NR - 1; + khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8; /* * hugepages can't be allocated by the buddy allocator */ -- 2.7.0 -- Cheers, Stephen Rothwell ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH kernel v2] powerpc/powernv: Fix debug macro
When cfg_dbg() is enabled (i.e. mapped to printk()), gcc produces errors as the __func__ parameter is missing (pnv_pci_cfg_read() has one); this adds the missing parameter. Since cfg_dbg() is used not just for config space access, this replaces it with well-known pr_devel(). Signed-off-by: Alexey Kardashevskiy--- Changes: v2: * s/cfg_dbg/pr_devel/ --- arch/powerpc/platforms/powernv/pci.c | 15 ++- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 73c8dc2..0db20ae 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -39,9 +39,6 @@ /* Delay in usec */ #define PCI_RESET_DELAY_US 300 -#define cfg_dbg(fmt...)do { } while(0) -//#define cfg_dbg(fmt...) printk(fmt) - #ifdef CONFIG_PCI_MSI int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { @@ -402,8 +399,8 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) } } - cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", - (pdn->busno << 8) | (pdn->devfn), pe_no, fstate); + pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", +(pdn->busno << 8) | (pdn->devfn), pe_no, fstate); /* Clear the frozen state if applicable */ if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE || @@ -451,8 +448,8 @@ int pnv_pci_cfg_read(struct pci_dn *pdn, return PCIBIOS_FUNC_NOT_SUPPORTED; } - cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", - __func__, pdn->busno, pdn->devfn, where, size, *val); + pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n", +__func__, pdn->busno, pdn->devfn, where, size, *val); return PCIBIOS_SUCCESSFUL; } @@ -462,8 +459,8 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, struct pnv_phb *phb = pdn->phb->private_data; u32 bdfn = (pdn->busno << 8) | pdn->devfn; - cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", - pdn->busno, pdn->devfn, where, size, val); + pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n", +__func__, pdn->busno, pdn->devfn, where, size, val); switch (size) { case 1: opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); -- 2.5.0.rc3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v8 45/45] PCI/hotplug: PowerPC PowerNV PCI hotplug driver
On 05/02/2016 01:44 PM, Gavin Shan wrote: On Tue, Apr 19, 2016 at 08:36:48PM +1000, Alexey Kardashevskiy wrote: On 02/17/2016 02:44 PM, Gavin Shan wrote: This adds standalone driver to support PCI hotplug for PowerPC PowerNV platform that runs on top of skiboot firmware. The firmware identifies hotpluggable slots and marked their device tree node with proper "ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans device tree nodes to create/register PCI hotplug slot accordingly. The PCI slots are organized in fashion of tree, which means one PCI slot might have parent PCI slot and parent PCI slot possibly contains multiple child PCI slots. At the plugging time, the parent PCI slot is populated before its children. The child PCI slots are removed before their parent PCI slot can be removed from the system. If the skiboot firmware doesn't support slot status retrieval, the PCI slot device node shouldn't have property "ibm,reset-by-firmware". In that case, none of valid PCI slots will be detected from device tree. The skiboot firmware doesn't export the capability to access attention LEDs yet and it's something for TBD. Signed-off-by: Gavin ShanAcked-by: Bjorn Helgaas --- drivers/pci/hotplug/Kconfig | 12 + drivers/pci/hotplug/Makefile | 3 + drivers/pci/hotplug/pnv_php.c | 870 ++ 3 files changed, 885 insertions(+) create mode 100644 drivers/pci/hotplug/pnv_php.c diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index df8caec..167c8ce 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -113,6 +113,18 @@ config HOTPLUG_PCI_SHPC When in doubt, say N. +config HOTPLUG_PCI_POWERNV + tristate "PowerPC PowerNV PCI Hotplug driver" + depends on PPC_POWERNV && EEH + help + Say Y here if you run PowerPC PowerNV platform that supports + PCI Hotplug + + To compile this driver as a module, choose M here: the + module will be called pnv-php. + + When in doubt, say N. + config HOTPLUG_PCI_RPA tristate "RPA PCI Hotplug driver" depends on PPC_PSERIES && EEH diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index b616e75..e33cdda 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)+= pciehp.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += pnv-php.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)+= rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o @@ -50,6 +51,8 @@ ibmphp-objs := ibmphp_core.o \ acpiphp-objs := acpiphp_core.o \ acpiphp_glue.o +pnv-php-objs := pnv_php.o + rpaphp-objs:= rpaphp_core.o \ rpaphp_pci.o\ rpaphp_slot.o diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c new file mode 100644 index 000..364ec36 --- /dev/null +++ b/drivers/pci/hotplug/pnv_php.c @@ -0,0 +1,870 @@ +/* + * PCI Hotplug Driver for PowerPC PowerNV platform. + * + * Copyright Gavin Shan, IBM Corporation 2015. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver" + +struct pnv_php_slot { + struct hotplug_slot slot; + struct hotplug_slot_infoslot_info; + uint64_tid; + char*name; + int slot_no; + struct kref kref; +#define PNV_PHP_STATE_INITIALIZED 0 +#define PNV_PHP_STATE_REGISTERED 1 +#define PNV_PHP_STATE_POPULATED2 + int state; + struct device_node *dn; + struct pci_dev *pdev; + struct pci_bus *bus; + boolpower_state_check; + int power_state_confirmed; +#define PNV_PHP_POWER_CONFIRMED_INVALID0 +#define PNV_PHP_POWER_CONFIRMED_SUCCESS1 +#define PNV_PHP_POWER_CONFIRMED_FAIL 2 + struct opal_msg *msg; + void*fdt; +
[PATCH] powerpc: Remove unnecessary CONFIG_SMP #ifdefs
The code in machine_restart/power_off/halt() includes #ifdefs around calls to smp_send_stop(), however these are not required as include/linux/smp.h includes an empty version of this function for CONFIG_SMP=n builds. Signed-off-by: Chris Smart--- arch/powerpc/kernel/setup-common.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 44c8d03558ac..8ca79b7503d8 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -128,9 +128,7 @@ void machine_restart(char *cmd) machine_shutdown(); if (ppc_md.restart) ppc_md.restart(cmd); -#ifdef CONFIG_SMP smp_send_stop(); -#endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); local_irq_disable(); while (1) ; @@ -141,9 +139,7 @@ void machine_power_off(void) machine_shutdown(); if (pm_power_off) pm_power_off(); -#ifdef CONFIG_SMP smp_send_stop(); -#endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); local_irq_disable(); while (1) ; @@ -159,9 +155,7 @@ void machine_halt(void) machine_shutdown(); if (ppc_md.halt) ppc_md.halt(); -#ifdef CONFIG_SMP smp_send_stop(); -#endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); local_irq_disable(); while (1) ; -- 2.5.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev