Re: [PATCH V2] crypto/NX: Set receive window credits to max number of CRBs in RxFIFO
Haren Myneni wrote: > > System gets checkstop if RxFIFO overruns with more requests than the > maximum possible number of CRBs in FIFO at the same time. The max number > of requests per window is controlled by window credits. So find max > CRBs from FIFO size and set it to receive window credits. > > Fixes: b0d6c9bab5e4 ("crypto/nx: Add P9 NX support for 842 compression > engine") > CC: sta...@vger.kernel.org # v4.14+ > Signed-off-by:Haren Myneni I presume this is being picked up by the powerpc tree? Thanks, -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH] recordmcount: Fix spurious mcount entries on powerpc
"Naveen N. Rao" writes: > The recent change enabling HAVE_C_RECORDMCOUNT on powerpc started > showing the following issue: > > # modprobe kprobe_example >ftrace-powerpc: Not expected bl: opcode is 3c4c0001 >WARNING: CPU: 0 PID: 227 at kernel/trace/ftrace.c:2001 > ftrace_bug+0x90/0x318 >Modules linked in: >CPU: 0 PID: 227 Comm: modprobe Not tainted 5.2.0-rc6-00678-g1c329100b942 #2 >NIP: c0264318 LR: c025d694 CTR: c0f5cd30 >REGS: c1f2b7b0 TRAP: 0700 Not tainted > (5.2.0-rc6-00678-g1c329100b942) >MSR: 90010282b033 CR: > 28228222 XER: >CFAR: c02642fc IRQMASK: 0 > >NIP [c0264318] ftrace_bug+0x90/0x318 >LR [c025d694] ftrace_process_locs+0x4f4/0x5e0 >Call Trace: >[c1f2ba40] [0004] 0x4 (unreliable) >[c1f2bad0] [c025d694] ftrace_process_locs+0x4f4/0x5e0 >[c1f2bb90] [c020ff10] load_module+0x25b0/0x30c0 >[c1f2bd00] [c0210cb0] sys_finit_module+0xc0/0x130 >[c1f2be20] [c000bda4] system_call+0x5c/0x70 >Instruction dump: >419e0018 2f83 419e00bc 2f83ffea 409e00cc 481c 0fe0 3c62ff96 >3901 3940 386386d0 48c4 <0fe0> 3ce20003 3901 3c62ff96 >---[ end trace 4c438d5cebf78381 ]--- >ftrace failed to modify >[] 0xc008012a0008 > actual: 01:00:4c:3c >Initializing ftrace call sites >ftrace record flags: 200 > (0) > expected tramp: c006af4c Aha, thanks. I saw that on one of my text boxes but hadn't pinned it down to this commit. > Fixes: c7d64b560ce80 ("powerpc/ftrace: Enable C Version of recordmcount") That commit is the tip of my next, so I'll drop it for now and merge them in the other order so there's breakage. Steve are you OK if I merge this via the powerpc tree? I'll reword the commit message so that it makes sense coming prior to the commit mentioned above. cheers > Signed-off-by: Naveen N. Rao > --- > scripts/recordmcount.h | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h > index 13c5e6c8829c..47fca2c69a73 100644 > --- a/scripts/recordmcount.h > +++ b/scripts/recordmcount.h > @@ -325,7 +325,8 @@ static uint_t *sift_rel_mcount(uint_t *mlocp, > if (!mcountsym) > mcountsym = get_mcountsym(sym0, relp, str0); > > - if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { > + if (mcountsym && mcountsym == Elf_r_sym(relp) && > + !is_fake_mcount(relp)) { > uint_t const addend = > _w(_w(relp->r_offset) - recval + mcount_adjust); > mrelp->r_offset = _w(offbase > -- > 2.22.0
[PATCH 0/2] migration/prrn instrumentation tweaks
Mainly this produces better information about what's happening with the device tree as a result of LPM or PRRN. Nathan Lynch (2): powerpc/pseries/mobility: set pr_fmt powerpc/pseries/mobility: add pr_debug for device tree changes arch/powerpc/platforms/pseries/mobility.c | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) -- 2.20.1
[PATCH 1/2] powerpc/pseries/mobility: set pr_fmt
The pr_err callsites in mobility.c already manually include a "mobility:" prefix, let's make it official for the benefit of messages to be added later. Signed-off-by: Nathan Lynch --- arch/powerpc/platforms/pseries/mobility.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index b8c8096907d4..f9a1287925a8 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -9,6 +9,9 @@ * 2 as published by the Free Software Foundation. */ + +#define pr_fmt(fmt) "mobility: " fmt + #include #include #include @@ -418,11 +421,11 @@ static int __init mobility_sysfs_init(void) rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr); if (rc) - pr_err("mobility: unable to create migration sysfs file (%d)\n", rc); + pr_err("unable to create migration sysfs file (%d)\n", rc); rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr); if (rc) - pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc); + pr_err("unable to create api_version sysfs file (%d)\n", rc); return 0; } -- 2.20.1
[PATCH 2/2] powerpc/pseries/mobility: add pr_debug for device tree changes
When investigating issues with partition migration or resource reassignments it is helpful to have a log of which nodes and properties in the device tree have changed. Use pr_debug() so it's easy to enable these at runtime with the dynamic debug facility. Signed-off-by: Nathan Lynch --- arch/powerpc/platforms/pseries/mobility.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index f9a1287925a8..5270ac00279b 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -69,6 +69,8 @@ static int delete_dt_node(__be32 phandle) if (!dn) return -ENOENT; + pr_debug("removing node %pOFfp\n", dn); + dlpar_detach_node(dn); of_node_put(dn); return 0; @@ -127,6 +129,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, } if (!more) { + pr_debug("updating node %pOF property %s\n", dn, name); of_update_property(dn, new_prop); *prop = NULL; } @@ -241,6 +244,8 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) if (rc) dlpar_free_cc_nodes(dn); + pr_debug("added node %pOFfp\n", dn); + of_node_put(parent_dn); return rc; } @@ -256,6 +261,7 @@ static void prrn_update_node(__be32 phandle) */ dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (dn) { + pr_debug("ignoring PRRN for %pOFfp\n", dn); of_node_put(dn); return; } @@ -265,6 +271,8 @@ static void prrn_update_node(__be32 phandle) hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; hp_elog._drc_u.drc_index = phandle; + pr_debug("handling PRRN for LMB DRC index 0x%x\n", be32_to_cpu(phandle)); + handle_dlpar_errorlog(&hp_elog); } -- 2.20.1
[PATCH] powerpc/configs: Disable /dev/port in skiroot defconfig
While reviewing lockdown patches, I discovered that we still enable /dev/port (CONFIG_DEVPORT) in skiroot. We don't need it. Deselect CONFIG_DEVPORT for skiroot. Signed-off-by: Daniel Axtens --- arch/powerpc/configs/skiroot_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 5ba131c30f6b..b2e8f37156eb 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -212,6 +212,7 @@ CONFIG_IPMI_WATCHDOG=y CONFIG_HW_RANDOM=y CONFIG_TCG_TPM=y CONFIG_TCG_TIS_I2C_NUVOTON=y +# CONFIG_DEVPORT is not set CONFIG_I2C=y # CONFIG_I2C_COMPAT is not set CONFIG_I2C_CHARDEV=y -- 2.20.1
[PATCH 4/4] powerpc/cacheinfo: warn if cache object chain becomes unordered
This can catch cases where the device tree has gotten mishandled into an inconsistent state at runtime, e.g. the cache nodes for both the source and the destination are present after a migration. Signed-off-by: Nathan Lynch --- arch/powerpc/kernel/cacheinfo.c | 9 + 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 30be2cdc8aa9..5938aeeba8c0 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -425,6 +425,15 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger) } smaller->next_local = bigger; + + /* +* The cache->next_local list sorts by level ascending: +* L1d -> L1i -> L2 -> L3 ... +*/ + WARN_ONCE((smaller->level == 1 && bigger->level > 2) || + (smaller->level > 1 && bigger->level != smaller->level + 1), + "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n", + smaller->level, smaller->ofnode, bigger->level, bigger->ofnode); } static void do_subsidiary_caches_debugcheck(struct cache *cache) -- 2.20.1
[PATCH 3/4] powerpc/cacheinfo: improve diagnostics about malformed cache lists
If we have a bug which causes us to start with the wrong kind of OF node when linking up the cache tree, it's helpful for debugging to print information about what we found vs what we expected. So replace uses of WARN_ON_ONCE with WARN_ONCE, which lets us include an informative message instead of a contentless backtrace. Signed-off-by: Nathan Lynch --- arch/powerpc/kernel/cacheinfo.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index d8200f7e1075..30be2cdc8aa9 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -429,8 +429,14 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger) static void do_subsidiary_caches_debugcheck(struct cache *cache) { - WARN_ON_ONCE(cache->level != 1); - WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu")); + WARN_ONCE(cache->level != 1, + "instantiating cache chain from L%d %s cache for " + "%pOFP instead of an L1\n", cache->level, + cache_type_string(cache), cache->ofnode); + WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"), + "instantiating cache chain from node %pOFP of type '%s' " + "instead of a cpu node\n", cache->ofnode, + of_node_get_device_type(cache->ofnode)); } static void do_subsidiary_caches(struct cache *cache) -- 2.20.1
[PATCH 0/4] cacheinfo instrumentation tweaks
A few changes that would have aided debugging this code's interactions with partition migration, maybe they'll help with the next thing (hibernation?). Nathan Lynch (4): powerpc/cacheinfo: set pr_fmt powerpc/cacheinfo: name@unit instead of full DT path in debug messages powerpc/cacheinfo: improve diagnostics about malformed cache lists powerpc/cacheinfo: warn if cache object chain becomes unordered arch/powerpc/kernel/cacheinfo.c | 37 - 1 file changed, 27 insertions(+), 10 deletions(-) -- 2.20.1
[PATCH 1/4] powerpc/cacheinfo: set pr_fmt
Set pr_fmt so we get a nice prefix on messages. Signed-off-by: Nathan Lynch --- arch/powerpc/kernel/cacheinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 42c559efe060..295af19e00f0 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -10,6 +10,8 @@ * 2 as published by the Free Software Foundation. */ +#define pr_fmt(fmt) "cacheinfo: " fmt + #include #include #include -- 2.20.1
[PATCH 2/4] powerpc/cacheinfo: name@unit instead of full DT path in debug messages
We know that every OF node we deal with in this code is under /cpus, so we can make the debug messages a little less verbose without losing information. E.g. cacheinfo: creating L1 dcache and icache for /cpus/PowerPC,POWER8@0 cacheinfo: creating L2 ucache for /cpus/l2-cache@2006 cacheinfo: creating L3 ucache for /cpus/l3-cache@3106 becomes cacheinfo: creating L1 dcache and icache for PowerPC,POWER8@0 cacheinfo: creating L2 ucache for l2-cache@2006 cacheinfo: creating L3 ucache for l3-cache@3106 Replace all '%pOF' specifiers with '%pOFP'. Signed-off-by: Nathan Lynch --- arch/powerpc/kernel/cacheinfo.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 295af19e00f0..d8200f7e1075 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -171,7 +171,7 @@ static void release_cache_debugcheck(struct cache *cache) list_for_each_entry(iter, &cache_list, list) WARN_ONCE(iter->next_local == cache, - "cache for %pOF(%s) refers to cache for %pOF(%s)\n", + "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n", iter->ofnode, cache_type_string(iter), cache->ofnode, @@ -183,7 +183,7 @@ static void release_cache(struct cache *cache) if (!cache) return; - pr_debug("freeing L%d %s cache for %pOF\n", cache->level, + pr_debug("freeing L%d %s cache for %pOFP\n", cache->level, cache_type_string(cache), cache->ofnode); release_cache_debugcheck(cache); @@ -198,7 +198,7 @@ static void cache_cpu_set(struct cache *cache, int cpu) while (next) { WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map), - "CPU %i already accounted in %pOF(%s)\n", + "CPU %i already accounted in %pOFP(%s)\n", cpu, next->ofnode, cache_type_string(next)); cpumask_set_cpu(cpu, &next->shared_cpu_map); @@ -359,7 +359,7 @@ static int cache_is_unified_d(const struct device_node *np) */ static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level) { - pr_debug("creating L%d ucache for %pOF\n", level, node); + pr_debug("creating L%d ucache for %pOFP\n", level, node); return new_cache(cache_is_unified_d(node), level, node); } @@ -369,7 +369,7 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node, { struct cache *dcache, *icache; - pr_debug("creating L%d dcache and icache for %pOF\n", level, + pr_debug("creating L%d dcache and icache for %pOFP\n", level, node); dcache = new_cache(CACHE_TYPE_DATA, level, node); @@ -740,13 +740,13 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) rc = attr->show(&dir->kobj, attr, buf); if (rc <= 0) { pr_debug("not creating %s attribute for " -"%pOF(%s) (rc = %zd)\n", +"%pOFP(%s) (rc = %zd)\n", attr->attr.name, cache->ofnode, cache_type, rc); continue; } if (sysfs_create_file(&dir->kobj, &attr->attr)) - pr_debug("could not create %s attribute for %pOF(%s)\n", + pr_debug("could not create %s attribute for %pOFP(%s)\n", attr->attr.name, cache->ofnode, cache_type); } @@ -862,7 +862,7 @@ static void cache_cpu_clear(struct cache *cache, int cpu) struct cache *next = cache->next_local; WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map), - "CPU %i not accounted in %pOF(%s)\n", + "CPU %i not accounted in %pOFP(%s)\n", cpu, cache->ofnode, cache_type_string(cache)); -- 2.20.1
Re: [PATCH] powerpc/rtas: retry when cpu offline races with suspend/migration
Juliet Kim writes: > On 6/25/19 1:51 PM, Nathan Lynch wrote: >> Juliet Kim writes: >> >>> There's some concern this could retry forever, resulting in live lock. >> First of all the system will make progress in other areas even if there >> are repeated retries; we're not indefinitely holding locks or anything >> like that. > > For instance, system admin runs a script that picks and offlines CPUs in a > loop to keep a certain rate of onlined CPUs for energy saving. If LPM keeps > putting CPUs back online, that would never finish, and would keepgenerating > new offline requests > >> Second, Linux checks the H_VASI_STATE result on every retry. If the >> platform wants to terminate the migration (say, if it imposes a >> timeout), Linux will abandon it when H_VASI_STATE fails to return >> H_VASI_SUSPENDING. And it seems incorrect to bail out before that >> happens, absent hard errors on the Linux side such as allocation >> failures. > I confirmed with the PHYP and HMC folks that they wouldn't time out the LPM > request including H_VASI_STATE, so if the LPM retries were unlucky enough to > encounter repeated CPU offline attempts (maybe some customer code retrying > that), then the retries could continue indefinitely, or until some manual > intervention. And in the mean time, the LPM delay here would cause PHYP to > block other operations. That sounds like a PHYP bug to me. cheers
Re: [PATCH] powerpc/64s/radix: Define arch_ioremap_p4d_supported()
Anshuman Khandual writes: > Recent core ioremap changes require HAVE_ARCH_HUGE_VMAP subscribing archs > provide arch_ioremap_p4d_supported() failing which will result in a build > failure like the following. > > ld: lib/ioremap.o: in function `.ioremap_huge_init': > ioremap.c:(.init.text+0x3c): undefined reference to > `.arch_ioremap_p4d_supported' > > This defines a stub implementation for arch_ioremap_p4d_supported() keeping > it disabled for now to fix the build problem. The easiest option is for this to be folded into your patch that creates the requirement for arch_ioremap_p4d_supported(). Andrew might do that for you, or you could send a v2. This looks fine from a powerpc POV: Acked-by: Michael Ellerman cheers > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: "Aneesh Kumar K.V" > Cc: Nicholas Piggin > Cc: Andrew Morton > Cc: Stephen Rothwell > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-ker...@vger.kernel.org > Cc: linux-n...@vger.kernel.org > > Signed-off-by: Anshuman Khandual > --- > This has been just build tested and fixes the problem reported earlier. > > arch/powerpc/mm/book3s64/radix_pgtable.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > b/arch/powerpc/mm/book3s64/radix_pgtable.c > index 8904aa1..c81da88 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -1124,6 +1124,11 @@ void radix__ptep_modify_prot_commit(struct > vm_area_struct *vma, > set_pte_at(mm, addr, ptep, pte); > } > > +int __init arch_ioremap_p4d_supported(void) > +{ > + return 0; > +} > + > int __init arch_ioremap_pud_supported(void) > { > /* HPT does not cope with large pages in the vmalloc area */ > -- > 2.7.4
Re: [PATCH] powerpc/64s/radix: Define arch_ioremap_p4d_supported()
On 06/26/2019 01:21 PM, Anshuman Khandual wrote: Recent core ioremap changes require HAVE_ARCH_HUGE_VMAP subscribing archs provide arch_ioremap_p4d_supported() failing which will result in a build failure like the following. ld: lib/ioremap.o: in function `.ioremap_huge_init': ioremap.c:(.init.text+0x3c): undefined reference to `.arch_ioremap_p4d_supported' This defines a stub implementation for arch_ioremap_p4d_supported() keeping it disabled for now to fix the build problem. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: "Aneesh Kumar K.V" Cc: Nicholas Piggin Cc: Andrew Morton Cc: Stephen Rothwell Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ker...@vger.kernel.org Cc: linux-n...@vger.kernel.org Signed-off-by: Anshuman Khandual Add a Fixes: tag ? For instance: Fixes: d909f9109c30 ("powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP") Christophe --- This has been just build tested and fixes the problem reported earlier. arch/powerpc/mm/book3s64/radix_pgtable.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 8904aa1..c81da88 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1124,6 +1124,11 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); } +int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} + int __init arch_ioremap_pud_supported(void) { /* HPT does not cope with large pages in the vmalloc area */
Re: [PATCH v1] powerpc: Fix BUG_ON during memory unplug on radix
Bharata B Rao writes: > We hit the following BUG_ON when memory hotplugged before reboot > is unplugged after reboot: > > kernel BUG at arch/powerpc/mm/pgtable-frag.c:113! > > remove_pagetable+0x594/0x6a0 > (unreliable) > remove_pagetable+0x94/0x6a0 > vmemmap_free+0x394/0x410 > sparse_remove_one_section+0x26c/0x2e8 > __remove_pages+0x428/0x540 > arch_remove_memory+0xd0/0x170 > __remove_memory+0xd4/0x1a0 > dlpar_remove_lmb+0xbc/0x110 > dlpar_memory+0xa80/0xd20 > handle_dlpar_errorlog+0xa8/0x160 > pseries_hp_work_fn+0x2c/0x60 > process_one_work+0x46c/0x860 > worker_thread+0x364/0x5e0 > kthread+0x1b0/0x1c0 > ret_from_kernel_thread+0x5c/0x68 > > This occurs because, during reboot-after-hotplug, the hotplugged > memory range gets initialized as regular memory and page > tables are setup using memblock allocator. This means that we > wouldn't have initialized the PMD or PTE fragment count for > those PMD or PTE pages. > > Fixing this includes 3 parts: > > - Re-walk the init_mm page tables from mem_init() and initialize > the PMD and PTE fragment counts appropriately. So PMD and PTE > table pages allocated during early allocation will have a > fragment count of 1. > - Convert the pages from memblock pages to regular pages so that > they can be freed back to buddy allocator seamlessly. However > we do this for only PMD and PTE pages and not for PUD pages. > PUD pages are freed using kmem_cache_free() and we need to > identify memblock pages and free them differently. > - When we do early memblock based allocation of PMD and PUD pages, > allocate in PAGE_SIZE granularity so that we are sure the > complete page is used as pagetable page. PAGE_SIZE allocations will > have an implication on the amount of memory used for page tables, > an example of which is shown below: > > Since we now do PAGE_SIZE allocations for both PUD table and > PMD table (Note that PTE table allocation is already of PAGE_SIZE), > we end up allocating more memory for the same amount of system RAM. > Here is an example of how much more we end up allocating for > page tables in case of 64T system RAM: > > 1. Mapping system RAM > > With each PGD entry spanning 512G, 64TB RAM would need 128 entries > and hence 128 PUD tables. We use 1G mapping at PUD level (level 2) > > With default PUD_TABLE_SIZE(4K), 128*4K=512K (8 64K pages) > With PAGE_SIZE(64K) allocations, 128*64K=8192K (128 64K pages) > > 2. Mapping struct pages (memmap) > > 64T RAM would need 64G for memmap with struct page size being 64B. > Since memmap array is mapped using 64K mappings, we would need > 64 PUD entries or 64 PMD tables (level 3) in total. > > With default PMD_TABLE_SIZE(4K), 64*4K=256K (4 64K pages) > With PAGE_SIZE(64K) allocations, 64*64K=4096K (64 64K pages) > > There is no change in PTE table (level 4) allocation requirement as > early page table allocation is already using PAGE_SIZE PTE tables. > > So essentially with this change we would use 180 64K pages > more for 64T system. > Reviewed-by: Aneesh Kumar K.V > Reported-by: Srikanth Aithal > Signed-off-by: Bharata B Rao > --- > v0 - https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-June/192242.html > Changes in v1: > - Handling PUD table freeing too. > - Added details about how much extra memory we use up with > this approach into the commit message > - A few cleanups and renames > > arch/powerpc/include/asm/book3s/64/pgalloc.h | 7 +- > arch/powerpc/include/asm/book3s/64/radix.h | 1 + > arch/powerpc/include/asm/sparsemem.h | 1 + > arch/powerpc/mm/book3s64/pgtable.c | 15 +++- > arch/powerpc/mm/book3s64/radix_pgtable.c | 79 +++- > arch/powerpc/mm/mem.c| 5 ++ > 6 files changed, 104 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h > b/arch/powerpc/include/asm/book3s/64/pgalloc.h > index d5a44912902f..9ae134f260be 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h > +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h > @@ -111,7 +111,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, > unsigned long addr) > > static inline void pud_free(struct mm_struct *mm, pud_t *pud) > { > - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); > + struct page *page = virt_to_page(pud); > + > + if (PageReserved(page)) > + free_reserved_page(page); > + else > + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); > } > > static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h > b/arch/powerpc/include/asm/book3s/64/radix.h > index 574eca33f893..4320f2790e8d 100644 > --- a/arch/powerpc/include/asm/book3s/64/radix.h > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void) > #ifdef CONFIG_MEMORY_HOTPLUG > int radix__create_section_mapping(unsigned long start, uns
Re: [PATCH v3 3/3] powerpc/papr_scm: Force a scm-unbind if initial scm-bind fails
On Thu, Jun 27, 2019 at 12:58 PM Aneesh Kumar K.V wrote: > > "Oliver O'Halloran" writes: > > >> > + rc = drc_pmem_bind(p); > >> > + } > >> > + > >> > if (rc) > >> > goto err; > >> > > >> > >> I am also not sure about the module reference count here. Should we > >> increment the module reference count after a bind so that we can track > >> failures in ubind and fail the module unload? > > > > I don't really get what you're concerned about here. The error > > handling path calls drc_pmem_unbind() so if there's a bind error we > > should never leave probe with memory still bound. > > > > In the remove callback, if the ubind fail should we allow the module > unload? If the drc_pmem_unbind() in the driver's remove function fails the driver will still be unbound from the platform device for that DRC. You can try re-bind the driver to the platform device and it'll hit the unbind-then-bind again error path this series introduces (which might also fail), but that's fine. The only reasons I can think of for drc_pmem_unbind() failing are bugs in the hypervisor or bugs in the driver. Forcing the module to stay loaded doesn't help either case so I'm not seeing the benefit. Oliver
Re: [PATCH v3 3/3] powerpc/papr_scm: Force a scm-unbind if initial scm-bind fails
"Oliver O'Halloran" writes: > As I said in the comments on v1, do we have any actual numbers on how > long the bind step takes? From memory, you could bind ~32GB in a > single bind h-call before phyp would hit it's time limit of 250us and > return a continue token. Assuming that holds we'll be saving a few > dozen milliseconds at best. > >> > + rc = drc_pmem_bind(p); >> > + } >> > + >> > if (rc) >> > goto err; >> > >> >> I am also not sure about the module reference count here. Should we >> increment the module reference count after a bind so that we can track >> failures in ubind and fail the module unload? > > I don't really get what you're concerned about here. The error > handling path calls drc_pmem_unbind() so if there's a bind error we > should never leave probe with memory still bound. > In the remove callback, if the ubind fail should we allow the module unload? -aneesh
RE: [EXT] Re: [PATCHv2 2/2] PCI: layerscape: EP and RC drivers are compiled separately
Hi Bjorn, > -Original Message- > From: Bjorn Helgaas > Sent: 2019年6月27日 1:52 > To: Xiaowei Bao > Cc: robh...@kernel.org; mark.rutl...@arm.com; shawn...@kernel.org; Leo > Li ; kis...@ti.com; lorenzo.pieral...@arm.com; > a...@arndb.de; gre...@linuxfoundation.org; M.h. Lian > ; Mingkai Hu ; Roy Zang > ; kstew...@linuxfoundation.org; > pombreda...@nexb.com; shawn@rock-chips.com; > linux-...@vger.kernel.org; devicet...@vger.kernel.org; > linux-ker...@vger.kernel.org; linux-arm-ker...@lists.infradead.org; > linuxppc-dev@lists.ozlabs.org > Subject: [EXT] Re: [PATCHv2 2/2] PCI: layerscape: EP and RC drivers are > compiled separately > > Caution: EXT Email > > If you post another revision for any reason, please change the subject so it's > worded as a command and mentions the new config options, e.g., > > PCI: layerscape: Add CONFIG_PCI_LAYERSCAPE_EP to build EP/RC > separately [Xiaowei Bao] OK, thanks, this subject looks well. > > On Wed, Jun 26, 2019 at 07:11:39PM +0800, Xiaowei Bao wrote: > > Compile the EP and RC drivers separately with different configuration > > options, this looks clearer. > > > > Signed-off-by: Xiaowei Bao > > --- > > v2: > > - No change. > > > > drivers/pci/controller/dwc/Kconfig | 20 ++-- > > drivers/pci/controller/dwc/Makefile |3 ++- > > 2 files changed, 20 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/pci/controller/dwc/Kconfig > > b/drivers/pci/controller/dwc/Kconfig > > index a6ce1ee..a41ccf5 100644 > > --- a/drivers/pci/controller/dwc/Kconfig > > +++ b/drivers/pci/controller/dwc/Kconfig > > @@ -131,13 +131,29 @@ config PCI_KEYSTONE_EP > > DesignWare core functions to implement the driver. > > > > config PCI_LAYERSCAPE > > - bool "Freescale Layerscape PCIe controller" > > + bool "Freescale Layerscape PCIe controller - Host mode" > > depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST) > > depends on PCI_MSI_IRQ_DOMAIN > > select MFD_SYSCON > > select PCIE_DW_HOST > > help > > - Say Y here if you want PCIe controller support on Layerscape SoCs. > > + Say Y here if you want to enable PCIe controller support on > Layerscape > > + SoCs to work in Host mode. > > + This controller can work either as EP or RC. The > RCW[HOST_AGT_PEX] > > + determines which PCIe controller works in EP mode and which > PCIe > > + controller works in RC mode. > > + > > +config PCI_LAYERSCAPE_EP > > + bool "Freescale Layerscape PCIe controller - Endpoint mode" > > + depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST) > > + depends on PCI_ENDPOINT > > + select PCIE_DW_EP > > + help > > + Say Y here if you want to enable PCIe controller support on > Layerscape > > + SoCs to work in Endpoint mode. > > + This controller can work either as EP or RC. The > RCW[HOST_AGT_PEX] > > + determines which PCIe controller works in EP mode and which > PCIe > > + controller works in RC mode. > > > > config PCI_HISI > > depends on OF && (ARM64 || COMPILE_TEST) diff --git > > a/drivers/pci/controller/dwc/Makefile > > b/drivers/pci/controller/dwc/Makefile > > index b085dfd..824fde7 100644 > > --- a/drivers/pci/controller/dwc/Makefile > > +++ b/drivers/pci/controller/dwc/Makefile > > @@ -8,7 +8,8 @@ obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o > > obj-$(CONFIG_PCI_IMX6) += pci-imx6.o > > obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o > > obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone.o > > -obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o pci-layerscape-ep.o > > +obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o > > +obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o > > obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o > > obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o > > obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o > > -- > > 1.7.1 > >
Re: [PATCH v3 3/3] powerpc/papr_scm: Force a scm-unbind if initial scm-bind fails
On Thu, Jun 27, 2019 at 2:58 AM Aneesh Kumar K.V wrote: > > Vaibhav Jain writes: > > *snip* > > + /* If phyp says drc memory still bound then force unbound and retry */ > > + if (rc == -EBUSY) { > > + dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); > > + drc_pmem_unbind(p); > This should only be caused by kexec right? We should only ever hit this path if there's an unclean shutdown, so kdump or fadump. For a normal kexec the previous kernel should have torn down the binding for us. > And considering kernel nor > hypervisor won't change device binding details, can you check switching > this to H_SCM_QUERY_BLOCK_MEM_BINDING? I thought about using the QUERY_BLOCK_MEM_BINDING call, but I'm not sure it's a good idea. It bakes in assumptions about what the *previous* kernel did with the SCM volume that might not be valid. A panic while unbinding a region would result in a partially-bound region which might break the query call. Also, it's possible that we might have SCM drivers in the future that do something other than just binding the volume in one contiguous chunk. UNBIND_ALL is robust against all of these and robustness is what you want out of an error handling mechanism. > Will that result in faster boot? As I said in the comments on v1, do we have any actual numbers on how long the bind step takes? From memory, you could bind ~32GB in a single bind h-call before phyp would hit it's time limit of 250us and return a continue token. Assuming that holds we'll be saving a few dozen milliseconds at best. > > + rc = drc_pmem_bind(p); > > + } > > + > > if (rc) > > goto err; > > > > I am also not sure about the module reference count here. Should we > increment the module reference count after a bind so that we can track > failures in ubind and fail the module unload? I don't really get what you're concerned about here. The error handling path calls drc_pmem_unbind() so if there's a bind error we should never leave probe with memory still bound. > -aneesh >
Re: [PATCH v3 1/3] powerpc/pseries: Update SCM hcall op-codes in hvcall.h
On Thu, Jun 27, 2019 at 2:53 AM Aneesh Kumar K.V wrote: > > Vaibhav Jain writes: > > > Update the hvcalls.h to include op-codes for new hcalls introduce to > > manage SCM memory. Also update existing hcall definitions to reflect > > current papr specification for SCM. > > > > Signed-off-by: Vaibhav Jain > > > Why split this as a separate patch? You should fold this to the next one > where we actually use the constant. I figured if you're going to update the hcall numbers to reflect reality then it should be done in a separate patch. Single logical change per patch and all that. Reviewed-by: Oliver O'Halloran
[PATCH AUTOSEL 4.14 17/35] ibmvnic: Refresh device multicast list after reset
From: Thomas Falcon [ Upstream commit be32a24372cf162e825332da1a7ccef058d4f20b ] It was observed that multicast packets were no longer received after a device reset. The fix is to resend the current multicast list to the backing device after recovery. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c914b338691b..956fbb164e6f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1489,6 +1489,9 @@ static int do_reset(struct ibmvnic_adapter *adapter, return 0; } + /* refresh device's multicast list */ + ibmvnic_set_multi(netdev); + /* kick napi */ for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); -- 2.20.1
[PATCH AUTOSEL 4.19 28/60] ibmvnic: Fix unchecked return codes of memory allocations
From: Thomas Falcon [ Upstream commit 7c940b1a5291e5069d561f5b8f0e51db6b7a259a ] The return values for these memory allocations are unchecked, which may cause an oops if the driver does not handle them after a failure. Fix by checking the function's return code. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b88af81499e8..0ae43d27cdcf 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -438,9 +438,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) if (rx_pool->buff_size != be64_to_cpu(size_array[i])) { free_long_term_buff(adapter, &rx_pool->long_term_buff); rx_pool->buff_size = be64_to_cpu(size_array[i]); - alloc_long_term_buff(adapter, &rx_pool->long_term_buff, -rx_pool->size * -rx_pool->buff_size); + rc = alloc_long_term_buff(adapter, + &rx_pool->long_term_buff, + rx_pool->size * + rx_pool->buff_size); } else { rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff); @@ -706,9 +707,9 @@ static int init_tx_pools(struct net_device *netdev) return rc; } - init_one_tx_pool(netdev, &adapter->tso_pool[i], -IBMVNIC_TSO_BUFS, -IBMVNIC_TSO_BUF_SZ); + rc = init_one_tx_pool(netdev, &adapter->tso_pool[i], + IBMVNIC_TSO_BUFS, + IBMVNIC_TSO_BUF_SZ); if (rc) { release_tx_pools(adapter); return rc; -- 2.20.1
[PATCH AUTOSEL 4.19 27/60] ibmvnic: Refresh device multicast list after reset
From: Thomas Falcon [ Upstream commit be32a24372cf162e825332da1a7ccef058d4f20b ] It was observed that multicast packets were no longer received after a device reset. The fix is to resend the current multicast list to the backing device after recovery. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bf0a5fe0da17..b88af81499e8 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1854,6 +1854,9 @@ static int do_reset(struct ibmvnic_adapter *adapter, return 0; } + /* refresh device's multicast list */ + ibmvnic_set_multi(netdev); + /* kick napi */ for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); -- 2.20.1
[PATCH AUTOSEL 4.19 26/60] ibmvnic: Do not close unopened driver during reset
From: Thomas Falcon [ Upstream commit 1f94608b0ce141be5286dde31270590bdf35b86a ] Check driver state before halting it during a reset. If the driver is not running, do nothing. Otherwise, a request to deactivate a down link can cause an error and the reset will fail. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 426789e2c23d..bf0a5fe0da17 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1754,7 +1754,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, ibmvnic_cleanup(netdev); - if (adapter->reset_reason != VNIC_RESET_MOBILITY && + if (reset_state == VNIC_OPEN && + adapter->reset_reason != VNIC_RESET_MOBILITY && adapter->reset_reason != VNIC_RESET_FAILOVER) { rc = __ibmvnic_close(netdev); if (rc) -- 2.20.1
[PATCH AUTOSEL 5.1 89/95] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac
From: Christoph Hellwig [ Upstream commit 9739ab7eda459f0669ec9807e0d9be5020bab88c ] With the strict dma mask checking introduced with the switch to the generic DMA direct code common wifi chips on 32-bit powerbooks stopped working. Add a 30-bit ZONE_DMA to the 32-bit pmac builds to allow them to reliably allocate dma coherent memory. Fixes: 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported") Reported-by: Aaro Koskinen Signed-off-by: Christoph Hellwig Tested-by: Larry Finger Acked-by: Larry Finger Tested-by: Aaro Koskinen Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/page.h | 7 +++ arch/powerpc/mm/mem.c | 3 ++- arch/powerpc/platforms/powermac/Kconfig | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index ed870468ef6f..d408711d09fb 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -330,6 +330,13 @@ struct vm_area_struct; #endif /* __ASSEMBLY__ */ #include +/* + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. + */ +#ifdef CONFIG_PPC32 +#define ARCH_ZONE_DMA_BITS 30 +#else #define ARCH_ZONE_DMA_BITS 31 +#endif #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f6787f90e158..b98ce400a889 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -255,7 +255,8 @@ void __init paging_init(void) (long int)((top_of_ram - total_ram) >> 20)); #ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL >> PAGE_SHIFT); + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index f834a19ed772..c02d8c503b29 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -7,6 +7,7 @@ config PPC_PMAC select PPC_INDIRECT_PCI if PPC32 select PPC_MPC106 if PPC32 select PPC_NATIVE + select ZONE_DMA if PPC32 default y config PPC_PMAC64 -- 2.20.1
[PATCH AUTOSEL 5.1 45/95] ibmvnic: Fix unchecked return codes of memory allocations
From: Thomas Falcon [ Upstream commit 7c940b1a5291e5069d561f5b8f0e51db6b7a259a ] The return values for these memory allocations are unchecked, which may cause an oops if the driver does not handle them after a failure. Fix by checking the function's return code. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 664e52fa7919..0e4029c54241 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -438,9 +438,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) if (rx_pool->buff_size != be64_to_cpu(size_array[i])) { free_long_term_buff(adapter, &rx_pool->long_term_buff); rx_pool->buff_size = be64_to_cpu(size_array[i]); - alloc_long_term_buff(adapter, &rx_pool->long_term_buff, -rx_pool->size * -rx_pool->buff_size); + rc = alloc_long_term_buff(adapter, + &rx_pool->long_term_buff, + rx_pool->size * + rx_pool->buff_size); } else { rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff); @@ -706,9 +707,9 @@ static int init_tx_pools(struct net_device *netdev) return rc; } - init_one_tx_pool(netdev, &adapter->tso_pool[i], -IBMVNIC_TSO_BUFS, -IBMVNIC_TSO_BUF_SZ); + rc = init_one_tx_pool(netdev, &adapter->tso_pool[i], + IBMVNIC_TSO_BUFS, + IBMVNIC_TSO_BUF_SZ); if (rc) { release_tx_pools(adapter); return rc; -- 2.20.1
[PATCH AUTOSEL 5.1 44/95] ibmvnic: Refresh device multicast list after reset
From: Thomas Falcon [ Upstream commit be32a24372cf162e825332da1a7ccef058d4f20b ] It was observed that multicast packets were no longer received after a device reset. The fix is to resend the current multicast list to the backing device after recovery. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 71bf895409a1..664e52fa7919 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1851,6 +1851,9 @@ static int do_reset(struct ibmvnic_adapter *adapter, return 0; } + /* refresh device's multicast list */ + ibmvnic_set_multi(netdev); + /* kick napi */ for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); -- 2.20.1
[PATCH AUTOSEL 5.1 43/95] ibmvnic: Do not close unopened driver during reset
From: Thomas Falcon [ Upstream commit 1f94608b0ce141be5286dde31270590bdf35b86a ] Check driver state before halting it during a reset. If the driver is not running, do nothing. Otherwise, a request to deactivate a down link can cause an error and the reset will fail. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 3dfb2d131eb7..71bf895409a1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1751,7 +1751,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, ibmvnic_cleanup(netdev); - if (adapter->reset_reason != VNIC_RESET_MOBILITY && + if (reset_state == VNIC_OPEN && + adapter->reset_reason != VNIC_RESET_MOBILITY && adapter->reset_reason != VNIC_RESET_FAILOVER) { rc = __ibmvnic_close(netdev); if (rc) -- 2.20.1
Re: Bisected regression in v5.1 on PowerBook G3 (Wallstreet)
On Wed, 26 Jun 2019, Christophe Leroy wrote: > Hi Finn, > > On 06/26/2019 02:06 AM, Finn Thain wrote: > > Hi Christophe, > > > > I received a report of a regression between v5.0 and v5.1 which causes > > the current release to crash during boot with a machine check > > exception. Please see console log below. > > > > Stan (whom I've Cc'd) tells me that this happens on every attempt to > > boot. I asked him to try 'git bisect'. The results are given below. > > Can you see anything in commit 93c4a162b014 that might explain this? > > Might be a false positive. That commit has a problem, but that problem > is fixed by 4622a2d43101 ("powerpc/6xx: fix setup and use of > SPRN_SPRG_PGDIR for hash32") > > I would bet your problem is related to commit f7354ccac844 ("powerpc/32: > Remove CURRENT_THREAD_INFO and rename TI_CPU"). That problem is fixed by > commit 397d2300b08c ("powerpc/32s: fix flush_hash_pages() on SMP") > upstream, and in linux 5.1.4 by commit fda49aec2515 on > stable/linux-5.1.y > I see. I've just discovered that this issue has already been covered on this list. I should have done a bit more research. > Can you test ? > Stan did some more tests and confirmed that the problem has been fixed in 397d2300b08c and stable/linux-5.1.y. Thanks. -- > Thanks > Christophe >
Re: [PATCH 3/4] powerpc/powernv: remove unused NPU DMA code
On 26/06/2019 17:49, Christoph Hellwig wrote: > On Wed, Jun 26, 2019 at 10:44:38AM +1000, Alexey Kardashevskiy wrote: >> >> >> On 26/06/2019 00:52, Christoph Hellwig wrote: >>> None of these routines were ever used anywhere in the kernel tree >>> since they were added to the kernel. >> >> >> So none of my comments has been addressed. Nice. > > Which comment? Last time I asked you complaint "it is still used in > exactly the same way as before" which you later clarified that you > have a hidden out of tree user somewhere, and you only objected to It is not hidden, anyone can download and inspect that GPL driver. > the word "dead". That has been fixed and there were no further > comments. You still have it in the cover letter so at very least 3/4 is not a part of this patchset then. And I still want to see a formal statement about out-of-tree drivers support/tolerance. If you manage to remove this code, I'll have to post a revert (again and again) but I would rather know the exact list of what we do and what we do not do about such drivers and if the list 1) exists 2) is reasonable then I could try to come up with a better solution or point others to the policy and push them to do the right thing. Right now it is just you pretending that the nVidia driver does not exist, this is not helping. Thanks, -- Alexey
Re: [PATCH] powerpc/64s/radix: Define arch_ioremap_p4d_supported()
Hi Anshuman, On Wed, 26 Jun 2019 18:51:00 +0530 Anshuman Khandual wrote: > > Recent core ioremap changes require HAVE_ARCH_HUGE_VMAP subscribing archs > provide arch_ioremap_p4d_supported() failing which will result in a build > failure like the following. > > ld: lib/ioremap.o: in function `.ioremap_huge_init': > ioremap.c:(.init.text+0x3c): undefined reference to > `.arch_ioremap_p4d_supported' > > This defines a stub implementation for arch_ioremap_p4d_supported() keeping > it disabled for now to fix the build problem. > > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: "Aneesh Kumar K.V" > Cc: Nicholas Piggin > Cc: Andrew Morton > Cc: Stephen Rothwell > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-ker...@vger.kernel.org > Cc: linux-n...@vger.kernel.org > > Signed-off-by: Anshuman Khandual > --- > This has been just build tested and fixes the problem reported earlier. > > arch/powerpc/mm/book3s64/radix_pgtable.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > b/arch/powerpc/mm/book3s64/radix_pgtable.c > index 8904aa1..c81da88 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -1124,6 +1124,11 @@ void radix__ptep_modify_prot_commit(struct > vm_area_struct *vma, > set_pte_at(mm, addr, ptep, pte); > } > > +int __init arch_ioremap_p4d_supported(void) > +{ > + return 0; > +} > + > int __init arch_ioremap_pud_supported(void) > { > /* HPT does not cope with large pages in the vmalloc area */ > -- > 2.7.4 > I will add that as a merge resolution patch for the akpm-current tree merge today. -- Cheers, Stephen Rothwell pgp4F7Hznlsp1.pgp Description: OpenPGP digital signature
Re: [PATCH] powerpc/rtas: Fix hang in race against concurrent cpu offline
Hi Juliet, Juliet Kim writes: > On 6/25/19 12:29 PM, Nathan Lynch wrote: >> Juliet Kim writes: >>> >>> However, that fix failed to notify Hypervisor that the LPM attempted >>> had been abandoned which results in a system hang. > >> It is surprising to me that leaving a migration unterminated would cause >> Linux to hang. Can you explain more about how that happens? >> > PHYP will block further requests(next partition migration, dlpar etc) while > it's in suspending state. That would have a follow-on effect on the HMC and > potentially this and other partitions. I can believe that operations on _this LPAR_ would be blocked by the platform and/or management console while the migration remains unterminated, but the OS should not be able to perpetrate a denial of service on other partitions or the management console simply by botching the LPM protocol. If it can, that's not Linux's bug to fix. >>> Fix this by sending a signal PHYP to cancel the migration, so that PHYP >>> can stop waiting, and clean up the migration. >> >> This is well-spotted and rtas_ibm_suspend_me() needs to signal >> cancellation in several error paths. But I don't agree that this is one >> of them: this race is going to be a temporary condition in any >> production setting, and retrying would allow the migration to >> succeed. > > If LPM and CPU offine requests conflict with one another, it might be better > to let them fail and let the customer decide which he prefers. Hmm I don't think so. When (if ever) this happens in production it would be the result of an unlucky race with a power management daemon or similar, not a conscious decision of the administrator in the moment. > IBM i cancels migration if the other OS components/operations veto > migration. It’s consistent with other OS behavior for LPM. But this situation isn't really like that. If we were to have a real veto mechanism, it would only make sense to have it run as early as possible, before the platform has done a bunch of work. This benign, recoverable race is occurring right before we complete the migration, which at this point has been copying state to the destination for minutes or hours. It doesn't make sense to error out like this. As I mentioned earlier though, it does make sense to signal a cancellation for these less-recoverable error conditions in rtas_ibm_suspend_me(): - rtas_online_cpus_mask() failure - alloc_cpumask_var() failure - the atomic_read(&data.error) != 0 case after returning from the IPI
Re: [PATCH] powerpc/pseries: Fix maximum memory value
Aravinda Prasad writes: > Calculating the maximum memory based on the number of lmbs > and lmb size does not account for the RMA region. Hence > use drmem_lmb_memory_max(), which already accounts for the > RMA region, to fetch the maximum memory value. > > Fixes: 772b039fd9a7: ("powerpc/pseries: Export maximum memory value") > Signed-off-by: Aravinda Prasad > --- > arch/powerpc/platforms/pseries/lparcfg.c |2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/powerpc/platforms/pseries/lparcfg.c > b/arch/powerpc/platforms/pseries/lparcfg.c > index e33e8bc..f425842 100644 > --- a/arch/powerpc/platforms/pseries/lparcfg.c > +++ b/arch/powerpc/platforms/pseries/lparcfg.c > @@ -435,7 +435,7 @@ static void maxmem_data(struct seq_file *m) > { > unsigned long maxmem = 0; > > - maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; > + maxmem += drmem_lmb_memory_max(); Would memory_hotplug_max() be better here? There's no guarantee an LPAR will have the device tree node/properties that populate drmem.
Re: [PATCH] powerpc/rtas: Fix hang in race against concurrent cpu offline
On 6/25/19 12:29 PM, Nathan Lynch wrote: > Juliet Kim writes: >> The commit >> (“powerpc/rtas: Fix a potential race between CPU-Offline & Migration) >> attempted to fix a hang in Live Partition Mobility(LPM) by abandoning >> the LPM attempt if a race between LPM and concurrent CPU offline was >> detected. >> >> However, that fix failed to notify Hypervisor that the LPM attempted >> had been abandoned which results in a system hang. > It is surprising to me that leaving a migration unterminated would cause > Linux to hang. Can you explain more about how that happens? > PHYP will block further requests(next partition migration, dlpar etc) while it's in suspending state. That would have a follow-on effect on the HMC and potentially this and other partitions. >> Fix this by sending a signal PHYP to cancel the migration, so that PHYP >> can stop waiting, and clean up the migration. > This is well-spotted and rtas_ibm_suspend_me() needs to signal > cancellation in several error paths. But I don't agree that this is one > of them: this race is going to be a temporary condition in any > production setting, and retrying would allow the migration to succeed. If LPM and CPU offine requests conflict with one another, it might be better to let them fail and let the customer decide which he prefers. IBM i cancels migration if the other OS components/operations veto migration. It’s consistent with other OS behavior for LPM. I think all the IBM products should have a consistent customer experience. Even if the race can be temporary, it still could happen and can cause livelock.
Re: [PATCH] powerpc/rtas: retry when cpu offline races with suspend/migration
On 6/25/19 1:51 PM, Nathan Lynch wrote: > Juliet Kim writes: > >> There's some concern this could retry forever, resulting in live lock. > First of all the system will make progress in other areas even if there > are repeated retries; we're not indefinitely holding locks or anything > like that. For instance, system admin runs a script that picks and offlines CPUs in a loop to keep a certain rate of onlined CPUs for energy saving. If LPM keeps putting CPUs back online, that would never finish, and would keepgenerating new offline requests > Second, Linux checks the H_VASI_STATE result on every retry. If the > platform wants to terminate the migration (say, if it imposes a > timeout), Linux will abandon it when H_VASI_STATE fails to return > H_VASI_SUSPENDING. And it seems incorrect to bail out before that > happens, absent hard errors on the Linux side such as allocation > failures. I confirmed with the PHYP and HMC folks that they wouldn't time out the LPM request including H_VASI_STATE, so if the LPM retries were unlucky enough to encounter repeated CPU offline attempts (maybe some customer code retrying that), then the retries could continue indefinitely, or until some manual intervention. And in the mean time, the LPM delay here would cause PHYP to block other operations.
[PATCH] recordmcount: Fix spurious mcount entries on powerpc
The recent change enabling HAVE_C_RECORDMCOUNT on powerpc started showing the following issue: # modprobe kprobe_example ftrace-powerpc: Not expected bl: opcode is 3c4c0001 WARNING: CPU: 0 PID: 227 at kernel/trace/ftrace.c:2001 ftrace_bug+0x90/0x318 Modules linked in: CPU: 0 PID: 227 Comm: modprobe Not tainted 5.2.0-rc6-00678-g1c329100b942 #2 NIP: c0264318 LR: c025d694 CTR: c0f5cd30 REGS: c1f2b7b0 TRAP: 0700 Not tainted (5.2.0-rc6-00678-g1c329100b942) MSR: 90010282b033 CR: 28228222 XER: CFAR: c02642fc IRQMASK: 0 NIP [c0264318] ftrace_bug+0x90/0x318 LR [c025d694] ftrace_process_locs+0x4f4/0x5e0 Call Trace: [c1f2ba40] [0004] 0x4 (unreliable) [c1f2bad0] [c025d694] ftrace_process_locs+0x4f4/0x5e0 [c1f2bb90] [c020ff10] load_module+0x25b0/0x30c0 [c1f2bd00] [c0210cb0] sys_finit_module+0xc0/0x130 [c1f2be20] [c000bda4] system_call+0x5c/0x70 Instruction dump: 419e0018 2f83 419e00bc 2f83ffea 409e00cc 481c 0fe0 3c62ff96 3901 3940 386386d0 48c4 <0fe0> 3ce20003 3901 3c62ff96 ---[ end trace 4c438d5cebf78381 ]--- ftrace failed to modify [] 0xc008012a0008 actual: 01:00:4c:3c Initializing ftrace call sites ftrace record flags: 200 (0) expected tramp: c006af4c Looking at the relocation records in __mcount_loc showed a few spurious entries: RELOCATION RECORDS FOR [__mcount_loc]: OFFSET TYPE VALUE R_PPC64_ADDR64.text.unlikely+0x0008 0008 R_PPC64_ADDR64.text.unlikely+0x0014 0010 R_PPC64_ADDR64.text.unlikely+0x0060 0018 R_PPC64_ADDR64.text.unlikely+0x00b4 0020 R_PPC64_ADDR64.init.text+0x0008 0028 R_PPC64_ADDR64.init.text+0x0014 The first entry in each section is incorrect. Looking at the relocation records, the spurious entries correspond to the R_PPC64_ENTRY records: RELOCATION RECORDS FOR [.text.unlikely]: OFFSET TYPE VALUE R_PPC64_REL64 .TOC.-0x0008 0008 R_PPC64_ENTRY *ABS* 0014 R_PPC64_REL24 _mcount The problem is that we are not validating the return value from get_mcountsym() in sift_rel_mcount(). With this entry, mcountsym is 0, but Elf_r_sym(relp) also ends up being 0. Fix this by ensuring mcountsym is valid before processing the entry. Fixes: c7d64b560ce80 ("powerpc/ftrace: Enable C Version of recordmcount") Signed-off-by: Naveen N. Rao --- scripts/recordmcount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 13c5e6c8829c..47fca2c69a73 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -325,7 +325,8 @@ static uint_t *sift_rel_mcount(uint_t *mlocp, if (!mcountsym) mcountsym = get_mcountsym(sym0, relp, str0); - if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { + if (mcountsym && mcountsym == Elf_r_sym(relp) && + !is_fake_mcount(relp)) { uint_t const addend = _w(_w(relp->r_offset) - recval + mcount_adjust); mrelp->r_offset = _w(offbase -- 2.22.0
Re: [PATCHv2 2/2] PCI: layerscape: EP and RC drivers are compiled separately
If you post another revision for any reason, please change the subject so it's worded as a command and mentions the new config options, e.g., PCI: layerscape: Add CONFIG_PCI_LAYERSCAPE_EP to build EP/RC separately On Wed, Jun 26, 2019 at 07:11:39PM +0800, Xiaowei Bao wrote: > Compile the EP and RC drivers separately with different configuration > options, this looks clearer. > > Signed-off-by: Xiaowei Bao > --- > v2: > - No change. > > drivers/pci/controller/dwc/Kconfig | 20 ++-- > drivers/pci/controller/dwc/Makefile |3 ++- > 2 files changed, 20 insertions(+), 3 deletions(-) > > diff --git a/drivers/pci/controller/dwc/Kconfig > b/drivers/pci/controller/dwc/Kconfig > index a6ce1ee..a41ccf5 100644 > --- a/drivers/pci/controller/dwc/Kconfig > +++ b/drivers/pci/controller/dwc/Kconfig > @@ -131,13 +131,29 @@ config PCI_KEYSTONE_EP > DesignWare core functions to implement the driver. > > config PCI_LAYERSCAPE > - bool "Freescale Layerscape PCIe controller" > + bool "Freescale Layerscape PCIe controller - Host mode" > depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST) > depends on PCI_MSI_IRQ_DOMAIN > select MFD_SYSCON > select PCIE_DW_HOST > help > - Say Y here if you want PCIe controller support on Layerscape SoCs. > + Say Y here if you want to enable PCIe controller support on Layerscape > + SoCs to work in Host mode. > + This controller can work either as EP or RC. The RCW[HOST_AGT_PEX] > + determines which PCIe controller works in EP mode and which PCIe > + controller works in RC mode. > + > +config PCI_LAYERSCAPE_EP > + bool "Freescale Layerscape PCIe controller - Endpoint mode" > + depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST) > + depends on PCI_ENDPOINT > + select PCIE_DW_EP > + help > + Say Y here if you want to enable PCIe controller support on Layerscape > + SoCs to work in Endpoint mode. > + This controller can work either as EP or RC. The RCW[HOST_AGT_PEX] > + determines which PCIe controller works in EP mode and which PCIe > + controller works in RC mode. > > config PCI_HISI > depends on OF && (ARM64 || COMPILE_TEST) > diff --git a/drivers/pci/controller/dwc/Makefile > b/drivers/pci/controller/dwc/Makefile > index b085dfd..824fde7 100644 > --- a/drivers/pci/controller/dwc/Makefile > +++ b/drivers/pci/controller/dwc/Makefile > @@ -8,7 +8,8 @@ obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o > obj-$(CONFIG_PCI_IMX6) += pci-imx6.o > obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o > obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone.o > -obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o pci-layerscape-ep.o > +obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o > +obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o > obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o > obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o > obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o > -- > 1.7.1 >
Re: [PATCH v3 3/3] powerpc/papr_scm: Force a scm-unbind if initial scm-bind fails
Vaibhav Jain writes: > In some cases initial bind of scm memory for an lpar can fail if > previously it wasn't released using a scm-unbind hcall. This situation > can arise due to panic of the previous kernel or forced lpar > fadump. In such cases the H_SCM_BIND_MEM return a H_OVERLAP error. > > To mitigate such cases the patch updates papr_scm_probe() to force a > call to drc_pmem_unbind() in case the initial bind of scm memory fails > with EBUSY error. In case scm-bind operation again fails after the > forced scm-unbind then we follow the existing error path. We also > update drc_pmem_bind() to handle the H_OVERLAP error returned by phyp > and indicate it as a EBUSY error back to the caller. > > Suggested-by: "Oliver O'Halloran" > Signed-off-by: Vaibhav Jain > Reviewed-by: Oliver O'Halloran > --- > Change-log: > v3: > * Minor update to a code comment. [Oliver] > > v2: > * Moved the retry code from drc_pmem_bind() to papr_scm_probe() > [Oliver] > * Changed the type of variable 'rc' in drc_pmem_bind() to > int64_t. [Oliver] > --- > arch/powerpc/platforms/pseries/papr_scm.c | 15 ++- > 1 file changed, 14 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/platforms/pseries/papr_scm.c > b/arch/powerpc/platforms/pseries/papr_scm.c > index c01a03fd3ee7..7c5e10c063a0 100644 > --- a/arch/powerpc/platforms/pseries/papr_scm.c > +++ b/arch/powerpc/platforms/pseries/papr_scm.c > @@ -43,8 +43,9 @@ struct papr_scm_priv { > static int drc_pmem_bind(struct papr_scm_priv *p) > { > unsigned long ret[PLPAR_HCALL_BUFSIZE]; > - uint64_t rc, token; > uint64_t saved = 0; > + uint64_t token; > + int64_t rc; > > /* >* When the hypervisor cannot map all the requested memory in a single > @@ -64,6 +65,10 @@ static int drc_pmem_bind(struct papr_scm_priv *p) > } while (rc == H_BUSY); > > if (rc) { > + /* H_OVERLAP needs a separate error path */ > + if (rc == H_OVERLAP) > + return -EBUSY; > + > dev_err(&p->pdev->dev, "bind err: %lld\n", rc); > return -ENXIO; > } > @@ -331,6 +336,14 @@ static int papr_scm_probe(struct platform_device *pdev) > > /* request the hypervisor to bind this region to somewhere in memory */ > rc = drc_pmem_bind(p); > + > + /* If phyp says drc memory still bound then force unbound and retry */ > + if (rc == -EBUSY) { > + dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); > + drc_pmem_unbind(p); This should only be caused by kexec right? And considering kernel nor hypervisor won't change device binding details, can you check switching this to H_SCM_QUERY_BLOCK_MEM_BINDING? Will that result in faster boot? > + rc = drc_pmem_bind(p); > + } > + > if (rc) > goto err; > I am also not sure about the module reference count here. Should we increment the module reference count after a bind so that we can track failures in ubind and fail the module unload? -aneesh
Re: [PATCH v3 1/3] powerpc/pseries: Update SCM hcall op-codes in hvcall.h
Vaibhav Jain writes: > Update the hvcalls.h to include op-codes for new hcalls introduce to > manage SCM memory. Also update existing hcall definitions to reflect > current papr specification for SCM. > > Signed-off-by: Vaibhav Jain Why split this as a separate patch? You should fold this to the next one where we actually use the constant. > --- > Change-log: > > v3: > * Added updated opcode for H_SCM_HEALTH [Oliver] > > v2: > * None new patch in this series. > --- > arch/powerpc/include/asm/hvcall.h | 11 --- > 1 file changed, 8 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/hvcall.h > b/arch/powerpc/include/asm/hvcall.h > index 463c63a9fcf1..2023e327 100644 > --- a/arch/powerpc/include/asm/hvcall.h > +++ b/arch/powerpc/include/asm/hvcall.h > @@ -302,9 +302,14 @@ > #define H_SCM_UNBIND_MEM0x3F0 > #define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4 > #define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8 > -#define H_SCM_MEM_QUERY 0x3FC > -#define H_SCM_BLOCK_CLEAR 0x400 > -#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR > +#define H_SCM_UNBIND_ALL0x3FC > +#define H_SCM_HEALTH0x400 > +#define H_SCM_PERFORMANCE_STATS 0x418 > +#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS > + > +/* Scope args for H_SCM_UNBIND_ALL */ > +#define H_UNBIND_SCOPE_ALL (0x1) > +#define H_UNBIND_SCOPE_DRC (0x2) > > /* H_VIOCTL functions */ > #define H_GET_VIOA_DUMP_SIZE 0x01 > -- > 2.21.0
Re: [PATCH RFC] generic ELF support for kexec
Hi Sven, On 06/25/2019 06:54 PM, Sven Schnelle wrote: Hi List, i recently started working on kexec for PA-RISC. While doing so, i figured that powerpc already has support for reading ELF images inside of the Kernel. My first attempt was to steal the source code and modify it for PA-RISC, but it turned out that i didn't had to change much. Only ARM specific stuff like fdt blob fetching had to be removed. So instead of duplicating the code, i thought about moving the ELF stuff to the core kexec code, and exposing several function to use that code from the arch specific code. I'm attaching the patch to this Mail. What do you think about that change? s390 also uses ELF files, and (maybe?) could also switch to this implementation. But i don't know anything about S/390 and don't have one in my basement. So i'll leave s390 to the IBM folks. I haven't really tested PowerPC yet. Can anyone give me a helping hand what would be a good target to test this code in QEMU? Or even better, test this code on real Hardware? Where did you start from ? Your patch doesn't apply on latest powerpc/merge branch (https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git): [root@localhost linux-powerpc]# git am -3 /root/Downloads/RFC-generic-ELF-support-for-kexec.patch Applying: generic ELF support for kexec Using index info to reconstruct a base tree... M arch/powerpc/kernel/kexec_elf_64.c M kernel/Makefile Falling back to patching base and 3-way merge... Auto-merging kernel/Makefile Auto-merging arch/powerpc/kernel/kexec_elf_64.c CONFLICT (content): Merge conflict in arch/powerpc/kernel/kexec_elf_64.c error: Failed to merge in the changes. Patch failed at 0001 generic ELF support for kexec Neither does it apply on 5.2-rc6 Looks like it cleanly applies on 5.1 Could you generate your patch using 'git format-patch -M -C ' ? It would be a lot easier to see the real changes: arch/Kconfig | 3 + arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/kexec_elf_64.c | 547 + include/linux/kexec.h | 35 ++ kernel/Makefile| 1 + .../kexec_elf_64.c => kernel/kexec_file_elf.c | 192 ++-- 6 files changed, 96 insertions(+), 683 deletions(-) copy arch/powerpc/kernel/kexec_elf_64.c => kernel/kexec_file_elf.c (77%) Thanks Christophe If that change is acceptable i would finish the patch and submit it. I think best would be to push this change through Helge's parisc tree, so we don't have any dependencies to sort out. Regards, Sven [PATCH] kexec: add generic support for elf kernel images Signed-off-by: Sven Schnelle --- arch/Kconfig | 3 + arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/kexec_elf_64.c | 547 +-- include/linux/kexec.h | 35 ++ kernel/Makefile| 1 + kernel/kexec_file_elf.c| 574 + 6 files changed, 619 insertions(+), 542 deletions(-) create mode 100644 kernel/kexec_file_elf.c diff --git a/arch/Kconfig b/arch/Kconfig index c47b328eada0..de7520100136 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -18,6 +18,9 @@ config KEXEC_CORE select CRASH_CORE bool +config KEXEC_FILE_ELF + bool + config HAVE_IMA_KEXEC bool diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8c1c636308c8..48241260b6ae 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -502,6 +502,7 @@ config KEXEC_FILE select KEXEC_CORE select HAVE_IMA_KEXEC select BUILD_BIN2C + select KEXEC_FILE_ELF depends on PPC64 depends on CRYPTO=y depends on CRYPTO_SHA256=y diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index ba4f18a43ee8..0059e36913e9 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -21,8 +21,6 @@ * GNU General Public License for more details. */ -#define pr_fmt(fmt) "kexec_elf: " fmt - #include #include #include @@ -31,540 +29,6 @@ #include #include -#define PURGATORY_STACK_SIZE (16 * 1024) - -#define elf_addr_to_cpuelf64_to_cpu - -#ifndef Elf_Rel -#define Elf_RelElf64_Rel -#endif /* Elf_Rel */ - -struct elf_info { - /* -* Where the ELF binary contents are kept. -* Memory managed by the user of the struct. -*/ - const char *buffer; - - const struct elfhdr *ehdr; - const struct elf_phdr *proghdrs; - struct elf_shdr *sechdrs; -}; - -static inline bool elf_is_elf_file(const struct elfhdr *ehdr) -{ - return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0; -} - -static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value) -{ - if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - va
Re: DMA coherency in drivers/tty/serial/mpsc.c
On Wed, Jun 26, 2019 at 08:48:37AM +0200, Christoph Hellwig wrote: > On Tue, Jun 25, 2019 at 09:37:22AM -0700, Mark Greer wrote: > > Yeah, the mpsc driver had lots of ugly cache related hacks because of > > cache coherency bugs in the early version of the MV64x60 bridge chips > > that it was embedded in. That chip is pretty much dead now and I've > > removed core support for it from the powerpc tree. Removing the mpsc > > driver is on my todo list but I've been busy and lazy. So, to sum it > > up, don't spend any more time worrying about it as it should be removed. > > > > I'll post a patch to do that tonight and I'm sorry for any time you've > > spent looking at it so far. > > No problem. And if future such broken chips show up we now have > support for per-device DMA coherency settings and could actually > handle it in a reaѕonably clean way. Ah, good to know - thanks. BTW, I just submitted a patch to remove the driver. Mark --
Re: [PATCH v3 2/3] powerpc/papr_scm: Update drc_pmem_unbind() to use H_SCM_UNBIND_ALL
On 6/26/19 7:34 PM, Vaibhav Jain wrote: The new hcall named H_SCM_UNBIND_ALL has been introduce that can unbind all or specific scm memory assigned to an lpar. This is more efficient than using H_SCM_UNBIND_MEM as currently we don't support partial unbind of scm memory. Hence this patch proposes following changes to drc_pmem_unbind(): * Update drc_pmem_unbind() to replace hcall H_SCM_UNBIND_MEM to H_SCM_UNBIND_ALL. * Update drc_pmem_unbind() to handles cases when PHYP asks the guest kernel to wait for specific amount of time before retrying the hcall via the 'LONG_BUSY' return value. * Ensure appropriate error code is returned back from the function in case of an error. Reviewed-by: Oliver O'Halloran Signed-off-by: Vaibhav Jain --- Change-log: v3: * Fixed a build warning reported by kbuild-robot. * Updated patch description to put emphasis on 'scm memory' instead of 'scm drc memory blocks' as for phyp there is a stark difference between how drc are managed for scm memory v/s regular memory. [Oliver] v2: * Added a dev_dbg when unbind operation succeeds [Oliver] * Changed type of variable 'rc' to int64_t [Oliver] * Removed the code that was logging a warning in case bind operation takes >1-seconds [Oliver] * Spinned off changes to hvcall.h as a separate patch. [Oliver] --- arch/powerpc/platforms/pseries/papr_scm.c | 29 +-- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 96c53b23e58f..c01a03fd3ee7 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -77,22 +78,36 @@ static int drc_pmem_bind(struct papr_scm_priv *p) static int drc_pmem_unbind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; - uint64_t rc, token; + uint64_t token = 0; + int64_t rc; - token = 0; + dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index); - /* NB: unbind has the same retry requirements mentioned above */ + /* NB: unbind has the same retry requirements as drc_pmem_bind() */ do { - rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index, - p->bound_addr, p->blocks, token); + + /* Unbind of all SCM resources associated with drcIndex */ + rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, +p->drc_index, token); token = ret[0]; - cond_resched(); + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); if (rc) dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); + else + dev_dbg(&p->pdev->dev, "unbind drc %x complete\n", + p->drc_index); Can we add p->drc_index as part of these messages? Also s/%x/0x%x ? - return !!rc; + return rc == H_SUCCESS ? 0 : -ENXIO; } The error -ENXIO is confusing. Can we keep the HCALL error as return for this? We don't check error from this. If we can't take any action based on the return. Then may be make it void? static int papr_scm_meta_get(struct papr_scm_priv *p, -aneesh
[PATCH v3 3/3] powerpc/papr_scm: Force a scm-unbind if initial scm-bind fails
In some cases initial bind of scm memory for an lpar can fail if previously it wasn't released using a scm-unbind hcall. This situation can arise due to panic of the previous kernel or forced lpar fadump. In such cases the H_SCM_BIND_MEM return a H_OVERLAP error. To mitigate such cases the patch updates papr_scm_probe() to force a call to drc_pmem_unbind() in case the initial bind of scm memory fails with EBUSY error. In case scm-bind operation again fails after the forced scm-unbind then we follow the existing error path. We also update drc_pmem_bind() to handle the H_OVERLAP error returned by phyp and indicate it as a EBUSY error back to the caller. Suggested-by: "Oliver O'Halloran" Signed-off-by: Vaibhav Jain Reviewed-by: Oliver O'Halloran --- Change-log: v3: * Minor update to a code comment. [Oliver] v2: * Moved the retry code from drc_pmem_bind() to papr_scm_probe() [Oliver] * Changed the type of variable 'rc' in drc_pmem_bind() to int64_t. [Oliver] --- arch/powerpc/platforms/pseries/papr_scm.c | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index c01a03fd3ee7..7c5e10c063a0 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -43,8 +43,9 @@ struct papr_scm_priv { static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; - uint64_t rc, token; uint64_t saved = 0; + uint64_t token; + int64_t rc; /* * When the hypervisor cannot map all the requested memory in a single @@ -64,6 +65,10 @@ static int drc_pmem_bind(struct papr_scm_priv *p) } while (rc == H_BUSY); if (rc) { + /* H_OVERLAP needs a separate error path */ + if (rc == H_OVERLAP) + return -EBUSY; + dev_err(&p->pdev->dev, "bind err: %lld\n", rc); return -ENXIO; } @@ -331,6 +336,14 @@ static int papr_scm_probe(struct platform_device *pdev) /* request the hypervisor to bind this region to somewhere in memory */ rc = drc_pmem_bind(p); + + /* If phyp says drc memory still bound then force unbound and retry */ + if (rc == -EBUSY) { + dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); + drc_pmem_unbind(p); + rc = drc_pmem_bind(p); + } + if (rc) goto err; -- 2.21.0
[PATCH v3 1/3] powerpc/pseries: Update SCM hcall op-codes in hvcall.h
Update the hvcalls.h to include op-codes for new hcalls introduce to manage SCM memory. Also update existing hcall definitions to reflect current papr specification for SCM. Signed-off-by: Vaibhav Jain --- Change-log: v3: * Added updated opcode for H_SCM_HEALTH [Oliver] v2: * None new patch in this series. --- arch/powerpc/include/asm/hvcall.h | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 463c63a9fcf1..2023e327 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -302,9 +302,14 @@ #define H_SCM_UNBIND_MEM0x3F0 #define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4 #define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8 -#define H_SCM_MEM_QUERY0x3FC -#define H_SCM_BLOCK_CLEAR 0x400 -#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR +#define H_SCM_UNBIND_ALL0x3FC +#define H_SCM_HEALTH0x400 +#define H_SCM_PERFORMANCE_STATS 0x418 +#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS + +/* Scope args for H_SCM_UNBIND_ALL */ +#define H_UNBIND_SCOPE_ALL (0x1) +#define H_UNBIND_SCOPE_DRC (0x2) /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 -- 2.21.0
[PATCH v3 2/3] powerpc/papr_scm: Update drc_pmem_unbind() to use H_SCM_UNBIND_ALL
The new hcall named H_SCM_UNBIND_ALL has been introduce that can unbind all or specific scm memory assigned to an lpar. This is more efficient than using H_SCM_UNBIND_MEM as currently we don't support partial unbind of scm memory. Hence this patch proposes following changes to drc_pmem_unbind(): * Update drc_pmem_unbind() to replace hcall H_SCM_UNBIND_MEM to H_SCM_UNBIND_ALL. * Update drc_pmem_unbind() to handles cases when PHYP asks the guest kernel to wait for specific amount of time before retrying the hcall via the 'LONG_BUSY' return value. * Ensure appropriate error code is returned back from the function in case of an error. Reviewed-by: Oliver O'Halloran Signed-off-by: Vaibhav Jain --- Change-log: v3: * Fixed a build warning reported by kbuild-robot. * Updated patch description to put emphasis on 'scm memory' instead of 'scm drc memory blocks' as for phyp there is a stark difference between how drc are managed for scm memory v/s regular memory. [Oliver] v2: * Added a dev_dbg when unbind operation succeeds [Oliver] * Changed type of variable 'rc' to int64_t [Oliver] * Removed the code that was logging a warning in case bind operation takes >1-seconds [Oliver] * Spinned off changes to hvcall.h as a separate patch. [Oliver] --- arch/powerpc/platforms/pseries/papr_scm.c | 29 +-- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 96c53b23e58f..c01a03fd3ee7 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -77,22 +78,36 @@ static int drc_pmem_bind(struct papr_scm_priv *p) static int drc_pmem_unbind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; - uint64_t rc, token; + uint64_t token = 0; + int64_t rc; - token = 0; + dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index); - /* NB: unbind has the same retry requirements mentioned above */ + /* NB: unbind has the same retry requirements as drc_pmem_bind() */ do { - rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index, - p->bound_addr, p->blocks, token); + + /* Unbind of all SCM resources associated with drcIndex */ + rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, +p->drc_index, token); token = ret[0]; - cond_resched(); + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); if (rc) dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); + else + dev_dbg(&p->pdev->dev, "unbind drc %x complete\n", + p->drc_index); - return !!rc; + return rc == H_SUCCESS ? 0 : -ENXIO; } static int papr_scm_meta_get(struct papr_scm_priv *p, -- 2.21.0
[PATCH v3 0/3] powerpc/papr_scm: Workaround for failure of drc bind after kexec
Presently an error is returned in response to hcall H_SCM_BIND_MEM when a new kernel boots on lpar via kexec. This prevents papr_scm from registering drc memory regions with nvdimm. The error reported is of the form below: "papr_scm ibm,persistent-memory:ibm,pmemory@4412: bind err: -68" On investigation it was revealed that phyp returns this error as previous kernel did not completely release bindings for drc scm-memory blocks and hence phyp rejected request for re-binding these block to lpar with error H_OVERLAP. Also support for a new H_SCM_UNBIND_ALL is recently added which is better suited for releasing all the bound scm-memory block from an lpar. So leveraging new hcall H_SCM_UNBIND_ALL, we can workaround H_OVERLAP issue during kexec by forcing an unbind of all drm scm-memory blocks and issuing H_SCM_BIND_MEM to re-bind the drc scm-memory blocks to lpar. This sequence will also be needed when a new kernel boot on lpar after previous kernel panicked and it never got an opportunity to call H_SCM_UNBIND_MEM/ALL. Hence this patch-set implements following changes to papr_scm module: * Update hvcall.h to include opcodes for new hcall H_SCM_UNBIND_ALL. * Update it to use H_SCM_UNBIND_ALL instead of H_SCM_UNBIND_MEM * In case hcall H_SCM_BIND_MEM fails with error H_OVERLAP, force H_SCM_UNBIND_ALL and retry the bind operation again. With the patch-set applied re-bind of drc scm-memory to lpar succeeds after a kexec to new kernel as illustrated below: # Old kernel $ sudo ndctl list -R [ { "dev":"region0", } ] # kexec to new kernel $ sudo kexec --initrd=... vmlinux ... ... I'm in purgatory ... papr_scm ibm,persistent-memory:ibm,pmemory@4412: Un-binding and retrying ... # New kernel $ sudo ndctl list -R [ { "dev":"region0", } ] --- Change-log: v3: * Fixed a build warning reported by kbuild test robot. * Updated the hcall opcode from latest papr-scm specification. * Fixed a minor code comment & patch description as pointed out by Oliver. v2: * Addressed review comments from Oliver on v1 patchset. Vaibhav Jain (3): powerpc/pseries: Update SCM hcall op-codes in hvcall.h powerpc/papr_scm: Update drc_pmem_unbind() to use H_SCM_UNBIND_ALL powerpc/papr_scm: Force a scm-unbind if initial scm-bind fails arch/powerpc/include/asm/hvcall.h | 11 -- arch/powerpc/platforms/pseries/papr_scm.c | 44 ++- 2 files changed, 44 insertions(+), 11 deletions(-) -- 2.21.0
Re: [PATCH 1/2] x86, numa: always initialize all possible nodes
On Thu 02-05-19 09:00:31, Michal Hocko wrote: > On Wed 01-05-19 15:12:32, Barret Rhoden wrote: > [...] > > A more elegant solution may be to avoid registering with sysfs during early > > boot, or something else entirely. But I figured I'd ask for help at this > > point. =) > > Thanks for the report and an excellent analysis! This is really helpful. > I will think about this some more but I am traveling this week. It seems > really awkward to register a sysfs file for an empty range. That looks > like a bug to me. I am sorry, but I didn't get to this for a long time and I am still busy. The patch has been dropped from the mm tree (thus linux-next). I hope I can revisit this or somebody else will take over and finish this work. This is much more trickier than I anticipated unfortunately. -- Michal Hocko SUSE Labs
[PATCH] powerpc/64s/radix: Define arch_ioremap_p4d_supported()
Recent core ioremap changes require HAVE_ARCH_HUGE_VMAP subscribing archs provide arch_ioremap_p4d_supported() failing which will result in a build failure like the following. ld: lib/ioremap.o: in function `.ioremap_huge_init': ioremap.c:(.init.text+0x3c): undefined reference to `.arch_ioremap_p4d_supported' This defines a stub implementation for arch_ioremap_p4d_supported() keeping it disabled for now to fix the build problem. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: "Aneesh Kumar K.V" Cc: Nicholas Piggin Cc: Andrew Morton Cc: Stephen Rothwell Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ker...@vger.kernel.org Cc: linux-n...@vger.kernel.org Signed-off-by: Anshuman Khandual --- This has been just build tested and fixes the problem reported earlier. arch/powerpc/mm/book3s64/radix_pgtable.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 8904aa1..c81da88 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1124,6 +1124,11 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); } +int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} + int __init arch_ioremap_pud_supported(void) { /* HPT does not cope with large pages in the vmalloc area */ -- 2.7.4
[PATCHv2 2/2] PCI: layerscape: EP and RC drivers are compiled separately
Compile the EP and RC drivers separately with different configuration options, this looks clearer. Signed-off-by: Xiaowei Bao --- v2: - No change. drivers/pci/controller/dwc/Kconfig | 20 ++-- drivers/pci/controller/dwc/Makefile |3 ++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index a6ce1ee..a41ccf5 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -131,13 +131,29 @@ config PCI_KEYSTONE_EP DesignWare core functions to implement the driver. config PCI_LAYERSCAPE - bool "Freescale Layerscape PCIe controller" + bool "Freescale Layerscape PCIe controller - Host mode" depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST) depends on PCI_MSI_IRQ_DOMAIN select MFD_SYSCON select PCIE_DW_HOST help - Say Y here if you want PCIe controller support on Layerscape SoCs. + Say Y here if you want to enable PCIe controller support on Layerscape + SoCs to work in Host mode. + This controller can work either as EP or RC. The RCW[HOST_AGT_PEX] + determines which PCIe controller works in EP mode and which PCIe + controller works in RC mode. + +config PCI_LAYERSCAPE_EP + bool "Freescale Layerscape PCIe controller - Endpoint mode" + depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST) + depends on PCI_ENDPOINT + select PCIE_DW_EP + help + Say Y here if you want to enable PCIe controller support on Layerscape + SoCs to work in Endpoint mode. + This controller can work either as EP or RC. The RCW[HOST_AGT_PEX] + determines which PCIe controller works in EP mode and which PCIe + controller works in RC mode. config PCI_HISI depends on OF && (ARM64 || COMPILE_TEST) diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile index b085dfd..824fde7 100644 --- a/drivers/pci/controller/dwc/Makefile +++ b/drivers/pci/controller/dwc/Makefile @@ -8,7 +8,8 @@ obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o obj-$(CONFIG_PCI_IMX6) += pci-imx6.o obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone.o -obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o pci-layerscape-ep.o +obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o +obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o -- 1.7.1
[PATCHv2 1/2] PCI: layerscape: Add the bar_fixed_64bit property in EP driver.
The PCIe controller of layerscape just have 4 BARs, BAR0 and BAR1 is 32bit, BAR3 and BAR4 is 64bit, this is determined by hardware, so set the bar_fixed_64bit with 0x14. Signed-off-by: Xiaowei Bao --- v2: - Replace value 0x14 with a macro. drivers/pci/controller/dwc/pci-layerscape-ep.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c index be61d96..227c33b 100644 --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c @@ -44,6 +44,7 @@ static int ls_pcie_establish_link(struct dw_pcie *pci) .linkup_notifier = false, .msi_capable = true, .msix_capable = false, + .bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4), }; static const struct pci_epc_features* -- 1.7.1
Re: [PATCH 7/7] powerpc/kprobes: Allow probing on any ftrace address
Masami Hiramatsu wrote: On Tue, 18 Jun 2019 20:17:06 +0530 "Naveen N. Rao" wrote: With KPROBES_ON_FTRACE, kprobe is allowed to be inserted on instructions that branch to _mcount (referred to as ftrace location). With -mprofile-kernel, we now include the preceding 'mflr r0' as being part of the ftrace location. However, by default, probing on an instruction that is not actually the branch to _mcount() is prohibited, as that is considered to not be at an instruction boundary. This is not the case on powerpc, so allow the same by overriding arch_check_ftrace_location() In addition, we update kprobe_ftrace_handler() to detect this scenarios and to pass the proper nip to the pre and post probe handlers. Signed-off-by: Naveen N. Rao --- arch/powerpc/kernel/kprobes-ftrace.c | 30 1 file changed, 30 insertions(+) diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 972cb28174b2..6a0bd3c16cb6 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -12,14 +12,34 @@ #include #include +/* + * With -mprofile-kernel, we patch two instructions -- the branch to _mcount + * as well as the preceding 'mflr r0'. Both these instructions are claimed + * by ftrace and we should allow probing on either instruction. + */ +int arch_check_ftrace_location(struct kprobe *p) +{ + if (ftrace_location((unsigned long)p->addr)) + p->flags |= KPROBE_FLAG_FTRACE; + return 0; +} + /* Ftrace callback handler for kprobes */ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe *p; + int mflr_kprobe = 0; struct kprobe_ctlblk *kcb; p = get_kprobe((kprobe_opcode_t *)nip); + if (unlikely(!p)) { Hmm, is this really unlikely? If we put a kprobe on the second instruction address, we will see p == NULL always. + p = get_kprobe((kprobe_opcode_t *)(nip - MCOUNT_INSN_SIZE)); + if (!p) Here will be unlikely, because we can not find kprobe at both of nip and nip - MCOUNT_INSN_SIZE. + return; + mflr_kprobe = 1; + } + if (unlikely(!p) || kprobe_disabled(p)) "unlikely(!p)" is not needed here. ... Joe Perches wrote: On Fri, 2019-06-21 at 23:50 +0900, Masami Hiramatsu wrote: On Tue, 18 Jun 2019 20:17:06 +0530 "Naveen N. Rao" wrote: trivia: > diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c [] > @@ -57,6 +82,11 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler); > > int arch_prepare_kprobe_ftrace(struct kprobe *p) > { > + if ((unsigned long)p->addr & 0x03) { > + printk("Attempt to register kprobe at an unaligned address\n"); Please use the appropriate KERN_ or pr_ All good points. Thanks for the review. - Naveen
[PATCH] powerpc/pseries: Fix maximum memory value
Calculating the maximum memory based on the number of lmbs and lmb size does not account for the RMA region. Hence use drmem_lmb_memory_max(), which already accounts for the RMA region, to fetch the maximum memory value. Fixes: 772b039fd9a7: ("powerpc/pseries: Export maximum memory value") Signed-off-by: Aravinda Prasad --- arch/powerpc/platforms/pseries/lparcfg.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e33e8bc..f425842 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -435,7 +435,7 @@ static void maxmem_data(struct seq_file *m) { unsigned long maxmem = 0; - maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += drmem_lmb_memory_max(); maxmem += hugetlb_total_pages() * PAGE_SIZE; seq_printf(m, "MaxMem=%ld\n", maxmem);
Re: [PATCH v2 1/1] cpuidle-powernv : forced wakeup for stop states
Hi Nick, On 06/19/2019 03:39 PM, Nicholas Piggin wrote: Abhishek's on June 19, 2019 7:08 pm: Hi Nick, Thanks for the review. Some replies below. On 06/19/2019 09:53 AM, Nicholas Piggin wrote: Abhishek Goel's on June 17, 2019 7:56 pm: Currently, the cpuidle governors determine what idle state a idling CPU should enter into based on heuristics that depend on the idle history on that CPU. Given that no predictive heuristic is perfect, there are cases where the governor predicts a shallow idle state, hoping that the CPU will be busy soon. However, if no new workload is scheduled on that CPU in the near future, the CPU may end up in the shallow state. This is problematic, when the predicted state in the aforementioned scenario is a shallow stop state on a tickless system. As we might get stuck into shallow states for hours, in absence of ticks or interrupts. To address this, We forcefully wakeup the cpu by setting the decrementer. The decrementer is set to a value that corresponds with the residency of the next available state. Thus firing up a timer that will forcefully wakeup the cpu. Few such iterations will essentially train the governor to select a deeper state for that cpu, as the timer here corresponds to the next available cpuidle state residency. Thus, cpu will eventually end up in the deepest possible state. Signed-off-by: Abhishek Goel --- Auto-promotion v1 : started as auto promotion logic for cpuidle states in generic driver v2 : Removed timeout_needed and rebased the code to upstream kernel Forced-wakeup v1 : New patch with name of forced wakeup started v2 : Extending the forced wakeup logic for all states. Setting the decrementer instead of queuing up a hrtimer to implement the logic. drivers/cpuidle/cpuidle-powernv.c | 38 +++ 1 file changed, 38 insertions(+) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 84b1ebe212b3..bc9ca18ae7e3 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -46,6 +46,26 @@ static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly static u64 default_snooze_timeout __read_mostly; static bool snooze_timeout_en __read_mostly; +static u64 forced_wakeup_timeout(struct cpuidle_device *dev, +struct cpuidle_driver *drv, +int index) +{ + int i; + + for (i = index + 1; i < drv->state_count; i++) { + struct cpuidle_state *s = &drv->states[i]; + struct cpuidle_state_usage *su = &dev->states_usage[i]; + + if (s->disabled || su->disable) + continue; + + return (s->target_residency + 2 * s->exit_latency) * + tb_ticks_per_usec; + } + + return 0; +} It would be nice to not have this kind of loop iteration in the idle fast path. Can we add a flag or something to the idle state? Currently, we do not have any callback notification or some feedback that notifies the driver everytime some state is enabled/disabled. So we have to parse everytime to get the next enabled state. Ahh, that's why you're doing that. Are you suggesting to add something like next_enabled_state in cpuidle state structure itself which will be updated when a state is enabled or disabled? Hmm, I guess it normally should not iterate over more than one state unless some idle states are disabled. What would have been nice is each state just have its own timeout field with ticks already calculated, if that could be updated when a state is enabled or disabled. How hard is that to add to the cpuidle core? I have implemented a prototype which does what you have asked for. Added a disable_callback which will update timeout whenever a state is enabled or disabled. But It would mean adding some code to cpuidle.h and cpuidle/sysfs.c. If that is not an issue, should I go ahead and post it? + static u64 get_snooze_timeout(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -144,8 +164,26 @@ static int stop_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { + u64 dec_expiry_tb, dec, timeout_tb, forced_wakeup; + + dec = mfspr(SPRN_DEC); + timeout_tb = forced_wakeup_timeout(dev, drv, index); + forced_wakeup = 0; + + if (timeout_tb && timeout_tb < dec) { + forced_wakeup = 1; + dec_expiry_tb = mftb() + dec; + } The compiler probably can't optimise away the SPR manipulations so try to avoid them if possible. Are you suggesting something like set_dec_before_idle?(in line with what you have suggested to do after idle, reset_dec_after_idle) I should have been clear, I meant don't mfspr(SPRN_DEC) until you have tested timeout_tb. + + if (forced_wakeup)
Re: [PATCH 3/4] powerpc/powernv: remove unused NPU DMA code
On Wed, Jun 26, 2019 at 10:44:38AM +1000, Alexey Kardashevskiy wrote: > > > On 26/06/2019 00:52, Christoph Hellwig wrote: > > None of these routines were ever used anywhere in the kernel tree > > since they were added to the kernel. > > > So none of my comments has been addressed. Nice. Which comment? Last time I asked you complaint "it is still used in exactly the same way as before" which you later clarified that you have a hidden out of tree user somewhere, and you only objected to the word "dead". That has been fixed and there were no further comments.
Re: [RFC PATCH 03/12] powerpc/prom_init: Add the ESM call to prom_init
On 21/05/2019 14:49, Thiago Jung Bauermann wrote: > From: Ram Pai > > Make the Enter-Secure-Mode (ESM) ultravisor call to switch the VM to secure > mode. Add "svm=" command line option to turn off switching to secure mode. > Introduce CONFIG_PPC_SVM to control support for secure guests. > > Signed-off-by: Ram Pai > [ Generate an RTAS os-term hcall when the ESM ucall fails. ] > Signed-off-by: Michael Anderson > [ Cleaned up the code a bit. ] > Signed-off-by: Thiago Jung Bauermann > --- > .../admin-guide/kernel-parameters.txt | 5 + > arch/powerpc/include/asm/ultravisor-api.h | 1 + > arch/powerpc/kernel/prom_init.c | 124 ++ > 3 files changed, 130 insertions(+) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt > b/Documentation/admin-guide/kernel-parameters.txt > index c45a19d654f3..7237d86b25c6 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -4501,6 +4501,11 @@ > /sys/power/pm_test). Only available when CONFIG_PM_DEBUG > is set. Default value is 5. > > + svm=[PPC] > + Format: { on | off | y | n | 1 | 0 } > + This parameter controls use of the Protected > + Execution Facility on pSeries. > + > swapaccount=[0|1] > [KNL] Enable accounting of swap in memory resource > controller if no parameter or 1 is given or disable > diff --git a/arch/powerpc/include/asm/ultravisor-api.h > b/arch/powerpc/include/asm/ultravisor-api.h > index 15e6ce77a131..0e8b72081718 100644 > --- a/arch/powerpc/include/asm/ultravisor-api.h > +++ b/arch/powerpc/include/asm/ultravisor-api.h > @@ -19,6 +19,7 @@ > > /* opcodes */ > #define UV_WRITE_PATE0xF104 > +#define UV_ESM 0xF110 > #define UV_RETURN0xF11C > > #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */ > diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c > index 523bb99d7676..5d8a3efb54f2 100644 > --- a/arch/powerpc/kernel/prom_init.c > +++ b/arch/powerpc/kernel/prom_init.c > @@ -44,6 +44,7 @@ > #include > #include > #include > +#include > > #include > > @@ -174,6 +175,10 @@ static unsigned long __prombss prom_tce_alloc_end; > static bool __prombss prom_radix_disable; > #endif > > +#ifdef CONFIG_PPC_SVM > +static bool __prombss prom_svm_disable; > +#endif > + > struct platform_support { > bool hash_mmu; > bool radix_mmu; > @@ -809,6 +814,17 @@ static void __init early_cmdline_parse(void) > if (prom_radix_disable) > prom_debug("Radix disabled from cmdline\n"); > #endif /* CONFIG_PPC_PSERIES */ > + > +#ifdef CONFIG_PPC_SVM > + opt = prom_strstr(prom_cmd_line, "svm="); > + if (opt) { > + bool val; > + > + opt += sizeof("svm=") - 1; > + if (!prom_strtobool(opt, &val)) > + prom_svm_disable = !val; > + } > +#endif /* CONFIG_PPC_SVM */ > } > > #ifdef CONFIG_PPC_PSERIES > @@ -1707,6 +1723,43 @@ static void __init prom_close_stdin(void) > } > } > > +#ifdef CONFIG_PPC_SVM > +static int prom_rtas_os_term_hcall(uint64_t args) This is just an rtas hcall, nothing special about "os-term". > +{ > + register uint64_t arg1 asm("r3") = 0xf000; > + register uint64_t arg2 asm("r4") = args; > + > + asm volatile("sc 1\n" : "=r" (arg1) : > + "r" (arg1), > + "r" (arg2) :); > + return arg1; > +} > + > +static struct rtas_args __prombss os_term_args; > + > +static void __init prom_rtas_os_term(char *str) > +{ > + phandle rtas_node; > + __be32 val; > + u32 token; > + > + prom_printf("%s: start...\n", __func__); > + rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas")); > + prom_printf("rtas_node: %x\n", rtas_node); > + if (!PHANDLE_VALID(rtas_node)) > + return; > + > + val = 0; > + prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val)); > + token = be32_to_cpu(val); > + prom_printf("ibm,os-term: %x\n", token); > + if (token == 0) > + prom_panic("Could not get token for ibm,os-term\n"); > + os_term_args.token = cpu_to_be32(token); > + prom_rtas_os_term_hcall((uint64_t)&os_term_args); > +} > +#endif /* CONFIG_PPC_SVM */ > + > /* > * Allocate room for and instantiate RTAS > */ > @@ -3162,6 +3215,74 @@ static void unreloc_toc(void) > #endif > #endif > > +#ifdef CONFIG_PPC_SVM > +/* > + * The ESM blob is a data structure with information needed by the > Ultravisor to > + * validate the integrity of the secure guest. > + */ > +static void *get_esm_blob(void) > +{ > + /* > + * FIXME: We are still finalizing the details on how prom_init will grab > + * the ESM blob. When that is done, this function will be