Re: [PATCH 10/13] powerpc: Add "bitmask" paramater to MASKABLE_* macros
On Friday 16 September 2016 04:33 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:32:00 +0530 Madhavan Srinivasanwrote: Make it explicit the interrupt masking supported by a gievn interrupt handler. Patch correspondingly extends the MASKABLE_* macros with an addition's parameter. "bitmask" parameter is passed to SOFTEN_TEST macro to decide on masking the interrupt. Signed-off-by: Madhavan Srinivasan --- arch/powerpc/include/asm/exception-64s.h | 62 arch/powerpc/kernel/exceptions-64s.S | 36 --- 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1eea4ab75607..41be0c2d7658 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -179,9 +179,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) * checking of the interrupt maskable level in the SOFTEN_TEST. * Intended to be used in MASKABLE_EXCPETION_* macros. */ -#define __EXCEPTION_PROLOG_1(area, extra, vec) \ +#define __EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \ __EXCEPTION_PROLOG_1_PRE(area); \ - extra(vec); \ + extra(vec, bitmask);\ __EXCEPTION_PROLOG_1_POST(area); /* Is __EXCEPTION_PROLOG_1 now for maskable exceptions, and EXCEPTION_PROLOG_1 for unmaskable? Does it make sense to rename __EXCEPTION_PROLOG_1 to MASKABLE_EXCEPTION_PROLOG_1? Reducing the mystery underscores in this file would be nice! Yes. That is true. Will make the changes. Maddy This worked out nicely with mask bit being passed in by the exception handlers. Very neat. Thanks. Reviewed-by: Nicholas Piggin
Re: [PATCH 09/13] powerpc: Introduce new mask bit for soft_enabled
On Friday 16 September 2016 03:46 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:31:59 +0530 Madhavan Srinivasanwrote: Currently soft_enabled is used as the flag to determine the interrupt state. Patch extends the soft_enabled to be used as a mask instead of a flag. This should be the title of the patch, IMO. Introducing the new mask bit is incidental (and could be moved to patch 11). The key here of course is switching the tests from a flag to a mask. Very cool that you got this all worked out without adding any new instructions. Will make the changes accordingly. Thanks Reviewed-by: Nicholas Piggin Signed-off-by: Madhavan Srinivasan --- arch/powerpc/include/asm/exception-64s.h | 4 ++-- arch/powerpc/include/asm/hw_irq.h| 1 + arch/powerpc/include/asm/irqflags.h | 4 ++-- arch/powerpc/kernel/entry_64.S | 4 ++-- arch/powerpc/kernel/exceptions-64e.S | 6 +++--- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index dd3253bd0d8e..1eea4ab75607 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -430,9 +430,9 @@ label##_relon_hv: \ #define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ - cmpwi r10,IRQ_DISABLE_MASK_LINUX; \ + andi. r10,r10,IRQ_DISABLE_MASK_LINUX; \ li r10,SOFTEN_VALUE_##vec; \ - beq masked_##h##interrupt + bne masked_##h##interrupt #define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) #define SOFTEN_TEST_PR(vec) \ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index fd9b421f9020..245262c02bab 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -32,6 +32,7 @@ */ #define IRQ_DISABLE_MASK_NONE 0 #define IRQ_DISABLE_MASK_LINUX1 +#define IRQ_DISABLE_MASK_PMU 2 #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index d0ed2a7d7d10..9ff09747a226 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -48,11 +48,11 @@ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACASOFTIRQEN(r13);\ lbz __rB,PACAIRQHAPPENED(r13); \ - cmpwi cr0,__rA,IRQ_DISABLE_MASK_LINUX;\ + andi. __rA,__rA,IRQ_DISABLE_MASK_LINUX;\ li __rA,IRQ_DISABLE_MASK_LINUX;\ ori __rB,__rB,PACA_IRQ_HARD_DIS;\ stb __rB,PACAIRQHAPPENED(r13); \ - beq 44f;\ + bne 44f;\ stb __rA,PACASOFTIRQEN(r13);\ TRACE_DISABLE_INTS; \ 44: diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 879aeb11ad29..533e363914a9 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -764,8 +764,8 @@ restore: */ ld r5,SOFTE(r1) lbz r6,PACASOFTIRQEN(r13) - cmpwi cr0,r5,IRQ_DISABLE_MASK_LINUX - beq restore_irq_off + andi. r5,r5,IRQ_DISABLE_MASK_LINUX + bne restore_irq_off /* We are enabling, were we already enabled ? Yes, just return */ cmpwi cr0,r6,IRQ_DISABLE_MASK_NONE diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 5c628b5696f6..8e40df2c2f30 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -212,8 +212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) /* Interrupts had better not already be enabled... */ twnei r6,IRQ_DISABLE_MASK_LINUX - cmpwi cr0,r5,IRQ_DISABLE_MASK_LINUX - beq 1f + andi. r5,r5,IRQ_DISABLE_MASK_LINUX + bne 1f TRACE_ENABLE_INTS stb r5,PACASOFTIRQEN(r13) @@ -352,7 +352,7 @@ ret_from_mc_except: #define PROLOG_ADDITION_MASKABLE_GEN(n) \ lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r10,IRQ_DISABLE_MASK_LINUX;/* yes -> go out of line */ \ + andi. r10,r10,IRQ_DISABLE_MASK_LINUX;/* yes -> go out of line */ \ beq masked_interrupt_book3e_##n #define PROLOG_ADDITION_2REGS_GEN(n) \
Re: [PATCH 08/13] powerpc: Add new _EXCEPTION_PROLOG_1 macro
On Friday 16 September 2016 03:42 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:31:58 +0530 Madhavan Srinivasanwrote: To support addition of "bitmask" to MASKABLE_* macros, factor out the EXCPETION_PROLOG_1 macro. Signed-off-by: Madhavan Srinivasan Really minor nit, but as a matter of readability of the series, would you consider moving this next to patch 10 where it's used, if you submit again? Yes sure. Make sense. Will do it. Maddy Reviewed-by: Nicholas Piggin
Re: [PATCH v2 2/3] powerpc: get hugetlbpage handling more generic
Christophe Leroywrites: > +#else > +static void hugepd_free(struct mmu_gather *tlb, void *hugepte) > +{ > + BUG(); > +} > + > #endif I was expecting that BUG will get removed in the next patch. But I don't see it in the next patch. Considering @@ -475,11 +453,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif for (i = 0; i < num_hugepd; i++, hpdp++) hpdp->pd = 0; -#ifdef CONFIG_PPC_FSL_BOOK3E - hugepd_free(tlb, hugepte); -#else - pgtable_free_tlb(tlb, hugepte, pdshift - shift); -#endif + if (shift >= pdshift) + hugepd_free(tlb, hugepte); + else + pgtable_free_tlb(tlb, hugepte, pdshift - shift); } What is that I am missing ? -aneesh
Re: [PATCH v2 2/3] powerpc: get hugetlbpage handling more generic
Christophe Leroywrites: > Today there are two implementations of hugetlbpages which are managed > by exclusive #ifdefs: > * FSL_BOOKE: several directory entries points to the same single hugepage > * BOOK3S: one upper level directory entry points to a table of hugepages > > In preparation of implementation of hugepage support on the 8xx, we > need a mix of the two above solutions, because the 8xx needs both cases > depending on the size of pages: > * In 4k page size mode, each PGD entry covers a 4M bytes area. It means > that 2 PGD entries will be necessary to cover an 8M hugepage while a > single PGD entry will cover 8x 512k hugepages. > * In 16 page size mode, each PGD entry covers a 64M bytes area. It means > that 8x 8M hugepages will be covered by one PGD entry and 64x 512k > hugepages will be covers by one PGD entry. > > This patch: > * removes #ifdefs in favor of if/else based on the range sizes > * merges the two huge_pte_alloc() functions as they are pretty similar > * merges the two hugetlbpage_init() functions as they are pretty similar > > Signed-off-by: Christophe Leroy > --- > v2: This part is new and results from a split of last patch of v1 serie in > two parts > > arch/powerpc/mm/hugetlbpage.c | 189 > +- > 1 file changed, 77 insertions(+), 112 deletions(-) > > diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c > index 8a512b1..2119f00 100644 > --- a/arch/powerpc/mm/hugetlbpage.c > +++ b/arch/powerpc/mm/hugetlbpage.c > @@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t > *hpdp, > { > struct kmem_cache *cachep; > pte_t *new; > - > -#ifdef CONFIG_PPC_FSL_BOOK3E > int i; > - int num_hugepd = 1 << (pshift - pdshift); > - cachep = hugepte_cache; > -#else > - cachep = PGT_CACHE(pdshift - pshift); > -#endif > + int num_hugepd; > + > + if (pshift >= pdshift) { > + cachep = hugepte_cache; > + num_hugepd = 1 << (pshift - pdshift); > + } else { > + cachep = PGT_CACHE(pdshift - pshift); > + num_hugepd = 1; > + } Is there a way to hint likely/unlikely branch based on the page size selected at build time ? > > new = kmem_cache_zalloc(cachep, GFP_KERNEL); > > @@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t > *hpdp, > smp_wmb(); > > spin_lock(>page_table_lock); > -#ifdef CONFIG_PPC_FSL_BOOK3E > + > /* >* We have multiple higher-level entries that point to the same >* actual pte location. Fill in each as we go and backtrack on error. > @@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, > hugepd_t *hpdp, > if (unlikely(!hugepd_none(*hpdp))) > break; > else > -#ifdef CONFIG_PPC_FSL_BOOK3E > struct kmem_cache *hugepte_cache; > static int __init hugetlbpage_init(void) > { > int psize; > > - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { > - unsigned shift; > - > - if (!mmu_psize_defs[psize].shift) > - continue; > - > - shift = mmu_psize_to_shift(psize); > - > - /* Don't treat normal page sizes as huge... */ > - if (shift != PAGE_SHIFT) > - if (add_huge_page_size(1ULL << shift) < 0) > - continue; > - } > - > - /* > - * Create a kmem cache for hugeptes. The bottom bits in the pte have > - * size information encoded in them, so align them to allow this > - */ > - hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), > -HUGEPD_SHIFT_MASK + 1, 0, NULL); > - if (hugepte_cache == NULL) > - panic("%s: Unable to create kmem cache for hugeptes\n", > - __func__); > - > - /* Default hpage size = 4M */ > - if (mmu_psize_defs[MMU_PAGE_4M].shift) > - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; > - else > - panic("%s: Unable to set default huge page size\n", __func__); > - > - > - return 0; > -} > -#else > -static int __init hugetlbpage_init(void) > -{ > - int psize; > - > +#if !defined(CONFIG_PPC_FSL_BOOK3E) > if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) > return -ENODEV; > - > +#endif Do we need that #if ? radix_enabled() should become 0 and that if condition should be removed at compile time isn't it ? or are you finding errors with that ? > for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { > unsigned shift; > unsigned pdshift; > @@ -860,16 +807,31 @@ static int __init hugetlbpage_init(void) >* if we have pdshift and shift value same, we don't >* use pgt cache for hugepd. >*/ > - if (pdshift !=
Re: [PATCH 06/13] powerpc: reverse the soft_enable logic
On Friday 16 September 2016 03:35 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:31:56 +0530 Madhavan Srinivasanwrote: "paca->soft_enabled" is used as a flag to mask some of interrupts. Currently supported flags values and their details: soft_enabledMSR[EE] 0 0 Disabled (PMI and HMI not masked) 1 1 Enabled "paca->soft_enabled" is initialized to 1 to make the interripts as enabled. arch_local_irq_disable() will toggle the value when interrupts needs to disbled. At this point, the interrupts are not actually disabled, instead, interrupt vector has code to check for the flag and mask it when it occurs. By "mask it", it update interrupt paca->irq_happened and return. arch_local_irq_restore() is called to re-enable interrupts, which checks and replays interrupts if any occured. Now, as mentioned, current logic doesnot mask "performance monitoring interrupts" and PMIs are implemented as NMI. But this patchset depends on local_irq_* for a successful local_* update. Meaning, mask all possible interrupts during local_* update and replay them after the update. So the idea here is to reserve the "paca->soft_enabled" logic. New values and details: soft_enabledMSR[EE] 1 0 Disabled (PMI and HMI not masked) 0 1 Enabled Reason for the this change is to create foundation for a third flag value "2" for "soft_enabled" to add support to mask PMIs. When ->soft_enabled is set to a value "2", PMI interrupts are mask and when set to a value of "1", PMI are not mask. Signed-off-by: Madhavan Srinivasan --- arch/powerpc/include/asm/hw_irq.h | 4 ++-- arch/powerpc/kernel/entry_64.S| 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index dc3c248f9244..fd9b421f9020 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -30,8 +30,8 @@ /* * flags for paca->soft_enabled */ -#define IRQ_DISABLE_MASK_NONE 1 -#define IRQ_DISABLE_MASK_LINUX 0 +#define IRQ_DISABLE_MASK_NONE 0 +#define IRQ_DISABLE_MASK_LINUX 1 #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index aef7b64cbbeb..879aeb11ad29 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -131,8 +131,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) */ #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) lbz r10,PACASOFTIRQEN(r13) - xorir10,r10,IRQ_DISABLE_MASK_NONE -1: tdnei r10,0 +1: tdnei r10,IRQ_DISABLE_MASK_NONE EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif @@ -1012,7 +1011,7 @@ _GLOBAL(enter_rtas) * check it with the asm equivalent of WARN_ON */ lbz r0,PACASOFTIRQEN(r13) -1: tdnei r0,IRQ_DISABLE_MASK_LINUX +1: tdeqi r0,IRQ_DISABLE_MASK_NONE EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif We specifically want to ensure that _LINUX interrupts are disabled here. Not that we allow masking of others without _LINUX now, but current behavior is checking that LINUX ones are masked. Otherwise it seems okay. It might be nice after this series to do a pass and rename soft_enabled to soft_masked. Yes definitely, I do have it in my todo. Maddy Reviewed-by: Nicholas Piggin
Re: [PATCH 05/13] powerpc: Add soft_enabled manipulation functions
On Friday 16 September 2016 03:27 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:31:55 +0530 Madhavan Srinivasanwrote: Add new soft_enabled_* manipulation function and implement arch_local_* using the soft_enabled_* wrappers. Signed-off-by: Madhavan Srinivasan --- arch/powerpc/include/asm/hw_irq.h | 32 ++-- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index f828b8f8df02..dc3c248f9244 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -53,21 +53,7 @@ static inline notrace void soft_enabled_set(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -static inline notrace unsigned long soft_enabled_set_return(unsigned long enable) -{ - unsigned long flags; - - asm volatile( - "lbz %0,%1(13); stb %2,%1(13)" - : "=r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled)),\ - "r" (enable) - : "memory"); - - return flags; -} - -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long soft_enabled_return(void) { unsigned long flags; @@ -79,20 +65,30 @@ static inline unsigned long arch_local_save_flags(void) return flags; } -static inline unsigned long arch_local_irq_disable(void) +static inline notrace unsigned long soft_enabled_set_return(unsigned long enable) { unsigned long flags, zero; asm volatile( - "li %1,%3; lbz %0,%2(13); stb %1,%2(13)" + "mr %1,%3; lbz %0,%2(13); stb %1,%2(13)" : "=r" (flags), "=" (zero) : "i" (offsetof(struct paca_struct, soft_enabled)),\ - "i" (IRQ_DISABLE_MASK_LINUX) + "r" (enable) : "memory"); return flags; } As we talked about earlier, it would be nice to add builtin_constant variants to avoid the extra instruction. If you prefer to do that after this series, that's fine. True. my bad. Will look into this. Maddy Reviewed-by: Nicholas Piggin
Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled
On Monday 19 September 2016 08:22 AM, Nicholas Piggin wrote: On Fri, 16 Sep 2016 13:22:24 + David Laightwrote: From: Nicholas Piggin Sent: 16 September 2016 12:59 On Fri, 16 Sep 2016 11:43:13 + David Laight wrote: From: Nicholas Piggin Sent: 16 September 2016 10:53 On Thu, 15 Sep 2016 18:31:54 +0530 Madhavan Srinivasan wrote: Force use of soft_enabled_set() wrapper to update paca-soft_enabled wherever possisble. Also add a new wrapper function, soft_enabled_set_return(), added to force the paca->soft_enabled updates. ... diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 8fad8c24760b..f828b8f8df02 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -53,6 +53,20 @@ static inline notrace void soft_enabled_set(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +static inline notrace unsigned long soft_enabled_set_return(unsigned long enable) +{ + unsigned long flags; + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled)),\ + "r" (enable) + : "memory"); + + return flags; +} Why do you have the "memory" clobber here while soft_enabled_set() does not? I wondered about the missing memory clobber earlier. Any 'clobber' ought to be restricted to the referenced memory area. If the structure is only referenced by r13 through 'asm volatile' it isn't needed. Well a clobber (compiler barrier) at some point is needed in irq_disable and irq_enable paths, so we correctly open and close the critical section vs interrupts. I just wonder about these helpers. It might be better to take the clobbers out of there and add barrier(); in callers, which would make it more obvious. If the memory clobber is needed to synchronise with the rest of the code rather than just ensuring the compiler doesn't reorder accesses via r13 then I'd add an explicit barrier() somewhere - even if in these helpers. Potentially the helper wants a memory clobber for the (r13) area and a separate barrier() to ensure the interrupts are masked for the right code. Even if both are together in the same helper. Good point. Some of the existing modification helpers don't seem to have clobbers for modifying the r13->soft_enabled memory itself, but they do have the memory clobber where a critical section barrier is required. The former may not be a problem if the helpers are used very carefully, but probably should be commented at best, if not fixed. Yes. Agreed. Will add comments So after Madhi's patches, we should make all accesses go via the helper functions, so a clobber for the soft_enabled modification may not be required (this should be commented). I think it may be cleaner to specify the location in the constraints, but maybe that doesn't generate the best code -- something to investigate. Then, I'd like to see barrier()s for interrupt critical sections placed in the callers of these helpers, which will make the code more obvious. Ok will look into this. Thanks, Nick
Re: [PATCH 2/2] powerpc: tm: Enable transactional memory (TM) lazily for userspace
On Mon, 2016-09-19 at 12:47 +0800, Simon Guo wrote: > On Wed, Sep 14, 2016 at 06:02:16PM +1000, Cyril Bur wrote: > > > > @@ -954,8 +963,16 @@ static inline void __switch_to_tm(struct > > task_struct *prev, > > struct task_struct *new) > > { > > if (cpu_has_feature(CPU_FTR_TM)) { > > - tm_enable(); > > - tm_reclaim_task(prev); > > + if (tm_enabled(prev) || tm_enabled(new)) > > + tm_enable(); > > + > > + if (tm_enabled(prev)) { > > + prev->thread.load_tm++; > > + tm_reclaim_task(prev); > > + if (!MSR_TM_ACTIVE(prev->thread.regs->msr) > > && prev->thread.load_tm == 0) > > + prev->thread.regs->msr &= ~MSR_TM; > > + } > Hi Cyril, > > If MSR_TM_ACTIVE(), is it better to reset load_tm to 0? > Other looks good to me. > Doing so would extend the window that we keep TM enabled for when we might not need to. It is possible that we could assume that if MSR_TM_ACTIVE() then they're in codepathes that will reuse TM again soon so load_tm = 0 could be a good idea but there's really no way to know. Food for thought I guess... Maybe? Good thought, Cyril > Thanks, > - Simon
[PATCH v7 6/6] powerpc: pSeries: Add pv-qspinlock build config/make
pSeries run as a guest and might need pv-qspinlock. Signed-off-by: Pan Xinhui--- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/platforms/pseries/Kconfig | 8 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fe4c075..efd2f3d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index f669323..46632e4 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -128,3 +128,11 @@ config HV_PERF_CTRS systems. 24x7 is available on Power 8 systems. If unsure, select Y. + +config PARAVIRT_SPINLOCKS + bool "Paravirtialization support for qspinlock" + depends on PPC_SPLPAR && QUEUED_SPINLOCKS + default y + help + If platform supports virtualization, for example PowerVM, this option + can let guest have a better performace. -- 2.4.11
[PATCH v7 5/6] powerpc/pv-qspinlock: powerpc support pv-qspinlock
The default pv-qspinlock uses qspinlock(native version of pv-qspinlock). pv_lock initialization should be done in bootstage with irq disabled. And if we run as a guest with powerKVM/pHyp shared_processor mode, restore pv_lock_ops callbacks to pv-qspinlock(pv version) which makes full use of virtualization. There is a hash table, we store cpu number into it and the key is lock. So everytime pv_wait can know who is the lock holder by searching the lock. Also store the lock in a per_cpu struct, and remove it when we own the lock. Then pv_wait can know which lock we are spinning on. But the cpu in the hash table might not be the correct lock holder, as for performace issue, we does not take care of hash conflict. Also introduce spin_lock_holder, which tells who owns the lock now. currently the only user is spin_unlock_wait. Signed-off-by: Pan Xinhui--- arch/powerpc/include/asm/qspinlock.h | 29 +++- arch/powerpc/include/asm/qspinlock_paravirt.h | 36 + .../powerpc/include/asm/qspinlock_paravirt_types.h | 13 ++ arch/powerpc/kernel/paravirt.c | 153 + arch/powerpc/lib/locks.c | 8 +- arch/powerpc/platforms/pseries/setup.c | 5 + 6 files changed, 241 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h create mode 100644 arch/powerpc/kernel/paravirt.c diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 881a186..23459fb 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -15,7 +15,7 @@ static inline u8 * __qspinlock_lock_byte(struct qspinlock *lock) return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); } -static inline void queued_spin_unlock(struct qspinlock *lock) +static inline void native_queued_spin_unlock(struct qspinlock *lock) { /* release semantics is required */ smp_store_release(__qspinlock_lock_byte(lock), 0); @@ -27,6 +27,33 @@ static inline int queued_spin_is_locked(struct qspinlock *lock) return atomic_read(>val); } +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#include +/* + * try to know who is the lock holder, however it is not always true + * Return: + * -1, we did not know the lock holder. + * other value, likely is the lock holder. + */ +extern int spin_lock_holder(void *lock); + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} +#else +#define spin_lock_holder(l) (-1) +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + native_queued_spin_unlock(lock); +} +#endif + #include /* we need override it as ppc has io_sync stuff */ diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h new file mode 100644 index 000..d87cda0 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_paravirt.h @@ -0,0 +1,36 @@ +#ifndef CONFIG_PARAVIRT_SPINLOCKS +#error "do not include this file" +#endif + +#ifndef _ASM_QSPINLOCK_PARAVIRT_H +#define _ASM_QSPINLOCK_PARAVIRT_H + +#include + +extern void pv_lock_init(void); +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +static inline void pv_queued_spin_lock(struct qspinlock *lock, u32 val) +{ + pv_lock_op.lock(lock, val); +} + +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + pv_lock_op.unlock(lock); +} + +static inline void pv_wait(u8 *ptr, u8 val) +{ + pv_lock_op.wait(ptr, val); +} + +static inline void pv_kick(int cpu) +{ + pv_lock_op.kick(cpu); +} + +#endif diff --git a/arch/powerpc/include/asm/qspinlock_paravirt_types.h b/arch/powerpc/include/asm/qspinlock_paravirt_types.h new file mode 100644 index 000..83611ed --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_paravirt_types.h @@ -0,0 +1,13 @@ +#ifndef _ASM_QSPINLOCK_PARAVIRT_TYPES_H +#define _ASM_QSPINLOCK_PARAVIRT_TYPES_H + +struct pv_lock_ops { + void (*lock)(struct qspinlock *lock, u32 val); + void (*unlock)(struct qspinlock *lock); + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); +}; + +extern struct pv_lock_ops pv_lock_op; + +#endif diff --git a/arch/powerpc/kernel/paravirt.c b/arch/powerpc/kernel/paravirt.c new file mode 100644 index 000..e697b17 --- /dev/null +++ b/arch/powerpc/kernel/paravirt.c @@ -0,0 +1,153 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + *
[PATCH v7 4/6] powerpc: lib/locks.c: Add cpu yield/wake helper function
Add two corresponding helper functions to support pv-qspinlock. For normal use, __spin_yield_cpu will confer current vcpu slices to the target vcpu(say, a lock holder). If target vcpu is not specified or it is in running state, such conferging to lpar happens or not depends. Because hcall itself will introduce latency and a little overhead. And we do NOT want to suffer any latency on some cases, e.g. in interrupt handler. The second parameter *confer* can indicate such case. __spin_wake_cpu is simpiler, it will wake up one vcpu regardless of its current vcpu state. Signed-off-by: Pan Xinhui--- arch/powerpc/include/asm/spinlock.h | 4 +++ arch/powerpc/lib/locks.c| 59 + 2 files changed, 63 insertions(+) diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 6aef8dd..abb6b0f 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -56,9 +56,13 @@ /* We only yield to the hypervisor if we are in shared processor mode */ #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr)) extern void __spin_yield(arch_spinlock_t *lock); +extern void __spin_yield_cpu(int cpu, int confer); +extern void __spin_wake_cpu(int cpu); extern void __rw_yield(arch_rwlock_t *lock); #else /* SPLPAR */ #define __spin_yield(x)barrier() +#define __spin_yield_cpu(x,y) barrier() +#define __spin_wake_cpu(x) barrier() #define __rw_yield(x) barrier() #define SHARED_PROCESSOR 0 #endif diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 6574626..892df7d 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -23,6 +23,65 @@ #include #include +/* + * confer our slices to a specified cpu and return. If it is already running or + * cpu is -1, then we will check confer. If confer is NULL, we will return + * otherwise we confer our slices to lpar. + */ +void __spin_yield_cpu(int cpu, int confer) +{ + unsigned int holder_cpu = cpu, yield_count; + + if (cpu == -1) + goto yield_to_lpar; + + BUG_ON(holder_cpu >= nr_cpu_ids); + yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + /* if cpu is running, confer slices to lpar conditionally*/ + if ((yield_count & 1) == 0) + goto yield_to_lpar; + + plpar_hcall_norets(H_CONFER, + get_hard_smp_processor_id(holder_cpu), yield_count); + return; + +yield_to_lpar: + if (confer) + plpar_hcall_norets(H_CONFER, -1, 0); +} +EXPORT_SYMBOL_GPL(__spin_yield_cpu); + +void __spin_wake_cpu(int cpu) +{ + unsigned int holder_cpu = cpu; + + BUG_ON(holder_cpu >= nr_cpu_ids); + /* +* NOTE: we should always do this hcall regardless of +* the yield_count of the holder_cpu. +* as thers might be a case like below; +* CPU 1 2 +* yielded = true +* if (yielded) +* __spin_wake_cpu() +* __spin_yield_cpu() +* +* So we might lose a wake if we check the yield_count and +* return directly if the holder_cpu is running. +* IOW. do NOT code like below. +* yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); +* if ((yield_count & 1) == 0) +* return; +* +* a PROD hcall marks the target_cpu proded, which cause the next cede or confer +* called on the target_cpu invalid. +*/ + plpar_hcall_norets(H_PROD, + get_hard_smp_processor_id(holder_cpu)); +} +EXPORT_SYMBOL_GPL(__spin_wake_cpu); + #ifndef CONFIG_QUEUED_SPINLOCKS void __spin_yield(arch_spinlock_t *lock) { -- 2.4.11
[PATCH v7 3/6] powerpc: pseries/Kconfig: Add qspinlock build config
pseries will use qspinlock by default. Signed-off-by: Pan Xinhui--- arch/powerpc/platforms/pseries/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index bec90fb..f669323 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HOTPLUG_CPU if SMP select ARCH_RANDOM select PPC_DOORBELL + select ARCH_USE_QUEUED_SPINLOCKS default y config PPC_SPLPAR -- 2.4.11
[PATCH v7 2/6] powerpc/qspinlock: powerpc support qspinlock
This patch add basic code to enable qspinlock on powerpc. qspinlock is one kind of fairlock implemention. And seen some performance improvement under some scenarios. queued_spin_unlock() release the lock by just one write of NULL to the ->locked field which sits at different places in the two endianness system. We override some arch_spin_xxx as powerpc has io_sync stuff which makes sure the io operations are protected by the lock correctly. There is another special case, see commit 2c610022711 ("locking/qspinlock: Fix spin_unlock_wait() some more") Signed-off-by: Pan Xinhui--- arch/powerpc/include/asm/qspinlock.h | 66 +++ arch/powerpc/include/asm/spinlock.h | 31 +-- arch/powerpc/include/asm/spinlock_types.h | 4 ++ arch/powerpc/lib/locks.c | 59 +++ 4 files changed, 147 insertions(+), 13 deletions(-) create mode 100644 arch/powerpc/include/asm/qspinlock.h diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h new file mode 100644 index 000..881a186 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock.h @@ -0,0 +1,66 @@ +#ifndef _ASM_POWERPC_QSPINLOCK_H +#define _ASM_POWERPC_QSPINLOCK_H + +#include + +#define SPIN_THRESHOLD (1 << 15) +#define queued_spin_unlock queued_spin_unlock +#define queued_spin_is_locked queued_spin_is_locked +#define queued_spin_unlock_wait queued_spin_unlock_wait + +extern void queued_spin_unlock_wait(struct qspinlock *lock); + +static inline u8 * __qspinlock_lock_byte(struct qspinlock *lock) +{ + return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + /* release semantics is required */ + smp_store_release(__qspinlock_lock_byte(lock), 0); +} + +static inline int queued_spin_is_locked(struct qspinlock *lock) +{ + smp_mb(); + return atomic_read(>val); +} + +#include + +/* we need override it as ppc has io_sync stuff */ +#undef arch_spin_trylock +#undef arch_spin_lock +#undef arch_spin_lock_flags +#undef arch_spin_unlock +#define arch_spin_trylock arch_spin_trylock +#define arch_spin_lock arch_spin_lock +#define arch_spin_lock_flags arch_spin_lock_flags +#define arch_spin_unlock arch_spin_unlock + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + CLEAR_IO_SYNC; + return queued_spin_trylock(lock); +} + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + CLEAR_IO_SYNC; + queued_spin_lock(lock); +} + +static inline +void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +{ + CLEAR_IO_SYNC; + queued_spin_lock(lock); +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + SYNC_IO; + queued_spin_unlock(lock); +} +#endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index fa37fe9..6aef8dd 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -52,6 +52,23 @@ #define SYNC_IO #endif +#if defined(CONFIG_PPC_SPLPAR) +/* We only yield to the hypervisor if we are in shared processor mode */ +#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr)) +extern void __spin_yield(arch_spinlock_t *lock); +extern void __rw_yield(arch_rwlock_t *lock); +#else /* SPLPAR */ +#define __spin_yield(x)barrier() +#define __rw_yield(x) barrier() +#define SHARED_PROCESSOR 0 +#endif + +#ifdef CONFIG_QUEUED_SPINLOCKS +#include +#else + +#define arch_spin_relax(lock) __spin_yield(lock) + static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { return lock.slock == 0; @@ -106,18 +123,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) * held. Conveniently, we have a word in the paca that holds this * value. */ - -#if defined(CONFIG_PPC_SPLPAR) -/* We only yield to the hypervisor if we are in shared processor mode */ -#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr)) -extern void __spin_yield(arch_spinlock_t *lock); -extern void __rw_yield(arch_rwlock_t *lock); -#else /* SPLPAR */ -#define __spin_yield(x)barrier() -#define __rw_yield(x) barrier() -#define SHARED_PROCESSOR 0 -#endif - static inline void arch_spin_lock(arch_spinlock_t *lock) { CLEAR_IO_SYNC; @@ -195,6 +200,7 @@ out: smp_mb(); } +#endif /* !CONFIG_QUEUED_SPINLOCKS */ /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -330,7 +336,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) __spin_yield(lock) #define arch_read_relax(lock) __rw_yield(lock) #define arch_write_relax(lock) __rw_yield(lock) diff --git
[PATCH v7 1/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
cmpxchg_release() is more lighweight than cmpxchg() on some archs(e.g. PPC), moreover, in __pv_queued_spin_unlock() we only needs a RELEASE in the fast path(pairing with *_try_lock() or *_lock()). And the slow path has smp_store_release too. So it's safe to use cmpxchg_release here. Suggested-by: Boqun FengSigned-off-by: Pan Xinhui --- kernel/locking/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 8a99abf..ce655aa 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -544,7 +544,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - locked = cmpxchg(>locked, _Q_LOCKED_VAL, 0); + locked = cmpxchg_release(>locked, _Q_LOCKED_VAL, 0); if (likely(locked == _Q_LOCKED_VAL)) return; -- 2.4.11
[PATCH v7 0/6] Implement qspinlock/pv-qspinlock on ppc
Hi All, this is the fairlock patchset. You can apply them and build successfully. patches are based on 4.8-rc4. qspinlock can avoid waiter starved issue. It has about the same speed in single-thread and it can be much faster in high contention situations especially when the spinlock is embedded within the data structure to be protected. v6 -> v7: rebase onto 4.8-rc4 no changelog anymore, sorry for that. I hope there is a very careful review. Todo: we can save one function call overhead. As we can use feature-fixup to patch the binary code. Currently there is pv_lock_ops->lock(lock) and ->unlock(lock) to acquire/release the lock. some benchmark result below perf bench these numbers are ops per sec, So the higher the better. *** on pSeries with 32 vcpus, 32Gb memory, pHyp. test case | pv-qspinlock | qspinlock| current-spinlock futex hash | 618572| 552332| 553788 futex lock-pi | 364 | 364 | 364 sched pipe | 78984 | 76060 | 81454 unix bench: these numbers are scores, So the higher the better. on PowerNV with 16 cores(cpus) (smt off), 32Gb memory: - pv-qspinlock and qspinlock have very similar results because pv-qspinlock use native version which is only having one callback overhead test case | pv-qspinlock and qspinlock | current-spinlock Execl Throughput 761.1 761.4 File Copy 1024 bufsize 2000 maxblocks 1259.81286.6 File Copy 256 bufsize 500 maxblocks782.2 790.3 File Copy 4096 bufsize 8000 maxblocks 2741.52817.4 Pipe Throughput 1063.21036.7 Pipe-based Context Switching 284.7 281.1 Process Creation 679.6 649.1 Shell Scripts (1 concurrent) 1933.21922.9 Shell Scripts (8 concurrent) 5003.34899.8 System Call Overhead 900.6 896.8 == System Benchmarks Index Score 1139.3 1133.0 --- - *** on pSeries with 32 vcpus, 32Gb memory, pHyp. test case | pv-qspinlock | qspinlock | current-spinlock Execl Throughput 877.1 891.2 872.8 File Copy 1024 bufsize 2000 maxblocks 1390.41399.21395.0 File Copy 256 bufsize 500 maxblocks 882.4 889.5 881.8 File Copy 4096 bufsize 8000 maxblocks 3112.33113.43121.7 Pipe Throughput 1095.81162.61158.5 Pipe-based Context Switching 194.9 192.7 200.7 Process Creation 518.4 526.4 509.1 Shell Scripts (1 concurrent)1401.91413.91402.2 Shell Scripts (8 concurrent)3215.63246.63229.1 System Call Overhead 833.2 892.4 888.1 System Benchmarks Index Score 1033.71052.51047.8 ** on pSeries with 32 vcpus, 16Gb memory, KVM. test case | pv-qspinlock | qspinlock | current-spinlock Execl Throughput 497.4518.7 497.8 File Copy 1024 bufsize 2000 maxblocks 1368.8 1390.11343.3 File Copy 256 bufsize 500 maxblocks 857.7859.8 831.4 File Copy 4096 bufsize 8000 maxblocks 2851.7 2838.12785.5 Pipe Throughput 1221.9
Re: [PATCH v2 1/3] powerpc: port 64 bits pgtable_cache to 32 bits
Christophe Leroywrites: > Today powerpc64 uses a set of pgtable_caches while powerpc32 uses > standard pages when using 4k pages and a single pgtable_cache > if using other size pages. > > In preparation of implementing huge pages on the 8xx, this patch > replaces the specific powerpc32 handling by the 64 bits approach. > > This is done by: > * moving 64 bits pgtable_cache_add() and pgtable_cache_init() > in a new file called init-common.c > * modifying pgtable_cache_init() to also handle the case > without PMD > * removing the 32 bits version of pgtable_cache_add() and > pgtable_cache_init() > * copying related header contents from 64 bits into both the > book3s/32 and nohash/32 header files > > On the 8xx, the following cache sizes will be used: > * 4k pages mode: > - PGT_CACHE(10) for PGD > - PGT_CACHE(3) for 512k hugepage tables > * 16k pages mode: > - PGT_CACHE(6) for PGD > - PGT_CACHE(7) for 512k hugepage tables > - PGT_CACHE(3) for 8M hugepage tables > > Signed-off-by: Christophe Leroy > --- > v2: in v1, hugepte_cache was wrongly replaced by PGT_CACHE(1). > This modification has been removed from v2. > > arch/powerpc/include/asm/book3s/32/pgalloc.h | 44 ++-- > arch/powerpc/include/asm/book3s/32/pgtable.h | 43 > arch/powerpc/include/asm/book3s/64/pgtable.h | 3 - > arch/powerpc/include/asm/nohash/32/pgalloc.h | 44 ++-- > arch/powerpc/include/asm/nohash/32/pgtable.h | 45 > arch/powerpc/include/asm/nohash/64/pgtable.h | 2 - > arch/powerpc/include/asm/pgtable.h | 2 + > arch/powerpc/mm/Makefile | 3 +- > arch/powerpc/mm/init-common.c| 147 > +++ > arch/powerpc/mm/init_64.c| 77 -- > arch/powerpc/mm/pgtable_32.c | 37 --- > 11 files changed, 273 insertions(+), 174 deletions(-) > create mode 100644 arch/powerpc/mm/init-common.c > > diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h > b/arch/powerpc/include/asm/book3s/32/pgalloc.h > index 8e21bb4..d310546 100644 > --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h > +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h > @@ -2,14 +2,42 @@ > #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H > > #include > +#include > > -/* For 32-bit, all levels of page tables are just drawn from get_free_page() > */ > -#define MAX_PGTABLE_INDEX_SIZE 0 > +/* > + * Functions that deal with pagetables that could be at any level of > + * the table need to be passed an "index_size" so they know how to > + * handle allocation. For PTE pages (which are linked to a struct > + * page for now, and drawn from the main get_free_pages() pool), the > + * allocation size will be (2^index_size * sizeof(pointer)) and > + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). > + * > + * The maximum index size needs to be big enough to allow any > + * pagetable sizes we need, but small enough to fit in the low bits of > + * any page table pointer. In other words all pagetables, even tiny > + * ones, must be aligned to allow at least enough low 0 bits to > + * contain this value. This value is also used as a mask, so it must > + * be one less than a power of two. > + */ > +#define MAX_PGTABLE_INDEX_SIZE 0xf > > extern void __bad_pte(pmd_t *pmd); > > -extern pgd_t *pgd_alloc(struct mm_struct *mm); > -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); > +extern struct kmem_cache *pgtable_cache[]; > +#define PGT_CACHE(shift) ({ \ > + BUG_ON(!(shift)); \ > + pgtable_cache[(shift) - 1]; \ > + }) > + > +static inline pgd_t *pgd_alloc(struct mm_struct *mm) > +{ > + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); > +} > + > +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) > +{ > + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); > +} > > /* > * We don't have any real pmd's, and this code never triggers because > @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, > pgtable_t ptepage) > > static inline void pgtable_free(void *table, unsigned index_size) > { > - BUG_ON(index_size); /* 32-bit doesn't use this */ > - free_page((unsigned long)table); > + if (!index_size) { > + free_page((unsigned long)table); > + } else { > + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); > + kmem_cache_free(PGT_CACHE(index_size), table); > + } > } > > #define check_pgt_cache()do { } while (0) > diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h > b/arch/powerpc/include/asm/book3s/32/pgtable.h > index 6b8b2d5..f887499 100644 > --- a/arch/powerpc/include/asm/book3s/32/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h > @@ -8,6 +8,26 @@ > /* And here we include common definitions */ > #include > > +#define
Re: [PATCH][RFC] Implement arch primitives for busywait loops
> diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h > index 3b5bafc..695743c 100644 > --- a/include/linux/bit_spinlock.h > +++ b/include/linux/bit_spinlock.h > @@ -25,9 +25,8 @@ static inline void bit_spin_lock(int bitnum, unsigned long > *addr) > #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) > while (unlikely(test_and_set_bit_lock(bitnum, addr))) { > preempt_enable(); > - do { > - cpu_relax(); > - } while (test_bit(bitnum, addr)); > + spin_do { > + } spin_while (test_bit(bitnum, addr)); > preempt_disable(); > } > #endif That usage is strange, with nothing in the do{ }while loop. May be add a macro for usages like this ? -aneesh
Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled
On Friday 16 September 2016 05:13 PM, David Laight wrote: From: Nicholas Piggin Sent: 16 September 2016 10:53 On Thu, 15 Sep 2016 18:31:54 +0530 Madhavan Srinivasanwrote: Force use of soft_enabled_set() wrapper to update paca-soft_enabled wherever possisble. Also add a new wrapper function, soft_enabled_set_return(), added to force the paca->soft_enabled updates. ... diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 8fad8c24760b..f828b8f8df02 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -53,6 +53,20 @@ static inline notrace void soft_enabled_set(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +static inline notrace unsigned long soft_enabled_set_return(unsigned long enable) +{ + unsigned long flags; + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled)),\ + "r" (enable) + : "memory"); + + return flags; +} Why do you have the "memory" clobber here while soft_enabled_set() does not? I wondered about the missing memory clobber earlier. Any 'clobber' ought to be restricted to the referenced memory area. If the structure is only referenced by r13 through 'asm volatile' it isn't needed. OTOH why not allocate a global register variable to r13 and access through that? I do see this in asm/paca.h "register struct paca_struct *local_paca asm("r13"); " and __check_irq_replay() in kernel/irq.c do updates the "irq_happened" as mentioned. But existing helpers in hw_irq update the soft_enabled via asm volatile and i did the same. Maddy David
Re: [PATCH 2/2] powerpc: tm: Enable transactional memory (TM) lazily for userspace
On Wed, Sep 14, 2016 at 06:02:16PM +1000, Cyril Bur wrote: > @@ -954,8 +963,16 @@ static inline void __switch_to_tm(struct task_struct > *prev, > struct task_struct *new) > { > if (cpu_has_feature(CPU_FTR_TM)) { > - tm_enable(); > - tm_reclaim_task(prev); > + if (tm_enabled(prev) || tm_enabled(new)) > + tm_enable(); > + > + if (tm_enabled(prev)) { > + prev->thread.load_tm++; > + tm_reclaim_task(prev); > + if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && > prev->thread.load_tm == 0) > + prev->thread.regs->msr &= ~MSR_TM; > + } Hi Cyril, If MSR_TM_ACTIVE(), is it better to reset load_tm to 0? Other looks good to me. Thanks, - Simon
Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled
On Friday 16 September 2016 03:23 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:31:54 +0530 Madhavan Srinivasanwrote: Force use of soft_enabled_set() wrapper to update paca-soft_enabled wherever possisble. Also add a new wrapper function, soft_enabled_set_return(), added to force the paca->soft_enabled updates. Signed-off-by: Madhavan Srinivasan --- arch/powerpc/include/asm/hw_irq.h | 14 ++ arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/kernel/time.c | 6 +++--- 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 8fad8c24760b..f828b8f8df02 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -53,6 +53,20 @@ static inline notrace void soft_enabled_set(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +static inline notrace unsigned long soft_enabled_set_return(unsigned long enable) +{ + unsigned long flags; + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled)),\ + "r" (enable) + : "memory"); + + return flags; +} Why do you have the "memory" clobber here while soft_enabled_set() does not? I did change to function to include a local variable and update the soft_enabled. But, my bad. It was in the next patch. I should make the change here. Yes. we dont a "memory" clobber here which is right. But, this change is not complete and I will correct it. Maddy Thanks, Nick
Re: [PATCH 03/13] powerpc: move set_soft_enabled() and rename
On Friday 16 September 2016 03:20 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:31:53 +0530 Madhavan Srinivasanwrote: Move set_soft_enabled() from powerpc/kernel/irq.c to asm/hw_irq.c. and rename it soft_enabled_set(). THis way paca->soft_enabled updates can be forced. Could you just tidy up the changelog a little? You are renaming it I assume because you are going to introduce more soft_enabled_x() functions, and that the namespace works better as a prefix than a postfix. You are moving it so all paca->soft_enabled updates can be done via these access functions rather than open coded. Did I get that right? Yes. That is right. My bad. Will fix the commit message and changelog. Reviewed-by: Nicholas Piggin
Re: [PATCH 02/13] powerpc: Cleanup to use IRQ_DISABLE_MASK_* macros for paca->soft_enabled update
On Friday 16 September 2016 03:17 PM, Nicholas Piggin wrote: On Thu, 15 Sep 2016 18:31:52 +0530 Madhavan Srinivasanwrote: Replace the hardcoded values used when updating paca->soft_enabled with IRQ_DISABLE_MASK_* #def. No logic change. This could be folded with patch 1. Sure. Will do it. Reviewed-by: Nicholas Piggin
Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled
On Fri, 16 Sep 2016 13:22:24 + David Laightwrote: > From: Nicholas Piggin > > Sent: 16 September 2016 12:59 > > On Fri, 16 Sep 2016 11:43:13 + > > David Laight wrote: > > > > > From: Nicholas Piggin > > > > Sent: 16 September 2016 10:53 > > > > On Thu, 15 Sep 2016 18:31:54 +0530 > > > > Madhavan Srinivasan wrote: > > > > > > > > > Force use of soft_enabled_set() wrapper to update paca-soft_enabled > > > > > wherever possisble. Also add a new wrapper function, > > > > > soft_enabled_set_return(), > > > > > added to force the paca->soft_enabled updates. > > > ... > > > > > diff --git a/arch/powerpc/include/asm/hw_irq.h > > > > > b/arch/powerpc/include/asm/hw_irq.h > > > > > index 8fad8c24760b..f828b8f8df02 100644 > > > > > --- a/arch/powerpc/include/asm/hw_irq.h > > > > > +++ b/arch/powerpc/include/asm/hw_irq.h > > > > > @@ -53,6 +53,20 @@ static inline notrace void > > > > > soft_enabled_set(unsigned long enable) > > > > > : : "r" (enable), "i" (offsetof(struct paca_struct, > > > > > soft_enabled))); > > > > > } > > > > > > > > > > +static inline notrace unsigned long soft_enabled_set_return(unsigned > > > > > long enable) > > > > > +{ > > > > > + unsigned long flags; > > > > > + > > > > > + asm volatile( > > > > > + "lbz %0,%1(13); stb %2,%1(13)" > > > > > + : "=r" (flags) > > > > > + : "i" (offsetof(struct paca_struct, soft_enabled)),\ > > > > > + "r" (enable) > > > > > + : "memory"); > > > > > + > > > > > + return flags; > > > > > +} > > > > > > > > Why do you have the "memory" clobber here while soft_enabled_set() does > > > > not? > > > > > > I wondered about the missing memory clobber earlier. > > > > > > Any 'clobber' ought to be restricted to the referenced memory area. > > > If the structure is only referenced by r13 through 'asm volatile' it > > > isn't needed. > > > > Well a clobber (compiler barrier) at some point is needed in irq_disable and > > irq_enable paths, so we correctly open and close the critical section vs > > interrupts. > > I just wonder about these helpers. It might be better to take the clobbers > > out of > > there and add barrier(); in callers, which would make it more obvious. > > If the memory clobber is needed to synchronise with the rest of the code > rather than just ensuring the compiler doesn't reorder accesses via r13 > then I'd add an explicit barrier() somewhere - even if in these helpers. > > Potentially the helper wants a memory clobber for the (r13) area > and a separate barrier() to ensure the interrupts are masked for the > right code. > Even if both are together in the same helper. Good point. Some of the existing modification helpers don't seem to have clobbers for modifying the r13->soft_enabled memory itself, but they do have the memory clobber where a critical section barrier is required. The former may not be a problem if the helpers are used very carefully, but probably should be commented at best, if not fixed. So after Madhi's patches, we should make all accesses go via the helper functions, so a clobber for the soft_enabled modification may not be required (this should be commented). I think it may be cleaner to specify the location in the constraints, but maybe that doesn't generate the best code -- something to investigate. Then, I'd like to see barrier()s for interrupt critical sections placed in the callers of these helpers, which will make the code more obvious. Thanks, Nick
Re: Linux 4.8: Reported regressions as of Sunday, 2016-09-18
On Sun, Sep 18, 2016 at 03:20:53PM +0200, Thorsten Leemhuis wrote: > Hi! Here is my fourth regression report for Linux 4.8. It lists 14 > regressions I'm aware of. 5 of them are new; 1 mentioned in last > weeks report got fixed. > > As always: Are you aware of any other regressions? Then please let me > know (simply CC regressi...@leemhuis.info). And pls tell me if there > is anything in the report that shouldn't be there. > > Ciao, Thorsten > > == Current regressions == > > Desc: genirq: Flags mismatch irq 8, 0088 (mmc0) vs. 0080 (rtc0). > mmc0: Failed to request irq 8: -16 > Repo: 2016-08-01 https://bugzilla.kernel.org/show_bug.cgi?id=150881 > Stat: 2016-09-09 https://bugzilla.kernel.org/show_bug.cgi?id=150881#c34 > Note: stalled; root cause somewhere in the main gpio merge for 4.8, but > problematic commit still unknown > > Desc: [lkp] [xfs] 68a9f5e700: aim7.jobs-per-min -13.6% regression > Repo: 2016-08-09 http://www.spinics.net/lists/kernel/msg2317052.html > Stat: 2016-09-09 https://marc.info/?t=14734151953=1=2 > Note: looks like post-4.8 material at this point: Mel working on it in his > spare time, but "The progression of this series has been unsatisfactory." Actually, what Mel was working on (mapping lock contention) was not related to the reported XFS regression. The regression was an XFS sub-page write issue introduced by the new iomap infrastructure, and nobody has been able to reproduce it exactly outside of the reaim benchmark. We've reproduced other, similar issues, and the fixes for those are queued for the 4.9 window. Cheers, Dave. -- Dave Chinner da...@fromorbit.com
Re: [PATCH V2 1/7] dt-bindings: Update QorIQ TMU thermal bindings
On 三, 2016-09-14 at 11:40 -0500, Leo Li wrote: > On Wed, Jun 8, 2016 at 2:52 PM, Rob Herringwrote: > > > > On Tue, Jun 07, 2016 at 11:27:34AM +0800, Jia Hongtao wrote: > > > > > > For different types of SoC the sensor id and endianness may vary. > > > "#thermal-sensor-cells" is used to provide sensor id information. > > > "little-endian" property is to tell the endianness of TMU. > > > > > > Signed-off-by: Jia Hongtao > > > --- > > > Changes for V2: > > > * Remove formatting chnages. > > > > > > Documentation/devicetree/bindings/thermal/qoriq-thermal.txt | 7 > > > +++ > > > 1 file changed, 7 insertions(+) > > Acked-by: Rob Herring > Hi Zhang Rui, > > Since you have applied the driver patch, can you also apply the > binding patch? The binding is supposed to go with the driver. > Do you mean I should take both patch 1/7 and 7/7? I can not see the other patches in this patch set. thanks, rui
Re: [PATCH v14 13/15] selftests/powerpc: Add ptrace tests for TM SPR registers
On Wed, Sep 14, 2016 at 03:04:12PM +1000, Cyril Bur wrote: > On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote: > > From: Anshuman Khandual> > > > This patch adds ptrace interface test for TM SPR registers. This > > also adds ptrace interface based helper functions related to TM > > SPR registers access. > > > > I'm seeing this one fail a lot, it does occasionally succeed but fails > a lot on my test setup. > > I use qemu on a power8 for most of my testing: > qemu-system-ppc64 --enable-kvm -machine pseries,accel=kvm,usb=off -m > 4096 -realtime mlock=off -smp 4,sockets=1,cores=2,threads=2 -nographic > -vga none Hi Cyril, Sorry for the late response. I am just back from a vacation. Strangely the case always succeed in my machine. I will try to reproduce it with your configuration. > > + if ((regs->tm_texasr != TM_SCHED) && (regs->tm_texasr != > > TM_KVM_SCHED)) > > + return TEST_FAIL; > > The above condition fails, should this test try again if this condition > is true, rather than fail? If you have the value of "regs->tm_texasr" in hand, please let me know. Thanks for the elaborated comment in other mails and I will work on that. BR, - Simon
[PATCH] PCI: Add parameter @mmio_force_on to pci_update_resource()
In pci_update_resource(), the PCI device's memory decoding (0x2 in PCI_COMMAND) is disabled when 64-bits memory BAR is updated if the PCI device's memory space wasn't asked to be always on by @pdev-> mmio_always_on. The PF's memory decoding might be disabled when updating its IOV BARs in the following path. Actually, the PF's memory decoding shouldn't be disabled in this scenario as the PF has been started to provide services: sriov_numvfs_store pdev->driver->sriov_configure mlx5_core_sriov_configure pci_enable_sriov sriov_enable pcibios_sriov_enable pnv_pci_sriov_enable pnv_pci_vf_resource_shift pci_update_resource This doesn't change the PF's memory decoding in the path by introducing additional parameter (@mmio_force_on) to pci_update_resource(). Reported-by: Carol SotoSigned-off-by: Gavin Shan Tested-by: Carol Soto --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- drivers/pci/iov.c | 2 +- drivers/pci/pci.c | 2 +- drivers/pci/setup-res.c | 9 + include/linux/pci.h | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index bc0c91e..2d6a2b7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -999,7 +999,7 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) dev_info(>dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", i, , res, (offset > 0) ? "En" : "Dis", num_vfs, offset); - pci_update_resource(dev, i + PCI_IOV_RESOURCES); + pci_update_resource(dev, i + PCI_IOV_RESOURCES, true); } return 0; } diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 2194b44..117aae6 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -511,7 +511,7 @@ static void sriov_restore_state(struct pci_dev *dev) return; for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) - pci_update_resource(dev, i); + pci_update_resource(dev, i, false); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); pci_iov_set_numvfs(dev, iov->num_VFs); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index aab9d51..87a33c0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -545,7 +545,7 @@ static void pci_restore_bars(struct pci_dev *dev) return; for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) - pci_update_resource(dev, i); + pci_update_resource(dev, i, false); } static const struct pci_platform_pm_ops *pci_platform_pm; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 66c4d8f..e8a50ff 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,7 +26,7 @@ #include "pci.h" -void pci_update_resource(struct pci_dev *dev, int resno) +void pci_update_resource(struct pci_dev *dev, int resno, bool mmio_force_on) { struct pci_bus_region region; bool disable; @@ -81,7 +81,8 @@ void pci_update_resource(struct pci_dev *dev, int resno) * disable decoding so that a half-updated BAR won't conflict * with another device. */ - disable = (res->flags & IORESOURCE_MEM_64) && !dev->mmio_always_on; + disable = (res->flags & IORESOURCE_MEM_64) && + !mmio_force_on && !dev->mmio_always_on; if (disable) { pci_read_config_word(dev, PCI_COMMAND, ); pci_write_config_word(dev, PCI_COMMAND, @@ -310,7 +311,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) res->flags &= ~IORESOURCE_STARTALIGN; dev_info(>dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) - pci_update_resource(dev, resno); + pci_update_resource(dev, resno, false); return 0; } @@ -350,7 +351,7 @@ int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsiz dev_info(>dev, "BAR %d: reassigned %pR (expanded by %#llx)\n", resno, res, (unsigned long long) addsize); if (resno < PCI_BRIDGE_RESOURCES) - pci_update_resource(dev, resno); + pci_update_resource(dev, resno, false); return 0; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 0ab8359..99231d1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1039,7 +1039,7 @@ int pci_try_reset_bus(struct pci_bus *bus); void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_reset_bridge_secondary_bus(struct pci_dev *dev); -void pci_update_resource(struct pci_dev *dev, int resno); +void
Re: [PATHC v2 0/9] ima: carry the measurement list across kexec
Am Samstag, 17 September 2016, 00:17:37 schrieb Eric W. Biederman: > Thiago Jung Bauermannwrites: > > Hello Eric, > > > > Am Freitag, 16 September 2016, 14:47:13 schrieb Eric W. Biederman: > >> I can see tracking to see if the list has changed at some > >> point and causing a reboot(LINUX_REBOOT_CMD_KEXEC) to fail. > > > > Yes, that is an interesting feature that I can add using the checksum- > > verifying part of my code. I can submit a patch for that if there's > > interest, adding a reboot notifier that verifies the checksum and causes > > a regular reboot instead of a kexec reboot if the checksum fails. > > I was thinking an early failure instead of getting all of the way down > into a kernel an discovering the tpm/ima subsystem would not > initialized. But where that falls in the reboot pathway I don't expect > there is much value in it. I'm not sure I understand. What I described doesn't involve the tpm or ima. I'm suggesting that if I take the parts of patch 4/5 in the kexec hand-over buffer series that verify the image checksum, I can submit a separate patch that checks the integrity of the kexec image early in kernel_kexec() and reverts to a regular reboot if the check fails. This would be orthogonal to ima carrying its measurement list across kexec. I think there is value in that, because if the kexec image is corrupted the machine will just get stuck in the purgatory and (unless it's a platform where the purgatory can print to the console) without even an error message explaining what is going on. Whereas if we notice the corruption before jumping into the purgatory we can switch to a regular reboot and the machine will boot successfully. To have an early failure, when would the checksum verification be done? What I can think of is to have kexec_file_load accept a new flag KEXEC_FILE_VERIFY_IMAGE, which userspace could use to request an integrity check when it's about to start the reboot procedure. Then it can decide to either reload the kernel or use a regular reboot if the image is corrupted. Is this what you had in mind? > >> At least the common bootloader cases that I know of using kexec are > >> very > >> minimal distributions that live in a ramdisk and as such it should be > >> very straight forward to measure what is needed at or before > >> sys_kexec_load. But that was completely dismissed as unrealistic so I > >> don't have a clue what actual problem you are trying to solve. > > > > We are interested in solving the problem in a general way because it > > will be useful to us in the future for the case of an arbitrary number > > of kexecs (and thus not only a bootloader but also multiple full-blown > > distros may be involved in the chain). > > > > But you are right that for the use case for which we currently need this > > feature it's feasible to measure everything upfront. We can cross the > > other bridge when we get there. > > Then let's start there. Passing the measurment list is something that > should not be controversial. Great! > >> If there is anyway we can start small and not with this big scary > >> infrastructure change I would very much prefer it. > > > > Sounds good. If we pre-measure everything then the following patches > > from my buffer hand-over series are enough: > > > > [PATCH v5 2/5] kexec_file: Add buffer hand-over support for the next > > kernel [PATCH v5 3/5] powerpc: kexec_file: Add buffer hand-over support > > for the next kernel > > > > Would you consider including those two? > > > > And like I mentioned in the cover letter, patch 1/5 is an interesting > > improvement that is worth considering. > > So from 10,000 feet I think that is correct. > > I am not quite certain why a new mechanism is being invented. We have > other information that is already passed (much of it architecture > specific) like the flattened device tree. If you remove the need to > update the information can you just append this information to the > flattened device tree without a new special mechanism to pass the data? > > I am just reluctant to invent a new mechanism when there is an existing > mechanism that looks like it should work without problems. Michael Ellerman suggested putting the buffer contents inside the device tree itself, but the s390 people are also planning to implement this feature. That architecture doesn't use device trees, so a solution that depends on DTs won't help them. With this mechanism each architecture will still need its own way of communicating to the next kernel where the buffer is, but I think it's easier to pass a base address and length than to pass a whole buffer. I suppose we could piggyback the ima measurements buffer at the end of one of the other segments such as the kernel or, in the case of powerpc, the dtb but it looks hackish to me. I think it's cleaner to put it in its own segment. -- []'s Thiago Jung Bauermann IBM Linux Technology Center
Linux 4.8: Reported regressions as of Sunday, 2016-09-18
Hi! Here is my fourth regression report for Linux 4.8. It lists 14 regressions I'm aware of. 5 of them are new; 1 mentioned in last weeks report got fixed. As always: Are you aware of any other regressions? Then please let me know (simply CC regressi...@leemhuis.info). And pls tell me if there is anything in the report that shouldn't be there. Ciao, Thorsten == Current regressions == Desc: genirq: Flags mismatch irq 8, 0088 (mmc0) vs. 0080 (rtc0). mmc0: Failed to request irq 8: -16 Repo: 2016-08-01 https://bugzilla.kernel.org/show_bug.cgi?id=150881 Stat: 2016-09-09 https://bugzilla.kernel.org/show_bug.cgi?id=150881#c34 Note: stalled; root cause somewhere in the main gpio merge for 4.8, but problematic commit still unknown Desc: [lkp] [xfs] 68a9f5e700: aim7.jobs-per-min -13.6% regression Repo: 2016-08-09 http://www.spinics.net/lists/kernel/msg2317052.html Stat: 2016-09-09 https://marc.info/?t=14734151953=1=2 Note: looks like post-4.8 material at this point: Mel working on it in his spare time, but "The progression of this series has been unsatisfactory." Desc: scsi host6: runtime PM trying to activate child device host6 but parent (2-2:1.0) is not active Repo: 2016-08-15 https://bugzilla.kernel.org/show_bug.cgi?id=153171 Stat: 2016-09-14 https://bugzilla.kernel.org/show_bug.cgi?id=153171#c5 Note: patch available; mkp "I would like the change to get a bit of soak time before we backport it." Desc: DT/OCTEON driver probing broken Repo: 2016-08-16 http://www.spinics.net/lists/devicetree/msg138990.html Stat: 2016-08-30 http://www.spinics.net/lists/devicetree/msg140682.html Note: stalled, poked Rob a week ago, got no reply Desc: gpio-leds broken on OCTEON Repo: 2016-08-23 http://www.spinics.net/lists/devicetree/msg139863.html Stat: 2016-09-12 http://www.spinics.net/lists/devicetree/msg140179.html Note: patch in Linux MIPS patchwork https://patchwork.linux-mips.org/patch/14091/ Desc: Skylake graphics regression: projector failure with 4.8-rc3 Repo: 2016-08-26 http://www.spinics.net/lists/intel-gfx/msg105478.html Stat: 2016-09-01 https://lkml.org/lkml/2016/8/31/946 Note: stalled, poked lists Desc: lk 4.8 + !CONFIG_SHMEM + shmat() = oops Repo: 2016-08-30 http://www.spinics.net/lists/linux-mm/msg112920.html Stat: 2016-09-07 http://www.spinics.net/lists/linux-mm/msg113177.html Note: just like last week: patch "ipc/shm: fix crash if CONFIG_SHMEM is not set" is going to fix this, but has not hit mainline yet Desc: regression in re-read operation by iozone ~10% Repo: 2016-09-02 https://bugzilla.kernel.org/show_bug.cgi?id=155821 Stat: n/a Note: stalled; told reporter he might be better of posting about the issue to some mailing list Desc: brcmfmac is preventing suspend (Dell XPS 13 9350 / Skylake)/ Repo: 2016-09-13 https://bugzilla.kernel.org/show_bug.cgi?id=156631 Stat: n/a Note: used to be https://bugzilla.kernel.org/show_bug.cgi?id=156361 in last weeks report Desc: CPU speed set very low Repo: 2016-09-09 https://lkml.org/lkml/2016/9/9/608 Stat: 2016-09-14 https://lkml.org/lkml/2016/9/14/588 Note: Larry: "Testing continues." Desc: pinctrl: qcom: Clear all function selection bits introduced a regression by not properly masking the calculated value. Repo: 2016-09-12 http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1229055.html Stat: n/a Note: Patch available in initial report Desc: WARN in vb2_dma_sg_alloc when makes dvbstream fail with with AverMedia Hybrid+FM Repo: 2016-09-13 https://bugzilla.kernel.org/show_bug.cgi?id=156751 Stat: n/a Note: told reporter he might be better of posting the issue to linux-media Desc: it is now common that machine needs re-run of xrandr after resume Repo: 2016-09-13 http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1230782.html Stat: 2016-09-15 http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1232834.html http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1231104.html Note: Seems Martin isn't seeing the problem anymore; Pavel's issue might be a different problem anyway; and he now has a userland-problem that complicates testing Desc: usb: gadget: udc: atmel: fix endpoint name Repo: 2016-09-15 http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1232309.html http://www.mail-archive.com/linux-usb@vger.kernel.org/msg80645.html Stat: n/a Note: Patch available: "Felipe, Greg,It is clearly a regression and material for 4.8-fixes." == Fixed since last report == Desc: ath9k: bring back direction setting in ath9k_{start_stop} Repo: 2016-09-01 https://marc.info/?l=linux-wireless=147292415030585=2 Fix: https://git.kernel.org/torvalds/c/e34f2ff40e0339f6a379e1ecf49e8f2759056453
[PATCH] Removed dependency on IDE_GD_ATA if ADB_PMU_LED_DISK is selected.
We can use the front led of powerbooks/ibooks as a disk activitiy imagination without the deprecated IDE_GD_ATA. Signed-off-by: Elimar Riesebieter--- drivers/macintosh/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index d28690f..5d80810 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -102,7 +102,6 @@ config ADB_PMU_LED_DISK bool "Use front LED as DISK LED by default" depends on ADB_PMU_LED depends on LEDS_CLASS - depends on IDE_GD_ATA select LEDS_TRIGGERS select LEDS_TRIGGER_DISK help -- 2.9.3 -- "Talking much about oneself can also be a means to conceal oneself." -Friedrich Nietzsche