[PATCH 08/14] x86/entry: Optimize local_db_save() for virt
Because DRn access is 'difficult' with virt; but the DR7 read is cheaper than a cacheline miss on native, add a virt specific fast path to local_db_save(), such that when breakpoints are not in use we avoid touching DRn entirely. Suggested-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/debugreg.h |7 ++- arch/x86/kernel/hw_breakpoint.c | 26 ++ arch/x86/kvm/vmx/nested.c |2 +- 3 files changed, 29 insertions(+), 6 deletions(-) --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -85,8 +85,8 @@ static inline void hw_breakpoint_disable set_debugreg(0UL, 3); } -static inline int hw_breakpoint_active(void) +static inline bool hw_breakpoint_active(void) { return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } @@ -117,6 +119,9 @@ static __always_inline unsigned long loc { unsigned long dr7; + if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active()) + return 0; + get_debugreg(dr7, 7); dr7 &= ~0x400; /* architecturally set bit */ if (dr7) --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -99,6 +99,8 @@ int arch_install_hw_breakpoint(struct pe unsigned long *dr7; int i; + lockdep_assert_irqs_disabled(); + for (i = 0; i < HBP_NUM; i++) { struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); @@ -117,6 +119,12 @@ int arch_install_hw_breakpoint(struct pe dr7 = this_cpu_ptr(&cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); + /* +* Ensure we first write cpu_dr7 before we set the DR7 register. +* This ensures an NMI never see cpu_dr7 0 when DR7 is not. +*/ + barrier(); + set_debugreg(*dr7, 7); if (info->mask) set_dr_addr_mask(info->mask, i); @@ -136,9 +144,11 @@ int arch_install_hw_breakpoint(struct pe void arch_uninstall_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned long *dr7; + unsigned long dr7; int i; + lockdep_assert_irqs_disabled(); + for (i = 0; i < HBP_NUM; i++) { struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); @@ -151,12 +161,20 @@ void arch_uninstall_hw_breakpoint(struct if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) return; - dr7 = this_cpu_ptr(&cpu_dr7); - *dr7 &= ~__encode_dr7(i, info->len, info->type); + dr7 = this_cpu_read(cpu_dr7); + dr7 &= ~__encode_dr7(i, info->len, info->type); - set_debugreg(*dr7, 7); + set_debugreg(dr7, 7); if (info->mask) set_dr_addr_mask(0, i); + + /* +* Ensure the write to cpu_dr7 is after we've set the DR7 register. +* This ensures an NMI never see cpu_dr7 0 when DR7 is not. +*/ + barrier(); + + this_cpu_write(cpu_dr7, dr7); } static int arch_bp_generic_len(int x86_len) --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3028,9 +3028,9 @@ static int nested_vmx_check_vmentry_hw(s /* * VMExit clears RFLAGS.IF and DR7, even on a consistency check. */ - local_irq_enable(); if (hw_breakpoint_active()) set_debugreg(__this_cpu_read(cpu_dr7), 7); + local_irq_enable(); preempt_enable(); /*
Re: Lost PCIe PME after a914ff2d78ce ("PCI/ASPM: Don't select CONFIG_PCIEASPM by default")
On Fri, May 29, 2020 at 03:21:35PM -0500, Bjorn Helgaas wrote: > Yeah, that makes sense. I can't remember the details, but I'm pretty > sure there's a reason why we ask for the whole set of things. Seems > like it solved some problem. I think Matthew Garrett might have been > involved in that. This was https://bugzilla.redhat.com/show_bug.cgi?id=638912 - some firmware misbehaves unless you pass the same set of supported functionality as Windows does. -- Matthew Garrett | mj...@srcf.ucam.org
[RFC][PATCH] ASoC: qcom: q6asm-dai: kCFI fix
Fixes the following kCFI crash seen on db845c, caused by the function prototypes not matching the callback function prototype. [ 82.585661] Unable to handle kernel NULL pointer dereference at virtual address 0001 [ 82.595387] Mem abort info: [ 82.599463] ESR = 0x9605 [ 82.602658] EC = 0x25: DABT (current EL), IL = 32 bits [ 82.608177] SET = 0, FnV = 0 [ 82.611829] EA = 0, S1PTW = 0 [ 82.615369] Data abort info: [ 82.618751] ISV = 0, ISS = 0x0005 [ 82.622641] CM = 0, WnR = 0 [ 82.625774] user pgtable: 4k pages, 39-bit VAs, pgdp=000174259000 [ 82.632292] [0001] pgd=, pud= [ 82.639167] Internal error: Oops: 9605 [#1] PREEMPT SMP [ 82.644795] Modules linked in: hci_uart btqca xhci_plat_hcd xhci_pci_renesas xhci_pci xhci_hcd wcn36xx wcnss_ctrl wcd934x vctrl_regulator ufs_qcom syscon_reboot_e [ 82.644927] qcom_apcs_ipc_mailbox q6asm_dai q6routing q6asm q6afe_dai q6adm q6afe q6core q6dsp_common pm8941_pwrkey pm8916_wdt platform_mhu pinctrl_spmi_mpp pine [ 82.812982] CPU: 3 PID: 240 Comm: kworker/u16:4 Tainted: GW 5.6.0-rc7-mainline-00960-g0c34353d11b9-dirty #1 [ 82.824201] Hardware name: Thundercomm Dragonboard 845c (DT) [ 82.829937] Workqueue: qcom_apr_rx apr_rxwq [apr] [ 82.834698] pstate: 80c5 (Nzcv daif +PAN +UAO) [ 82.839553] pc : __cfi_check_fail+0x4/0x1c [q6asm_dai] [ 82.844754] lr : __cfi_check+0x3a8/0x3b0 [q6asm_dai] [ 82.849767] sp : ffc0105f3c20 [ 82.853123] x29: ffc0105f3c30 x28: 0020 [ 82.858489] x27: ff80f4588400 x26: ff80f458ec94 [ 82.863854] x25: ff80f458ece8 x24: ffe3670c7000 [ 82.869220] x23: ff8094bb7b34 x22: ffe367137000 [ 82.874585] x21: bd07909b332eada6 x20: 0001 [ 82.879950] x19: ffe36713863c x18: ff80f8df4430 [ 82.885316] x17: 0001 x16: ffe39d15e660 [ 82.890681] x15: 0001 x14: 0027 [ 82.896047] x13: x12: ffe39e6465a0 [ 82.901413] x11: 0051 x10: [ 82.906779] x9 : 000ffe366c19 x8 : c3c5f18762d1ceef [ 82.912145] x7 : x6 : ffc010877698 [ 82.917511] x5 : ffc0105f3c00 x4 : [ 82.922877] x3 : x2 : 0001 [ 82.928243] x1 : ffe36713863c x0 : 0001 [ 82.933610] Call trace: [ 82.936099] __cfi_check_fail+0x4/0x1c [q6asm_dai] [ 82.940955] q6asm_srvc_callback+0x22c/0x618 [q6asm] [ 82.945973] apr_rxwq+0x1a8/0x27c [apr] [ 82.949861] process_one_work+0x2e8/0x54c [ 82.953919] worker_thread+0x27c/0x4d4 [ 82.957715] kthread+0x144/0x154 [ 82.960985] ret_from_fork+0x10/0x18 [ 82.964603] Code: a8c37bfd f85f8e5e d65f03c0 b4a0 (3948) [ 82.970762] ---[ end trace 410accb839617143 ]--- [ 82.975429] Kernel panic - not syncing: Fatal exception Cc: Patrick Lai Cc: Banajit Goswami Cc: Liam Girdwood Cc: Mark Brown Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Srinivas Kandagatla Cc: Vinod Koul Cc: Kuninori Morimoto Cc: Stephan Gerhold Cc: Sami Tolvanen Cc: Todd Kjos Cc: Alistair Delva Cc: Amit Pundir Cc: Sumit Semwal Cc: alsa-de...@alsa-project.org Signed-off-by: John Stultz --- sound/soc/qcom/qdsp6/q6asm-dai.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index 125af00bba53..4640804aab7f 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -176,7 +176,7 @@ static const struct snd_compr_codec_caps q6asm_compr_caps = { }; static void event_handler(uint32_t opcode, uint32_t token, - uint32_t *payload, void *priv) + void *payload, void *priv) { struct q6asm_dai_rtd *prtd = priv; struct snd_pcm_substream *substream = prtd->substream; @@ -490,7 +490,7 @@ static int q6asm_dai_hw_params(struct snd_soc_component *component, } static void compress_event_handler(uint32_t opcode, uint32_t token, - uint32_t *payload, void *priv) + void *payload, void *priv) { struct q6asm_dai_rtd *prtd = priv; struct snd_compr_stream *substream = prtd->cstream; -- 2.17.1
Re: [PATCH] ARM: omap2: fix omap5_realtime_timer_init definition
On Fri, May 29, 2020 at 11:14 PM Tony Lindgren wrote: > * Arnd Bergmann [200529 21:09]: > > > > #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER > > extern void omap5_realtime_timer_init(void); > > #else > > static inline void omap5_realtime_timer_init(void) > > { > > } > > #endif > > > > In fact, the inline stub is what that caused the regression, > > so I think it's ok with my patch. > > To me it seems not having SOC_HAS_REALTIME_COUNTER will > cause omap5_realtime_timer_init() not get called? Correct, this looked to me like it was the intention of that symbol. Unfortunately there is no help text but it is user selectable: config SOC_HAS_REALTIME_COUNTER bool "Real time free running counter" depends on SOC_OMAP5 || SOC_DRA7XX default y > That initializes clocks and calls timer_probe(). So this > will result in non-booting system AFAIK, the header > file stub should no rely CONFIG_SOC_HAS_REALTIME_COUNTER > also, but rather ! CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX. > > Also the Makefile change at least seems wrong, that > can't rely on CONFIG_SOC_HAS_REALTIME_COUNTER. How about just removing the prompt on CONFIG_SOC_HAS_REALTIME_COUNTER but keeping the rest of my patch? That way it's just always enabled when there is a chip that needs it enabled in the kernel config. The only other usage of the symbol is #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER void set_cntfreq(void); #else static inline void set_cntfreq(void) { } #endif Alternatively, we could just remove the Kconfig symbol altogether and rely on (SOC_OMAP5 || SOC_DRA7XX) everywhere, but that seems a little more fragile in case there is going to be another chip that needs it. Arnd
Re: [PATCH] refperf: work around 64-bit division
On Fri, May 29, 2020 at 10:15:51PM +0200, Arnd Bergmann wrote: > A 64-bit division was introduced in refperf, breaking compilation > on all 32-bit architectures: > > kernel/rcu/refperf.o: in function `main_func': > refperf.c:(.text+0x57c): undefined reference to `__aeabi_uldivmod' > > Work it by using div_u64 to mark the expensive operation. > > Fixes: bd5b16d6c88d ("refperf: Allow decimal nanoseconds") > Signed-off-by: Arnd Bergmann Many thanks, Arnd! I queued this and reverted my earlier 64-bit-only restriction, pushing the result out on the -rcu tree's "dev" branch. I also added a couple of Reported-by entries. Thanx, Paul > --- > kernel/rcu/refperf.c | 9 +++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/kernel/rcu/refperf.c b/kernel/rcu/refperf.c > index 47df72c492b3..c2366648981d 100644 > --- a/kernel/rcu/refperf.c > +++ b/kernel/rcu/refperf.c > @@ -386,7 +386,7 @@ static int main_func(void *arg) > if (torture_must_stop()) > goto end; > > - result_avg[exp] = 1000 * process_durations(nreaders) / > (nreaders * loops); > + result_avg[exp] = div_u64(1000 * process_durations(nreaders), > nreaders * loops); > } > > // Print the average of all experiments > @@ -397,9 +397,14 @@ static int main_func(void *arg) > strcat(buf, "Threads\tTime(ns)\n"); > > for (exp = 0; exp < nruns; exp++) { > + u64 avg; > + u32 rem; > + > if (errexit) > break; > - sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, result_avg[exp] / > 1000, (int)(result_avg[exp] % 1000)); > + > + avg = div_s64_rem(result_avg[exp], 1000, &rem); > + sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, avg, rem); > strcat(buf, buf1); > } > > -- > 2.26.2 >
[PATCH v3 4/5] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables
Currently all IRQ-tracking state is in task_struct, this means that task_struct needs to be defined before we use it. Especially for lockdep_assert_irq*() this can lead to header-hell. Move the hardirq state into per-cpu variables to avoid the task_struct dependency. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/irqflags.h | 19 --- include/linux/lockdep.h | 34 ++ include/linux/sched.h|2 -- kernel/fork.c|4 +--- kernel/locking/lockdep.c | 30 +++--- kernel/softirq.c |6 ++ 6 files changed, 52 insertions(+), 43 deletions(-) --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -14,6 +14,7 @@ #include #include +#include /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING @@ -31,18 +32,22 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS + +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); + extern void trace_hardirqs_on_prepare(void); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p)((p)->hardirq_context) +# define lockdep_hardirq_context(p)(this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p)((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) -# define lockdep_hardirq_enter() \ -do { \ - if (!current->hardirq_context++)\ - current->hardirq_threaded = 0; \ +# define lockdep_hardirq_enter() \ +do { \ + if (this_cpu_inc_return(hardirq_context) == 1) \ + current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded()\ do { \ @@ -50,7 +55,7 @@ do { \ } while (0) # define lockdep_hardirq_exit()\ do { \ - current->hardirq_context--; \ + this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_softirq_enter() \ do { \ --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -19,6 +19,7 @@ extern int lock_stat; #define MAX_LOCKDEP_SUBCLASSES 8UL +#include #include enum lockdep_wait_type { @@ -703,28 +704,29 @@ do { \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) -#define lockdep_assert_irqs_enabled() do {\ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirqs_enabled, \ - "IRQs not enabled as expected\n");\ - } while (0) +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); -#define lockdep_assert_irqs_disabled() do {\ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirqs_enabled,\ - "IRQs not disabled as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_enabled() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \ +} while (0) -#define lockdep_assert_in_irq() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirq_context,\ - "Not in hardirq as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_disabled() \ +do { \ + WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \ +} while (0) + +#define lockdep_assert_in_irq() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ +} while (0) #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define might_lock_nested(lock, subclass) do { } while (0) + # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do {
[PATCH v3 3/5] s390: Break cyclic percpu include
In order to use in irqflags.h, we need to make sure asm/percpu.h does not itself depend on irqflags.h Signed-off-by: Peter Zijlstra (Intel) --- arch/s390/include/asm/smp.h |1 + arch/s390/include/asm/thread_info.h |1 - 2 files changed, 1 insertion(+), 1 deletion(-) --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -10,6 +10,7 @@ #include #include +#include #define raw_smp_processor_id() (S390_lowcore.cpu_nr) --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -24,7 +24,6 @@ #ifndef __ASSEMBLY__ #include #include -#include #define STACK_INIT_OFFSET \ (THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
[PATCH v3 5/5] lockdep: Remove lockdep_hardirq{s_enabled,_context}() argument
Now that the macros use per-cpu data, we no longer need the argument. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/entry/common.c|2 +- include/linux/irqflags.h |8 include/linux/lockdep.h|2 +- kernel/locking/lockdep.c | 30 +++--- kernel/softirq.c |2 +- tools/include/linux/irqflags.h |4 ++-- 6 files changed, 24 insertions(+), 24 deletions(-) --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -689,7 +689,7 @@ noinstr void idtentry_exit_user(struct p noinstr bool idtentry_enter_nmi(struct pt_regs *regs) { - bool irq_state = lockdep_hardirqs_enabled(current); + bool irq_state = lockdep_hardirqs_enabled(); __nmi_enter(); lockdep_hardirqs_off(CALLER_ADDR0); --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -40,9 +40,9 @@ DECLARE_PER_CPU(int, hardirq_context); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p)(this_cpu_read(hardirq_context)) +# define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p)((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled)) +# define lockdep_hardirqs_enabled()(this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ @@ -109,9 +109,9 @@ do {\ # define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) -# define lockdep_hardirq_context(p)0 +# define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p)0 -# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_hardirqs_enabled()0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_threaded()do { } while (0) --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -736,7 +736,7 @@ do { \ # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - lockdep_hardirq_context(current) && \ + lockdep_hardirq_context() && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_str pr_warn("-\n"); pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), - lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT, curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, - lockdep_hardirqs_enabled(curr), + lockdep_hardirqs_enabled(), curr->softirqs_enabled); print_lock(next); @@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", curr->comm, task_pid_nr(curr), - lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT, lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, - lockdep_hardirqs_enabled(curr), + lockdep_hardirqs_enabled(), lockdep_softirqs_enabled(curr)); print_lock(this); @@ -3655,7 +3655,7 @@ void lockdep_hardirqs_on_prepare(unsigne if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (unlikely(lockdep_hardirqs_enabled(current))) { + if (unlikely(lockdep_hardirqs_enabled())) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3683,7 +3683,7 @@ void lockdep_hardirqs_on_prepare(unsigne * Can't allow enabling interrupts while in an interrupt handler, * that's general bad form and such. Recursion, limited stack etc.. */ - if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current))) + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context())) return; curren
[RFC][PATCH v3 1/5] sparc64: Fix asm/percpu.h build error
In order to break a header dependency between lockdep and task_struct, I need per-cpu stuff from lockdep. Including from lockdep.h gives a build error, this patch cures that, but results in the following warning: ../arch/sparc/include/asm/percpu_64.h:7:24: warning: call-clobbered register used for global register variable register unsigned long __local_per_cpu_offset asm("g5"); But i've no idea how to fix that :/ but it does build. Not-Signed-off-by: Peter Zijlstra (Intel) --- arch/sparc/include/asm/trap_block.h |2 ++ 1 file changed, 2 insertions(+) --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -2,6 +2,8 @@ #ifndef _SPARC_TRAP_BLOCK_H #define _SPARC_TRAP_BLOCK_H +#include + #include #include
Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro
On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris wrote: > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko > wrote: > > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote: > > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot wrote: ... > > > Taking the example statement (in my patch) where compilation warning > > > is getting reported: > > > return (map[index] >> offset) & GENMASK(nbits - 1, 0); > > > > > > 'nbits' is of type 'unsigned long'. > > > In above, the sanity check is comparing '0' with unsigned value. And > > > unsigned value can't be less than '0' ever, hence the warning. > > > But this warning will occur whenever there will be '0' as one of the > > > 'argument' and an unsigned variable as another 'argument' for GENMASK. > > Proper fix is to fix GENMASK(), but allowed workaround is to use > > (BIT(nbits) - 1) > > instead. > When I used BIT macro (earlier), I had faced a problem. I want to tell > you about that. > > Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or > clump size) is BITS_PER_LONG, unexpected calculation happens. > > Explanation: > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer), > (BIT(nbits) - 1) > gives a value of zero and when this zero is ANDed with any value, it > makes it full zero. This is unexpected and incorrect calculation happening. > > What actually happens is in the macro expansion of BIT(64), that is 1 > << 64, the '1' overflows from leftmost bit position (most significant > bit) and re-enters at the rightmost bit position (least significant > bit), therefore 1 << 64 becomes '0x1', and when another '1' is > subtracted from this, the final result becomes 0. > > Since this macro is being used in both bitmap_get_value and > bitmap_set_value functions, it will give unexpected results when nbits or > clump > size is BITS_PER_LONG (32 or 64 depending on arch). I see, something like https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139 should be done. But yes, let's try to fix GENMASK(). So, if we modify the following #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ __builtin_constant_p((l) > (h)), (l) > (h), 0))) to be #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ __builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0))) would it work? > William also knows about this issue: > "This is undefined behavior in the C standard (section 6.5.7 in the N1124)" I think it is about 6.5.7.3 here, 1U << 31 (or 63) is okay. -- With Best Regards, Andy Shevchenko
RE: [EXTERNAL] Re: [PATCH v4] perf inject --jit: Remove //anon mmap events
>/tmp/perf/perf record -k 1 -e cycles:u -o /tmp/perf.data java >-agentpath:/tmp/perf/libperf-jvmti.so -XX:+PreserveFramePointer >-XX:InitialCodeCacheSize=20M -XX:ReservedCodeCacheSize=1G -jar >dacapo-9.12-bach.jar jython Its also possible the `InitialCodeCacheSize=20M` argument is masking the problem. Something like 4K would make the problem much easier to see. I see the problem every time .NET grows the cache across a 64K page boundary. 20M may mean you are allocating huge pages or something which might mask the problem. We may be allocating code pages 64K at a time.
Re: [PATCH v2 1/4] dt-bindings: pinctrl: Document 7211 compatible for brcm, bcm2835-gpio.txt
On Fri, 29 May 2020 12:15:19 -0700, Florian Fainelli wrote: > Document the brcm,bcm7211-gpio compatible string in the > brcm,bcm2835-gpio.txt document. > > Signed-off-by: Florian Fainelli > --- > Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt | 1 + > 1 file changed, 1 insertion(+) > Acked-by: Rob Herring
[PATCH v3 0/5] lockdep: Change IRQ state tracking to use per-cpu variables
Ahmed and Sebastian wanted additional lockdep_assert*() macros and ran into header hell. Move the IRQ state into per-cpu variables, which removes the dependency on task_struct, which is what generated the header-hell. These patches are intended to go on top of: https://lkml.kernel.org/r/20200529212728.795169...@infradead.org but should apply on current tip/master without much difficulty. There are a few build fixes for Sparc64, PowerPC64 and s390. Especially the Sparc one I'm not sure about.
[PATCH v3 2/5] powerpc64: Break asm/percpu.h vs spinlock_types.h dependency
In order to use in lockdep.h, we need to make sure asm/percpu.h does not itself depend on lockdep. The below seems to make that so and builds powerpc64-defconfig + PROVE_LOCKING. Signed-off-by: Peter Zijlstra (Intel) --- arch/powerpc/include/asm/dtl.h | 52 + arch/powerpc/include/asm/lppaca.h | 44 --- arch/powerpc/kernel/time.c |1 arch/powerpc/kvm/book3s_hv.c |1 arch/powerpc/platforms/pseries/dtl.c |1 arch/powerpc/platforms/pseries/lpar.c |1 arch/powerpc/platforms/pseries/setup.c |1 arch/powerpc/platforms/pseries/svm.c |1 8 files changed, 58 insertions(+), 44 deletions(-) --- /dev/null +++ b/arch/powerpc/include/asm/dtl.h @@ -0,0 +1,52 @@ +#ifndef _ASM_POWERPC_DTL_H +#define _ASM_POWERPC_DTL_H + +#include +#include + +/* + * Layout of entries in the hypervisor's dispatch trace log buffer. + */ +struct dtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + __be16 processor_id; + __be32 enqueue_to_dispatch_time; + __be32 ready_to_enqueue_time; + __be32 waiting_to_ready_time; + __be64 timebase; + __be64 fault_addr; + __be64 srr0; + __be64 srr1; +}; + +#define DISPATCH_LOG_BYTES 4096/* bytes per cpu */ +#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) + +/* + * Dispatch trace log event enable mask: + * 0x1: voluntary virtual processor waits + * 0x2: time-slice preempts + * 0x4: virtual partition memory page faults + */ +#define DTL_LOG_CEDE 0x1 +#define DTL_LOG_PREEMPT0x2 +#define DTL_LOG_FAULT 0x4 +#define DTL_LOG_ALL(DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) + +extern struct kmem_cache *dtl_cache; +extern rwlock_t dtl_access_lock; + +/* + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls + * reading from the dispatch trace log. If other code wants to consume + * DTL entries, it can set this pointer to a function that will get + * called once for each DTL entry that gets processed. + */ +extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); + +extern void register_dtl_buffer(int cpu); +extern void alloc_dtl_buffers(unsigned long *time_limit); +extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); + +#endif /* _ASM_POWERPC_DTL_H */ --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -42,7 +42,6 @@ */ #include #include -#include #include #include #include @@ -146,49 +145,6 @@ struct slb_shadow { } save_area[SLB_NUM_BOLTED]; } cacheline_aligned; -/* - * Layout of entries in the hypervisor's dispatch trace log buffer. - */ -struct dtl_entry { - u8 dispatch_reason; - u8 preempt_reason; - __be16 processor_id; - __be32 enqueue_to_dispatch_time; - __be32 ready_to_enqueue_time; - __be32 waiting_to_ready_time; - __be64 timebase; - __be64 fault_addr; - __be64 srr0; - __be64 srr1; -}; - -#define DISPATCH_LOG_BYTES 4096/* bytes per cpu */ -#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) - -/* - * Dispatch trace log event enable mask: - * 0x1: voluntary virtual processor waits - * 0x2: time-slice preempts - * 0x4: virtual partition memory page faults - */ -#define DTL_LOG_CEDE 0x1 -#define DTL_LOG_PREEMPT0x2 -#define DTL_LOG_FAULT 0x4 -#define DTL_LOG_ALL(DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) - -extern struct kmem_cache *dtl_cache; -extern rwlock_t dtl_access_lock; - -/* - * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls - * reading from the dispatch trace log. If other code wants to consume - * DTL entries, it can set this pointer to a function that will get - * called once for each DTL entry that gets processed. - */ -extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); - -extern void register_dtl_buffer(int cpu); -extern void alloc_dtl_buffers(unsigned long *time_limit); extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); #endif /* CONFIG_PPC_BOOK3S */ --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -69,6 +69,7 @@ #include #include #include +#include /* powerpc clocksource/clockevent code */ --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -74,6 +74,7 @@ #include #include #include +#include #include "book3s.h" --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "pseries.h" --- a/ar
Re: [RFC PATCH v4 07/10] vfio/pci: introduce a new irq type VFIO_IRQ_TYPE_REMAP_BAR_REGION
On Sun, 17 May 2020 22:52:45 -0400 Yan Zhao wrote: > This is a virtual irq type. > vendor driver triggers this irq when it wants to notify userspace to > remap PCI BARs. > > 1. vendor driver triggers this irq and packs the target bar number in >the ctx count. i.e. "1 << bar_number". >if a bit is set, the corresponding bar is to be remapped. > > 2. userspace requery the specified PCI BAR from kernel and if flags of > the bar regions are changed, it removes the old subregions and attaches > subregions according to the new flags. > > 3. userspace notifies back to kernel by writing one to the eventfd of > this irq. > > Please check the corresponding qemu implementation from the reply of this > patch, and a sample usage in vendor driver in patch [10/10]. > > Cc: Kevin Tian > Signed-off-by: Yan Zhao > --- > include/uapi/linux/vfio.h | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index 2d0d85c7c4d4..55895f75d720 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -704,6 +704,17 @@ struct vfio_irq_info_cap_type { > __u32 subtype; /* type specific */ > }; > > +/* Bar Region Query IRQ TYPE */ > +#define VFIO_IRQ_TYPE_REMAP_BAR_REGION (1) > + > +/* sub-types for VFIO_IRQ_TYPE_REMAP_BAR_REGION */ > +/* > + * This irq notifies userspace to re-query BAR region and remaps the > + * subregions. > + */ > +#define VFIO_IRQ_SUBTYPE_REMAP_BAR_REGION(0) Hi Yan, How do we do this in a way that's backwards compatible? Or maybe, how do we perform a handshake between the vendor driver and userspace to indicate this support? Would the vendor driver refuse to change device_state in the migration region if the user has not enabled this IRQ? Everything you've described in the commit log needs to be in this header, we can't have the usage protocol buried in a commit log. It also seems like this is unnecessarily PCI specific. Can't the count bitmap simply indicate the region index to re-evaluate? Maybe you were worried about running out of bits in the ctx count? An IRQ per region could resolve that, but maybe we could also just add another IRQ for the next bitmap of regions. I assume that the bitmap can indicate multiple regions to re-evaluate, but that should be documented. Also, what sort of service requirements does this imply? Would the vendor driver send this IRQ when the user tries to set the device_state to _SAVING and therefore we'd require the user to accept, implement the mapping change, and acknowledge the IRQ all while waiting for the write to device_state to return? That implies quite a lot of asynchronous support in the userspace driver. Thanks, Alex > + > + > /** > * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct > vfio_irq_set) > *
[PATCH v4 kunit-next 0/2] kunit: extend kunit resources API
A recent RFC patch set [1] suggests some additional functionality may be needed around kunit resources. It seems to require 1. support for resources without allocation 2. support for lookup of such resources 3. support for access to resources across multiple kernel threads The proposed changes here are designed to address these needs. The idea is we first generalize the API to support adding resources with static data; then from there we support named resources. The latter support is needed because if we are in a different thread context and only have the "struct kunit *" to work with, we need a way to identify a resource in lookup. [1] https://lkml.org/lkml/2020/2/26/1286 Changes since v3: - removed unused "init" field from "struct kunit_resources" (Brendan) Changes since v2: - moved a few functions relating to resource retrieval in patches 1 and 2 into include/kunit/test.h and defined as "static inline"; this allows built-in consumers to use these functions when KUnit is built as a module Changes since v1: - reformatted longer parameter lists to have one parameter per-line (Brendan, patch 1) - fixed phrasing in various comments to clarify allocation of memory and added comment to kunit resource tests to clarify why kunit_put_resource() is used there (Brendan, patch 1) - changed #define to static inline function (Brendan, patch 2) - simplified kunit_add_named_resource() to use more of existing code for non-named resource (Brendan, patch 2) Alan Maguire (2): kunit: generalize kunit_resource API beyond allocated resources kunit: add support for named resources Alan Maguire (2): kunit: generalize kunit_resource API beyond allocated resources kunit: add support for named resources include/kunit/test.h | 210 +++--- lib/kunit/kunit-test.c| 111 +++- lib/kunit/string-stream.c | 14 ++-- lib/kunit/test.c | 171 ++--- 4 files changed, 380 insertions(+), 126 deletions(-) -- 1.8.3.1
[PATCH v4 kunit-next 1/2] kunit: generalize kunit_resource API beyond allocated resources
In its original form, the kunit resources API - consisting the struct kunit_resource and associated functions - was focused on adding allocated resources during test operation that would be automatically cleaned up on test completion. The recent RFC patch proposing converting KASAN tests to KUnit [1] showed another potential model - where outside of test context, but with a pointer to the test state, we wish to access/update test-related data, but expressly want to avoid allocations. It turns out we can generalize the kunit_resource to support static resources where the struct kunit_resource * is passed in and initialized for us. As part of this work, we also change the "allocation" field to the more general "data" name, as instead of associating an allocation, we can associate a pointer to static data. Static data is distinguished by a NULL free functions. A test is added to cover using kunit_add_resource() with a static resource and data. Finally we also make use of the kernel's krefcount interfaces to manage reference counting of KUnit resources. The motivation for this is simple; if we have kernel threads accessing and using resources (say via kunit_find_resource()) we need to ensure we do not remove said resources (or indeed free them if they were dynamically allocated) until the reference count reaches zero. A new function - kunit_put_resource() - is added to handle this, and it should be called after a thread using kunit_find_resource() is finished with the retrieved resource. We ensure that the functions needed to look up, use and drop reference count are "static inline"-defined so that they can be used by builtin code as well as modules in the case that KUnit is built as a module. A cosmetic change here also; I've tried moving to kunit_[action]_resource() as the format of function names for consistency and readability. [1] https://lkml.org/lkml/2020/2/26/1286 Signed-off-by: Alan Maguire Reviewed-by: Brendan Higgins --- include/kunit/test.h | 156 +- lib/kunit/kunit-test.c| 74 -- lib/kunit/string-stream.c | 14 ++--- lib/kunit/test.c | 153 - 4 files changed, 268 insertions(+), 129 deletions(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 47e61e1..f9b914e 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -15,6 +15,7 @@ #include #include #include +#include struct kunit_resource; @@ -23,13 +24,19 @@ /** * struct kunit_resource - represents a *test managed resource* - * @allocation: for the user to store arbitrary data. + * @data: for the user to store arbitrary data. * @free: a user supplied function to free the resource. Populated by - * kunit_alloc_resource(). + * kunit_resource_alloc(). * * Represents a *test managed resource*, a resource which will automatically be * cleaned up at the end of a test case. * + * Resources are reference counted so if a resource is retrieved via + * kunit_alloc_and_get_resource() or kunit_find_resource(), we need + * to call kunit_put_resource() to reduce the resource reference count + * when finished with it. Note that kunit_alloc_resource() does not require a + * kunit_resource_put() because it does not retrieve the resource itself. + * * Example: * * .. code-block:: c @@ -42,9 +49,9 @@ * static int kunit_kmalloc_init(struct kunit_resource *res, void *context) * { * struct kunit_kmalloc_params *params = context; - * res->allocation = kmalloc(params->size, params->gfp); + * res->data = kmalloc(params->size, params->gfp); * - * if (!res->allocation) + * if (!res->data) * return -ENOMEM; * * return 0; @@ -52,30 +59,26 @@ * * static void kunit_kmalloc_free(struct kunit_resource *res) * { - * kfree(res->allocation); + * kfree(res->data); * } * * void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp) * { * struct kunit_kmalloc_params params; - * struct kunit_resource *res; * * params.size = size; * params.gfp = gfp; * - * res = kunit_alloc_resource(test, kunit_kmalloc_init, + * return kunit_alloc_resource(test, kunit_kmalloc_init, * kunit_kmalloc_free, ¶ms); - * if (res) - * return res->allocation; - * - * return NULL; * } */ struct kunit_resource { - void *allocation; - kunit_resource_free_t free; + void *data; /* private: internal use only. */ + kunit_resource_free_t free; + struct kref refcount; struct list_head node; }; @@ -284,6 +287,64 @@ struct kunit_resource *kunit_alloc_and_get_resource(struct kunit *test, void *context);
Re: [PATCH] ARM: omap2: fix omap5_realtime_timer_init definition
* Arnd Bergmann [200529 21:41]: > On Fri, May 29, 2020 at 11:14 PM Tony Lindgren wrote: > > * Arnd Bergmann [200529 21:09]: > > > > > > #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER > > > extern void omap5_realtime_timer_init(void); > > > #else > > > static inline void omap5_realtime_timer_init(void) > > > { > > > } > > > #endif > > > > > > In fact, the inline stub is what that caused the regression, > > > so I think it's ok with my patch. > > > > To me it seems not having SOC_HAS_REALTIME_COUNTER will > > cause omap5_realtime_timer_init() not get called? > > Correct, this looked to me like it was the intention of that > symbol. Unfortunately there is no help text but it is user > selectable: > > config SOC_HAS_REALTIME_COUNTER > bool "Real time free running counter" > depends on SOC_OMAP5 || SOC_DRA7XX > default y Maybe this is a legacy Kconfig option since Santosh got the cpuidle coupled to switch things to using the always on timers for idle modes years ago already. > > That initializes clocks and calls timer_probe(). So this > > will result in non-booting system AFAIK, the header > > file stub should no rely CONFIG_SOC_HAS_REALTIME_COUNTER > > also, but rather ! CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX. > > > > Also the Makefile change at least seems wrong, that > > can't rely on CONFIG_SOC_HAS_REALTIME_COUNTER. > > How about just removing the prompt on > CONFIG_SOC_HAS_REALTIME_COUNTER but keeping the > rest of my patch? That way it's just always enabled when > there is a chip that needs it enabled in the kernel config. > > The only other usage of the symbol is > > #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER > void set_cntfreq(void); > #else > static inline void set_cntfreq(void) > { > } > #endif Yeah it's already default y, so I'd say let's just get rid of the option. > Alternatively, we could just remove the Kconfig symbol > altogether and rely on (SOC_OMAP5 || SOC_DRA7XX) > everywhere, but that seems a little more fragile in case > there is going to be another chip that needs it. Sounds like we can just remove CONFIG_SOC_HAS_REALTIME_COUNTER and rely on (SOC_OMAP5 || SOC_DRA7XX). Regards, Tony
[PATCH v4 kunit-next 2/2] kunit: add support for named resources
The kunit resources API allows for custom initialization and cleanup code (init/fini); here a new resource add function sets the "struct kunit_resource" "name" field, and calls the standard add function. Having a simple way to name resources is useful in cases such as multithreaded tests where a set of resources are shared among threads; a pointer to the "struct kunit *" test state then is all that is needed to retrieve and use named resources. Support is provided to add, find and destroy named resources; the latter two are simply wrappers that use a "match-by-name" callback. If an attempt to add a resource with a name that already exists is made kunit_add_named_resource() will return -EEXIST. Signed-off-by: Alan Maguire Reviewed-by: Brendan Higgins --- include/kunit/test.h | 54 ++ lib/kunit/kunit-test.c | 37 ++ lib/kunit/test.c | 24 ++ 3 files changed, 115 insertions(+) diff --git a/include/kunit/test.h b/include/kunit/test.h index f9b914e..59f3144 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -72,9 +72,15 @@ * return kunit_alloc_resource(test, kunit_kmalloc_init, * kunit_kmalloc_free, ¶ms); * } + * + * Resources can also be named, with lookup/removal done on a name + * basis also. kunit_add_named_resource(), kunit_find_named_resource() + * and kunit_destroy_named_resource(). Resource names must be + * unique within the test instance. */ struct kunit_resource { void *data; + const char *name; /* optional name */ /* private: internal use only. */ kunit_resource_free_t free; @@ -344,6 +350,21 @@ int kunit_add_resource(struct kunit *test, kunit_resource_free_t free, struct kunit_resource *res, void *data); + +/** + * kunit_add_named_resource() - Add a named *test managed resource*. + * @test: The test context object. + * @init: a user-supplied function to initialize the resource data, if needed. + * @free: a user-supplied function to free the resource data, if needed. + * @name_data: name and data to be set for resource. + */ +int kunit_add_named_resource(struct kunit *test, +kunit_resource_init_t init, +kunit_resource_free_t free, +struct kunit_resource *res, +const char *name, +void *data); + /** * kunit_alloc_resource() - Allocates a *test managed resource*. * @test: The test context object. @@ -399,6 +420,19 @@ static inline bool kunit_resource_instance_match(struct kunit *test, } /** + * kunit_resource_name_match() - Match a resource with the same name. + * @test: Test case to which the resource belongs. + * @res: The resource. + * @match_name: The name to match against. + */ +static inline bool kunit_resource_name_match(struct kunit *test, +struct kunit_resource *res, +void *match_name) +{ + return res->name && strcmp(res->name, match_name) == 0; +} + +/** * kunit_find_resource() - Find a resource using match function/data. * @test: Test case to which the resource belongs. * @match: match function to be applied to resources/match data. @@ -427,6 +461,19 @@ static inline bool kunit_resource_instance_match(struct kunit *test, } /** + * kunit_find_named_resource() - Find a resource using match name. + * @test: Test case to which the resource belongs. + * @name: match name. + */ +static inline struct kunit_resource * +kunit_find_named_resource(struct kunit *test, + const char *name) +{ + return kunit_find_resource(test, kunit_resource_name_match, + (void *)name); +} + +/** * kunit_destroy_resource() - Find a kunit_resource and destroy it. * @test: Test case to which the resource belongs. * @match: Match function. Returns whether a given resource matches @match_data. @@ -439,6 +486,13 @@ int kunit_destroy_resource(struct kunit *test, kunit_resource_match_t match, void *match_data); +static inline int kunit_destroy_named_resource(struct kunit *test, + const char *name) +{ + return kunit_destroy_resource(test, kunit_resource_name_match, + (void *)name); +} + /** * kunit_remove_resource: remove resource from resource list associated with * test. diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 03f3eca..69f9024 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -325,6 +325,42 @@ static void kunit_resource_test_static(struct kunit *test) KUNIT_EXPECT_TRUE(test, list_empty(&test->resources));
[PATCH v9 4/5] ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods
Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm module and add the command family NVDIMM_FAMILY_PAPR to the white list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the nvdimm command mask and implement necessary scaffolding in the module to handle ND_CMD_CALL ioctl and PDSM requests that we receive. The layout of the PDSM request as we expect from libnvdimm/libndctl is described in newly introduced uapi header 'papr_pdsm.h' which defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used to communicate the PDSM request via member 'nd_cmd_pkg.nd_command' and size of payload that need to be sent/received for servicing the PDSM. A new function is_cmd_valid() is implemented that reads the args to papr_scm_ndctl() and performs sanity tests on them. A new function papr_scm_service_pdsm() is introduced and is called from papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL command from libnvdimm. Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Signed-off-by: Vaibhav Jain --- Changelog: v8..v9: * Reduced the usage of term SCM replacing it with appropriate replacement [ Dan Williams, Aneesh ] * Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h' * s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g * s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g * Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'. * Minor update to patch description. v7..v8: * Removed the 'payload_offset' field from 'struct nd_pdsm_cmd_pkg'. Instead command payload is always assumed to start at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ] * To enable introducing new fields to 'struct nd_pdsm_cmd_pkg', 'reserved' field of 10-bytes is introduced. [ Aneesh ] * Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h [ Ira ] Resend: * None v6..v7 : * Removed the re-definitions of __packed macro from papr_scm_pdsm.h [Mpe]. * Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe]. * Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h [Mpe]. * Made functions defined in papr_scm_pdsm.h as static inline. [Mpe] v5..v6 : * Changed the usage of the term DSM to PDSM to distinguish it from the ACPI term [ Dan Williams ] * Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various struct to reflect the new terminology. * Updated the patch description and title to reflect the new terminology. * Squashed patch to introduce new command family in 'ndctl.h' with this patch [ Dan Williams ] * Updated the papr_scm_pdsm method starting index from 0x1 to 0x0 [ Dan Williams ] * Removed redundant license text from the papr_scm_psdm.h file. [ Dan Williams ] * s/envelop/envelope/ at various places [ Dan Williams ] * Added '__packed' attribute to command package header to gaurd against different compiler adding paddings between the fields. [ Dan Williams] * Converted various pr_debug to dev_debug [ Dan Williams ] v4..v5 : * None v3..v4 : * None v2..v3 : * Updated the patch prefix to 'ndctl/uapi' [Aneesh] v1..v2 : * None --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 136 ++ arch/powerpc/platforms/pseries/papr_scm.c | 101 +++- include/uapi/linux/ndctl.h| 1 + 3 files changed, 232 insertions(+), 6 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h new file mode 100644 index ..6407fefcc007 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl + * + * (C) Copyright IBM 2020 + * + * Author: Vaibhav Jain + */ + +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ + +#include + +/* + * PDSM Envelope: + * + * The ioctl ND_CMD_CALL transfers data between user-space and kernel via + * envelope which consists of a header and user-defined payload sections. + * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a + * payload following it and accessible via 'nd_pdsm_cmd_pkg.payload' field. + * There is reserved field that can used to introduce new fields to the + * structure in future. It also tries to ensure that 'nd_pdsm_cmd_pkg.payload' + * lies at a 8-byte boundary. + * + * +-+-+---+ + * | 64-Bytes | 16-Bytes | Max 176-Bytes | + * +-+-+---+ + * | nd_pdsm_cmd_pkg | | + * |-+ | | + * | nd_cmd_pkg | | | + * +-+-+---+ + * | nd_family | | | + * | nd_size
[PATCH v9 0/5] powerpc/papr_scm: Add support for reporting nvdimm health
Changes since v8 [1]: * Updated proposed changes to remove usage of term 'SCM' due to ambiguity with 'PMEM' and 'NVDIMM'. [ Dan Williams ] * Replaced the usage of term 'SCM' with 'PMEM' in most contexts. [ Aneesh ] * Renamed NVDIMM health defines from PAPR_SCM_DIMM_* to PAPR_PMEM_* . * Updates to various newly introduced identifiers in 'papr_scm.c' removing the 'SCM' prefix from their names. * Renamed NVDIMM_FAMILY_PAPR_SCM to NVDIMM_FAMILY_PAPR * Renamed PAPR_SCM_PDSM_HEALTH PAPR_PDSM_HEALTH * Renamed uapi header 'papr_scm_pdsm.h' to 'papr_pdsm.h'. * Renamed sysfs ABI document from sysfs-bus-papr-scm to sysfs-bus-papr-pmem. * No behavioural changes from v8 [1]. [1] https://lore.kernel.org/linux-nvdimm/20200527041244.37821-1-vaib...@linux.ibm.com/ --- The PAPR standard[2][4] provides mechanisms to query the health and performance stats of an NVDIMM via various hcalls as described in Ref[3]. Until now these stats were never available nor exposed to the user-space tools like 'ndctl'. This is partly due to PAPR platform not having support for ACPI and NFIT. Hence 'ndctl' is unable to query and report the dimm health status and a user had no way to determine the current health status of a NDVIMM. To overcome this limitation, this patch-set updates papr_scm kernel module to query and fetch NVDIMM health stats using hcalls described in Ref[3]. This health and performance stats are then exposed to userspace via sysfs and PAPR-NVDIMM-Specific-Methods(PDSM) issued by libndctl. These changes coupled with proposed ndtcl changes located at Ref[5] should provide a way for the user to retrieve NVDIMM health status using ndtcl. Below is a sample output using proposed kernel + ndctl for PAPR NVDIMM in a emulation environment: # ndctl list -DH [ { "dev":"nmem0", "health":{ "health_state":"fatal", "shutdown_state":"dirty" } } ] Dimm health report output on a pseries guest lpar with vPMEM or HMS based NVDIMMs that are in perfectly healthy conditions: # ndctl list -d nmem0 -H [ { "dev":"nmem0", "health":{ "health_state":"ok", "shutdown_state":"clean" } } ] PAPR NVDIMM-Specific-Methods(PDSM) == PDSM requests are issued by vendor specific code in libndctl to execute certain operations or fetch information from NVDIMMS. PDSMs requests can be sent to papr_scm module via libndctl(userspace) and libnvdimm (kernel) using the ND_CMD_CALL ioctl command which can be handled in the dimm control function papr_scm_ndctl(). Current patchset proposes a single PDSM to retrieve NVDIMM health, defined in the newly introduced uapi header named 'papr_pdsm.h'. Support for more PDSMs will be added in future. Structure of the patch-set == The patch-set starts with a doc patch documenting details of hcall H_SCM_HEALTH. Second patch exports kernel symbol seq_buf_printf() thats used in subsequent patches to generate sysfs attribute content. Third patch implements support for fetching NVDIMM health information from PHYP and partially exposing it to user-space via a NVDIMM sysfs flag. Fourth patches deal with implementing support for servicing PDSM commands in papr_scm module. Finally the last patch implements support for servicing PDSM 'PAPR_PDSM_HEALTH' that returns the NVDIMM health information to libndctl. References: [2] "Power Architecture Platform Reference" https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference [3] commit 58b278f568f0 ("powerpc: Provide initial documentation for PAPR hcalls") [4] "Linux on Power Architecture Platform Reference" https://members.openpowerfoundation.org/document/dl/469 [5] https://github.com/vaibhav92/ndctl/tree/papr_scm_health_v9 --- Vaibhav Jain (5): powerpc: Document details on H_SCM_HEALTH hcall seq_buf: Export seq_buf_printf powerpc/papr_scm: Fetch nvdimm health information from PHYP ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH Documentation/ABI/testing/sysfs-bus-papr-pmem | 27 ++ Documentation/powerpc/papr_hcalls.rst | 46 ++- arch/powerpc/include/uapi/asm/papr_pdsm.h | 175 + arch/powerpc/platforms/pseries/papr_scm.c | 363 +- include/uapi/linux/ndctl.h| 1 + lib/seq_buf.c | 1 + 6 files changed, 600 insertions(+), 13 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h -- 2.26.2
[PATCH v9 1/5] powerpc: Document details on H_SCM_HEALTH hcall
Add documentation to 'papr_hcalls.rst' describing the bitmap flags that are returned from H_SCM_HEALTH hcall as per the PAPR-SCM specification. Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Signed-off-by: Vaibhav Jain --- Changelog: v8..v9: * s/SCM/PMEM device. [ Dan Williams, Aneesh ] v7..v8: * Added a clarification on bit-ordering of Health Bitmap Resend: * None v6..v7: * None v5..v6: * New patch in the series --- Documentation/powerpc/papr_hcalls.rst | 46 --- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst index 3493631a60f8..48fcf1255a33 100644 --- a/Documentation/powerpc/papr_hcalls.rst +++ b/Documentation/powerpc/papr_hcalls.rst @@ -220,13 +220,51 @@ from the LPAR memory. **H_SCM_HEALTH** | Input: drcIndex -| Out: *health-bitmap, health-bit-valid-bitmap* +| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)* | Return Value: *H_Success, H_Parameter, H_Hardware* Given a DRC Index return the info on predictive failure and overall health of -the NVDIMM. The asserted bits in the health-bitmap indicate a single predictive -failure and health-bit-valid-bitmap indicate which bits in health-bitmap are -valid. +the PMEM device. The asserted bits in the health-bitmap indicate one or more states +(described in table below) of the PMEM device and health-bit-valid-bitmap indicate +which bits in health-bitmap are valid. The bits are reported in +reverse bit ordering for example a value of 0xC400 +indicates bits 0, 1, and 5 are valid. + +Health Bitmap Flags: + ++--+---+ +| Bit | Definition | ++==+===+ +| 00 | PMEM device is unable to persist memory contents. | +| | If the system is powered down, nothing will be saved. | ++--+---+ +| 01 | PMEM device failed to persist memory contents. Either contents were | +| | not saved successfully on power down or were not restored properly on | +| | power up. | ++--+---+ +| 02 | PMEM device contents are persisted from previous IPL. The data from | +| | the last boot were successfully restored. | ++--+---+ +| 03 | PMEM device contents are not persisted from previous IPL. There was no| +| | data to restore from the last boot. | ++--+---+ +| 04 | PMEM device memory life remaining is critically low | ++--+---+ +| 05 | PMEM device will be garded off next IPL due to failure | ++--+---+ +| 06 | PMEM device contents cannot persist due to current platform health | +| | status. A hardware failure may prevent data from being saved or | +| | restored. | ++--+---+ +| 07 | PMEM device is unable to persist memory contents in certain conditions| ++--+---+ +| 08 | PMEM device is encrypted | ++--+---+ +| 09 | PMEM device has successfully completed a requested erase or secure | +| | erase procedure. | ++--+---+ +|10:63 | Reserved / Unused | ++--+---+ **H_SCM_PERFORMANCE_STATS** -- 2.26.2
[PATCH v9 3/5] powerpc/papr_scm: Fetch nvdimm health information from PHYP
Implement support for fetching nvdimm health information via H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair of 64-bit bitmap, bitwise-and of which is then stored in 'struct papr_scm_priv' and subsequently partially exposed to user-space via newly introduced dimm specific attribute 'papr/flags'. Since the hcall is costly, the health information is cached and only re-queried, 60s after the previous successful hcall. The patch also adds a documentation text describing flags reported by the the new sysfs attribute 'papr/flags' is also introduced at Documentation/ABI/testing/sysfs-bus-papr-pmem. [1] commit 58b278f568f0 ("powerpc: Provide initial documentation for PAPR hcalls") Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Signed-off-by: Vaibhav Jain --- Changelog: v8..v9: * Rename some variables and defines to reduce usage of term SCM replacing it with PMEM [Dan Williams, Aneesh] * s/PAPR_SCM_DIMM/PAPR_PMEM/g * s/papr_scm_nd_attributes/papr_nd_attributes/g * s/papr_scm_nd_attribute_group/papr_nd_attribute_group/g * s/papr_scm_dimm_attr_groups/papr_nd_attribute_groups/g * Renamed file sysfs-bus-papr-scm to sysfs-bus-papr-pmem v7..v8: * Update type of variable 'rc' in __drc_pmem_query_health() and drc_pmem_query_health() to long and int respectively. [ Ira ] * Updated the patch description to s/64 bit Big Endian Number/64-bit bitmap/ [ Ira, Aneesh ]. Resend: * None v6..v7 : * Used the exported buf_seq_printf() function to generate content for 'papr/flags' * Moved the PAPR_SCM_DIMM_* bit-flags macro definitions to papr_scm.c and removed the papr_scm.h file [Mpe] * Some minor consistency issued in sysfs-bus-papr-scm documentation. [Mpe] * s/dimm_mutex/health_mutex/g [Mpe] * Split drc_pmem_query_health() into two function one of which takes care of caching and locking. [Mpe] * Fixed a local copy creation of dimm health information using READ_ONCE(). [Mpe] v5..v6 : * Change the flags sysfs attribute from 'papr_flags' to 'papr/flags' [Dan Williams] * Include documentation for 'papr/flags' attr [Dan Williams] * Change flag 'save_fail' to 'flush_fail' [Dan Williams] * Caching of health bitmap to reduce expensive hcalls [Dan Williams] * Removed usage of PPC_BIT from 'papr-scm.h' header [Mpe] * Replaced two __be64 integers from papr_scm_priv to a single u64 integer [Mpe] * Updated patch description to reflect the changes made in this version. * Removed avoidable usage of 'papr_scm_priv.dimm_mutex' from flags_show() [Dan Williams] v4..v5 : * None v3..v4 : * None v2..v3 : * Removed PAPR_SCM_DIMM_HEALTH_NON_CRITICAL as a condition for NVDIMM unarmed [Aneesh] v1..v2 : * New patch in the series. --- Documentation/ABI/testing/sysfs-bus-papr-pmem | 27 +++ arch/powerpc/platforms/pseries/papr_scm.c | 169 +- 2 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem new file mode 100644 index ..5b10d036a8d4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -0,0 +1,27 @@ +What: /sys/bus/nd/devices/nmemX/papr/flags +Date: Apr, 2020 +KernelVersion: v5.8 +Contact: linuxppc-dev , linux-nvd...@lists.01.org, +Description: + (RO) Report flags indicating various states of a + papr-pmem NVDIMM device. Each flag maps to a one or + more bits set in the dimm-health-bitmap retrieved in + response to H_SCM_HEALTH hcall. The details of the bit + flags returned in response to this hcall is available + at 'Documentation/powerpc/papr_hcalls.rst' . Below are + the flags reported in this sysfs file: + + * "not_armed" : Indicates that NVDIMM contents will not + survive a power cycle. + * "flush_fail" : Indicates that NVDIMM contents + couldn't be flushed during last + shut-down event. + * "restore_fail": Indicates that NVDIMM contents + couldn't be restored during NVDIMM + initialization. + * "encrypted" : NVDIMM contents are encrypted. + * "smart_notify": There is health event for the NVDIMM. + * "scrubbed": Indicating that contents of the + NVDIMM have been scrubbed. + * "locked" : Indicating that NVDIMM contents cant + be modified until next power cycle. diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f35592423380..149431594839 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_
Re: [PATCH v1] auxdisplay: charlcd: Reuse hex_to_bin() instead of custom code
Hi Andy, On Mon, May 18, 2020 at 9:36 PM Andy Shevchenko wrote: > > hex_to_bin() may be used to convert hexdecimal digit to its binary > representation. > > Signed-off-by: Andy Shevchenko > --- Looks fine to me and the logic is simpler for the `esc` increase too. Thanks for the cleanup! Were you able to test it, by any chance? I will queue it up for -rc1. (Sidenote: it would be nice if `git patch` allowed for a full-to-full comparison for patches like this since the unified format is harder to read; I couldn't find the option from a quick look...). Cheers, Miguel
[PATCH v9 2/5] seq_buf: Export seq_buf_printf
'seq_buf' provides a very useful abstraction for writing to a string buffer without needing to worry about it over-flowing. However even though the API has been stable for couple of years now its still not exported to kernel loadable modules limiting its usage. Hence this patch proposes update to 'seq_buf.c' to mark seq_buf_printf() which is part of the seq_buf API to be exported to kernel loadable GPL modules. This symbol will be used in later parts of this patch-set to simplify content creation for a sysfs attribute. Cc: Piotr Maziarz Cc: Cezary Rojewski Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Borislav Petkov Signed-off-by: Vaibhav Jain --- Changelog: v8..v9: * None v7..v8: * Updated the patch title [ Christoph Hellwig ] * Updated patch description to replace confusing term 'external kernel modules' to 'kernel lodable modules'. Resend: * Added ack from Steven Rostedt v6..v7: * New patch in the series --- lib/seq_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 4e865d42ab03..707453f5d58e 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -91,6 +91,7 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) return ret; } +EXPORT_SYMBOL_GPL(seq_buf_printf); #ifdef CONFIG_BINARY_PRINTF /** -- 2.26.2
[PATCH v9 5/5] powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH
This patch implements support for PDSM request 'PAPR_PDSM_HEALTH' that returns a newly introduced 'struct nd_papr_pdsm_health' instance containing dimm health information back to user space in response to ND_CMD_CALL. This functionality is implemented in newly introduced papr_pdsm_health() that queries the nvdimm health information and then copies this information to the package payload whose layout is defined by 'struct nd_papr_pdsm_health'. The patch also introduces a new member 'struct papr_scm_priv.health' thats an instance of 'struct nd_papr_pdsm_health' to cache the health information of a nvdimm. As a result functions drc_pmem_query_health() and flags_show() are updated to populate and use this new struct instead of a u64 integer that was earlier used. Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Signed-off-by: Vaibhav Jain --- Changelog: v8..v9: * s/PAPR_SCM_PDSM_HEALTH/PAPR_PDSM_HEALTH/g [ Dan , Aneesh ] * s/PAPR_SCM_PSDM_DIMM_*/PAPR_PDSM_DIMM_*/g * Renamed papr_scm_get_health() to papr_psdm_health() * Updated patch description to replace papr-scm dimm with nvdimm. v7..v8: * None Resend: * None v6..v7: * Updated flags_show() to use seq_buf_printf(). [Mpe] * Updated papr_scm_get_health() to use newly introduced __drc_pmem_query_health() bypassing the cache [Mpe]. v5..v6: * Added attribute '__packed' to 'struct nd_papr_pdsm_health_v1' to gaurd against possibility of different compilers adding different paddings to the struct [ Dan Williams ] * Updated 'struct nd_papr_pdsm_health_v1' to use __u8 instead of 'bool' and also updated drc_pmem_query_health() to take this into account. [ Dan Williams ] v4..v5: * None v3..v4: * Call the DSM_PAPR_SCM_HEALTH service function from papr_scm_service_dsm() instead of papr_scm_ndctl(). [Aneesh] v2..v3: * Updated struct nd_papr_scm_dimm_health_stat_v1 to use '__xx' types as its exported to the userspace [Aneesh] * Changed the constants DSM_PAPR_SCM_DIMM_XX indicating dimm health from enum to #defines [Aneesh] v1..v2: * New patch in the series --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 39 +++ arch/powerpc/platforms/pseries/papr_scm.c | 125 +++--- 2 files changed, 147 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 6407fefcc007..411725a91591 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -115,6 +115,7 @@ struct nd_pdsm_cmd_pkg { */ enum papr_pdsm { PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_HEALTH, PAPR_PDSM_MAX, }; @@ -133,4 +134,42 @@ static inline void *pdsm_cmd_to_payload(struct nd_pdsm_cmd_pkg *pcmd) return (void *)(pcmd->payload); } +/* Various nvdimm health indicators */ +#define PAPR_PDSM_DIMM_HEALTHY 0 +#define PAPR_PDSM_DIMM_UNHEALTHY 1 +#define PAPR_PDSM_DIMM_CRITICAL 2 +#define PAPR_PDSM_DIMM_FATAL 3 + +/* + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH + * Various flags indicate the health status of the dimm. + * + * dimm_unarmed: Dimm not armed. So contents wont persist. + * dimm_bad_shutdown : Previous shutdown did not persist contents. + * dimm_bad_restore: Contents from previous shutdown werent restored. + * dimm_scrubbed : Contents of the dimm have been scrubbed. + * dimm_locked : Contents of the dimm cant be modified until CEC reboot + * dimm_encrypted : Contents of dimm are encrypted. + * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_ + */ +struct nd_papr_pdsm_health_v1 { + __u8 dimm_unarmed; + __u8 dimm_bad_shutdown; + __u8 dimm_bad_restore; + __u8 dimm_scrubbed; + __u8 dimm_locked; + __u8 dimm_encrypted; + __u16 dimm_health; +} __packed; + +/* + * Typedef the current struct for dimm_health so that any application + * or kernel recompiled after introducing a new version automatically + * supports the new version. + */ +#define nd_papr_pdsm_health nd_papr_pdsm_health_v1 + +/* Current version number for the dimm health struct */ +#define ND_PAPR_PDSM_HEALTH_VERSION 1 + #endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 5e2237e7ec08..c0606c0c659c 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -88,7 +88,7 @@ struct papr_scm_priv { unsigned long lasthealth_jiffies; /* Health information for the dimm */ - u64 health_bitmap; + struct nd_papr_pdsm_health health; }; static int drc_pmem_bind(struct papr_scm_priv *p) @@ -201,6 +201,7 @@ static int drc_pmem_query_n_bind(struct papr_scm_priv *p) static int __drc_pmem_query_health(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; + u64 health; long rc;
Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro
On Sat, May 30, 2020 at 3:01 AM William Breathitt Gray wrote: > > On Sat, May 30, 2020 at 01:32:44AM +0530, Syed Nayyar Waris wrote: > > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko > > wrote: > > > > > > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote: > > > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot > > > > wrote: > > > > > > ... > > > > > > > >579 static inline unsigned long bitmap_get_value(const unsigned > > > > > long *map, > > > > >580unsigned long > > > > > start, > > > > >581unsigned long > > > > > nbits) > > > > >582 { > > > > >583 const size_t index = BIT_WORD(start); > > > > >584 const unsigned long offset = start % BITS_PER_LONG; > > > > >585 const unsigned long ceiling = roundup(start + 1, > > > > > BITS_PER_LONG); > > > > >586 const unsigned long space = ceiling - start; > > > > >587 unsigned long value_low, value_high; > > > > >588 > > > > >589 if (space >= nbits) > > > > > > 590 return (map[index] >> offset) & GENMASK(nbits > > > > > - 1, 0); > > > > >591 else { > > > > >592 value_low = map[index] & > > > > > BITMAP_FIRST_WORD_MASK(start); > > > > >593 value_high = map[index + 1] & > > > > > BITMAP_LAST_WORD_MASK(start + nbits); > > > > >594 return (value_low >> offset) | (value_high << > > > > > space); > > > > >595 } > > > > >596 } > > > > > > > Regarding the above compilation warnings. All the warnings are because > > > > of GENMASK usage in my patch. > > > > The warnings are coming because of sanity checks present for 'GENMASK' > > > > macro in include/linux/bits.h. > > > > > > > > Taking the example statement (in my patch) where compilation warning > > > > is getting reported: > > > > return (map[index] >> offset) & GENMASK(nbits - 1, 0); > > > > > > > > 'nbits' is of type 'unsigned long'. > > > > In above, the sanity check is comparing '0' with unsigned value. And > > > > unsigned value can't be less than '0' ever, hence the warning. > > > > But this warning will occur whenever there will be '0' as one of the > > > > 'argument' and an unsigned variable as another 'argument' for GENMASK. > > > > > > > > This warning is getting cleared if I cast the 'nbits' to 'long'. > > > > > > > > Let me know if I should submit a next patch with the casts applied. > > > > What do you guys think? > > > > > > Proper fix is to fix GENMASK(), but allowed workaround is to use > > > (BIT(nbits) - 1) > > > instead. > > > > > > -- > > > With Best Regards, > > > Andy Shevchenko > > > > > > > Hi Andy. Thank You for your comment. > > > > When I used BIT macro (earlier), I had faced a problem. I want to tell > > you about that. > > > > Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or > > clump size) is BITS_PER_LONG, unexpected calculation happens. > > > > Explanation: > > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer), > > (BIT(nbits) - 1) > > gives a value of zero and when this zero is ANDed with any value, it > > makes it full zero. This is unexpected and incorrect calculation happening. > > > > What actually happens is in the macro expansion of BIT(64), that is 1 > > << 64, the '1' overflows from leftmost bit position (most significant > > bit) and re-enters at the rightmost bit position (least significant > > bit), therefore 1 << 64 becomes '0x1', and when another '1' is > > subtracted from this, the final result becomes 0. > > > > Since this macro is being used in both bitmap_get_value and > > bitmap_set_value functions, it will give unexpected results when nbits or > > clump > > size is BITS_PER_LONG (32 or 64 depending on arch). > > > > William also knows about this issue: > > > > "This is undefined behavior in the C standard (section 6.5.7 in the N1124)" > > > > Andy, William, > > Let me know what do you think ? > > > > Regards > > Syed Nayyar Waris > > We can't use BIT here because nbits could be equal to BITS_PER_LONG in > some cases. Casting to long should be fine because the nbits will never > be greater than BITS_PER_LONG, so long should be safe to use. > > However, I agree with Andy that the proper solution is to fix GENMASK so > that this warning does not come up. What's the actual line of code in > the GENMASK macro that is throwing this warning? I'd like to understand > better the logic of this sanity check. > > William Breathitt Gray Here is the code snippet: #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ __builtin_constant_p((l) > (h)), (l) > (h), 0))) Above you can see the comparisons are being done in the last line. And because of these comparisons, those compilation warnings are generated. For full code related to GENMASK macro: h
Re: [PATCH v2] arm: dts: Move am33xx and am43xx mmc nodes to sdhci-omap driver
* Tony Lindgren [200527 16:06]: > * Tony Lindgren [200519 09:05]: > > * Tomi Valkeinen [200519 15:55]: > > > (Dropping DT from cc) > > > > > > On 19/05/2020 18:48, Tony Lindgren wrote: > > > > > > > > > Suspend/resume on am43xx-gpevm is broken right now in mainline and > > > > > > the regression looks > > > > > > like it is caused by the display subsystem. I have reported this to > > > > > > Tomi and > > > > > > its being investigated. > > > > > > > > > > > > Meanwhile I have tested this patch with display configs disabled > > > > > > and Keerthy's > > > > > > suspend/resume tests pass on both am3 and am4. > > > > > > > > OK great thanks for checking it. Do you have the display subsystem > > > > related commit that broke PM? I'm wondering if my recent DSS platform > > > > data removal changes might have caused the regression. > > > > > > I spent a bit time looking at this, but unfortunately I wasn't even able > > > to > > > resume my AM4 evm from suspend. I tried with rtcwake and with plain > > > console > > > (with no_console_suspend). I did not have DSS loaded. > > > > My test-bbb-suspend script seems to have: > > > > sudo modprobe wkup_m3_ipc > > sudo modprobe pm33xx > > sudo modprobe rtc-omap > > rtcwake -m mem -s 5 > > > > I think the same should work for am437x. But some boards do not support > > deep sleep like am437x-idk. > > > > > Anyone have quick hints on how to debug why resume doesn't seem to happen? > > > > You might get some info with no_console_suspend, but that might also > > cause other issues. > > To me it seems we may have some dss clock missing with the ti-sysc dts > changes that makes resume fail. Or else there is some ordering issue > between the dss components now on resume, I'll try to debug more. Looks like we now set the CLOCKATIVITY bit while earlier we did not set it except for HWMOD_SET_DEFAULT_CLOCKACT cases. I've also found few other minor issues, but I'm still seeing resume fail for DSS. The clock and sysconfig registers look just fine, so getting closer. Regards, Tony
Re: [PATCH 1/2] dt-bindings: chrome: Add cros-ec-typec mux props
On Tue, May 12, 2020 at 7:42 AM Heikki Krogerus wrote: > > Hi guys, > > On Mon, May 11, 2020 at 01:46:35PM -0700, Prashant Malani wrote: > > Hi Rob, > > > > Thank you for reviewing the patch. Kindly see my comments inline: > > > > On Mon, May 11, 2020 at 02:28:00PM -0500, Rob Herring wrote: > > > On Wed, Apr 22, 2020 at 03:22:39PM -0700, Prashant Malani wrote: > > > > Add properties for mode, orientation and USB data role switches for > > > > Type C connectors. When available, these will allow the Type C connector > > > > class port driver to configure the various switches according to USB PD > > > > information (like orientation, alt mode etc.) provided by the Chrome OS > > > > EC controller. > > > > > > > > Signed-off-by: Prashant Malani > > > > --- > > > > .../bindings/chrome/google,cros-ec-typec.yaml | 27 ++- > > > > 1 file changed, 26 insertions(+), 1 deletion(-) > > > > > > > > diff --git > > > > a/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml > > > > b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml > > > > index 6d7396ab8bee..b5814640aa32 100644 > > > > --- a/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml > > > > +++ b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml > > > > @@ -21,7 +21,21 @@ properties: > > > > const: google,cros-ec-typec > > > > > > > >connector: > > > > -$ref: /schemas/connector/usb-connector.yaml# > > > > +allOf: > > > > + - $ref: /schemas/connector/usb-connector.yaml# > > > > + - type: object > > > > +properties: > > > > > > These don't seem CrOS EC specific, so why document them as such. > > > > Are you referring to the "mode-switch", "orientation-switch" and > > "usb-role-switch" properties? If so, then yes, they aren't Cros EC > > specific. The Type C connector class framework requires the nodes to be > > named like this, and the cros-ec-typec driver uses this framework, hence > > the description here (the Type C connector class framework doesn't have > > any bindings). > > > > Would it be better to add in the description string that Type Connector > > class expects these switches to be named this way? : > > > > " Reference to a DT node for the USB Type C Multiplexer controlling the > > data lines routing for this connector. This switch is assumed registered > > with the Type C connector class framework, which requires it to be named > > this way." > > > > > > > + mode-switch: > > > > +description: Reference to a DT node for the USB Type C > > > > Multiplexer > > > > + controlling the data lines routing for this connector. > > > > > > This is for alternate mode muxing I presume. > > > > Yes, that's right. > > > > > > We already have a mux-control binding. Why not use that here? > > > > Heikki might be able to offer more insight into why this is the case, > > since the connector class framework seems to expect a phandle and for > > the device driver to implement a "set" command. Heikki, would you happen to > > know? > > The mode-switch here would actually represent the "consumer" part in > the mux-control bindings. So the mux-controls would describe the > relationship between the "mode-switch" and the mux controller(s), > while the mode-switch property describes the relationship between > something like USB Type-C Port Manager (or this cros_ec function) and > the "mux consumer". The "USB Type-C Port Manager" is not just the parent node in your case? Can you point me to what you expect your DT to look like showing the mode switch node, the connector, the USB host(s), and the DP/HDMI bridge/output? Rob
[PATCH 00/27] afs: Improvements
Here's a set of patches to make a number of improvements to the AFS driver: (1) Improve callback (ie. third party change notification) processing by: (a) Relying more on the fact we're doing this under RCU and by using fewer locks. This involves making the inode hash table RCU safe and providing some RCU-safe accessor functions. The search can then be done without taking the inode_hash_lock. Care must be taken because the object may be being deleted and no wait is made. This is also used to improve Ext4's time updating. Konstantin Khlebnikov said "For now, I've plugged this issue with try-lock in ext4 lazy time update. This solution is much better." (b) Moving to keeping volumes in a tree indexed by volume ID rather than a flat list. (c) Making the server and volume records logically part of the cell. This means that a server record now points directly at the cell and the tree of volumes is there. This removes an N:M mapping table, simplifying things. (2) Improve keeping NAT or firewall channels open for the server callbacks to reach the client by actively polling the fileserver on a timed basis, instead of only doing it when we have an operation to process. (3) Improving detection of delayed or lost callbacks by including the parent directory in the list of file IDs to be queried when doing a bulk status fetch from lookup. We can then check to see if our copy of the directory has changed under us without us getting notified. (4) Determine aliasing of cells (such as a cell that is pointed to be a DNS alias). This allows us to avoid having ambiguity due to apparently different cells using the same volume and file servers. (5) Improve the fileserver rotation to do more probing when it detects that all of the addresses to a server are listed as non-responsive. It's possible that an address that previously stopped responding has become responsive again. Beyond that, lay some foundations for making some calls asynchronous: (1) Turn the fileserver cursor struct into a general operation struct and hang the parameters off of that rather than keeping them in local variables and hang results off of that rather than the call struct. (2) Implement some general operation handling code and simplify the callers of operations that affect a volume or a volume component (such as a file). Most of the operation is now done by core code. (3) Operations are supplied with a table of operations to issue different variants of RPCs and to manage the completion, where all the required data is held in the operation object, thereby allowing these to be called from a workqueue. (4) Put the standard "if (begin), while(select), call op, end" sequence into a canned function that just emulates the current behaviour for now. There are also some fixes interspersed: (1) Don't let the EACCES from ICMP6 mapping reach the user as such, since it's confusing as to whether it's a filesystem error. Convert it to EHOSTUNREACH. (2) Don't use the epoch value acquired through probing a server. If we have two servers with the same UUID but in different cells, it's hard to draw conclusions from them having different epoch values. (3) Don't interpret the argument to the CB.ProbeUuid RPC as a fileserver UUID and look up a fileserver from it. (4) Deal with servers in different cells having the same UUIDs. In the event that a CB.InitCallBackState3 RPC is received, we have to break the callback promises for every server record matching that UUID. (5) Don't let afs_statfs return values that go below 0. (6) Don't use running fileserver probe state to make server selection and address selection decisions on. Only make decisions on final state as the running state is cleared at the start of probing. The patches are here: http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=afs-next David --- David Howells (1): afs: Adjust the fileserver rotation algorithm to reprobe/retry more quickly fs/afs/Makefile|2 + fs/afs/afs.h |3 +- fs/afs/afs_vl.h|1 + fs/afs/callback.c | 345 fs/afs/cell.c | 10 +- fs/afs/cmservice.c | 67 +-- fs/afs/dir.c | 1253 -- fs/afs/dir_silly.c | 190 +++ fs/afs/dynroot.c | 93 fs/afs/file.c | 62 ++- fs/afs/flock.c | 114 ++-- fs/afs/fs_operation.c | 239 fs/afs/fs_probe.c | 339 +--- fs/afs/fsclient.c | 1295 +--- fs/afs/inode.c | 491 - fs/afs/internal.h | 523
[PATCH 01/27] vfs, afs, ext4: Make the inode hash table RCU searchable
Make the inode hash table RCU searchable so that searches that want to access or modify an inode without taking a ref on that inode can do so without taking the inode hash table lock. The main thing this requires is some RCU annotation on the list manipulation operations. Inodes are already freed by RCU in most cases. Users of this interface must take care as the inode may be still under construction or may be being torn down around them. There are at least two instances where this can be of use: (1) Ext4 date stamp updating. (2) AFS callback breaking. Signed-off-by: David Howells Acked-by: Konstantin Khlebnikov cc: linux-e...@vger.kernel.org cc: linux-...@lists.infradead.org --- fs/afs/callback.c | 12 +++- fs/ext4/inode.c| 44 +++-- fs/inode.c | 173 include/linux/fs.h |3 + 4 files changed, 179 insertions(+), 53 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 2dca8df1a18d..0dcbd40732d1 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -252,6 +252,7 @@ static void afs_break_one_callback(struct afs_server *server, struct afs_vnode *vnode; struct inode *inode; + rcu_read_lock(); read_lock(&server->cb_break_lock); hlist_for_each_entry(vi, &server->cb_volumes, srv_link) { if (vi->vid < fid->vid) @@ -287,12 +288,16 @@ static void afs_break_one_callback(struct afs_server *server, } else { data.volume = NULL; data.fid = *fid; - inode = ilookup5_nowait(cbi->sb, fid->vnode, - afs_iget5_test, &data); + + /* See if we can find a matching inode - even an I_NEW +* inode needs to be marked as it can have its callback +* broken before we finish setting up the local inode. +*/ + inode = find_inode_rcu(cbi->sb, fid->vnode, + afs_iget5_test, &data); if (inode) { vnode = AFS_FS_I(inode); afs_break_callback(vnode, afs_cb_break_for_callback); - iput(inode); } else { trace_afs_cb_miss(fid, afs_cb_break_for_callback); } @@ -301,6 +306,7 @@ static void afs_break_one_callback(struct afs_server *server, out: read_unlock(&server->cb_break_lock); + rcu_read_unlock(); } /* diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a4aae6acdcb..2bbb55d05bb7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4860,21 +4860,22 @@ static int ext4_inode_blocks_set(handle_t *handle, return 0; } -struct other_inode { - unsigned long orig_ino; - struct ext4_inode *raw_inode; -}; - -static int other_inode_match(struct inode * inode, unsigned long ino, -void *data) +static void __ext4_update_other_inode_time(struct super_block *sb, + unsigned long orig_ino, + unsigned long ino, + struct ext4_inode *raw_inode) { - struct other_inode *oi = (struct other_inode *) data; + struct inode *inode; + + inode = find_inode_by_ino_rcu(sb, ino); + if (!inode) + return; - if ((inode->i_ino != ino) || - (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | + if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | I_DIRTY_INODE)) || ((inode->i_state & I_DIRTY_TIME) == 0)) - return 0; + return; + spin_lock(&inode->i_lock); if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | I_DIRTY_INODE)) == 0) && @@ -4885,16 +4886,15 @@ static int other_inode_match(struct inode * inode, unsigned long ino, spin_unlock(&inode->i_lock); spin_lock(&ei->i_raw_lock); - EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); - EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); - EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); - ext4_inode_csum_set(inode, oi->raw_inode, ei); + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + ext4_inode_csum_set(inode, raw_inode, ei); spin_unlock(&ei->i_raw_lock); - trace_ext4_other_inode_update_time(inode, oi->orig_ino); - return -1; + trace_ext4_other_inode_update_time(inode, orig_ino);
[PATCH 03/27] rxrpc: Adjust /proc/net/rxrpc/calls to display call->debug_id not user_ID
The user ID value isn't actually much use - and leaks a kernel pointer or a userspace value - so replace it with the call debug ID, which appears in trace points. Signed-off-by: David Howells --- net/rxrpc/proc.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 8b179e3c802a..543afd9bd664 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -68,7 +68,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) "Proto Local " " Remote " " SvID ConnID CallID End Use StateAbort " -" UserID TxSeqTW RxSeqRW RxSerial RxTimo\n"); +" DebugId TxSeqTW RxSeqRW RxSerial RxTimo\n"); return 0; } @@ -100,7 +100,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) rx_hard_ack = READ_ONCE(call->rx_hard_ack); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" - " %-8.8s %08x %lx %08x %02x %08x %02x %08x %06lx\n", + " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n", lbuff, rbuff, call->service_id, @@ -110,7 +110,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) atomic_read(&call->usage), rxrpc_call_states[call->state], call->abort_code, - call->user_call_ID, + call->debug_id, tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack, rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack, call->rx_serial,
[PATCH 02/27] rxrpc: Map the EACCES error produced by some ICMP6 to EHOSTUNREACH
Map the EACCES error that is produced by some ICMP6 packets to EHOSTUNREACH when we get them as EACCES has other meanings within a filesystem context. Signed-off-by: David Howells --- net/rxrpc/peer_event.c |3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index b1449d971883..112e490ebbcd 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -271,6 +271,9 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, break; case SO_EE_ORIGIN_ICMP6: + if (err == EACCES) + err = EHOSTUNREACH; + /* Fall through */ default: _proto("Rx Received error report { orig=%u }", ee->ee_origin); break;
[PATCH 06/27] afs: Split the usage count on struct afs_server
Split the usage count on the afs_server struct to have an active count that registers who's actually using it separately from the reference count on the object. This allows a future patch to dispatch polling probes without advancing the "unuse" time into the future each time we emit a probe, which would otherwise prevent unused server records from expiring. Included in this: (1) The latter part of afs_destroy_server() in which the RCU destruction of afs_server objects is invoked and the outstanding server count is decremented is split out into __afs_put_server(). (2) afs_put_server() now calls __afs_put_server() rather then setting the management timer. (3) The calls begun by afs_fs_give_up_all_callbacks() and afs_fs_get_capabilities() can now take a ref on the server record, so afs_destroy_server() can just drop its ref and needn't wait for the completion of these calls. They'll put the ref when they're done. (4) Because of (3), afs_fs_probe_done() no longer needs to wake up afs_destroy_server() with server->probe_outstanding. (5) afs_gc_servers can be simplified. It only needs to check if server->active is 0 rather than playing games with the refcount. (6) afs_manage_servers() can propose a server for gc if usage == 0 rather than if ref == 1. The gc is effected by (5). Signed-off-by: David Howells --- fs/afs/cmservice.c |4 + fs/afs/fs_probe.c |1 fs/afs/fsclient.c |5 + fs/afs/internal.h |8 ++ fs/afs/proc.c |9 +-- fs/afs/rxrpc.c |2 - fs/afs/server.c| 151 +--- fs/afs/server_list.c |4 + include/trace/events/afs.h | 18 +++-- 9 files changed, 131 insertions(+), 71 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 380ad5ace7cf..7dcbca3bf828 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -268,7 +268,9 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * to maintain cache coherency. */ if (call->server) { - trace_afs_server(call->server, atomic_read(&call->server->usage), + trace_afs_server(call->server, +atomic_read(&call->server->ref), +atomic_read(&call->server->active), afs_server_trace_callback); afs_break_callbacks(call->server, call->count, call->request); } diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 37d1bba57b00..d37d78eb84bd 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -16,7 +16,6 @@ static bool afs_fs_probe_done(struct afs_server *server) if (!atomic_dec_and_test(&server->probe_outstanding)) return false; - wake_up_var(&server->probe_outstanding); clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags); wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING); return true; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index d2b3798c1932..3854d16e14b1 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1842,7 +1842,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, bp = call->request; *bp++ = htonl(FSGIVEUPALLCALLBACKS); - /* Can't take a ref on server */ + call->server = afs_use_server(server, afs_server_trace_give_up_cb); afs_make_call(ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, ac); } @@ -1924,7 +1924,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, return ERR_PTR(-ENOMEM); call->key = key; - call->server = afs_get_server(server, afs_server_trace_get_caps); + call->server = afs_use_server(server, afs_server_trace_get_caps); call->server_index = server_index; call->upgrade = true; call->async = true; @@ -1934,7 +1934,6 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, bp = call->request; *bp++ = htonl(FSGETCAPABILITIES); - /* Can't take a ref on server */ trace_afs_make_fs_call(call, NULL); afs_make_call(ac, call, GFP_NOFS); return call; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ee17c868ad2c..cb70e1c234cc 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -498,7 +498,7 @@ struct afs_server { struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ struct hlist_node proc_link; /* Link in net->fs_proc */ struct afs_server *gc_next; /* Next server in manager's list */ - time64_tput_time; /* Time at which last put */ + time64_tunuse_time; /* Time at which last unused */ unsigned long flags; #define AFS_SERVER_FL_NOT_READY1 /* The record is not ready for use */ #define AFS_SERVER_FL_NOT_F
[PATCH 04/27] afs: Always include dir in bulk status fetch from afs_do_lookup()
When a lookup is done in an AFS directory, the filesystem will speculate and fetch up to 49 other statuses for files in the same directory and fetch those as well, turning them into inodes or updating inodes that already exist. However, occasionally, a callback break might go missing due to NAT timing out, but the afs filesystem doesn't then realise that the directory is not up to date. Alleviate this by using one of the status slots to check the directory in which the lookup is being done. Reported-by: Dave Botsch Suggested-by: Jeffrey Altman Signed-off-by: David Howells --- fs/afs/dir.c |9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index d1e1caa23c8b..3c486340b220 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -658,7 +658,8 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, cookie->ctx.actor = afs_lookup_filldir; cookie->name = dentry->d_name; - cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */ + cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want + * and slot 1 for the directory */ read_seqlock_excl(&dvnode->cb_lock); dcbi = rcu_dereference_protected(dvnode->cb_interest, @@ -709,7 +710,11 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, if (!cookie->inodes) goto out_s; - for (i = 1; i < cookie->nr_fids; i++) { + cookie->fids[1] = dvnode->fid; + cookie->statuses[1].cb_break = afs_calc_vnode_cb_break(dvnode); + cookie->inodes[1] = igrab(&dvnode->vfs_inode); + + for (i = 2; i < cookie->nr_fids; i++) { scb = &cookie->statuses[i]; /* Find any inodes that already exist and get their
[PATCH 07/27] afs: Actively poll fileservers to maintain NAT or firewall openings
When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch Signed-off-by: David Howells --- fs/afs/cmservice.c |2 fs/afs/fs_probe.c | 277 +--- fs/afs/fsclient.c | 21 ++- fs/afs/internal.h | 41 +-- fs/afs/main.c |5 + fs/afs/rotate.c|7 - fs/afs/server.c| 19 +-- fs/afs/volume.c| 24 ++-- include/trace/events/afs.h |4 + 9 files changed, 281 insertions(+), 119 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 7dcbca3bf828..7ae88958051f 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -157,7 +157,7 @@ static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server) _enter(""); if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) && - !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) { + !afs_is_probing_server(server)) { if (server->cm_epoch == call->epoch) return 0; diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index d37d78eb84bd..442b5e7944ff 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* AFS fileserver probing * - * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowe...@redhat.com) */ @@ -11,14 +11,83 @@ #include "internal.h" #include "protocol_yfs.h" -static bool afs_fs_probe_done(struct afs_server *server) +static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; +static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; + +/* + * Start the probe polling timer. We have to supply it with an inc on the + * outstanding server count. + */ +static void afs_schedule_fs_probe(struct afs_net *net, + struct afs_server *server, bool fast) +{ + unsigned long atj; + + if (!net->live) + return; + + atj = server->probed_at; + atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval; + + afs_inc_servers_outstanding(net); + if (timer_reduce(&net->fs_probe_timer, atj)) + afs_dec_servers_outstanding(net); +} + +/* + * Handle the completion of a set of probes. + */ +static void afs_finished_fs_probe(struct afs_net *net, struct afs_
[PATCH 09/27] afs: Make callback processing more efficient.
afs_vol_interest objects represent the volume IDs currently being accessed from a fileserver. These hold lists of afs_cb_interest objects that repesent the superblocks using that volume ID on that server. When a callback notification from the server telling of a modification by another client arrives, the volume ID specified in the notification is looked up in the server's afs_vol_interest list. Through the afs_cb_interest list, the relevant superblocks can be iterated over and the specific inode looked up and marked in each one. Make the following efficiency improvements: (1) Hold rcu_read_lock() over the entire processing rather than locking it each time. (2) Do all the callbacks for each vid together rather than individually. Each volume then only needs to be looked up once. (3) afs_vol_interest objects are now stored in an rb_tree rather than a flat list to reduce the lookup step count. (4) afs_vol_interest lookup is now done with RCU, but because it's in an rb_tree which may rotate under us, a seqlock is used so that if it changes during the walk, we repeat the walk with a lock held. With this and the preceding patch which adds RCU-based lookups in the inode cache, target volumes/vnodes can be taken without the need to take any locks, except on the target itself. Signed-off-by: David Howells --- fs/afs/callback.c | 150 ++--- fs/afs/internal.h |6 +- fs/afs/server.c |4 + 3 files changed, 100 insertions(+), 60 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 0dcbd40732d1..b16781e1683e 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -28,7 +28,7 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server, { struct afs_vol_interest *new_vi, *vi; struct afs_cb_interest *new; - struct hlist_node **pp; + struct rb_node *parent, **pp; new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL); if (!new_vi) @@ -42,7 +42,6 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server, new_vi->usage = 1; new_vi->vid = vnode->volume->vid; - INIT_HLIST_NODE(&new_vi->srv_link); INIT_HLIST_HEAD(&new_vi->cb_interests); refcount_set(&new->usage, 1); @@ -51,31 +50,31 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server, new->server = afs_get_server(server, afs_server_trace_get_new_cbi); INIT_HLIST_NODE(&new->cb_vlink); - write_lock(&server->cb_break_lock); + write_seqlock(&server->cb_break_lock); - for (pp = &server->cb_volumes.first; *pp; pp = &(*pp)->next) { - vi = hlist_entry(*pp, struct afs_vol_interest, srv_link); - if (vi->vid < new_vi->vid) - continue; - if (vi->vid > new_vi->vid) - break; - vi->usage++; - goto found_vi; + pp = &server->cb_volumes.rb_node; + while ((parent = *pp)) { + vi = rb_entry(parent, struct afs_vol_interest, srv_node); + if (vi->vid < new_vi->vid) { + pp = &(*pp)->rb_left; + } else if (vi->vid > new_vi->vid) { + pp = &(*pp)->rb_right; + } else { + vi->usage++; + goto found_vi; + } } - new_vi->srv_link.pprev = pp; - new_vi->srv_link.next = *pp; - if (*pp) - (*pp)->pprev = &new_vi->srv_link.next; - *pp = &new_vi->srv_link; vi = new_vi; new_vi = NULL; -found_vi: + rb_link_node_rcu(&vi->srv_node, parent, pp); + rb_insert_color(&vi->srv_node, &server->cb_volumes); +found_vi: new->vol_interest = vi; hlist_add_head(&new->cb_vlink, &vi->cb_interests); - write_unlock(&server->cb_break_lock); + write_sequnlock(&server->cb_break_lock); kfree(new_vi); return new; } @@ -182,17 +181,17 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) if (cbi && refcount_dec_and_test(&cbi->usage)) { if (!hlist_unhashed(&cbi->cb_vlink)) { - write_lock(&cbi->server->cb_break_lock); + write_seqlock(&cbi->server->cb_break_lock); hlist_del_init(&cbi->cb_vlink); vi = cbi->vol_interest; cbi->vol_interest = NULL; if (--vi->usage == 0) - hlist_del(&vi->srv_link); + rb_erase(&vi->srv_node, &cbi->server->cb_volumes); else vi = NULL; - write_unlock(&cbi->server->cb_break_lock); + write_sequnlock(&cbi->server->cb_break_lock); if
[PATCH] [v2] ARM: omap2: fix omap5_realtime_timer_init definition
There is one more regression introduced by the last build fix: arch/arm/mach-omap2/timer.c:170:6: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] void __init omap5_realtime_timer_init(void) ^ arch/arm/mach-omap2/common.h:118:20: note: previous definition is here static inline void omap5_realtime_timer_init(void) ^ arch/arm/mach-omap2/timer.c:170:13: error: redefinition of 'omap5_realtime_timer_init' void __init omap5_realtime_timer_init(void) ^ arch/arm/mach-omap2/common.h:118:20: note: previous definition is here static inline void omap5_realtime_timer_init(void) As it turns out, the CONFIG_SOC_HAS_REALTIME_COUNTER option should never be disabled for OMAP5 as we realy on this to initialize the clocks and the timer. Just remove it here and make it the default. Removing the guard around the set_cntfreq() definition, I noticed that this is not properly namespaced, so fix that as well. Fixes: d86ad463d670 ("ARM: OMAP2+: Fix regression for using local timer on non-SMP SoCs") Signed-off-by: Arnd Bergmann --- I'll let this run randconfig builds overnight to be sure it doesn't cause another regression. --- arch/arm/mach-omap2/Kconfig | 5 - arch/arm/mach-omap2/common.h | 6 -- arch/arm/mach-omap2/omap-secure.h | 9 + arch/arm/mach-omap2/omap-smp.c| 2 +- arch/arm/mach-omap2/timer.c | 14 ++ 5 files changed, 4 insertions(+), 32 deletions(-) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index ca74473f01df..87fb4df4cf72 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -145,11 +145,6 @@ config ARCH_OMAP2PLUS_TYPICAL config SOC_HAS_OMAP2_SDRC bool "OMAP2 SDRAM Controller support" -config SOC_HAS_REALTIME_COUNTER - bool "Real time free running counter" - depends on SOC_OMAP5 || SOC_DRA7XX - default y - config POWER_AVS_OMAP bool "AVS(Adaptive Voltage Scaling) support for OMAP IP versions 1&2" depends on POWER_AVS && (ARCH_OMAP3 || ARCH_OMAP4) && PM diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 49926eced5f1..70e3d6df4cb6 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -112,13 +112,7 @@ static inline int omap_l2_cache_init(void) #define omap4_l2c310_write_sec NULL #endif -#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER extern void omap5_realtime_timer_init(void); -#else -static inline void omap5_realtime_timer_init(void) -{ -} -#endif void omap2420_init_early(void); void omap2430_init_early(void); diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index 4aaa95706d39..fbc02bb639c4 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -81,14 +81,7 @@ extern u32 rx51_secure_rng_call(u32 ptr, u32 count, u32 flag); extern bool optee_available; void omap_secure_init(void); - -#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER -void set_cntfreq(void); -#else -static inline void set_cntfreq(void) -{ -} -#endif +void omap5_set_cntfreq(void); #endif /* __ASSEMBLER__ */ #endif /* OMAP_ARCH_OMAP_SECURE_H */ diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 570a987e6d1a..f35d5642122a 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -162,7 +162,7 @@ static void omap4_secondary_init(unsigned int cpu) * Configure the CNTFRQ register for the secondary cpu's which * indicates the frequency of the cpu local timers. */ - set_cntfreq(); + omap5_set_cntfreq(); /* Configure ACR to disable streaming WA for 801819 */ omap5_erratum_workaround_801819(); /* Enable ACR to allow for ICUALLU workaround */ diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index c1737e737a94..9b7b1240de81 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -39,11 +39,9 @@ #define INCREMENTER_DENUMERATOR_RELOAD_OFFSET 0x14 #define NUMERATOR_DENUMERATOR_MASK 0xf000 -#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER - static unsigned long arch_timer_freq; -void set_cntfreq(void) +void omap5_set_cntfreq(void) { omap_smc1(OMAP5_DRA7_MON_SET_CNTFRQ_INDEX, arch_timer_freq); } @@ -154,19 +152,11 @@ static void __init realtime_counter_init(void) writel_relaxed(reg, base + INCREMENTER_DENUMERATOR_RELOAD_OFFSET); arch_timer_freq = DIV_ROUND_UP_ULL(rate * num, den); - set_cntfreq(); + omap5_set_cntfreq(); iounmap(base); } -#else - -static inline void realtime_counter_init(void) -{ -} - -#endif /* CONFIG_SOC_HAS_REALTIME_COUNTER */ - void __init omap5_realtime_timer_init(void) { omap_clk_init(); -- 2.26.2
[PATCH 10/27] afs: Set error flag rather than return error from file status decode
Set a flag in the call struct to indicate an unmarshalling error rather than return and handle an error from the decoding of file statuses. This flag is checked on a successful return from the delivery function. Signed-off-by: David Howells --- fs/afs/fsclient.c | 88 +++- fs/afs/internal.h |1 + fs/afs/rxrpc.c |4 ++ fs/afs/yfsclient.c | 85 +++--- 4 files changed, 55 insertions(+), 123 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 401de063996c..b1d8d8f780d2 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -56,16 +56,15 @@ static void xdr_dump_bad(const __be32 *bp) /* * decode an AFSFetchStatus block */ -static int xdr_decode_AFSFetchStatus(const __be32 **_bp, -struct afs_call *call, -struct afs_status_cb *scb) +static void xdr_decode_AFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; - int ret; abort_code = ntohl(xdr->abort_code); @@ -79,7 +78,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, */ status->abort_code = abort_code; scb->have_error = true; - goto good; + goto advance; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -89,7 +88,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; scb->have_error = true; - goto good; + goto advance; } type = ntohl(xdr->type); @@ -125,15 +124,13 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, data_version |= (u64)ntohl(xdr->data_version_hi) << 32; status->data_version = data_version; scb->have_status = true; -good: - ret = 0; advance: *_bp = (const void *)*_bp + sizeof(*xdr); - return ret; + return; bad: xdr_dump_bad(*_bp); - ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); goto advance; } @@ -254,9 +251,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSCallBack(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); @@ -419,9 +414,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSCallBack(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); @@ -577,12 +570,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->out_fid); - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_AFSCallBack(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); @@ -691,9 +680,7 @@ static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -784,12 +771,8 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->
[PATCH 05/27] afs: Use the serverUnique field in the UVLDB record to reduce rpc ops
The U-version VLDB volume record retrieved by the VL.GetEntryByNameU rpc op carries a change counter (the serverUnique field) for each fileserver listed in the record as backing that volume. This is incremented whenever the registration details for a fileserver change (such as its address list). Note that the same value will be seen in all UVLDB records that refer to that fileserver. This should be checked before calling the VL server to re-query the address list for a fileserver. If it's the same, there's no point doing the query. Reported-by: Jeffrey Altman Signed-off-by: David Howells --- fs/afs/internal.h|5 +++-- fs/afs/server.c | 26 ++ fs/afs/server_list.c |3 ++- fs/afs/vlclient.c|1 + 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 80255513e230..ee17c868ad2c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -471,6 +471,7 @@ struct afs_vldb_entry { #define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */ uuid_t fs_server[AFS_NMAXNSERVERS]; + u32 addr_version[AFS_NMAXNSERVERS]; /* Registration change counters */ u8 fs_mask[AFS_NMAXNSERVERS]; #define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */ #define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */ @@ -498,7 +499,6 @@ struct afs_server { struct hlist_node proc_link; /* Link in net->fs_proc */ struct afs_server *gc_next; /* Next server in manager's list */ time64_tput_time; /* Time at which last put */ - time64_tupdate_at; /* Time at which to next update the record */ unsigned long flags; #define AFS_SERVER_FL_NOT_READY1 /* The record is not ready for use */ #define AFS_SERVER_FL_NOT_FOUND2 /* VL server says no such server */ @@ -511,6 +511,7 @@ struct afs_server { #define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ #define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ #define AFS_SERVER_FL_HAVE_EPOCH 11/* ->epoch is valid */ +#define AFS_SERVER_FL_NEEDS_UPDATE 12 /* Fileserver address list is out of date */ atomic_tusage; u32 addr_version; /* Address list version */ u32 cm_epoch; /* Server RxRPC epoch */ @@ -1241,7 +1242,7 @@ extern spinlock_t afs_server_peer_lock; extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); -extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *); +extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32); extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace); extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace); extern void afs_manage_servers(struct work_struct *); diff --git a/fs/afs/server.c b/fs/afs/server.c index 11b90ac7ea30..9e50ccde5d37 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -12,7 +12,6 @@ #include "protocol_yfs.h" static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ -static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ static atomic_t afs_server_debug_id; static void afs_inc_servers_outstanding(struct afs_net *net) @@ -218,7 +217,6 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, RCU_INIT_POINTER(server->addresses, alist); server->addr_version = alist->version; server->uuid = *uuid; - server->update_at = ktime_get_real_seconds() + afs_server_update_delay; rwlock_init(&server->fs_lock); INIT_HLIST_HEAD(&server->cb_volumes); rwlock_init(&server->cb_break_lock); @@ -264,7 +262,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, * Get or create a fileserver record. */ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, -const uuid_t *uuid) +const uuid_t *uuid, u32 addr_version) { struct afs_addr_list *alist; struct afs_server *server, *candidate; @@ -272,8 +270,11 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, _enter("%p,%pU", cell->net, uuid); server = afs_find_server_by_uuid(cell->net, uuid); - if (server) + if (server) { + if (server->addr_version != addr_version) +
[PATCH 11/27] afs: Remove the error argument from afs_protocol_error()
Remove the error argument from afs_protocol_error() as it's always -EBADMSG. Signed-off-by: David Howells --- fs/afs/cmservice.c |9 +++-- fs/afs/fsclient.c | 17 ++--- fs/afs/inode.c |4 ++-- fs/afs/internal.h |2 +- fs/afs/rxrpc.c |6 +++--- fs/afs/vlclient.c | 34 ++ fs/afs/yfsclient.c | 17 ++--- include/trace/events/afs.h | 10 -- 8 files changed, 39 insertions(+), 60 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 7ae88958051f..ed0fb34d77dd 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -307,8 +307,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > AFSCBMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_cb_fid_count); + return afs_protocol_error(call, afs_eproto_cb_fid_count); call->buffer = kmalloc(array3_size(call->count, 3, 4), GFP_KERNEL); @@ -353,8 +352,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count2 = ntohl(call->tmp); _debug("CB count: %u", call->count2); if (call->count2 != call->count && call->count2 != 0) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_cb_count); + return afs_protocol_error(call, afs_eproto_cb_count); call->iter = &call->def_iter; iov_iter_discard(&call->def_iter, READ, call->count2 * 3 * 4); call->unmarshall++; @@ -674,8 +672,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > YFSCBMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_cb_fid_count); + return afs_protocol_error(call, afs_eproto_cb_fid_count); size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid)); call->buffer = kmalloc(size, GFP_KERNEL); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index b1d8d8f780d2..7d4503174dd1 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -130,7 +130,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, bad: xdr_dump_bad(*_bp); - afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + afs_protocol_error(call, afs_eproto_bad_status); goto advance; } @@ -1470,8 +1470,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_volname_len); + return afs_protocol_error(call, afs_eproto_volname_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; @@ -1500,8 +1499,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_offline_msg_len); + return afs_protocol_error(call, afs_eproto_offline_msg_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; @@ -1531,8 +1529,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_motd_len); + return afs_protocol_error(call, afs_eproto_motd_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; @@ -2012,8 +2009,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) tmp = ntohl(call->tmp); _debug("status count: %u/%u", tmp, call->count2); if (tmp != call->count2) -
[PATCH 08/27] afs: Show more information in /proc/net/afs/servers
Show more information in /proc/net/afs/servers to make it easier to see what's going on with the server probing. Signed-off-by: David Howells --- fs/afs/proc.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 9bce7898cd7d..1d21465a4108 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -378,21 +378,22 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) int i; if (v == SEQ_START_TOKEN) { - seq_puts(m, "UUID REF ACT ADDR\n"); + seq_puts(m, "UUID REF ACT\n"); return 0; } server = list_entry(v, struct afs_server, proc_link); alist = rcu_dereference(server->addresses); - seq_printf(m, "%pU %3d %3d %pISpc%s\n", + seq_printf(m, "%pU %3d %3d\n", &server->uuid, atomic_read(&server->ref), - atomic_read(&server->active), - &alist->addrs[0].transport, - alist->preferred == 0 ? "*" : ""); - for (i = 1; i < alist->nr_addrs; i++) - seq_printf(m, " %pISpc%s\n", - &alist->addrs[i].transport, + atomic_read(&server->active)); + seq_printf(m, " - ALIST v=%u osp=%u r=%lx f=%lx\n", + alist->version, atomic_read(&server->probe_outstanding), + alist->responded, alist->failed); + for (i = 0; i < alist->nr_addrs; i++) + seq_printf(m, "[%x] %pISpc%s\n", + i, &alist->addrs[i].transport, alist->preferred == i ? "*" : ""); return 0; }
[PATCH 14/27] afs: Don't get epoch from a server because it may be ambiguous
Don't get the epoch from a server, particularly one that we're looking up by UUID, as UUIDs may be ambiguous and may map to more than one server - so we can't draw any conclusions from it. Reported-by: Jeffrey Altman Signed-off-by: David Howells --- fs/afs/cmservice.c | 49 ++--- fs/afs/internal.h |7 --- 2 files changed, 2 insertions(+), 54 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index ed0fb34d77dd..954030ae7a0f 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -118,8 +118,6 @@ bool afs_cm_incoming_call(struct afs_call *call) { _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID); - call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall); - switch (call->operation_ID) { case CBCallBack: call->type = &afs_SRXCBCallBack; @@ -149,49 +147,6 @@ bool afs_cm_incoming_call(struct afs_call *call) } } -/* - * Record a probe to the cache manager from a server. - */ -static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server) -{ - _enter(""); - - if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) && - !afs_is_probing_server(server)) { - if (server->cm_epoch == call->epoch) - return 0; - - if (!server->probe.said_rebooted) { - pr_notice("kAFS: FS rebooted %pU\n", &server->uuid); - server->probe.said_rebooted = true; - } - } - - spin_lock(&server->probe_lock); - - if (!test_and_set_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) { - server->cm_epoch = call->epoch; - server->probe.cm_epoch = call->epoch; - goto out; - } - - if (server->probe.cm_probed && - call->epoch != server->probe.cm_epoch && - !server->probe.said_inconsistent) { - pr_notice("kAFS: FS endpoints inconsistent %pU\n", - &server->uuid); - server->probe.said_inconsistent = true; - } - - if (!server->probe.cm_probed || call->epoch == server->cm_epoch) - server->probe.cm_epoch = server->cm_epoch; - -out: - server->probe.cm_probed = true; - spin_unlock(&server->probe_lock); - return 0; -} - /* * Find the server record by peer address and record a probe to the cache * manager from a server. @@ -210,7 +165,7 @@ static int afs_find_cm_server_by_peer(struct afs_call *call) } call->server = server; - return afs_record_cm_probe(call, server); + return 0; } /* @@ -231,7 +186,7 @@ static int afs_find_cm_server_by_uuid(struct afs_call *call, } call->server = server; - return afs_record_cm_probe(call, server); + return 0; } /* diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 4b8ac049fc17..9f024c1bd650 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -124,7 +124,6 @@ struct afs_call { spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ - u32 epoch; unsigned intmax_lifespan; /* Maximum lifespan to set if not 0 */ unsignedrequest_size; /* size of request data */ unsignedreply_max; /* maximum size of reply */ @@ -491,12 +490,10 @@ struct afs_server { #define AFS_SERVER_FL_MAY_HAVE_CB 8/* May have callbacks on this fileserver */ #define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ #define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ -#define AFS_SERVER_FL_HAVE_EPOCH 11/* ->epoch is valid */ #define AFS_SERVER_FL_NEEDS_UPDATE 12 /* Fileserver address list is out of date */ atomic_tref;/* Object refcount */ atomic_tactive; /* Active user count */ u32 addr_version; /* Address list version */ - u32 cm_epoch; /* Server RxRPC epoch */ unsigned intdebug_id; /* Debugging ID for traces */ /* file service access */ @@ -515,15 +512,11 @@ struct afs_server { struct { unsigned intrtt;/* RTT as ktime/64 */ u32 abort_code; - u32 cm_epoch; short error; boolresponded:1; boolis_yfs:1; boolnot_yfs:1; boollocal_failure:1; - boolcm_probed:1; - boolsaid_rebooted:1; - boolsaid_inconsistent:1
[PATCH 12/27] afs: Rename struct afs_fs_cursor to afs_operation
As a prelude to implementing asynchronous fileserver operations in the afs filesystem, rename struct afs_fs_cursor to afs_operation. This struct is going to form the core of the operation management and is going to acquire more members in later. Signed-off-by: David Howells --- fs/afs/dir.c | 22 ++-- fs/afs/dir_silly.c |4 - fs/afs/file.c |2 fs/afs/flock.c |6 + fs/afs/fsclient.c | 42 --- fs/afs/inode.c | 10 +- fs/afs/internal.h | 112 ++-- fs/afs/rotate.c| 292 ++-- fs/afs/server.c|8 + fs/afs/super.c |4 - fs/afs/volume.c|4 - fs/afs/write.c |2 fs/afs/xattr.c |8 + fs/afs/yfsclient.c | 40 --- 14 files changed, 278 insertions(+), 278 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 3c486340b220..ff421db40cf2 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -643,7 +643,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_status_cb *scb; struct afs_iget_data iget_data; - struct afs_fs_cursor fc; + struct afs_operation fc; struct afs_server *server; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct inode *inode = NULL, *ti; @@ -1220,7 +1220,7 @@ void afs_d_release(struct dentry *dentry) /* * Create a new inode for create/mkdir/symlink */ -static void afs_vnode_new_inode(struct afs_fs_cursor *fc, +static void afs_vnode_new_inode(struct afs_operation *fc, struct dentry *new_dentry, struct afs_iget_data *new_data, struct afs_status_cb *new_scb) @@ -1248,7 +1248,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, d_instantiate(new_dentry, inode); } -static void afs_prep_for_new_inode(struct afs_fs_cursor *fc, +static void afs_prep_for_new_inode(struct afs_operation *fc, struct afs_iget_data *iget_data) { iget_data->volume = fc->vnode->volume; @@ -1261,7 +1261,7 @@ static void afs_prep_for_new_inode(struct afs_fs_cursor *fc, * number derived from the result of the operation. It doesn't matter if * d_fsdata goes backwards as we'll just revalidate. */ -static void afs_update_dentry_version(struct afs_fs_cursor *fc, +static void afs_update_dentry_version(struct afs_operation *fc, struct dentry *dentry, struct afs_status_cb *scb) { @@ -1277,7 +1277,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct afs_iget_data iget_data; struct afs_status_cb *scb; - struct afs_fs_cursor fc; + struct afs_operation fc; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; afs_dataversion_t data_version; @@ -1367,7 +1367,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry) static int afs_rmdir(struct inode *dir, struct dentry *dentry) { struct afs_status_cb *scb; - struct afs_fs_cursor fc; + struct afs_operation fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; afs_dataversion_t data_version; @@ -1483,7 +1483,7 @@ static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry, */ static int afs_unlink(struct inode *dir, struct dentry *dentry) { - struct afs_fs_cursor fc; + struct afs_operation fc; struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); @@ -1588,7 +1588,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct afs_iget_data iget_data; - struct afs_fs_cursor fc; + struct afs_operation fc; struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; @@ -1666,7 +1666,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry) { - struct afs_fs_cursor fc; + struct afs_operation fc; struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); @@ -1755,7 +1755,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, const char *content) { struct afs_iget_data iget_data; - struct afs_fs_cursor fc; + struct afs_operation fc; struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; @@ -1837,7 +1837,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
[PATCH 15/27] afs: Fix handling of CB.ProbeUuid cache manager op
The AFS filesystem driver is handling the CB.ProbeUuid request incorrectly. The UUID presented in the request is that of the cache manager, not the fileserver, so afs_deliver_cb_probe_uuid() shouldn't be using that UUID to look up the server. Fix this by looking up the server by address instead. Signed-off-by: David Howells --- fs/afs/cmservice.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 954030ae7a0f..bef413818af7 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -464,7 +464,8 @@ static int afs_deliver_cb_probe(struct afs_call *call) } /* - * allow the fileserver to quickly find out if the fileserver has been rebooted + * Allow the fileserver to quickly find out if the cache manager has been + * rebooted. */ static void SRXAFSCB_ProbeUuid(struct work_struct *work) { @@ -536,7 +537,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - return afs_find_cm_server_by_uuid(call, call->request); + return afs_find_cm_server_by_peer(call); } /*
[PATCH 19/27] afs: Detect cell aliases 2 - Cells with no root volumes
Implement the second phase of cell alias detection. This part handles alias detection for cells that don't have root.cell volumes and so we have to find some other volume or fileserver to query. We take the first volume from each such cell and attempt to look it up in the new cell. If found, we compare the records, if they are the same, we judge the cell names to be aliases. Signed-off-by: David Howells --- fs/afs/vl_alias.c | 90 - 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index d1d91a25fbe0..76bfa4dde4a4 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -180,6 +180,94 @@ static int afs_compare_cell_roots(struct afs_cell *cell) return 1; } +/* + * Query the new cell for a volume from a cell we're already using. + */ +static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key, + struct afs_cell *p) +{ + struct afs_volume *volume, *pvol = NULL; + int ret; + + /* Arbitrarily pick the first volume in the list. */ + read_lock(&p->proc_lock); + if (!list_empty(&p->proc_volumes)) + pvol = afs_get_volume(list_first_entry(&p->proc_volumes, + struct afs_volume, proc_link)); + read_unlock(&p->proc_lock); + if (!pvol) + return 0; + + _enter("%s:%s", cell->name, pvol->name); + + /* And see if it's in the new cell. */ + volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len); + if (IS_ERR(volume)) { + afs_put_volume(cell->net, pvol); + if (PTR_ERR(volume) != -ENOMEDIUM) + return PTR_ERR(volume); + /* That volume is not in the new cell, so not an alias */ + return 0; + } + + /* The new cell has a like-named volume also - compare volume ID, +* server and address lists. +*/ + ret = 0; + if (pvol->vid == volume->vid) { + rcu_read_lock(); + if (afs_compare_volume_slists(volume, pvol)) + ret = 1; + rcu_read_unlock(); + } + + afs_put_volume(cell->net, volume); + afs_put_volume(cell->net, pvol); + return ret; +} + +/* + * Query the new cell for volumes we know exist in cells we're already using. + */ +static int afs_query_for_alias(struct afs_cell *cell, struct key *key) +{ + struct afs_cell *p; + + _enter("%s", cell->name); + + if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) + return -ERESTARTSYS; + + hlist_for_each_entry(p, &cell->net->proc_cells, proc_link) { + if (p == cell || p->alias_of) + continue; + if (list_empty(&p->proc_volumes)) + continue; + if (p->root_volume) + continue; /* Ignore cells that have a root.cell volume. */ + afs_get_cell(p); + mutex_unlock(&cell->net->proc_cells_lock); + + if (afs_query_for_alias_one(cell, key, p) != 0) + goto is_alias; + + if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) { + afs_put_cell(cell->net, p); + return -ERESTARTSYS; + } + + afs_put_cell(cell->net, p); + } + + mutex_unlock(&cell->net->proc_cells_lock); + _leave(" = 0"); + return 0; + +is_alias: + cell->alias_of = p; /* Transfer our ref */ + return 1; +} + static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key) { struct afs_volume *root_volume; @@ -199,7 +287,7 @@ static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key) /* Okay, this cell doesn't have an root.cell volume. We need to * locate some other random volume and use that to check. */ - return -ENOMEDIUM; + return afs_query_for_alias(cell, key); } /*
[PATCH 20/27] afs: Detect cell aliases 3 - YFS Cells with a canonical cell name op
YFS Volume Location servers have an operation by which the cell name may be queried. Use this to find out what a YFS server thinks the canonical cell name should be. Signed-off-by: David Howells --- fs/afs/vl_alias.c | 60 fs/afs/vl_rotate.c |4 +++ 2 files changed, 64 insertions(+) diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index 76bfa4dde4a4..1fcb63c65ba9 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -268,12 +268,72 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key) return 1; } +/* + * Look up a VLDB record for a volume. + */ +static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key) +{ + struct afs_vl_cursor vc; + char *cell_name = ERR_PTR(-EDESTADDRREQ); + bool skipped = false, not_skipped = false; + int ret; + + if (!afs_begin_vlserver_operation(&vc, cell, key)) + return ERR_PTR(-ERESTARTSYS); + + while (afs_select_vlserver(&vc)) { + if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) { + vc.ac.error = -EOPNOTSUPP; + skipped = true; + continue; + } + not_skipped = true; + cell_name = afs_yfsvl_get_cell_name(&vc); + } + + ret = afs_end_vlserver_operation(&vc); + if (skipped && !not_skipped) + ret = -EOPNOTSUPP; + return ret < 0 ? ERR_PTR(ret) : cell_name; +} + +static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) +{ + struct afs_cell *master; + char *cell_name; + + cell_name = afs_vl_get_cell_name(cell, key); + if (IS_ERR(cell_name)) + return PTR_ERR(cell_name); + + if (strcmp(cell_name, cell->name) == 0) { + kfree(cell_name); + return 0; + } + + master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name), +NULL, false); + kfree(cell_name); + if (IS_ERR(master)) { + kfree(cell_name); + return PTR_ERR(master); + } + + cell->alias_of = master; /* Transfer our ref */ + return 1; +} + static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key) { struct afs_volume *root_volume; + int ret; _enter("%s", cell->name); + ret = yfs_check_canonical_cell_name(cell, key); + if (ret != -EOPNOTSUPP) + return ret; + /* Try and get the root.cell volume for comparison with other cells */ root_volume = afs_sample_volume(cell, key, "root.cell", 9); if (!IS_ERR(root_volume)) { diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 72eacc14e6e1..f405ca8b240a 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -151,6 +151,10 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) vc->error = error; vc->flags |= AFS_VL_CURSOR_RETRY; goto next_server; + + case -EOPNOTSUPP: + _debug("notsupp"); + goto next_server; } restart_from_beginning:
[PATCH 18/27] afs: Detect cell aliases 1 - Cells with root volumes
Put in the first phase of cell alias detection. This part handles alias detection for cells that have root.cell volumes (which is expected to be likely). When a cell becomes newly active, it is probed for its root.cell volume, and if it has one, this volume is compared against other root.cell volumes to find out if the list of fileserver UUIDs have any in common - and if that's the case, do the address lists of those fileservers have any addresses in common. If they do, the new cell is adjudged to be an alias of the old cell and the old cell is used instead. Comparing is aided by the server list in struct afs_server_list being sorted in UUID order and the addresses in the fileserver address lists being sorted in address order. The cell then retains the afs_volume object for the root.cell volume, even if it's not mounted for future alias checking. This necessary because: (1) Whilst fileservers have UUIDs that are meant to be globally unique, in practice they are not because cells get cloned without changing the UUIDs - so afs_server records need to be per cell. (2) Sometimes the DNS is used to make cell aliases - but if we don't know they're the same, we may end up with multiple superblocks and multiple afs_server records for the same thing, impairing our ability to deliver callback notifications of third party changes (3) The fileserver RPC API doesn't contain the cell name, so it can't tell us which cell it's notifying and can't see that a change made to to one cell should notify the same client that's also accessed as the other cell. Reported-by: Jeffrey Altman Signed-off-by: David Howells --- fs/afs/Makefile |1 fs/afs/cell.c |3 + fs/afs/internal.h | 17 +++- fs/afs/main.c |1 fs/afs/proc.c |5 + fs/afs/super.c| 18 fs/afs/vl_alias.c | 235 + fs/afs/volume.c |7 +- 8 files changed, 279 insertions(+), 8 deletions(-) create mode 100644 fs/afs/vl_alias.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 924f02e9d7e7..75c4e4043d1d 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -31,6 +31,7 @@ kafs-y := \ server_list.o \ super.o \ vlclient.o \ + vl_alias.o \ vl_list.o \ vl_probe.o \ vl_rotate.o \ diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 78ba5f932287..212098514ebf 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -164,6 +164,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, INIT_LIST_HEAD(&cell->proc_volumes); rwlock_init(&cell->proc_lock); rwlock_init(&cell->vl_servers_lock); + cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS); /* Provide a VL server list, filling it in if we were given a list of * addresses to use. @@ -481,7 +482,9 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); + afs_put_volume(cell->net, cell->root_volume); afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); + afs_put_cell(cell->net, cell->alias_of); key_put(cell->anonymous_key); kfree(cell); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3606cfa50832..a3ef97d560ca 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -269,6 +269,7 @@ struct afs_net { struct timer_list cells_timer; atomic_tcells_outstanding; seqlock_t cells_lock; + struct mutexcells_alias_lock; struct mutexproc_cells_lock; struct hlist_head proc_cells; @@ -342,8 +343,10 @@ enum afs_cell_state { * for authentication and encryption. The cell name is not typically used in * the protocol. * - * There is no easy way to determine if two cells are aliases or one is a - * subset of another. + * Two cells are determined to be aliases if they have an explicit alias (YFS + * only), share any VL servers in common or have at least one volume in common. + * "In common" means that the address list of the VL servers or the fileservers + * share at least one endpoint. */ struct afs_cell { union { @@ -351,6 +354,8 @@ struct afs_cell { struct rb_node net_node; /* Node in net->cells */ }; struct afs_net *net; + struct afs_cell *alias_of; /* The cell this is an alias of */ + struct afs_volume *root_volume; /* The root.cell volume if there is one */ struct key *anonymous_key; /* anonymous user key for this cell */ struct work_struct manager;/* Manager for init/deinit/dns */ struct hlist_node proc_link; /* /proc cell list link */ @@ -363,6 +368,7 @@ struct afs_cell { unsigned long flags; #define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
[PATCH 16/27] afs: Retain more of the VLDB record for alias detection
Save more bits from the volume location database record obtained for a server so that we can use this information in cell alias detection. Signed-off-by: David Howells --- fs/afs/internal.h|3 ++- fs/afs/server_list.c |3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9f024c1bd650..dce03e068cab 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -546,7 +546,7 @@ struct afs_cb_interest { }; /* - * Replaceable server list. + * Replaceable volume server list. */ struct afs_server_entry { struct afs_server *server; @@ -554,6 +554,7 @@ struct afs_server_entry { }; struct afs_server_list { + afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */ refcount_t usage; unsigned char nr_servers; unsigned char preferred; /* Preferred server */ diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index b77e50f62459..a35f6951a74a 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -46,6 +46,9 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, refcount_set(&slist->usage, 1); rwlock_init(&slist->lock); + for (i = 0; i < AFS_MAXTYPES; i++) + slist->vids[i] = vldb->vid[i]; + /* Make sure a records exists for each server in the list. */ for (i = 0; i < vldb->nr_servers; i++) { if (!(vldb->fs_mask[i] & type_mask))
[PATCH 17/27] afs: Implement client support for the YFSVL.GetCellName RPC op
Implement client support for the YFSVL.GetCellName RPC operation by which YFS permits the canonical cell name to be queried from a VL server. Signed-off-by: David Howells --- fs/afs/afs.h |2 - fs/afs/afs_vl.h|1 fs/afs/internal.h |2 + fs/afs/protocol_yfs.h |2 - fs/afs/vlclient.c | 111 include/trace/events/afs.h |4 ++ 6 files changed, 120 insertions(+), 2 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index f8e34406243e..432cb4b23961 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -10,7 +10,7 @@ #include -#define AFS_MAXCELLNAME64 /* Maximum length of a cell name */ +#define AFS_MAXCELLNAME256 /* Maximum length of a cell name */ #define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */ #define AFS_MAXNSERVERS8 /* Maximum servers in a basic volume record */ #define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index e9b8029920ec..9c65ffb8a523 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -22,6 +22,7 @@ enum AFSVL_Operations { VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */ VLGETADDRSU = 533, /* AFS Get addrs for fileserver */ YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */ + YVLGETCELLNAME = 64014, /* YFS Get actual cell name */ VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */ }; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index dce03e068cab..3606cfa50832 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -116,6 +116,7 @@ struct afs_call { longret0; /* Value to reply with instead of 0 */ struct afs_addr_list*ret_alist; struct afs_vldb_entry *ret_vldb; + char*ret_str; }; struct afs_operation*op; unsigned intserver_index; @@ -1373,6 +1374,7 @@ extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uu extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *, struct afs_vlserver *, unsigned int); extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); +extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *); /* * vl_probe.c diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h index 32be9c698348..b5bd03b1d3c7 100644 --- a/fs/afs/protocol_yfs.h +++ b/fs/afs/protocol_yfs.h @@ -8,7 +8,7 @@ #define YFS_FS_SERVICE 2500 #define YFS_CM_SERVICE 2501 -#define YFSCBMAX 1024 +#define YFSCBMAX 1024 enum YFS_CM_Operations { YFSCBProbe = 206, /* probe client */ diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index d0c85623ce8f..fd82850cd424 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -645,3 +645,114 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, afs_make_call(&vc->ac, call, GFP_KERNEL); return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac); } + +/* + * Deliver reply data to a YFSVL.GetCellName operation. + */ +static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call) +{ + char *cell_name; + u32 namesz, paddedsz; + int ret; + + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->iter), call->count); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + /* Fall through - and extract the cell name length */ + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + namesz = ntohl(call->tmp); + if (namesz > AFS_MAXCELLNAME) + return afs_protocol_error(call, afs_eproto_cellname_len); + paddedsz = (namesz + 3) & ~3; + call->count = namesz; + call->count2 = paddedsz - namesz; + + cell_name = kmalloc(namesz + 1, GFP_KERNEL); + if (!cell_name) + return -ENOMEM; + cell_name[namesz] = 0; + call->ret_str = cell_name; + + afs_extract_begin(call, cell_name, namesz); + call->unmarshall++; + + /* Fall through - and extract cell name */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + afs_extract_discard(call, call->count2); + call->unmarshall++; + + /* Fall through -
[PATCH 21/27] afs: Add a tracepoint to track the lifetime of the afs_volume struct
Add a tracepoint to track the lifetime of the afs_volume struct. Signed-off-by: David Howells --- fs/afs/cell.c |2 +- fs/afs/fs_operation.c |4 ++- fs/afs/internal.h | 10 ++-- fs/afs/super.c | 10 +--- fs/afs/vl_alias.c |9 --- fs/afs/volume.c| 27 ++--- include/trace/events/afs.h | 56 7 files changed, 95 insertions(+), 23 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 212098514ebf..8bfc8a05fd46 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -482,7 +482,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); - afs_put_volume(cell->net, cell->root_volume); + afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root); afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); afs_put_cell(cell->net, cell->alias_of); key_put(cell->anonymous_key); diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index f7a768d12141..f57efd9d2db0 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -36,7 +36,7 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo } op->key = key; - op->volume = afs_get_volume(volume); + op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op); op->net = volume->cell->net; op->cb_v_break = volume->cb_v_break; op->debug_id= atomic_inc_return(&afs_operation_debug_counter); @@ -233,7 +233,7 @@ int afs_put_operation(struct afs_operation *op) afs_end_cursor(&op->ac); afs_put_cb_interest(op->net, op->cbi); afs_put_serverlist(op->net, op->server_list); - afs_put_volume(op->net, op->volume); + afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op); kfree(op); return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a3ef97d560ca..e084936066b0 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1429,17 +1429,11 @@ extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *, /* * volume.c */ -static inline struct afs_volume *afs_get_volume(struct afs_volume *volume) -{ - if (volume) - atomic_inc(&volume->usage); - return volume; -} - extern struct afs_volume *afs_create_volume(struct afs_fs_context *); extern void afs_activate_volume(struct afs_volume *); extern void afs_deactivate_volume(struct afs_volume *); -extern void afs_put_volume(struct afs_net *, struct afs_volume *); +extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace); +extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace); extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *); /* diff --git a/fs/afs/super.c b/fs/afs/super.c index 1bb69159956f..45e937dcb80a 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -376,7 +376,8 @@ static int afs_validate_fc(struct fs_context *fc) ctx->key = key; if (ctx->volume) { - afs_put_volume(ctx->net, ctx->volume); + afs_put_volume(ctx->net, ctx->volume, + afs_volume_trace_put_validate_fc); ctx->volume = NULL; } @@ -507,7 +508,8 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) as->dyn_root = true; } else { as->cell = afs_get_cell(ctx->cell); - as->volume = afs_get_volume(ctx->volume); + as->volume = afs_get_volume(ctx->volume, + afs_volume_trace_get_alloc_sbi); } } return as; @@ -517,7 +519,7 @@ static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { struct afs_net *net = afs_net(as->net_ns); - afs_put_volume(net, as->volume); + afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi); afs_put_cell(net, as->cell); put_net(as->net_ns); kfree(as); @@ -604,7 +606,7 @@ static void afs_free_fc(struct fs_context *fc) struct afs_fs_context *ctx = fc->fs_private; afs_destroy_sbi(fc->s_fs_info); - afs_put_volume(ctx->net, ctx->volume); + afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc); afs_put_cell(ctx->net, ctx->cell); key_put(ctx->key); kfree(ctx); diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index 1fcb63c65ba9..1cf9584bb51d 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -193,7 +193,8 @@ static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key, read
Re: clean up kernel_{read,write} & friends v2
On 5/29/2020 12:19 PM, Linus Torvalds wrote: > On Fri, May 29, 2020 at 6:08 AM David Laight wrote: >> A wide monitor is for looking at lots of files. > Not necessarily. > > Excessive line breaks are BAD. They cause real and every-day problems. > > They cause problems for things like "grep" both in the patterns and in > the output, since grep (and a lot of other very basic unix utilities) > is fundamentally line-based. > > So the fact is, many of us have long long since skipped the whole > "80-column terminal" model, for the same reason that we have many more > lines than 25 lines visible at a time. > > And honestly, I don't want to see patches that make the kernel reading > experience worse for me and likely for the vast majority of people, > based on the argument that some odd people have small terminal > windows. > > If you or Christoph have 80 character lines, you'll get possibly ugly > wrapped output. Tough. That's _your_ choice. Your hardware limitations > shouldn't be a pain for the rest of us. > > Longer lines are fundamentally useful. My monitor is not only a lot > wider than it is tall, my fonts are universally narrower than they are > tall. Long lines are natural. > > When I tile my terminal windows on my display, I can have 6 terminals > visible at one time, and that's because I have them three wide. And I > could still fit 80% of a fourth one side-by-side. > > And guess what? That's with my default "100x50" terminal window (go to > your gnome terminal settings, you'll find that the 80x25 thing is just > an initial default that you can change), not with some 80x25 one. And > that's with a font that has anti-aliasing and isn't some pixelated > mess. > > And most of my terminals actually end up being dragged wider and > taller than that. I checked, and my main one is 142x76 characters > right now, because it turns out that wider (and taller) terminals are > useful not just for source code. > > Have you looked at "ps ax" output lately? Or used "top"? Or done "git > diff --stat" or any number of things where it turns out that 80x25 is > really really limiting, and is simply NO LONGER RELEVANT to most of > us. > > So no. I do not care about somebody with a 80x25 terminal window > getting line wrapping. The first law of good C programming is: "Thou shalt adhere to the One True Brace Style" It extrapolates into indentation, line width, and a number of other things. Since Linux is a trademark of Linus Torvalds, you get to define what the "One Truth" is. Time to resize my terminals. > > For exactly the same reason I find it completely irrelevant if > somebody says that their kernel compile takes 10 hours because they > are doing kernel development on a Raspberry PI with 4GB of RAM. > > People with restrictive hardware shouldn't make it more inconvenient > for people who have better resources. Yes, we'll accommodate things to > within reasonable limits. But no, 80-column terminals in 2020 isn't > "reasonable" any more as far as I'm concerned. People commonly used > 132-column terminals even back in the 80's, for chrissake, don't try > to make 80 columns some immovable standard. > > If you choose to use a 80-column terminal, you can live with the line > wrapping. It's just that simple. > > And longer lines are simply useful. Part of that is that we aren't > programming in the 80's any more, and our source code is fundamentally > wider as a result. > > Yes, local iteration variables are still called 'i', because more > context just isn't helpful for some anonymous counter. Being concise > is still a good thing, and overly verbose names are not inherently > better. > > But still - it's entirely reasonable to have variable names that are > 10-15 characters and it makes the code more legible. Writing things > out instead of using abbreviations etc. > > And yes, we do use wide tabs, because that makes indentation something > you can visually see in the structure at a glance and on a > whole-function basis, rather than something you have to try to > visually "line up" things for or count spaces. > > So we have lots of fairly fundamental issues that fairly easily make > for longer lines in many circumstances. > > And yes, we do line breaks at some point. But there really isn't any > reason to make that point be 80 columns any more. > > Linus
[PATCH 24/27] afs: Fix afs_statfs() to not let the values go below zero
Fix afs_statfs() so that the value for f_bavail and f_bfree don't go "negative" if the number of blocks in use by a volume exceeds the max quota for that volume. Signed-off-by: David Howells --- fs/afs/super.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/afs/super.c b/fs/afs/super.c index c77b11b31233..b552357b1d13 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -729,7 +729,10 @@ static void afs_get_volume_status_success(struct afs_operation *op) buf->f_blocks = vs->part_max_blocks; else buf->f_blocks = vs->max_quota; - buf->f_bavail = buf->f_bfree = buf->f_blocks - vs->blocks_in_use; + + if (buf->f_blocks > vs->blocks_in_use) + buf->f_bavail = buf->f_bfree = + buf->f_blocks - vs->blocks_in_use; } static const struct afs_operation_ops afs_get_volume_status_operation = {
[PATCH 23/27] afs: Fix the by-UUID server tree to allow servers with the same UUID
Whilst it shouldn't happen, it is possible for multiple fileservers to share a UUID, particularly if an entire cell has been duplicated, UUIDs and all. In such a case, it's not necessarily possible to map the effect of the CB.InitCallBackState3 incoming RPC to a specific server unambiguously by UUID and thus to a specific cell. Indeed, there's a problem whereby multiple server records may need to occupy the same spot in the rb_tree rooted in the afs_net struct. Fix this by allowing servers to form a list, with the head of the list in the tree. When the front entry in the list is removed, the second in the list just replaces it. afs_init_callback_state() then just goes down the line, poking each server in the list. This means that some servers will be unnecessarily poked, unfortunately. An alternative would be to route by call parameters. Reported-by: Jeffrey Altman Signed-off-by: David Howells Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") --- fs/afs/callback.c | 10 - fs/afs/internal.h |4 +++- fs/afs/server.c | 56 + 3 files changed, 59 insertions(+), 11 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index b4cb9bb63f0a..7d9b23d981bf 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -21,11 +21,17 @@ #include "internal.h" /* - * allow the fileserver to request callback state (re-)initialisation + * Allow the fileserver to request callback state (re-)initialisation. + * Unfortunately, UUIDs are not guaranteed unique. */ void afs_init_callback_state(struct afs_server *server) { - server->cb_s_break++; + rcu_read_lock(); + do { + server->cb_s_break++; + server = rcu_dereference(server->uuid_next); + } while (0); + rcu_read_unlock(); } /* diff --git a/fs/afs/internal.h b/fs/afs/internal.h index c64c2b47ece7..e0dc14d4d8b9 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -486,7 +486,9 @@ struct afs_server { struct afs_addr_list__rcu *addresses; struct afs_cell *cell; /* Cell to which belongs (pins ref) */ - struct rb_node uuid_rb;/* Link in cell->fs_servers */ + struct rb_node uuid_rb;/* Link in net->fs_servers */ + struct afs_server __rcu *uuid_next; /* Next server with same UUID */ + struct afs_server *uuid_prev; /* Previous server with same UUID */ struct list_headprobe_link; /* Link in net->fs_probe_list */ struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ diff --git a/fs/afs/server.c b/fs/afs/server.c index c51039a077cd..88593ffcb54e 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -130,13 +130,15 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu } /* - * Install a server record in the namespace tree + * Install a server record in the namespace tree. If there's a clash, we stick + * it into a list anchored on whichever afs_server struct is actually in the + * tree. */ static struct afs_server *afs_install_server(struct afs_cell *cell, struct afs_server *candidate) { const struct afs_addr_list *alist; - struct afs_server *server; + struct afs_server *server, *next; struct afs_net *net = cell->net; struct rb_node **pp, *p; int diff; @@ -153,12 +155,30 @@ static struct afs_server *afs_install_server(struct afs_cell *cell, _debug("- consider %p", p); server = rb_entry(p, struct afs_server, uuid_rb); diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); - if (diff < 0) + if (diff < 0) { pp = &(*pp)->rb_left; - else if (diff > 0) + } else if (diff > 0) { pp = &(*pp)->rb_right; - else - goto exists; + } else { + if (server->cell == cell) + goto exists; + + /* We have the same UUID representing servers in +* different cells. Append the new server to the list. +*/ + for (;;) { + next = rcu_dereference_protected( + server->uuid_next, + lockdep_is_held(&net->fs_lock.lock)); + if (!next) + break; + server = next; + } + rcu_assign_pointer(server->uuid_next, candidate); + candidate->uuid_prev = serv
[PATCH 26/27] afs: Show more a bit more server state in /proc/net/afs/servers
Display more information about the state of a server record, including the flags, rtt and break counter plus the probe state for each server in /proc/net/afs/servers. Rearrange the server flags a bit to make them easier to read at a glance in the proc file. Signed-off-by: David Howells --- fs/afs/internal.h | 16 fs/afs/proc.c | 10 +++--- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a4fe5d1a8b53..af0b7fca87db 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -497,15 +497,15 @@ struct afs_server { time64_tunuse_time; /* Time at which last unused */ unsigned long flags; #define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */ -#define AFS_SERVER_FL_NOT_READY1 /* The record is not ready for use */ -#define AFS_SERVER_FL_NOT_FOUND2 /* VL server says no such server */ -#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ -#define AFS_SERVER_FL_UPDATING 4 -#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_UPDATING 1 +#define AFS_SERVER_FL_NEEDS_UPDATE 2 /* Fileserver address list is out of date */ +#define AFS_SERVER_FL_NOT_READY4 /* The record is not ready for use */ +#define AFS_SERVER_FL_NOT_FOUND5 /* VL server says no such server */ +#define AFS_SERVER_FL_VL_FAIL 6 /* Failed to access VL server */ #define AFS_SERVER_FL_MAY_HAVE_CB 8/* May have callbacks on this fileserver */ -#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ -#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ -#define AFS_SERVER_FL_NEEDS_UPDATE 12 /* Fileserver address list is out of date */ +#define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */ +#define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_NO_RM2 18 /* Fileserver doesn't support YFS.RemoveFile2 */ atomic_tref;/* Object refcount */ atomic_tactive; /* Active user count */ u32 addr_version; /* Address list version */ diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 309a7b578255..22d00cf1913d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -386,9 +386,13 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) &server->uuid, atomic_read(&server->ref), atomic_read(&server->active)); - seq_printf(m, " - ALIST v=%u osp=%u r=%lx f=%lx\n", - alist->version, atomic_read(&server->probe_outstanding), - alist->responded, alist->failed); + seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n", + server->flags, server->rtt, server->cb_s_break); + seq_printf(m, " - probe: last=%d out=%d\n", + (int)(jiffies - server->probed_at) / HZ, + atomic_read(&server->probe_outstanding)); + seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx\n", + alist->version, alist->responded, alist->failed); for (i = 0; i < alist->nr_addrs; i++) seq_printf(m, "[%x] %pISpc%s\n", i, &alist->addrs[i].transport,
[PATCH 27/27] afs: Adjust the fileserver rotation algorithm to reprobe/retry more quickly
Adjust the fileserver rotation algorithm so that if we've tried all the addresses on a server (cumulatively over multiple operations) until we've run out of untried addresses, immediately reprobe all that server's interfaces and retry the op at least once before we move onto the next server. Signed-off-by: David Howells --- fs/afs/fs_probe.c | 47 +++ fs/afs/internal.h | 24 ++-- fs/afs/rotate.c | 29 +++-- 3 files changed, 88 insertions(+), 12 deletions(-) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c41cf3b2ab89..b34f74b0f319 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -338,6 +338,18 @@ static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server afs_put_server(net, server, afs_server_trace_put_probe); } +/* + * Probe a server immediately without waiting for its due time to come + * round. This is used when all of the addresses have been tried. + */ +void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) +{ + write_seqlock(&net->fs_lock); + if (!list_empty(&server->probe_link)) + return afs_dispatch_fs_probe(net, server, true); + write_sequnlock(&net->fs_lock); +} + /* * Probe dispatcher to regularly dispatch probes to keep NAT alive. */ @@ -411,3 +423,38 @@ void afs_fs_probe_dispatcher(struct work_struct *work) _leave(" [quiesce]"); } } + +/* + * Wait for a probe on a particular fileserver to complete for 2s. + */ +int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) +{ + struct wait_queue_entry wait; + unsigned long timo = 2 * HZ; + + if (atomic_read(&server->probe_outstanding) == 0) + goto dont_wait; + + init_wait_entry(&wait, 0); + for (;;) { + prepare_to_wait_event(&server->probe_wq, &wait, + is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + if (timo == 0 || + server->probe.responded || + atomic_read(&server->probe_outstanding) == 0 || + (is_intr && signal_pending(current))) + break; + timo = schedule_timeout(timo); + } + + finish_wait(&server->probe_wq, &wait); + +dont_wait: + if (server->probe.responded) + return 0; + if (is_intr && signal_pending(current)) + return -ERESTARTSYS; + if (timo == 0) + return -ETIME; + return -EDESTADDRREQ; +} diff --git a/fs/afs/internal.h b/fs/afs/internal.h index af0b7fca87db..e1621b0670cc 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -826,16 +826,18 @@ struct afs_operation { unsigned short nr_iterations; /* Number of server iterations */ unsigned intflags; -#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */ -#define AFS_OPERATION_VBUSY0x0002 /* Set if seen VBUSY */ -#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */ -#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */ -#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ -#define AFS_OPERATION_NO_VSLEEP0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ -#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */ -#define AFS_OPERATION_DOWNGRADE0x0080 /* Set to retry with downgraded opcode */ -#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */ -#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */ +#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */ +#define AFS_OPERATION_VBUSY0x0002 /* Set if seen VBUSY */ +#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */ +#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */ +#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ +#define AFS_OPERATION_NO_VSLEEP0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ +#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */ +#define AFS_OPERATION_DOWNGRADE0x0080 /* Set to retry with downgraded opcode */ +#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */ +#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */ +#define AFS_OPERATION_TRIED_ALL0x0400 /* Set if we've tried all the fileservers */ +#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */ }; /* @@ -1055,7 +1057,9 @@ static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n, extern void afs_fileserver_probe_result(s
[PATCH 25/27] afs: Don't use probe running state to make decisions outside probe code
Don't use the running state for fileserver probes to make decisions about which server to use as the state is cleared at the start of a probe and also intermediate values might be misleading. Instead, add a separate 'latest known' rtt in the afs_server struct and a flag to indicate if the server is known to be responding and update these as and when we know what to change them to. Signed-off-by: David Howells --- fs/afs/fs_probe.c | 18 -- fs/afs/internal.h |4 +++- fs/afs/rotate.c |3 ++- fs/afs/server.c |1 + 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 442b5e7944ff..c41cf3b2ab89 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -42,10 +42,13 @@ static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server bool responded = server->probe.responded; write_seqlock(&net->fs_lock); - if (responded) + if (responded) { list_add_tail(&server->probe_link, &net->fs_probe_slow); - else + } else { + server->rtt = UINT_MAX; + clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); list_add_tail(&server->probe_link, &net->fs_probe_fast); + } write_sequnlock(&net->fs_lock); afs_schedule_fs_probe(net, server, !responded); @@ -161,12 +164,14 @@ void afs_fileserver_probe_result(struct afs_call *call) rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall); if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; + server->rtt = rtt_us; alist->preferred = index; } smp_wmb(); /* Set rtt before responded. */ server->probe.responded = true; set_bit(index, &alist->responded); + set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); out: spin_unlock(&server->probe_lock); @@ -224,7 +229,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) { struct wait_queue_entry *waits; struct afs_server *server; - unsigned int rtt = UINT_MAX; + unsigned int rtt = UINT_MAX, rtt_s; bool have_responders = false; int pref = -1, i; @@ -280,10 +285,11 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) for (i = 0; i < slist->nr_servers; i++) { if (test_bit(i, &untried)) { server = slist->servers[i].server; - if (server->probe.responded && - server->probe.rtt < rtt) { + rtt_s = READ_ONCE(server->rtt); + if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && + rtt_s < rtt) { pref = i; - rtt = server->probe.rtt; + rtt = rtt_s; } remove_wait_queue(&server->probe_wq, &waits[i]); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e0dc14d4d8b9..a4fe5d1a8b53 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -496,6 +496,7 @@ struct afs_server { struct afs_server *gc_next; /* Next server in manager's list */ time64_tunuse_time; /* Time at which last unused */ unsigned long flags; +#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */ #define AFS_SERVER_FL_NOT_READY1 /* The record is not ready for use */ #define AFS_SERVER_FL_NOT_FOUND2 /* VL server says no such server */ #define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ @@ -508,6 +509,7 @@ struct afs_server { atomic_tref;/* Object refcount */ atomic_tactive; /* Active user count */ u32 addr_version; /* Address list version */ + unsigned intrtt;/* Server's current RTT in uS */ unsigned intdebug_id; /* Debugging ID for traces */ /* file service access */ @@ -522,7 +524,7 @@ struct afs_server { atomic_tprobe_outstanding; spinlock_t probe_lock; struct { - unsigned intrtt;/* RTT as ktime/64 */ + unsigned intrtt;/* RTT in uS */ u32 abort_code; short error; boolresponded:1; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 979979e33a77..d1590fb382b6 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -339,7 +339,8 @@ bool afs_select_fileserver(struct afs_operation *op) for (i = 0; i < op->server_list->nr_servers; i++) { struct afs_server *s = op->server_list->servers[i
[PATCH 22/27] afs: Reorganise volume and server trees to be rooted on the cell
Reorganise afs_volume objects such that they're in a tree keyed on volume ID, rooted at on an afs_cell object rather than being in multiple trees, each of which is rooted on an afs_server object. afs_server structs become per-cell and acquire a pointer to the cell. The process of breaking a callback then starts with finding the server by its network address, following that to the cell and then looking up each volume ID in the volume tree. This is simpler than the afs_vol_interest/afs_cb_interest N:M mapping web and allows those structs and the code for maintaining them to be simplified or removed. It does make a couple of things a bit more tricky, though: (1) Operations now start with a volume, not a server, so there can be more than one answer as to whether or not the server we'll end up using supports the FS.InlineBulkStatus RPC. (2) CB RPC operations that specify the server UUID. There's still a tree of servers by UUID on the afs_net struct, but the UUIDs in it aren't guaranteed unique. Signed-off-by: David Howells --- fs/afs/callback.c | 286 +++-- fs/afs/cell.c |7 + fs/afs/dir.c | 45 ++-- fs/afs/fs_operation.c | 11 -- fs/afs/fsclient.c | 12 +- fs/afs/inode.c| 59 -- fs/afs/internal.h | 78 - fs/afs/proc.c | 15 +-- fs/afs/rotate.c | 128 +++--- fs/afs/rxrpc.c|5 - fs/afs/security.c |8 + fs/afs/server.c | 13 +- fs/afs/server_list.c | 30 - fs/afs/super.c|7 - fs/afs/vl_alias.c | 14 +- fs/afs/volume.c | 84 +- fs/afs/yfsclient.c|2 17 files changed, 257 insertions(+), 547 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 282dbac84435..b4cb9bb63f0a 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -20,185 +20,6 @@ #include #include "internal.h" -/* - * Create volume and callback interests on a server. - */ -static struct afs_cb_interest *afs_create_interest(struct afs_server *server, - struct afs_vnode *vnode) -{ - struct afs_vol_interest *new_vi, *vi; - struct afs_cb_interest *new; - struct rb_node *parent, **pp; - - new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL); - if (!new_vi) - return NULL; - - new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); - if (!new) { - kfree(new_vi); - return NULL; - } - - new_vi->usage = 1; - new_vi->vid = vnode->volume->vid; - INIT_HLIST_HEAD(&new_vi->cb_interests); - - refcount_set(&new->usage, 1); - new->sb = vnode->vfs_inode.i_sb; - new->server = afs_get_server(server, afs_server_trace_get_new_cbi); - INIT_HLIST_NODE(&new->cb_vlink); - - write_seqlock(&server->cb_break_lock); - - pp = &server->cb_volumes.rb_node; - while ((parent = *pp)) { - vi = rb_entry(parent, struct afs_vol_interest, srv_node); - if (vi->vid < new_vi->vid) { - pp = &(*pp)->rb_left; - } else if (vi->vid > new_vi->vid) { - pp = &(*pp)->rb_right; - } else { - vi->usage++; - goto found_vi; - } - } - - vi = new_vi; - new_vi = NULL; - rb_link_node_rcu(&vi->srv_node, parent, pp); - rb_insert_color(&vi->srv_node, &server->cb_volumes); - -found_vi: - new->vol_interest = vi; - hlist_add_head(&new->cb_vlink, &vi->cb_interests); - - write_sequnlock(&server->cb_break_lock); - kfree(new_vi); - return new; -} - -/* - * Set up an interest-in-callbacks record for a volume on a server and - * register it with the server. - * - Called with vnode->io_lock held. - */ -int afs_register_server_cb_interest(struct afs_vnode *vnode, - struct afs_server_list *slist, - unsigned int index) -{ - struct afs_server_entry *entry = &slist->servers[index]; - struct afs_cb_interest *cbi, *vcbi, *new, *old; - struct afs_server *server = entry->server; - -again: - vcbi = rcu_dereference_protected(vnode->cb_interest, -lockdep_is_held(&vnode->io_lock)); - if (vcbi && likely(vcbi == entry->cb_interest)) - return 0; - - read_lock(&slist->lock); - cbi = afs_get_cb_interest(entry->cb_interest); - read_unlock(&slist->lock); - - if (vcbi) { - if (vcbi == cbi) { - afs_put_cb_interest(afs_v2net(vnode), cbi); - return 0; - } - - /* Use a new interest in the server list for the same server -* rather than an old one that's still attached to a
Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro
On Sat, May 30, 2020 at 12:42 AM Andy Shevchenko wrote: > On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris > wrote: > > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko > > wrote: ... > > Explanation: > > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer), > > (BIT(nbits) - 1) > > gives a value of zero and when this zero is ANDed with any value, it > > makes it full zero. This is unexpected and incorrect calculation happening. > > > > What actually happens is in the macro expansion of BIT(64), that is 1 > > << 64, the '1' overflows from leftmost bit position (most significant > > bit) and re-enters at the rightmost bit position (least significant > > bit), therefore 1 << 64 becomes '0x1', and when another '1' is > > subtracted from this, the final result becomes 0. > > > > Since this macro is being used in both bitmap_get_value and > > bitmap_set_value functions, it will give unexpected results when nbits or > > clump > > size is BITS_PER_LONG (32 or 64 depending on arch). > > I see, something like > https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139 > should be done. > But yes, let's try to fix GENMASK(). > > So, if we modify the following > > #define GENMASK_INPUT_CHECK(h, l) \ > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > __builtin_constant_p((l) > (h)), (l) > (h), 0))) > > to be > > #define GENMASK_INPUT_CHECK(h, l) \ > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > __builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0))) > > would it work? Actually it needs an amendment for signed types... (l) ? (l) > (h) : !((h) > 0) ...but today is Friday night, so, mistakes are warranted :-) -- With Best Regards, Andy Shevchenko
drivers/usb/cdns3/gadget.c:1188:29: sparse: sparse: incorrect type in assignment (different base types)
tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 75caf310d16cc5e2f851c048cd597f5437013368 commit: 70d8b9e5e63d212019ba3f6823c8ec3d2df87645 usb: cdns3: make signed 1 bit bitfields unsigned date: 9 weeks ago config: i386-randconfig-s031-20200529 (attached as .config) compiler: gcc-9 (Debian 9.3.0-13) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.1-243-gc100a7ab-dirty git checkout 70d8b9e5e63d212019ba3f6823c8ec3d2df87645 # save the attached .config to linux build tree make W=1 C=1 ARCH=i386 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' If you fix the issue, kindly add following tag as appropriate Reported-by: kbuild test robot sparse warnings: (new ones prefixed by >>) drivers/usb/cdns3/gadget.c:1157:35: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] control @@ got unsigned long @@ drivers/usb/cdns3/gadget.c:1157:35: sparse: expected restricted __le32 [usertype] control drivers/usb/cdns3/gadget.c:1157:35: sparse: got unsigned long drivers/usb/cdns3/gadget.c:1173:29: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] buffer @@ got unsigned long @@ drivers/usb/cdns3/gadget.c:1173:29: sparse: expected restricted __le32 [usertype] buffer drivers/usb/cdns3/gadget.c:1173:29: sparse: got unsigned long >> drivers/usb/cdns3/gadget.c:1188:29: sparse: sparse: incorrect type in >> assignment (different base types) @@ expected restricted __le32 >> [usertype] length @@ got unsigned long @@ drivers/usb/cdns3/gadget.c:1188:29: sparse: expected restricted __le32 [usertype] length drivers/usb/cdns3/gadget.c:1188:29: sparse: got unsigned long drivers/usb/cdns3/gadget.c:1191:37: sparse: sparse: invalid assignment: |= drivers/usb/cdns3/gadget.c:1191:37: sparse:left side has type restricted __le32 drivers/usb/cdns3/gadget.c:1191:37: sparse:right side has type unsigned long drivers/usb/cdns3/gadget.c:1213:38: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] control @@ got unsigned int [assigned] [usertype] control @@ drivers/usb/cdns3/gadget.c:1213:38: sparse: expected restricted __le32 [usertype] control drivers/usb/cdns3/gadget.c:1213:38: sparse: got unsigned int [assigned] [usertype] control drivers/usb/cdns3/gadget.c:1215:48: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] control @@ got unsigned int [assigned] [usertype] control @@ drivers/usb/cdns3/gadget.c:1215:48: sparse: expected restricted __le32 [usertype] control drivers/usb/cdns3/gadget.c:1215:48: sparse: got unsigned int [assigned] [usertype] control drivers/usb/cdns3/gadget.c:1229:30: sparse: sparse: invalid assignment: |= drivers/usb/cdns3/gadget.c:1229:30: sparse:left side has type restricted __le32 drivers/usb/cdns3/gadget.c:1229:30: sparse:right side has type unsigned long drivers/usb/cdns3/gadget.c:1255:36: sparse: sparse: restricted __le32 degrades to integer drivers/usb/cdns3/gadget.c:1255:30: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] control @@ got unsigned int @@ drivers/usb/cdns3/gadget.c:1255:30: sparse: expected restricted __le32 [usertype] control drivers/usb/cdns3/gadget.c:1255:30: sparse: got unsigned int drivers/usb/cdns3/gadget.c:1010:29: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] buffer @@ got unsigned long @@ drivers/usb/cdns3/gadget.c:1010:29: sparse: expected restricted __le32 [usertype] buffer drivers/usb/cdns3/gadget.c:1010:29: sparse: got unsigned long drivers/usb/cdns3/gadget.c:1013:29: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] buffer @@ got unsigned long @@ drivers/usb/cdns3/gadget.c:1013:29: sparse: expected restricted __le32 [usertype] buffer drivers/usb/cdns3/gadget.c:1013:29: sparse: got unsigned long drivers/usb/cdns3/gadget.c:1019:21: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] length @@ got unsigned long @@ drivers/usb/cdns3/gadget.c:1019:21: sparse: expected restricted __le32 [usertype] length drivers/usb/cdns3/gadget.c:1019:21: sparse: got unsigned long drivers/usb/cdns3/gadget.c:1029:37: sparse: sparse: invalid assignment: |= drivers/usb/cdns3/gadget.c:1029:37: sparse:left side has type restricted __le32 drivers/usb/cdns3/gadget.c:1029:37: sparse:right side has type unsigned long
Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro
On Sat, May 30, 2020 at 3:13 AM Andy Shevchenko wrote: > > On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris > wrote: > > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko > > wrote: > > > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote: > > > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot > > > > wrote: > > ... > > > > > Taking the example statement (in my patch) where compilation warning > > > > is getting reported: > > > > return (map[index] >> offset) & GENMASK(nbits - 1, 0); > > > > > > > > 'nbits' is of type 'unsigned long'. > > > > In above, the sanity check is comparing '0' with unsigned value. And > > > > unsigned value can't be less than '0' ever, hence the warning. > > > > But this warning will occur whenever there will be '0' as one of the > > > > 'argument' and an unsigned variable as another 'argument' for GENMASK. > > > > Proper fix is to fix GENMASK(), but allowed workaround is to use > > > (BIT(nbits) - 1) > > > instead. > > > When I used BIT macro (earlier), I had faced a problem. I want to tell > > you about that. > > > > Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or > > clump size) is BITS_PER_LONG, unexpected calculation happens. > > > > Explanation: > > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer), > > (BIT(nbits) - 1) > > gives a value of zero and when this zero is ANDed with any value, it > > makes it full zero. This is unexpected and incorrect calculation happening. > > > > What actually happens is in the macro expansion of BIT(64), that is 1 > > << 64, the '1' overflows from leftmost bit position (most significant > > bit) and re-enters at the rightmost bit position (least significant > > bit), therefore 1 << 64 becomes '0x1', and when another '1' is > > subtracted from this, the final result becomes 0. > > > > Since this macro is being used in both bitmap_get_value and > > bitmap_set_value functions, it will give unexpected results when nbits or > > clump > > size is BITS_PER_LONG (32 or 64 depending on arch). > > I see, something like > https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139 > should be done. > But yes, let's try to fix GENMASK(). > > So, if we modify the following > > #define GENMASK_INPUT_CHECK(h, l) \ > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > __builtin_constant_p((l) > (h)), (l) > (h), 0))) > > to be > > #define GENMASK_INPUT_CHECK(h, l) \ > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > __builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0))) > > would it work? Sorry Andy it is not working. Actually the warning will be thrown, whenever there will be comparison between 'h' and 'l'. If one of them is '0' and the other is unsigned variable. In above, still there is comparison being done between 'h' and 'l', so the warning is getting thrown. > > > William also knows about this issue: > > "This is undefined behavior in the C standard (section 6.5.7 in the N1124)" > > I think it is about 6.5.7.3 here, 1U << 31 (or 63) is okay. Actually for: (BIT(nbits) - 1) When nbits will be BITS_PER_LONG it will be 1U << 32 (or 64). Isn't it ? The expression, BIT(64) - 1 can become unexpectedly zero (incorrectly). > > -- > With Best Regards, > Andy Shevchenko
Re: [PATCH] [v2] ARM: omap2: fix omap5_realtime_timer_init definition
* Arnd Bergmann [200529 22:01]: > There is one more regression introduced by the last build fix: > > arch/arm/mach-omap2/timer.c:170:6: error: attribute declaration must precede > definition [-Werror,-Wignored-attributes] > void __init omap5_realtime_timer_init(void) > ^ > arch/arm/mach-omap2/common.h:118:20: note: previous definition is here > static inline void omap5_realtime_timer_init(void) >^ > arch/arm/mach-omap2/timer.c:170:13: error: redefinition of > 'omap5_realtime_timer_init' > void __init omap5_realtime_timer_init(void) > ^ > arch/arm/mach-omap2/common.h:118:20: note: previous definition is here > static inline void omap5_realtime_timer_init(void) > > As it turns out, the CONFIG_SOC_HAS_REALTIME_COUNTER option > should never be disabled for OMAP5 as we realy on this to initialize > the clocks and the timer. Just remove it here and make it the default. > > Removing the guard around the set_cntfreq() definition, I noticed that > this is not properly namespaced, so fix that as well. Looks good to me, thanks for fixing it: Acked-by: Tony Lindgren
Re: [PATCH] refperf: work around 64-bit division
On 5/29/20 1:15 PM, Arnd Bergmann wrote: > A 64-bit division was introduced in refperf, breaking compilation > on all 32-bit architectures: > > kernel/rcu/refperf.o: in function `main_func': > refperf.c:(.text+0x57c): undefined reference to `__aeabi_uldivmod' > > Work it by using div_u64 to mark the expensive operation. > > Fixes: bd5b16d6c88d ("refperf: Allow decimal nanoseconds") > Signed-off-by: Arnd Bergmann Acked-by: Randy Dunlap # build-tested Thanks. > --- > kernel/rcu/refperf.c | 9 +++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/kernel/rcu/refperf.c b/kernel/rcu/refperf.c > index 47df72c492b3..c2366648981d 100644 > --- a/kernel/rcu/refperf.c > +++ b/kernel/rcu/refperf.c > @@ -386,7 +386,7 @@ static int main_func(void *arg) > if (torture_must_stop()) > goto end; > > - result_avg[exp] = 1000 * process_durations(nreaders) / > (nreaders * loops); > + result_avg[exp] = div_u64(1000 * process_durations(nreaders), > nreaders * loops); > } > > // Print the average of all experiments > @@ -397,9 +397,14 @@ static int main_func(void *arg) > strcat(buf, "Threads\tTime(ns)\n"); > > for (exp = 0; exp < nruns; exp++) { > + u64 avg; > + u32 rem; > + > if (errexit) > break; > - sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, result_avg[exp] / > 1000, (int)(result_avg[exp] % 1000)); > + > + avg = div_s64_rem(result_avg[exp], 1000, &rem); > + sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, avg, rem); > strcat(buf, buf1); > } > > -- ~Randy
Re: [PATCH v3] regmap: fix alignment issue
On Fri, May 29, 2020 at 09:25:38PM +0200, Jens Thoms Toerring wrote: > pointers with odd addresses. On architectures with strict alignment > requirements this results in a kernel crash for u16 and u32 values. Which architectures out of interest? signature.asc Description: PGP signature
Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking
On Fri, 29 May 2020 23:27:41 +0200 Peter Zijlstra wrote: > There is no reason not to always, accurately, track IRQ state. > > This change also makes IRQ state tracking ignore lockdep_off(). > > Signed-off-by: Peter Zijlstra (Intel) > --- > kernel/locking/lockdep.c | 33 ++--- > 1 file changed, 30 insertions(+), 3 deletions(-) > > --- a/kernel/locking/lockdep.c > +++ b/kernel/locking/lockdep.c > @@ -3646,7 +3646,13 @@ static void __trace_hardirqs_on_caller(v > */ > void lockdep_hardirqs_on_prepare(unsigned long ip) > { > - if (unlikely(!debug_locks || current->lockdep_recursion)) Why remove the check for debug_locks? Isn't that there to disable everything at once to prevent more warnings to be printed? Also, isn't there other ways that we could have recursion besides NMIs? Say we do a printk inside here, or call something that may also enable interrupts? I thought the recursion check was also to prevent lockdep infrastructure calling something that lockdep monitors being a problem? Or am I missing something? -- Steve > + /* > + * NMIs do not (and cannot) track lock dependencies, nothing to do. > + */ > + if (in_nmi()) > + return; > + > + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & > LOCKDEP_RECURSION_MASK)) > return; > > if (unlikely(current->hardirqs_enabled)) { > @@ -3692,7 +3698,24 @@ void noinstr lockdep_hardirqs_on(unsigne > { > struct task_struct *curr = current; > > - if (unlikely(!debug_locks || curr->lockdep_recursion)) > + /* > + * NMIs can happen in the middle of local_irq_{en,dis}able() where the > + * tracking state and hardware state are out of sync. > + * > + * NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from, > + * and not rely on hardware state like normal interrupts. > + */ > + if (in_nmi()) { > + /* > + * Skip: > + * - recursion check, because NMI can hit lockdep; > + * - hardware state check, because above; > + * - chain_key check, see lockdep_hardirqs_on_prepare(). > + */ > + goto skip_checks; > + } > + > + if (DEBUG_LOCKS_WARN_ON(curr->lockdep_recursion & > LOCKDEP_RECURSION_MASK)) > return; > > if (curr->hardirqs_enabled) { > @@ -3720,6 +3743,7 @@ void noinstr lockdep_hardirqs_on(unsigne > DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key != > current->curr_chain_key); > > +skip_checks: > /* we'll do an OFF -> ON transition: */ > curr->hardirqs_enabled = 1; > curr->hardirq_enable_ip = ip; > @@ -3735,7 +3759,10 @@ void noinstr lockdep_hardirqs_off(unsign > { > struct task_struct *curr = current; > > - if (unlikely(!debug_locks || curr->lockdep_recursion)) > + /* > + * NMIs can happen in lockdep. > + */ > + if (!in_nmi() && DEBUG_LOCKS_WARN_ON(curr->lockdep_recursion & > LOCKDEP_RECURSION_MASK)) > return; > > /* >
Re: [PATCH v2 00/33] iommu: Move iommu_group setup to IOMMU core code
On Tue Apr 14 20, Joerg Roedel wrote: Hi, here is the second version of this patch-set. The first version with some more introductory text can be found here: https://lore.kernel.org/lkml/20200407183742.4344-1-j...@8bytes.org/ Changes v1->v2: * Rebased to v5.7-rc1 * Re-wrote the arm-smmu changes as suggested by Robin Murphy * Re-worked the Exynos patches to hopefully not break the driver anymore * Fixed a missing mutex_unlock() reported by Marek Szyprowski, thanks for that. There is also a git-branch available with these patches applied: https://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git/log/?h=iommu-probe-device-v2 Please review. Thanks, Joerg Joerg Roedel (32): iommu: Move default domain allocation to separate function iommu/amd: Implement iommu_ops->def_domain_type call-back iommu/vt-d: Wire up iommu_ops->def_domain_type iommu/amd: Remove dma_mask check from check_device() iommu/amd: Return -ENODEV in add_device when device is not handled by IOMMU iommu: Add probe_device() and remove_device() call-backs iommu: Move default domain allocation to iommu_probe_device() iommu: Keep a list of allocated groups in __iommu_probe_device() iommu: Move new probe_device path to separate function iommu: Split off default domain allocation from group assignment iommu: Move iommu_group_create_direct_mappings() out of iommu_group_add_device() iommu: Export bus_iommu_probe() and make is safe for re-probing iommu/amd: Remove dev_data->passthrough iommu/amd: Convert to probe/release_device() call-backs iommu/vt-d: Convert to probe/release_device() call-backs iommu/arm-smmu: Convert to probe/release_device() call-backs iommu/pamu: Convert to probe/release_device() call-backs iommu/s390: Convert to probe/release_device() call-backs iommu/virtio: Convert to probe/release_device() call-backs iommu/msm: Convert to probe/release_device() call-backs iommu/mediatek: Convert to probe/release_device() call-backs iommu/mediatek-v1 Convert to probe/release_device() call-backs iommu/qcom: Convert to probe/release_device() call-backs iommu/rockchip: Convert to probe/release_device() call-backs iommu/tegra: Convert to probe/release_device() call-backs iommu/renesas: Convert to probe/release_device() call-backs iommu/omap: Remove orphan_dev tracking iommu/omap: Convert to probe/release_device() call-backs iommu/exynos: Use first SYSMMU in controllers list for IOMMU core iommu/exynos: Convert to probe/release_device() call-backs iommu: Remove add_device()/remove_device() code-paths iommu: Unexport iommu_group_get_for_dev() Sai Praneeth Prakhya (1): iommu: Add def_domain_type() callback in iommu_ops drivers/iommu/amd_iommu.c | 97 drivers/iommu/amd_iommu_types.h | 1 - drivers/iommu/arm-smmu-v3.c | 38 +-- drivers/iommu/arm-smmu.c| 39 ++-- drivers/iommu/exynos-iommu.c| 24 +- drivers/iommu/fsl_pamu_domain.c | 22 +- drivers/iommu/intel-iommu.c | 68 +- drivers/iommu/iommu.c | 393 +--- drivers/iommu/ipmmu-vmsa.c | 60 ++--- drivers/iommu/msm_iommu.c | 34 +-- drivers/iommu/mtk_iommu.c | 24 +- drivers/iommu/mtk_iommu_v1.c| 50 ++-- drivers/iommu/omap-iommu.c | 99 ++-- drivers/iommu/qcom_iommu.c | 24 +- drivers/iommu/rockchip-iommu.c | 26 +-- drivers/iommu/s390-iommu.c | 22 +- drivers/iommu/tegra-gart.c | 24 +- drivers/iommu/tegra-smmu.c | 31 +-- drivers/iommu/virtio-iommu.c| 41 +--- include/linux/iommu.h | 21 +- 20 files changed, 533 insertions(+), 605 deletions(-) -- 2.17.1 ___ iommu mailing list io...@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Hi Joerg, With this patchset, I have an epyc system where if I boot with iommu=nopt and force a dump I will see some io page faults for a nic on the system. The vmcore is harvested and the system reboots. I haven't reproduced it on other systems yet, but without the patchset I don't see the io page faults during the kdump. Regards, Jerry
Re: [PATCH v12 2/4] dt-bindings: power: Convert battery.txt to battery.yaml
On Thu, May 28, 2020 at 05:53:48PM -0500, Ricardo Rivera-Matos wrote: > From: Dan Murphy > > Convert the battery.txt file to yaml and fix up the examples. > > Signed-off-by: Dan Murphy > --- > .../bindings/power/supply/battery.txt | 82 +- > .../bindings/power/supply/battery.yaml| 143 ++ > 2 files changed, 144 insertions(+), 81 deletions(-) > create mode 100644 > Documentation/devicetree/bindings/power/supply/battery.yaml > > diff --git a/Documentation/devicetree/bindings/power/supply/battery.txt > b/Documentation/devicetree/bindings/power/supply/battery.txt > index 3049cf88bdcf..b9a81621ce59 100644 > --- a/Documentation/devicetree/bindings/power/supply/battery.txt > +++ b/Documentation/devicetree/bindings/power/supply/battery.txt > @@ -1,81 +1 @@ > -Battery Characteristics > - > -The devicetree battery node provides static battery characteristics. > -In smart batteries, these are typically stored in non-volatile memory > -on a fuel gauge chip. The battery node should be used where there is > -no appropriate non-volatile memory, or it is unprogrammed/incorrect. > - > -Upstream dts files should not include battery nodes, unless the battery > -represented cannot easily be replaced in the system by one of a > -different type. This prevents unpredictable, potentially harmful, > -behavior should a replacement that changes the battery type occur > -without a corresponding update to the dtb. > - > -Required Properties: > - - compatible: Must be "simple-battery" > - > -Optional Properties: > - - voltage-min-design-microvolt: drained battery voltage > - - voltage-max-design-microvolt: fully charged battery voltage > - - energy-full-design-microwatt-hours: battery design energy > - - charge-full-design-microamp-hours: battery design capacity > - - precharge-current-microamp: current for pre-charge phase > - - charge-term-current-microamp: current for charge termination phase > - - constant-charge-current-max-microamp: maximum constant input current > - - constant-charge-voltage-max-microvolt: maximum constant input voltage > - - factory-internal-resistance-micro-ohms: battery factory internal > resistance > - - ocv-capacity-table-0: An array providing the open circuit voltage (OCV) > - of the battery and corresponding battery capacity percent, which is used > - to look up battery capacity according to current OCV value. And the open > - circuit voltage unit is microvolt. > - - ocv-capacity-table-1: Same as ocv-capacity-table-0 > - .. > - - ocv-capacity-table-n: Same as ocv-capacity-table-0 > - - ocv-capacity-celsius: An array containing the temperature in degree > Celsius, > - for each of the battery capacity lookup table. The first temperature value > - specifies the OCV table 0, and the second temperature value specifies the > - OCV table 1, and so on. > - - resistance-temp-table: An array providing the temperature in degree > Celsius > - and corresponding battery internal resistance percent, which is used to > look > - up the resistance percent according to current temperature to get a > accurate > - batterty internal resistance in different temperatures. > - > -Battery properties are named, where possible, for the corresponding > -elements in enum power_supply_property, defined in > -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/power_supply.h > - > -Batteries must be referenced by chargers and/or fuel-gauges > -using a phandle. The phandle's property should be named > -"monitored-battery". > - > -Example: > - > - bat: battery { > - compatible = "simple-battery"; > - voltage-min-design-microvolt = <320>; > - voltage-max-design-microvolt = <420>; > - energy-full-design-microwatt-hours = <529>; > - charge-full-design-microamp-hours = <143>; > - precharge-current-microamp = <256000>; > - charge-term-current-microamp = <128000>; > - constant-charge-current-max-microamp = <90>; > - constant-charge-voltage-max-microvolt = <420>; > - factory-internal-resistance-micro-ohms = <25>; > - ocv-capacity-celsius = <(-10) 0 10>; > - ocv-capacity-table-0 = <4185000 100>, <4113000 95>, <4066000 > 90>, ...; > - ocv-capacity-table-1 = <420 100>, <4185000 95>, <4113000 > 90>, ...; > - ocv-capacity-table-2 = <425 100>, <420 95>, <4185000 > 90>, ...; > - resistance-temp-table = <20 100>, <10 90>, <0 80>, <(-10) 60>; > - }; > - > - charger: charger@11 { > - > - monitored-battery = <&bat>; > - ... > - }; > - > - fuel_gauge: fuel-gauge@22 { > - > - monitored-battery = <&bat>; > - ... > - }; > +The contents of this file has been moved to battery.yaml > diff --git a/Documentation/devicetree/bindings/p
Re: [PATCH v12 3/4] dt-bindings: power: Add the bindings for the bq2515x family of chargers.
On Thu, May 28, 2020 at 05:53:49PM -0500, Ricardo Rivera-Matos wrote: > The BQ2515X family of devices are highly integrated battery management > ICs that integrate the most common functions for wearable devices > namely a charger, an output voltage rail, ADC for battery and system > monitoring, and a push-button controller. > > Datasheets: > http://www.ti.com/lit/ds/symlink/bq25150.pdf > http://www.ti.com/lit/ds/symlink/bq25155.pdf > > Signed-off-by: Ricardo Rivera-Matos > --- > .../bindings/power/supply/bq2515x.yaml| 91 +++ > 1 file changed, 91 insertions(+) > create mode 100644 > Documentation/devicetree/bindings/power/supply/bq2515x.yaml > > diff --git a/Documentation/devicetree/bindings/power/supply/bq2515x.yaml > b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml > new file mode 100644 > index ..19cb336d581e > --- /dev/null > +++ b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml > @@ -0,0 +1,91 @@ > +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) > +# Copyright (C) 2020 Texas Instruments Incorporated > +%YAML 1.2 > +--- > +$id: "http://devicetree.org/schemas/power/supply/bq2515x.yaml#"; > +$schema: "http://devicetree.org/meta-schemas/core.yaml#"; > + > +title: TI bq2515x 500-mA Linear charger family > + > +maintainers: > + - Dan Murphy > + - Ricardo Rivera-Matos > + > +description: | > + The BQ2515x family is a highly integrated battery charge management IC that > + integrates the most common functions for wearable devices, namely a > charger, > + an output voltage rail, ADC for battery and system monitoring, and > + push-button controller. > + > + Specifications about the charger can be found at: > +http://www.ti.com/lit/ds/symlink/bq25150.pdf > +http://www.ti.com/lit/ds/symlink/bq25155.pdf > + > +properties: > + compatible: > +enum: > + - ti,bq25150 > + - ti,bq25155 > + > + reg: > +maxItems: 1 > +description: I2C address of the charger. > + > + ac-detect-gpios: > +description: | > + GPIO used for connecting the bq2515x device PG (AC Detect) > + pin. > +maxItems: 1 > + > + reset-gpios: > +description: GPIO used for hardware reset. > +maxItems: 1 > + > + powerdown-gpios: > +description: GPIO used for low power mode of IC. > +maxItems: 1 > + > + charge-enable-gpios: > +description: GPIO used to turn on and off charging. > +maxItems: 1 > + > + input-current-limit-microamp: > +$ref: /schemas/types.yaml#/definitions/uint32 > +description: Maximum input current in micro Amps. > +minimum: 5 > +maximum: 50 > + > + monitored-battery: > +$ref: battery.yaml# This doesn't work. It's saying monitored-battery is a node containing all the properties in battery.yaml. > + > +required: > + - compatible > + - reg How is the battery optional? > + > +additionalProperties: false > + > +examples: > + - | > +bat: battery { > + compatible = "simple-battery"; > + constant-charge-current-max-microamp = <5>; > + precharge-current-microamp = <2500>; > + constant-charge-voltage-max-microvolt = <400>; > +}; > +#include > +i2c0 { > + #address-cells = <1>; > + #size-cells = <0>; > + > + bq25150: charger@6b { > +compatible = "ti,bq25150"; > +reg = <0x6b>; > +monitored-battery = <&bat>; > +input-current-limit-microamp = <10>; > + > +ac-detect-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>; > +reset-gpios = <&gpio0 14 GPIO_ACTIVE_HIGH>; > +powerdown-gpios = <&gpio0 15 GPIO_ACTIVE_HIGH>; > +charge-enable-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>; > + }; > +}; > -- > 2.26.2 >
Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro
On Sat, May 30, 2020 at 1:11 AM Syed Nayyar Waris wrote: > On Sat, May 30, 2020 at 3:13 AM Andy Shevchenko > wrote: > > On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris > > wrote: > > > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko > > > wrote: > > > > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote: > > > > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot > > > > > wrote: > > > > ... > > > > > > > Taking the example statement (in my patch) where compilation warning > > > > > is getting reported: > > > > > return (map[index] >> offset) & GENMASK(nbits - 1, 0); > > > > > > > > > > 'nbits' is of type 'unsigned long'. > > > > > In above, the sanity check is comparing '0' with unsigned value. And > > > > > unsigned value can't be less than '0' ever, hence the warning. > > > > > But this warning will occur whenever there will be '0' as one of the > > > > > 'argument' and an unsigned variable as another 'argument' for GENMASK. > > > > > > Proper fix is to fix GENMASK(), but allowed workaround is to use > > > > (BIT(nbits) - 1) > > > > instead. > > > > > When I used BIT macro (earlier), I had faced a problem. I want to tell > > > you about that. > > > > > > Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or > > > clump size) is BITS_PER_LONG, unexpected calculation happens. > > > > > > Explanation: > > > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my > > > computer), > > > (BIT(nbits) - 1) > > > gives a value of zero and when this zero is ANDed with any value, it > > > makes it full zero. This is unexpected and incorrect calculation > > > happening. > > > > > > What actually happens is in the macro expansion of BIT(64), that is 1 > > > << 64, the '1' overflows from leftmost bit position (most significant > > > bit) and re-enters at the rightmost bit position (least significant > > > bit), therefore 1 << 64 becomes '0x1', and when another '1' is > > > subtracted from this, the final result becomes 0. > > > > > > Since this macro is being used in both bitmap_get_value and > > > bitmap_set_value functions, it will give unexpected results when nbits or > > > clump > > > size is BITS_PER_LONG (32 or 64 depending on arch). > > > > I see, something like > > https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139 > > should be done. > > But yes, let's try to fix GENMASK(). > > > > So, if we modify the following > > > > #define GENMASK_INPUT_CHECK(h, l) \ > > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > > __builtin_constant_p((l) > (h)), (l) > (h), 0))) > > > > to be > > > > #define GENMASK_INPUT_CHECK(h, l) \ > > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > > __builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0))) > > > > would it work? > > Sorry Andy it is not working. Actually the warning will be thrown, > whenever there will be comparison between 'h' and 'l'. If one of them > is '0' and the other is unsigned variable. > In above, still there is comparison being done between 'h' and 'l', so > the warning is getting thrown. Ah, okay what about (l) && ((l) > (h)) ? > > > William also knows about this issue: > > > "This is undefined behavior in the C standard (section 6.5.7 in the > > > N1124)" > > > > I think it is about 6.5.7.3 here, 1U << 31 (or 63) is okay. > > Actually for: > (BIT(nbits) - 1) > When nbits will be BITS_PER_LONG it will be 1U << 32 (or 64). Isn't it ? > The expression, > BIT(64) - 1 > can become unexpectedly zero (incorrectly). Yes, that's why I pointed out to the paragraph. It's about right operand to be "great than or equal to" the size of type of left operand. -- With Best Regards, Andy Shevchenko
Re: [PATCH] vfio/mdev: Fix reference count leak in add_mdev_supported_type.
On Wed, 27 May 2020 21:01:09 -0500 wu000...@umn.edu wrote: > From: Qiushi Wu > > kobject_init_and_add() takes reference even when it fails. > If this function returns an error, kobject_put() must be called to > properly clean up the memory associated with the object. Thus, > replace kfree() by kobject_put() to fix this issue. Previous > commit "b8eb718348b8" fixed a similar problem. > > Fixes: 7b96953bc640 ("vfio: Mediated device Core driver") > Signed-off-by: Qiushi Wu > --- Applied to vfio next branch for v5.8 with Connie's and Kirti's reviews. Thanks, Alex > drivers/vfio/mdev/mdev_sysfs.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c > index 8ad14e5c02bf..917fd84c1c6f 100644 > --- a/drivers/vfio/mdev/mdev_sysfs.c > +++ b/drivers/vfio/mdev/mdev_sysfs.c > @@ -110,7 +110,7 @@ static struct mdev_type *add_mdev_supported_type(struct > mdev_parent *parent, > "%s-%s", dev_driver_string(parent->dev), > group->name); > if (ret) { > - kfree(type); > + kobject_put(&type->kobj); > return ERR_PTR(ret); > } >
Re: [PATCH v3] regmap: fix alignment issue
On Fri, May 29, 2020 at 09:25:38PM +0200, Jens Thoms Toerring wrote: > The assembly and disassembly of data to be sent to or received from a > device invoke functions (regmap_format_XXX() and regmap_parse_XXX()) > that extract or insert data items from or into a buffer, using > assignments. In some cases those functions are called with buffer This doesn't apply against current code, please check and resend: HEAD is now at 93b929922dba Merge series "regmap: provide simple bitops and use them in a driver" from Bartosz Golaszewski Bartosz Golaszewski : Applying: regmap: fix alignment issue Using index info to reconstruct a base tree... M drivers/base/regmap/regmap.c error: patch failed: drivers/base/regmap/regmap.c:261 error: drivers/base/regmap/regmap.c: patch does not apply error: Did you hand edit your patch? It does not apply to blobs recorded in its index. Patch failed at 0001 regmap: fix alignment issue (it also didn't apply against v5.7-rc1 and git didn't know the blobs it was based on, the most recent change to that code was in 2015 it seems...). signature.asc Description: PGP signature
Re: [PATCH v6 00/16] spi: dw: Add generic DW DMA controller support
On Fri, May 29, 2020 at 03:22:26PM -0600, Rob Herring wrote: > On Fri, May 29, 2020 at 06:33:25PM +0100, Mark Brown wrote: > > Please rebase. TBH I'd not noticed Rob's review so I just left it > > waiting for that, there's such a huge backlog there it didn't occur to > > me that it might've been reviewed. > Hey, I'm down to about 10 patches now. I think I'll take the rest of the > week off. Ah, nice! Good to hear. signature.asc Description: PGP signature
Re: [PATCH] dt-bindings: Merge gpio-usb-b-connector with usb-connector
On Fri, 29 May 2020 20:06:31 +0200, Thierry Reding wrote: > From: Thierry Reding > > The binding for usb-connector is a superset of gpio-usb-b-connector. One > major difference is that gpio-usb-b-connector requires at least one of > the vbus-gpios and id-gpios properties to be specified. Merge the two > bindings by adding the compatible string combination for the GPIO USB-B > variant and an extra conditional for the required properties list to the > usb-connector.yaml file. > > Signed-off-by: Thierry Reding > --- > .../bindings/connector/usb-connector.yaml | 39 +-- > .../devicetree/bindings/usb/usb-conn-gpio.txt | 30 -- > 2 files changed, 35 insertions(+), 34 deletions(-) > delete mode 100644 Documentation/devicetree/bindings/usb/usb-conn-gpio.txt > Applied, thanks!
Re: [PATCH v2 2/4] dt-bindings: pinctrl: Document optional BCM7211 wake-up interrupts
On Fri, 29 May 2020 12:15:20 -0700, Florian Fainelli wrote: > BCM7211 supports wake-up interrupts in the form of optional interrupt > lines, one per bank, plus the "all banks" interrupt line. > > Signed-off-by: Florian Fainelli > --- > .../devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > Acked-by: Rob Herring
[GIT PULL] ARM: SoC fixes for v5.7
The following changes since commit b9bbe6ed63b2b9f2c9ee5cbd0f2c946a2723f4ce: Linux 5.7-rc6 (2020-05-17 16:48:37 -0700) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git tags/armsoc-fixes-v5.7 for you to fetch changes up to 99706d62fb50486eadb4441eaed311491fd7addf: Merge tag 'omap-for-v5.7/cpsw-fixes-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into arm/fixes (2020-05-26 00:18:48 +0200) ARM: SoC fixes for v5.7 This time there is one fix for the error path in the mediatek cmdq driver (used by their video driver) and a couple of devicetree fixes, mostly for 32-bit ARM, and fairly harmless: - On OMAP2 there were a few regressions in the ethernet drivers, one of them leading to an external abort trap - One Raspberry Pi version had a misconfigured LED - Interrupts on Broadcom NSP were slightly misconfigured - One i.MX6q board had issues with graphics mode setting - On mmp3 there are some minor fixes that were submitted for v5.8 with a cc:stable tag, so I ended up picking them up here as well - The Mediatek Video Codec needs to run at a higher frequency than configured originally Signed-off-by: Arnd Bergmann Arnd Bergmann (5): Merge branch 'mmp/fixes' into arm/fixes Merge tag 'imx-fixes-5.7-2' of git://git.kernel.org/.../shawnguo/linux into arm/fixes Merge branch 'v5.7-fixes' of git://git.kernel.org/.../matthias.bgg/linux into arm/fixes Merge tag 'arm-soc/for-5.7/devicetree-fixes-part2-v2' of https://github.com/Broadcom/stblinux into arm/fixes Merge tag 'omap-for-v5.7/cpsw-fixes-signed' of git://git.kernel.org/.../tmlind/linux-omap into arm/fixes Dennis YC Hsieh (1): soc: mediatek: cmdq: return send msg error code Grygorii Strashko (2): ARM: dts: am57xx: fix networking on boards with ksz9031 phy ARM: dts: am437x: fix networking on boards with ksz9031 phy Hamish Martin (1): ARM: dts: bcm: HR2: Fix PPI interrupt types Hsin-Yi Wang (1): arm64: dts: mt8173: fix vcodec-enc clock Lubomir Rintel (3): ARM: dts: mmp3: Use the MMP3 compatible string for /clocks ARM: dts: mmp3-dell-ariel: Fix the SPI devices ARM: dts: mmp3: Drop usb-nop-xceiv from HSIC phy Robert Beckett (1): ARM: dts/imx6q-bx50v3: Set display interface clock parents Tony Lindgren (1): ARM: dts: Fix wrong mdio clock for dm814x Vincent Stehlé (1): ARM: dts: bcm2835-rpi-zero-w: Fix led polarity arch/arm/boot/dts/am437x-gp-evm.dts | 2 +- arch/arm/boot/dts/am437x-idk-evm.dts| 2 +- arch/arm/boot/dts/am437x-sk-evm.dts | 4 ++-- arch/arm/boot/dts/am571x-idk.dts| 4 ++-- arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi | 4 ++-- arch/arm/boot/dts/am57xx-idk-common.dtsi| 4 ++-- arch/arm/boot/dts/bcm-hr2.dtsi | 6 +++--- arch/arm/boot/dts/bcm2835-rpi-zero-w.dts| 2 +- arch/arm/boot/dts/dm814x.dtsi | 2 +- arch/arm/boot/dts/imx6q-b450v3.dts | 7 --- arch/arm/boot/dts/imx6q-b650v3.dts | 7 --- arch/arm/boot/dts/imx6q-b850v3.dts | 11 --- arch/arm/boot/dts/imx6q-bx50v3.dtsi | 15 +++ arch/arm/boot/dts/mmp3-dell-ariel.dts | 12 ++-- arch/arm/boot/dts/mmp3.dtsi | 8 +++- arch/arm64/boot/dts/mediatek/mt8173.dtsi| 4 ++-- drivers/soc/mediatek/mtk-cmdq-helper.c | 4 +++- 17 files changed, 44 insertions(+), 54 deletions(-)
[ANNOUNCE] 5.4.43-rt25
Dear RT Folks, I'm pleased to announce the 5.4.43-rt25 stable release. This release is just an update to the new stable 5.4.43 version and no RT specific changes have been made. You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git branch: v5.4-rt Head SHA1: 2a84edbec2cb7d50c72e51a1021aff9c4c3bb800 Or to build 5.4.43-rt25 directly, the following patches should be applied: http://www.kernel.org/pub/linux/kernel/v5.x/linux-5.4.tar.xz http://www.kernel.org/pub/linux/kernel/v5.x/patch-5.4.43.xz http://www.kernel.org/pub/linux/kernel/projects/rt/5.4/patch-5.4.43-rt25.patch.xz Enjoy, -- Steve
Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking
On Fri, May 29, 2020 at 06:14:01PM -0400, Steven Rostedt wrote: > On Fri, 29 May 2020 23:27:41 +0200 > Peter Zijlstra wrote: > > > There is no reason not to always, accurately, track IRQ state. > > > > This change also makes IRQ state tracking ignore lockdep_off(). > > > > Signed-off-by: Peter Zijlstra (Intel) > > --- > > kernel/locking/lockdep.c | 33 ++--- > > 1 file changed, 30 insertions(+), 3 deletions(-) > > > > --- a/kernel/locking/lockdep.c > > +++ b/kernel/locking/lockdep.c > > @@ -3646,7 +3646,13 @@ static void __trace_hardirqs_on_caller(v > > */ > > void lockdep_hardirqs_on_prepare(unsigned long ip) > > { > > - if (unlikely(!debug_locks || current->lockdep_recursion)) > > Why remove the check for debug_locks? Isn't that there to disable > everything at once to prevent more warnings to be printed? Yeah, maybe. I was thinking we could keep IRQ state running. But you're right, if we mess up the IRQ state itself this might generate a wee mess. > Also, isn't there other ways that we could have recursion besides NMIs? > Say we do a printk inside here, or call something that may also enable > interrupts? I thought the recursion check was also to prevent lockdep > infrastructure calling something that lockdep monitors being a problem? > > Or am I missing something? > > + /* > > +* NMIs do not (and cannot) track lock dependencies, nothing to do. > > +*/ > > + if (in_nmi()) > > + return; > > + > > + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & > > LOCKDEP_RECURSION_MASK)) > > return; ^^ there's your regular recursion check.
Re: Lost PCIe PME after a914ff2d78ce ("PCI/ASPM: Don't select CONFIG_PCIEASPM by default")
On 29.05.2020 22:59, Matthew Garrett wrote: > On Fri, May 29, 2020 at 03:21:35PM -0500, Bjorn Helgaas wrote: > >> Yeah, that makes sense. I can't remember the details, but I'm pretty >> sure there's a reason why we ask for the whole set of things. Seems >> like it solved some problem. I think Matthew Garrett might have been >> involved in that. > > This was https://bugzilla.redhat.com/show_bug.cgi?id=638912 - some > firmware misbehaves unless you pass the same set of supported > functionality as Windows does. > Current situation means that PME is unusable on all systems where pcie_aspm_support_enabled() returns false, what is basically every system except EXPERT mode is enabled and CONFIG_PCIEASPM is set. So we definitely need to do something. One question is whether the system from the 10yr old bug report actually depends on OSC_PCI_EXPRESS_LTR_CONTROL control, or whether some other change in recent years fixed the issue. Not sure whether the system is still available for re-testing. If worst case we have 10yr old systems breaking with a new kernel then we still would have the workaround to enable CONFIG_PCIEASPM on that system.
Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
On 2020-05-29 13:27, Matthew Wilcox wrote: > On Fri, May 29, 2020 at 10:11:00PM +0800, Kaitao Cheng wrote: >> There is a function named ilog2() exist which can replace blksize. >> The generated code will be shorter and more efficient on some >> architecture, such as arm64. And ilog2() can be optimized according >> to different architecture. > > We'd get the same benefit from a smaller patch with just: > > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -1502,15 +1502,9 @@ static inline int blk_rq_aligned(struct request_queue > *q, unsigned long addr, > return !(addr & alignment) && !(len & alignment); > } > > -/* assumes size > 256 */ > static inline unsigned int blksize_bits(unsigned int size) > { > - unsigned int bits = 8; > - do { > - bits++; > - size >>= 1; > - } while (size > 256); > - return bits; > + return ilog2(size); > } > > static inline unsigned int block_size(struct block_device *bdev) Hi Matthew, I had suggested to change all blksize_bits() calls into ilog2() calls because I think that a single function to calculate the logarithm base 2 is sufficient. Thanks, Bart.
donation
You have a sum donation for Catholic Charities Foundation, kindly contact the Program Coordinator through her email anastasia.mc...@gmail.com for details of claim. Regard Sir. Angelo Tony.
Re: [RFC PATCH 2/2] drm: xlnx: driver for Xilinx DSI TX Subsystem
Hi Laurent, On Wed, 2020-05-27 at 15:45:24 -0700, Laurent Pinchart wrote: > Hi Hyun, > > On Wed, May 27, 2020 at 10:54:35AM -0700, Hyun Kwon wrote: > > On Sat, 2020-05-23 at 20:08:13 -0700, Laurent Pinchart wrote: > > > On Mon, May 04, 2020 at 11:43:48AM -0700, Hyun Kwon wrote: > > >> On Mon, 2020-04-20 at 14:20:56 -0700, Venkateshwar Rao Gannavarapu wrote: > > >>> The Xilinx MIPI DSI Tx Subsystem soft IP is used to display video > > >>> data from AXI-4 stream interface. > > >>> > > >>> It supports upto 4 lanes, optional register interface for the DPHY, > > >> > > >> I don't see the register interface for dphy support. > > > > > > I think the D-PHY should be supported through a PHY driver, as it seems > > > to be shared between different subsystems. > > > > Right, if the logic is shared across subsystems. I can't tell if that's > > the case as the IP comes as a single block. Maybe GVRao can confirm. > > I believe the CSI2-RX subsystem uses the same D-PHY IP core, but a > confirmation would be nice. > > > >>> multiple RGB color formats, command mode and video mode. > > >>> This is a MIPI-DSI host driver and provides DSI bus for panels. > > >>> This driver also helps to communicate with its panel using panel > > >>> framework. > > >>> > > >>> Signed-off-by: Venkateshwar Rao Gannavarapu > > >>> > > >>> --- > > >>> drivers/gpu/drm/xlnx/Kconfig| 11 + > > >>> drivers/gpu/drm/xlnx/Makefile | 2 + > > >>> drivers/gpu/drm/xlnx/xlnx_dsi.c | 755 > > >>> > > > > > > Daniel Vetter has recently expressed his opiion that bridge drivers > > > should go to drivers/gpu/drm/bridge/. It would then be > > > drivers/gpu/drm/bridge/xlnx/. I don't have a strong opinion myself. > > > > > >>> 3 files changed, 768 insertions(+) > > >>> create mode 100644 drivers/gpu/drm/xlnx/xlnx_dsi.c > > >>> > > >>> diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig > > >>> index aa6cd88..73873cf 100644 > > >>> --- a/drivers/gpu/drm/xlnx/Kconfig > > >>> +++ b/drivers/gpu/drm/xlnx/Kconfig > > >>> @@ -11,3 +11,14 @@ config DRM_ZYNQMP_DPSUB > > >>> This is a DRM/KMS driver for ZynqMP DisplayPort controller. > > >>> Choose > > >>> this option if you have a Xilinx ZynqMP SoC with DisplayPort > > >>> subsystem. > > >>> + > > >>> +config DRM_XLNX_DSI > > >>> +tristate "Xilinx DRM DSI Subsystem Driver" > > >>> +select DRM_MIPI_DSI > > >>> +select DRM_PANEL > > >>> +select DRM_PANEL_SIMPLE > > >>> +help > > >>> + This enables support for Xilinx MIPI-DSI. > > >> > > >> This sentence is not needed with below. Could you please rephrase the > > >> whole? > > >> > > >>> + This is a DRM/KMS driver for Xilinx programmable DSI > > >>> controller. > > >>> + Choose this option if you have a Xilinx MIPI DSI-TX controller > > >>> + subsytem. > > >> > > >> These seem incorrectly indented. > > >> > > >>> diff --git a/drivers/gpu/drm/xlnx/Makefile > > >>> b/drivers/gpu/drm/xlnx/Makefile > > >>> index 2b844c6..b7ee6ef 100644 > > >>> --- a/drivers/gpu/drm/xlnx/Makefile > > >>> +++ b/drivers/gpu/drm/xlnx/Makefile > > >>> @@ -1,2 +1,4 @@ > > >>> zynqmp-dpsub-objs += zynqmp_disp.o zynqmp_dpsub.o zynqmp_dp.o > > >>> obj-$(CONFIG_DRM_ZYNQMP_DPSUB) += zynqmp-dpsub.o > > >>> + > > >>> +obj-$(CONFIG_DRM_XLNX_DSI) += xlnx_dsi.o > > >>> diff --git a/drivers/gpu/drm/xlnx/xlnx_dsi.c > > >>> b/drivers/gpu/drm/xlnx/xlnx_dsi.c > > >>> new file mode 100644 > > >>> index 000..b8cae59 > > >>> --- /dev/null > > >>> +++ b/drivers/gpu/drm/xlnx/xlnx_dsi.c > > >>> @@ -0,0 +1,755 @@ > > >>> +// SPDX-License-Identifier: GPL-2.0 > > >>> +/* > > >>> + * Xilinx FPGA MIPI DSI Tx Controller driver > > >>> + * > > >>> + * Copyright (C) 2017 - 2019 Xilinx, Inc. > > >>> + * > > >>> + * Authors: > > >>> + * - Saurabh Sengar > > >>> + * - Venkateshwar Rao Gannavarapu > > >>> > > >>> + */ > > >>> + > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> + > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> + > > >>> +#include > > >>> +#include > > >>> + > > >>> +/* DSI Tx IP registers */ > > >>> +#define XDSI_CCR 0x00 > > >>> +#define XDSI_CCR_COREENB BIT(0) > > >>> +#define XDSI_CCR_SOFTRST BIT(1) > > >>> +#define XDSI_CCR_CRREADY BIT(2) > > >>> +#define XDSI_CCR_CMDMODE BIT(3) > > >>> +#define XDSI_CCR_DFIFORST BIT(4) > > >>> +#define XDSI_CCR_CMDFIFORSTBIT(5) > > >>> +#define XDSI_PCR 0x04 > > > > [snip] > > > > >>> + } > > >>> + > > >>> + ret = clk_prepare_enable(dsi->video_aclk); > > >>> + if (ret) { > > >>
Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking
On Sat, 30 May 2020 00:25:05 +0200 Peter Zijlstra wrote: > > > + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & > > > LOCKDEP_RECURSION_MASK)) > > > return; > > ^^ there's your regular recursion check. Yes, but this is more of a "bug if it happens" than just "ignore it". -- Steve
Re: Regression with PM / wakeup: Show wakeup sources stats in sysfs"
On Fri, May 29, 2020 at 9:51 AM Rafael J. Wysocki wrote: > > On 5/28/2020 10:46 PM, Florian Fainelli wrote: > > Hi, > > > > Commit c8377adfa78103be5380200eb9dab764d7ca890e ("PM / wakeup: Show > > wakeup sources stats in sysfs") is causing some of our tests to fail > > because /sys/class/net/*/device/power/wakeup_count is now 0, despite > > /sys/kernel/debug/wakeup_sources clearly indicating that the Ethernet > > device was responsible for system wake-up. > > > > What's more in looking at /sys/class/wakekup/wakeup2/event_count, we > > have the number of Wake-on-LAN wakeups recorded properly, but > > wakeup_count is desperately 0, why is that? > > I need to look at that commit in detail to find out what is going on. It would be helpful to see the contents of /sys/kernel/debug/wakeup_sources, /sys/class/net/*/device/power/*, and /sys/class/wakekup/* corresponding to the device in question. The values in these files are queried from the same struct wakeup_source. So it's odd if wakeup_count diverges.
Re: [PATCH v3 0/2] dtc: Improve checks for i2c reg properties
On Thu, May 28, 2020 at 2:57 AM Joel Stanley wrote: > > This is to fix a build warning in the Linux kernel caused by dtc > incorrectly warning about I2C_OWN_SLAVE_ADDRESS. > > v3 fixes the 10 bit size check > v2 contains a second patch to check for 10 bit vs 7 bit addresses. > > Joel Stanley (2): > checks: Remove warning for I2C_OWN_SLAVE_ADDRESS > checks: Improve i2c reg property checking Reviewed-by: Rob Herring I'll sync the kernel copy when David applies. Rob
Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"
On 05/29, Chao Yu wrote: > Under heavy fsstress, we may triggle panic while issuing discard, > because __check_sit_bitmap() detects that discard command may earse > valid data blocks, the root cause is as below race stack described, > since we removed lock when flushing quota data, quota data writeback > may race with write_checkpoint(), so that it causes inconsistency in > between cached discard entry and segment bitmap. > > - f2fs_write_checkpoint > - block_operations > - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH) > - f2fs_flush_sit_entries > - add_discard_addrs >- __set_bit_le(i, (void *)de->discard_map); > - f2fs_write_data_pages >- f2fs_write_single_data_page > : inode is quota one, > cp_rwsem won't be locked > - f2fs_do_write_data_page > - f2fs_allocate_data_block > - f2fs_wait_discard_bio > : discard entry has not > been added yet. > - update_sit_entry > - f2fs_clear_prefree_segments > - f2fs_issue_discard > : add discard entry > > This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix quota_sync > failure due to f2fs_lock_op"). > > Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op") The previous patch fixes quota_sync gets EAGAIN all the time. How about this? It seems this works for fsstress test. --- fs/f2fs/segment.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ebbadde6cbced..f67cffc38975e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3095,6 +3095,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct curseg_info *curseg = CURSEG_I(sbi, type); bool put_pin_sem = false; + /* +* We need to wait for node_write to avoid block allocation during +* checkpoint. This can only happen to quota writes which can cause +* the below discard race condition. +*/ + if (IS_DATASEG(type)) + down_write(&sbi->node_write); + if (type == CURSEG_COLD_DATA) { /* GC during CURSEG_COLD_DATA_PINNED allocation */ if (down_read_trylock(&sbi->pin_sem)) { @@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (put_pin_sem) up_read(&sbi->pin_sem); + + if (IS_DATASEG(type)) + up_write(&sbi->node_write); } static void update_device_state(struct f2fs_io_info *fio) -- 2.27.0.rc0.183.gde8f92d652-goog
Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier
On Fri, May 29, 2020 at 6:31 AM Christian Brauner wrote: > > > > + /* Check if we were woken up by a addfd message */ > > > + addfd = list_first_entry_or_null(&n.addfd, > > > +struct seccomp_kaddfd, list); > > > + if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) { > > > + seccomp_handle_addfd(addfd); > > > + mutex_unlock(&match->notify_lock); > > > + goto wait; > > > + } > > > ret = n.val; > > > err = n.error; > > > flags = n.flags; > > > } > > > > > > + /* If there were any pending addfd calls, clear them out */ > > > + list_for_each_entry_safe(addfd, tmp, &n.addfd, list) { > > > + /* The process went away before we got a chance to handle it > > > */ > > > + addfd->ret = -ESRCH; > > > + list_del_init(&addfd->list); > > > + complete(&addfd->completion); > > > + } > > I forgot to ask this in my first review before, don't you need a > complete(&addfd->completion) call in seccomp_notify_release() before > freeing it? > When complete(&knotif->ready) is called in seccomp_notify_release, subsequently the notifier (seccomp_do_user_notification) will be woken up and it'll fail this check: if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) Falling through to: /* If there were any pending addfd calls, clear them out */ list_for_each_entry_safe(addfd, tmp, &n.addfd, list) { /* The process went away before we got a chance to handle it */ addfd->ret = -ESRCH; list_del_init(&addfd->list); complete(&addfd->completion); } Although ESRCH isn't the "right" response, this fall through behaviour should work.
Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking
On Sat, May 30, 2020 at 12:25:05AM +0200, Peter Zijlstra wrote: > On Fri, May 29, 2020 at 06:14:01PM -0400, Steven Rostedt wrote: > > On Fri, 29 May 2020 23:27:41 +0200 > > Peter Zijlstra wrote: > > > > > There is no reason not to always, accurately, track IRQ state. > > > > > > This change also makes IRQ state tracking ignore lockdep_off(). > > > > > > Signed-off-by: Peter Zijlstra (Intel) > > > --- > > > kernel/locking/lockdep.c | 33 ++--- > > > 1 file changed, 30 insertions(+), 3 deletions(-) > > > > > > --- a/kernel/locking/lockdep.c > > > +++ b/kernel/locking/lockdep.c > > > @@ -3646,7 +3646,13 @@ static void __trace_hardirqs_on_caller(v > > > */ > > > void lockdep_hardirqs_on_prepare(unsigned long ip) > > > { > > > - if (unlikely(!debug_locks || current->lockdep_recursion)) > > > > Why remove the check for debug_locks? Isn't that there to disable > > everything at once to prevent more warnings to be printed? > > Yeah, maybe. I was thinking we could keep IRQ state running. But you're > right, if we mess up the IRQ state itself this might generate a wee > mess. That is, mostly the IRQ state recovers when we mess up. It's only when we then trigger more fail that we crash and burn, and that will likely already give more warnings. But I can put the debug_locks check back.
Re: Regression with PM / wakeup: Show wakeup sources stats in sysfs"
On 5/29/20 3:28 PM, Tri Vo wrote: > On Fri, May 29, 2020 at 9:51 AM Rafael J. Wysocki > wrote: >> >> On 5/28/2020 10:46 PM, Florian Fainelli wrote: >>> Hi, >>> >>> Commit c8377adfa78103be5380200eb9dab764d7ca890e ("PM / wakeup: Show >>> wakeup sources stats in sysfs") is causing some of our tests to fail >>> because /sys/class/net/*/device/power/wakeup_count is now 0, despite >>> /sys/kernel/debug/wakeup_sources clearly indicating that the Ethernet >>> device was responsible for system wake-up. >>> >>> What's more in looking at /sys/class/wakekup/wakeup2/event_count, we >>> have the number of Wake-on-LAN wakeups recorded properly, but >>> wakeup_count is desperately 0, why is that? >> >> I need to look at that commit in detail to find out what is going on. > > It would be helpful to see the contents of > /sys/kernel/debug/wakeup_sources, /sys/class/net/*/device/power/*, and > /sys/class/wakekup/* corresponding to the device in question. The > values in these files are queried from the same struct wakeup_source. > So it's odd if wakeup_count diverges. Most certainly, below is the information you want, the two cat /s/k/d/wakeup_sources were done before Wake-on-LAN and after waking-up from LAN. /sys/class/wakeup/wakeup2 maps to the Ethernet device. The Ethernet device calls pm_wakeup_event() against the struct device that is embedded in the platform_device that it was probed with. I will try to debug this myself over the weekend, time permitting. # ethtool -s eth0 wol g # cat /sys/kernel/debug/wakeup_sources nameactive_countevent_count wakeup_count expire_countactive_sincetotal_time max_timelast_changep revent_suspend_time 47d58.ethernet 0 0 0 0 0 0 0 0 0 alarmtimer 0 0 0 0 0 0 0 0 0 47c408400.waketimer 2 2 0 0 0 0 0 6144 1 0 # pml -w20 [ 3449.937142] brcm-waketimer 47c408400.waketimer: Using sysfs attributes, consider using 'rtcwake' Pass 1 out of 1, mode=none, tp_al[ 3449.952654] PM: suspend entry (shallow) l=1, cycle_tp=, sleep=, [ 3449.959004] Filesystems sync: 0.000 seconds wakeup_time=20 [ 3449.965984] Freezing user space processes ... (elapsed 0.001 seconds) done. [ 3449.974087] OOM killer disabled. [ 3449.977316] Freezing remaining freezable tasks ... (elapsed 0.006 seconds) done. [ 3449.991114] printk: Suspending console(s) (use no_console_suspend to debug) AMS: System is entering S2... [ 3450.022381] bcmgenet 47d58.ethernet eth0: Link is Down [ 3450.048340] Disabling non-boot CPUs ... [ 3450.049344] CPU1: shutdown [ 3450.050393] psci: CPU1 killed (polled 1 ms) [ 3450.051332] Enabling non-boot CPUs ... [ 3450.051712] Detected PIPT I-cache on CPU1 [ 3450.051812] CPU1: Booted secondary processor 0x01 [0x410fd083] [ 3450.052435] CPU1 is up [ 3450.683588] bcmgenet 47d58.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx [ 3450.729677] OOM killer enabled. [ 3450.732908] Restarting tasks ... done. [ 3450.738539] PM: suspend exit -- [ 3450.744239] brcm-waketimer 47c408400.waketimer: Using sysfs attributes, consider using 'rtcwake' # cat /sys/kernel/debug/wakeup_sources nameactive_countevent_count wakeup_count expire_countactive_sincetotal_time max_timelast_changep revent_suspend_time 47d58.ethernet 1 1 0 0 0 0 0 3450 054 0 alarmtimer 0 0 0 0 0 0 0 0 0 47c408400.waketimer 2 2 0 0 0 0 0 6144 1 0 # cat /sys/class/net/*/device/power/* cat: read error: Input/output error auto 0 unsupported 0 enabled 0 0 1 0 0 3450054 0 0 # ls -l /sys/class/net/eth0/device/ driver/ net/ subsystem/ wakeup/ driver_override of_node/ uevent modalias power/ unimac-mdio.0/ # ls -l /sys/class/net/eth0/device/power/wakeup wakeupwakeup_active_count wakeup_last_time_ms wakeup_abort_countwakeup_count wakeup_max_time_ms wakeup_active wakeup_expire_count wakeup_total_time_ms # ls -l /sys/class/net/eth0/device/power/wakeup^C # ls -l /sys/class/wakeup/wakeup wakeup0/ wakeup1/ wakeup2/ # ls -l /sys/class/wakeup/wakeup2/ total 0 -r--r--r--1 root root 4096 Jan 1 00:59 active_count -r--r--r--1 root root 4096 Jan 1 00:59 active_time_ms lrwxrwxrwx1 root root 0 Jan 1 00:59 device -> ../../../47d58.ethernet -r--r--r--1 root root 4096 Jan 1 0
Re: general protection fault in inet_unhash
On 5/29/20 10:32 AM, Eric Dumazet wrote: On 5/28/20 11:32 PM, Andrii Nakryiko wrote: On 5/28/20 11:23 PM, Dmitry Vyukov wrote: On Thu, May 28, 2020 at 11:01 PM 'Andrii Nakryiko' via syzkaller-bugs wrote: On 5/28/20 9:44 AM, syzbot wrote: Hello, syzbot found the following crash on: HEAD commit: dc0f3ed1 net: phy: at803x: add cable diagnostics support f.. git tree: net-next console output: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_log.txt-3Fx-3D17289cd210&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=t1v5ZakZM9Aw_9u_I6FbFZ28U0GFs0e9dMMUOyiDxO4&e= kernel config: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_.config-3Fx-3D7e1bc97341edbea6&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=yeXCTODuJF6ExmCJ-ppqMHsfvMCbCQ9zkmZi3W6NGHo&e= dashboard link: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_bug-3Fextid-3D3610d489778b57cc8031&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=8fAJHh81yojiinnGJzTw6hN4w4A6XRZST4463CWL9Y8&e= compiler: gcc (GCC) 9.0.0 20181231 (experimental) syz repro: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_repro.syz-3Fx-3D15f237aa10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=cPv-hQsGYs0CVz3I26BmauS0hQ8_YTWHeH5p-U5ElWY&e= C reproducer: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_repro.c-3Fx-3D1553834a10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=r6sGJDOgosZDE9sRxqFnVibDNJFt_6IteSWeqEQLbNE&e= The bug was bisected to: commit af6eea57437a830293eab56246b6025cc7d46ee7 Author: Andrii Nakryiko Date: Mon Mar 30 02:59:58 2020 + bpf: Implement bpf_link-based cgroup BPF program attachment bisection log: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_bisect.txt-3Fx-3D1173cd7e10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=rJIpYFSAMRfea3349dd7PhmLD_hriVwq8ZtTHcSagBA&e= final crash: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_report.txt-3Fx-3D1373cd7e10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=TWpx5JNdxKiKPABUScn8WB7u3fXueCp7BXwQHg4Unz0&e= console output: https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_log.txt-3Fx-3D1573cd7e10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=-SMhn-dVZI4W51EZQ8Im0sdThgwt9M6fxUt3_bcYvk8&e= IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+3610d489778b57cc8...@syzkaller.appspotmail.com Fixes: af6eea57437a ("bpf: Implement bpf_link-based cgroup BPF program attachment") general protection fault, probably for non-canonical address 0xdc01: [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0008-0x000f] CPU: 0 PID: 7063 Comm: syz-executor654 Not tainted 5.7.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:inet_unhash+0x11f/0x770 net/ipv4/inet_hashtables.c:600 No idea why it was bisected to bpf_link change. It seems completely struct sock-related. Seems like Hi Andrii, You can always find a detailed explanation of syzbot bisections under the "bisection log" link. Right. Sorry, I didn't mean that bisect went wrong or anything like that. I just don't see how my change has anything to do with invalid socket state. As I just replied in another email, this particular repro is using bpf_link_create() for cgroup attachment, which was added in my patch. So running repro before my patch would always fail to attach BPF program, and thus won't be able to repro the issue (because the bug is somewhere in the interaction between BPF program attachment and socket itself). So it will always bisect to my patch :) L2TP seems to use sk->sk_node to insert sockets into l2tp_ip_table, _and_ uses l2tp_ip_prot.unhash == inet_unhash So if/when BPF_CGROUP_RUN_PROG_INET_SOCK(sk) returns an error and inet_create() calls sk_common_release() bad things happen, because inet_unhash() expects a valid hashinfo pointer. I guess the following patch should fix this. Bug has been there forever, but only BPF_CGROUP_RUN_PROG_INET_SOCK(sk) could trigger it. I knew it! :) Thanks a lot for taking a look, Eric! diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 10cf7c3dcbb3fb1b27657588f3d1ba806cba737f..097c80c0e323777df997a189eb456e3ae6d26888 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@
Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
On 5/29/20 4:27 PM, Bart Van Assche wrote: > On 2020-05-29 13:27, Matthew Wilcox wrote: >> On Fri, May 29, 2020 at 10:11:00PM +0800, Kaitao Cheng wrote: >>> There is a function named ilog2() exist which can replace blksize. >>> The generated code will be shorter and more efficient on some >>> architecture, such as arm64. And ilog2() can be optimized according >>> to different architecture. >> >> We'd get the same benefit from a smaller patch with just: >> >> --- a/include/linux/blkdev.h >> +++ b/include/linux/blkdev.h >> @@ -1502,15 +1502,9 @@ static inline int blk_rq_aligned(struct request_queue >> *q, unsigned long addr, >> return !(addr & alignment) && !(len & alignment); >> } >> >> -/* assumes size > 256 */ >> static inline unsigned int blksize_bits(unsigned int size) >> { >> -unsigned int bits = 8; >> -do { >> -bits++; >> -size >>= 1; >> -} while (size > 256); >> -return bits; >> +return ilog2(size); >> } >> >> static inline unsigned int block_size(struct block_device *bdev) > > Hi Matthew, > > I had suggested to change all blksize_bits() calls into ilog2() calls > because I think that a single function to calculate the logarithm base 2 > is sufficient. I think this should be a two-parter: 1) Use the inode blkbits where appropriate 2) Then do this change I'm leaning towards just doing it in blksize_bits() instead of updating every caller, unless there aren't that many left once we've gone through patch 1. -- Jens Axboe
Re: [PATCH V2] dt-bindings: regulator: Convert anatop regulator to json-schema
On Fri, 29 May 2020 09:59:11 +0800, Anson Huang wrote: > Convert the anatop regulator binding to DT schema format using json-schema. Applied to https://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git for-next Thanks! [1/1] dt-bindings: regulator: Convert anatop regulator to json-schema commit: 81227f49bd272cbcd9bb4650b250519c8aa22065 All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark
Re: [PATCH v3 4/7] watchdog: dw_wdt: Support devices with non-fixed TOP values
On Tue, May 26, 2020 at 06:41:20PM +0300, Serge Semin wrote: > In case if the DW Watchdog IP core is synthesised with > WDT_USE_FIX_TOP == false, the TOP interval indexes make the device > to load a custom periods to the counter. These periods are hardwired > at the IP synthesis stage and can be within [2^8, 2^(WDT_CNT_WIDTH - 1)]. > Alas their values can't be detected at runtime and must be somehow > supplied to the driver so one could properly determine the watchdog > timeout intervals. For this purpose we suggest to have a vendor- > specific dts property "snps,watchdog-tops" utilized, which would > provide an array of sixteen counter values. At device probe stage they > will be used to initialize the watchdog device timeouts determined > from the array values and current clocks source rate. > > In order to have custom TOP values supported the driver must be > altered in the following way. First of all the fixed-top values > ready-to-use array must be determined for compatibility with currently > supported devices, which were synthesised with WDT_USE_FIX_TOP == true. > It will be used if either fixed TOP feature is detected being enabled or > no custom TOPs are fetched from the device dt node. Secondly at the probe > stage we must initialize an array of the watchdog timeouts corresponding > to the detected TOPs list and the reference clock rate. For generality the > procedure of initialization is designed in a way to support the TOPs array > with no limitations on the items order or value. Finally the watchdog > period search methods should be altered to support the new timeouts data > structure. > > Signed-off-by: Serge Semin > Cc: Alexey Malahov > Cc: Thomas Bogendoerfer > Cc: Arnd Bergmann > Cc: Rob Herring > Cc: linux-m...@vger.kernel.org > Cc: devicet...@vger.kernel.org Reviewed-by: Guenter Roeck > --- > > Changelog v2: > - Rearrange SoBs. > - Add "ms" suffix to the methods returning msec and convert the methods > with no "ms" suffix to return a timeout in sec. > - Make sure minimum timeout is at least 1 sec. > - Refactor the timeouts calculation procedure to retain the timeouts in > the ascending order. > - Make sure there is no integer overflow in milliseconds calculation. It > is saved in a dedicated uint field of the timeout structure. > --- > drivers/watchdog/dw_wdt.c | 191 +- > 1 file changed, 167 insertions(+), 24 deletions(-) > > diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c > index fba21de2bbad..693c0d1fd796 100644 > --- a/drivers/watchdog/dw_wdt.c > +++ b/drivers/watchdog/dw_wdt.c > @@ -13,6 +13,8 @@ > */ > > #include > +#include > +#include > #include > #include > #include > @@ -34,21 +36,40 @@ > #define WDOG_CURRENT_COUNT_REG_OFFSET0x08 > #define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c > #define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 > +#define WDOG_COMP_PARAMS_1_REG_OFFSET 0xf4 > +#define WDOG_COMP_PARAMS_1_USE_FIX_TOP BIT(6) > > -/* The maximum TOP (timeout period) value that can be set in the watchdog. */ > -#define DW_WDT_MAX_TOP 15 > +/* There are sixteen TOPs (timeout periods) that can be set in the watchdog. > */ > +#define DW_WDT_NUM_TOPS 16 > +#define DW_WDT_FIX_TOP(_idx) (1U << (16 + _idx)) > > #define DW_WDT_DEFAULT_SECONDS 30 > > +static const u32 dw_wdt_fix_tops[DW_WDT_NUM_TOPS] = { > + DW_WDT_FIX_TOP(0), DW_WDT_FIX_TOP(1), DW_WDT_FIX_TOP(2), > + DW_WDT_FIX_TOP(3), DW_WDT_FIX_TOP(4), DW_WDT_FIX_TOP(5), > + DW_WDT_FIX_TOP(6), DW_WDT_FIX_TOP(7), DW_WDT_FIX_TOP(8), > + DW_WDT_FIX_TOP(9), DW_WDT_FIX_TOP(10), DW_WDT_FIX_TOP(11), > + DW_WDT_FIX_TOP(12), DW_WDT_FIX_TOP(13), DW_WDT_FIX_TOP(14), > + DW_WDT_FIX_TOP(15) > +}; > + > static bool nowayout = WATCHDOG_NOWAYOUT; > module_param(nowayout, bool, 0); > MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " >"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); > > +struct dw_wdt_timeout { > + u32 top_val; > + unsigned int sec; > + unsigned int msec; > +}; > + > struct dw_wdt { > void __iomem*regs; > struct clk *clk; > unsigned long rate; > + struct dw_wdt_timeout timeouts[DW_WDT_NUM_TOPS]; > struct watchdog_device wdd; > struct reset_control*rst; > /* Save/restore */ > @@ -64,20 +85,66 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt) > WDOG_CONTROL_REG_WDT_EN_MASK; > } > > -static inline int dw_wdt_top_in_seconds(struct dw_wdt *dw_wdt, unsigned top) > +static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt, > + unsigned int timeout, u32 *top_val) > { > + int idx; > + > /* > - * There are 16 possible timeout values in 0..15 where the number of > - * cycles is 2 ^ (16 + i) and the watchdog counts down. > + * Find a TO
Re: [PATCH v3 5/7] watchdog: dw_wdt: Support devices with asynch clocks
On Tue, May 26, 2020 at 06:41:21PM +0300, Serge Semin wrote: > DW Watchdog IP core can be synthesised with asynchronous timer/APB > clocks support (WDT_ASYNC_CLK_MODE_ENABLE == 1). In this case > separate clock signals are supposed to be used to feed watchdog timer > and APB interface of the device. Currently the driver supports > the synchronous mode only. Since there is no way to determine which > mode was actually activated for device from its registers, we have to > rely on the platform device configuration data. If optional "pclk" > clock source is supplied, we consider the device working in asynchronous > mode, otherwise the driver falls back to the synchronous configuration. > > Signed-off-by: Serge Semin > Cc: Alexey Malahov > Cc: Thomas Bogendoerfer > Cc: Arnd Bergmann > Cc: Rob Herring > Cc: linux-m...@vger.kernel.org > Cc: devicet...@vger.kernel.org Reviewed-by: Guenter Roeck > --- > > Changelog v2: > - Rearrange SoBs. > --- > drivers/watchdog/dw_wdt.c | 48 +++ > 1 file changed, 43 insertions(+), 5 deletions(-) > > diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c > index 693c0d1fd796..efbc36872670 100644 > --- a/drivers/watchdog/dw_wdt.c > +++ b/drivers/watchdog/dw_wdt.c > @@ -68,6 +68,7 @@ struct dw_wdt_timeout { > struct dw_wdt { > void __iomem*regs; > struct clk *clk; > + struct clk *pclk; > unsigned long rate; > struct dw_wdt_timeout timeouts[DW_WDT_NUM_TOPS]; > struct watchdog_device wdd; > @@ -274,6 +275,7 @@ static int dw_wdt_suspend(struct device *dev) > dw_wdt->control = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); > dw_wdt->timeout = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); > > + clk_disable_unprepare(dw_wdt->pclk); > clk_disable_unprepare(dw_wdt->clk); > > return 0; > @@ -287,6 +289,12 @@ static int dw_wdt_resume(struct device *dev) > if (err) > return err; > > + err = clk_prepare_enable(dw_wdt->pclk); > + if (err) { > + clk_disable_unprepare(dw_wdt->clk); > + return err; > + } > + > writel(dw_wdt->timeout, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); > writel(dw_wdt->control, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); > > @@ -393,9 +401,18 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) > if (IS_ERR(dw_wdt->regs)) > return PTR_ERR(dw_wdt->regs); > > - dw_wdt->clk = devm_clk_get(dev, NULL); > - if (IS_ERR(dw_wdt->clk)) > - return PTR_ERR(dw_wdt->clk); > + /* > + * Try to request the watchdog dedicated timer clock source. It must > + * be supplied if asynchronous mode is enabled. Otherwise fallback > + * to the common timer/bus clocks configuration, in which the very > + * first found clock supply both timer and APB signals. > + */ > + dw_wdt->clk = devm_clk_get(dev, "tclk"); > + if (IS_ERR(dw_wdt->clk)) { > + dw_wdt->clk = devm_clk_get(dev, NULL); > + if (IS_ERR(dw_wdt->clk)) > + return PTR_ERR(dw_wdt->clk); > + } > > ret = clk_prepare_enable(dw_wdt->clk); > if (ret) > @@ -407,10 +424,27 @@ static int dw_wdt_drv_probe(struct platform_device > *pdev) > goto out_disable_clk; > } > > + /* > + * Request APB clock if device is configured with async clocks mode. > + * In this case both tclk and pclk clocks are supposed to be specified. > + * Alas we can't know for sure whether async mode was really activated, > + * so the pclk phandle reference is left optional. If it couldn't be > + * found we consider the device configured in synchronous clocks mode. > + */ > + dw_wdt->pclk = devm_clk_get_optional(dev, "pclk"); > + if (IS_ERR(dw_wdt->pclk)) { > + ret = PTR_ERR(dw_wdt->pclk); > + goto out_disable_clk; > + } > + > + ret = clk_prepare_enable(dw_wdt->pclk); > + if (ret) > + goto out_disable_clk; > + > dw_wdt->rst = devm_reset_control_get_optional_shared(&pdev->dev, NULL); > if (IS_ERR(dw_wdt->rst)) { > ret = PTR_ERR(dw_wdt->rst); > - goto out_disable_clk; > + goto out_disable_pclk; > } > > reset_control_deassert(dw_wdt->rst); > @@ -449,10 +483,13 @@ static int dw_wdt_drv_probe(struct platform_device > *pdev) > > ret = watchdog_register_device(wdd); > if (ret) > - goto out_disable_clk; > + goto out_disable_pclk; > > return 0; > > +out_disable_pclk: > + clk_disable_unprepare(dw_wdt->pclk); > + > out_disable_clk: > clk_disable_unprepare(dw_wdt->clk); > return ret; > @@ -464,6 +501,7 @@ static int dw_wdt_drv_remove(struct platform_device *pdev) > > watchdog_unregister_device(&dw_wdt->wdd); > reset_control_assert(dw_wdt->rst); >
[PATCH] perf libdw: Fix off-by 1 relative directory includes
This is currently working due to extra include paths in the build. Before: $ cd tools/perf/arch/arm64/util $ ls -la ../../util/unwind-libdw.h ls: cannot access '../../util/unwind-libdw.h': No such file or directory After: $ ls -la ../../../util/unwind-libdw.h -rw-r- 1 irogers irogers 553 Apr 17 14:31 ../../../util/unwind-libdw.h --- tools/perf/arch/arm64/util/unwind-libdw.c | 6 +++--- tools/perf/arch/powerpc/util/unwind-libdw.c | 6 +++--- tools/perf/arch/x86/util/unwind-libdw.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c index 7623d85e77f3..a50941629649 100644 --- a/tools/perf/arch/arm64/util/unwind-libdw.c +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "../../util/unwind-libdw.h" -#include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c index abf2dbc7f829..7b2d96ec28e3 100644 --- a/tools/perf/arch/powerpc/util/unwind-libdw.c +++ b/tools/perf/arch/powerpc/util/unwind-libdw.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include "../../util/unwind-libdw.h" -#include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ static const int special_regs[3][2] = { diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index fda8f4206ee4..eea2bf87232b 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "../../util/unwind-libdw.h" -#include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { -- 2.27.0.rc2.251.g90737beb825-goog
Re: [PATCH v3 1/7] dt-bindings: watchdog: Convert DW WDT binding to DT schema
On Tue, May 26, 2020 at 06:41:17PM +0300, Serge Semin wrote: > Modern device tree bindings are supposed to be created as YAML-files > in accordance with dt-schema. This commit replaces the DW Watchdog > legacy bare text bindings with YAML file. As before the binding states > that the corresponding dts node is supposed to have a registers > range, a watchdog timer references clock source, optional reset line and > pre-timeout interrupt. > > Signed-off-by: Serge Semin > Reviewed-by: Rob Herring > Cc: Alexey Malahov > Cc: Thomas Bogendoerfer > Cc: Arnd Bergmann > Cc: linux-m...@vger.kernel.org Reviewed-by: Guenter Roeck > --- > > Changelog v2: > - Rearrange SoBs. > - Discard BE copyright header. > - Replace "additionalProperties: false" with "unevaluatedProperties: false" > property. > - Discard interrupts property from the required properties list. > - Remove a label definition from the binding example. > - Move the asynchronous APB3 clock support into a dedicated patch. > --- > .../devicetree/bindings/watchdog/dw_wdt.txt | 24 - > .../bindings/watchdog/snps,dw-wdt.yaml| 50 +++ > 2 files changed, 50 insertions(+), 24 deletions(-) > delete mode 100644 Documentation/devicetree/bindings/watchdog/dw_wdt.txt > create mode 100644 > Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > > diff --git a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt > b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt > deleted file mode 100644 > index eb0914420c7c.. > --- a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt > +++ /dev/null > @@ -1,24 +0,0 @@ > -Synopsys Designware Watchdog Timer > - > -Required Properties: > - > -- compatible : Should contain "snps,dw-wdt" > -- reg: Base address and size of the watchdog timer registers. > -- clocks : phandle + clock-specifier for the clock that drives the > - watchdog timer. > - > -Optional Properties: > - > -- interrupts : The interrupt used for the watchdog timeout warning. > -- resets : phandle pointing to the system reset controller with > - line index for the watchdog. > - > -Example: > - > - watchdog0: wd@ffd02000 { > - compatible = "snps,dw-wdt"; > - reg = <0xffd02000 0x1000>; > - interrupts = <0 171 4>; > - clocks = <&per_base_clk>; > - resets = <&rst WDT0_RESET>; > - }; > diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > new file mode 100644 > index ..4f6944756ab4 > --- /dev/null > +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > @@ -0,0 +1,50 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +%YAML 1.2 > +--- > +$id: http://devicetree.org/schemas/watchdog/snps,dw-wdt.yaml# > +$schema: http://devicetree.org/meta-schemas/core.yaml# > + > +title: Synopsys Designware Watchdog Timer > + > +allOf: > + - $ref: "watchdog.yaml#" > + > +maintainers: > + - Jamie Iles > + > +properties: > + compatible: > +const: snps,dw-wdt > + > + reg: > +maxItems: 1 > + > + interrupts: > +description: DW Watchdog pre-timeout interrupt > +maxItems: 1 > + > + clocks: > +items: > + - description: Watchdog timer reference clock > + > + resets: > +description: Phandle to the DW Watchdog reset lane > +maxItems: 1 > + > +unevaluatedProperties: false > + > +required: > + - compatible > + - reg > + - clocks > + > +examples: > + - | > +watchdog@ffd02000 { > + compatible = "snps,dw-wdt"; > + reg = <0xffd02000 0x1000>; > + interrupts = <0 171 4>; > + clocks = <&per_base_clk>; > + resets = <&wdt_rst>; > +}; > +...
Re: [PATCH v3 2/7] dt-bindings: watchdog: dw-wdt: Support devices with asynch clocks
On Tue, May 26, 2020 at 06:41:18PM +0300, Serge Semin wrote: > DW Watchdog IP core can be synthesised with asynchronous timer/APB > clocks support (WDT_ASYNC_CLK_MODE_ENABLE == 1). In this case > separate clock signals are supposed to be used to feed watchdog timer > and APB interface of the device. Let's update the DW Watchdog DT node > schema so it would support the optional APB3 bus clock specified along > with the mandatory watchdog timer reference clock. > > Signed-off-by: Serge Semin > Reviewed-by: Rob Herring > Cc: Alexey Malahov > Cc: Thomas Bogendoerfer > Cc: Arnd Bergmann > Cc: linux-m...@vger.kernel.org Reviewed-by: Guenter Roeck > --- > > Changelog v2: > - It's a new patch unpinned from the previous one. > --- > .../devicetree/bindings/watchdog/snps,dw-wdt.yaml | 8 > 1 file changed, 8 insertions(+) > > diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > index 4f6944756ab4..5bf6dc6377f3 100644 > --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > @@ -24,8 +24,16 @@ properties: > maxItems: 1 > >clocks: > +minItems: 1 > items: >- description: Watchdog timer reference clock > + - description: APB3 interface clock > + > + clock-names: > +minItems: 1 > +items: > + - const: tclk > + - const: pclk > >resets: > description: Phandle to the DW Watchdog reset lane
Re: [PATCH v3 3/7] dt-bindings: watchdog: dw-wdt: Add watchdog TOPs array property
On Tue, May 26, 2020 at 06:41:19PM +0300, Serge Semin wrote: > In case if DW Watchdog IP core is built with WDT_USE_FIX_TOP == false, > a custom timeout periods are used to preset the timer counter. In > this case that periods should be specified in a new "snps,watchdog-tops" > property of the DW watchdog dts node. > > Signed-off-by: Serge Semin > Reviewed-by: Rob Herring > Cc: Alexey Malahov > Cc: Thomas Bogendoerfer > Cc: Arnd Bergmann > Cc: linux-m...@vger.kernel.org Reviewed-by: Guenter Roeck > --- > > Changelog v2: > - Rearrange SoBs. > - Move $ref to the root level of the "snps,watchdog-tops" property > so does the constraints. > - Add default TOP values array. > - Discard the label definition from the new bindings example. > > Changelog v3: > - Remove items property and move the minItems and maxItems constraints to > the root level of the snps,watchdog-tops property. > --- > .../bindings/watchdog/snps,dw-wdt.yaml| 32 +++ > 1 file changed, 32 insertions(+) > > diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > index 5bf6dc6377f3..d9fc7bb851b1 100644 > --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml > @@ -39,6 +39,23 @@ properties: > description: Phandle to the DW Watchdog reset lane > maxItems: 1 > > + snps,watchdog-tops: > +$ref: /schemas/types.yaml#/definitions/uint32-array > +description: | > + DW APB Watchdog custom timer intervals - Timeout Period ranges (TOPs). > + Each TOP is a number loaded into the watchdog counter at the moment of > + the timer restart. The counter decrementing happens each tick of the > + reference clock. Therefore the TOPs array is equivalent to an array of > + the timer expiration intervals supported by the DW APB Watchdog. Note > + DW APB Watchdog IP-core might be synthesized with fixed TOP values, > + in which case this property is unnecessary with default TOPs utilized. > +default: [0x0001000 0x0002000 0x0004000 0x0008000 > + 0x001 0x002 0x004 0x008 > + 0x010 0x020 0x040 0x080 > + 0x100 0x200 0x400 0x800] > +minItems: 16 > +maxItems: 16 > + > unevaluatedProperties: false > > required: > @@ -55,4 +72,19 @@ examples: >clocks = <&per_base_clk>; >resets = <&wdt_rst>; > }; > + > + - | > +watchdog@ffd02000 { > + compatible = "snps,dw-wdt"; > + reg = <0xffd02000 0x1000>; > + interrupts = <0 171 4>; > + clocks = <&per_base_clk>; > + clock-names = "tclk"; > + snps,watchdog-tops = <0x00FF 0x01FF 0x03FF > +0x07FF 0x 0x0001 > +0x0003 0x0007 0x000F > +0x001F 0x003F 0x007F > +0x00FF 0x01FF 0x03FF > +0x07FF>; > +}; > ...
Re: linux-next: manual merge of the akpm-current tree with the mips tree
Hi Andrew, On Fri, 29 May 2020 14:15:46 -0700 Andrew Morton wrote: > > On Thu, 28 May 2020 19:29:43 +1000 Stephen Rothwell > wrote: > > > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@@ -2752,9 -2748,6 +2749,8 @@@ static vm_fault_t wp_page_copy(struct v > > /* Free the old page.. */ > > new_page = old_page; > > page_copied = 1; > > + } else { > > + update_mmu_tlb(vma, vmf->address, vmf->pte); > > - mem_cgroup_cancel_charge(new_page, memcg, false); > > } > > > > if (new_page) > > We decided that the update_mmu_tlb() call is no longer needed here, so > there is no need to re-add it when resolving this. OK, I have fixed up my merge resolution to remove those 2 lines. -- Cheers, Stephen Rothwell pgpHz7ZVh_Q4Y.pgp Description: OpenPGP digital signature
Re: Lost PCIe PME after a914ff2d78ce ("PCI/ASPM: Don't select CONFIG_PCIEASPM by default")
On Sat, May 30, 2020 at 12:26:17AM +0200, Heiner Kallweit wrote: > Current situation means that PME is unusable on all systems where > pcie_aspm_support_enabled() returns false, what is basically every > system except EXPERT mode is enabled and CONFIG_PCIEASPM is set. > So we definitely need to do something. CONFIG_PCIEASPM is default y. I don't think there's huge value in adding complexity to deal with it being disabled, given that the kernel is then in a configuration that no vendor is testing against. There are existing runtime mechanisms to disable it at runtime. -- Matthew Garrett | mj...@srcf.ucam.org
Re: [PATCH v3 6/7] watchdog: dw_wdt: Add pre-timeouts support
On Tue, May 26, 2020 at 06:41:22PM +0300, Serge Semin wrote: > DW Watchdog can rise an interrupt in case if IRQ request mode is enabled > and timer reaches the zero value. In this case the IRQ lane is left > pending until either the next watchdog kick event (watchdog restart) or > until the WDT_EOI register is read or the device/system reset. This > interface can be used to implement the pre-timeout functionality > optionally provided by the Linux kernel watchdog devices. > > IRQ mode provides a two stages timeout interface. It means the IRQ is > raised when the counter reaches zero, while the system reset occurs only > after subsequent timeout if the timer restart is not performed. Due to > this peculiarity the pre-timeout value is actually set to the achieved > hardware timeout, while the real watchdog timeout is considered to be > twice as much of it. This applies a significant limitation on the > pre-timeout values, so current implementation supports either zero value, > which disables the pre-timeout events, or non-zero values, which imply > the pre-timeout to be at least half of the current watchdog timeout. > > Note that we ask the interrupt controller to detect the rising-edge > pre-timeout interrupts to prevent the high-level-IRQs flood, since > if the pre-timeout happens, the IRQ lane will be left pending until > it's cleared by the timer restart. > > Signed-off-by: Serge Semin > Cc: Alexey Malahov > Cc: Thomas Bogendoerfer > Cc: Arnd Bergmann > Cc: Rob Herring > Cc: linux-m...@vger.kernel.org > Cc: devicet...@vger.kernel.org Nitpick below, but I don't really know what to do about it, so Reviewed-by: Guenter Roeck > --- > > Changelog v2: > - Rearrange SoBs. > - Make the Pre-timeout IRQ being optionally supported. > --- > drivers/watchdog/dw_wdt.c | 138 +++--- > 1 file changed, 130 insertions(+), 8 deletions(-) > > diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c > index efbc36872670..3cd7c485cd70 100644 > --- a/drivers/watchdog/dw_wdt.c > +++ b/drivers/watchdog/dw_wdt.c > @@ -22,6 +22,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -36,6 +37,8 @@ > #define WDOG_CURRENT_COUNT_REG_OFFSET0x08 > #define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c > #define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 > +#define WDOG_INTERRUPT_STATUS_REG_OFFSET0x10 > +#define WDOG_INTERRUPT_CLEAR_REG_OFFSET 0x14 > #define WDOG_COMP_PARAMS_1_REG_OFFSET 0xf4 > #define WDOG_COMP_PARAMS_1_USE_FIX_TOP BIT(6) > > @@ -59,6 +62,11 @@ module_param(nowayout, bool, 0); > MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " >"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); > > +enum dw_wdt_rmod { > + DW_WDT_RMOD_RESET = 1, > + DW_WDT_RMOD_IRQ = 2 > +}; > + > struct dw_wdt_timeout { > u32 top_val; > unsigned int sec; > @@ -70,6 +78,7 @@ struct dw_wdt { > struct clk *clk; > struct clk *pclk; > unsigned long rate; > + enum dw_wdt_rmodrmod; > struct dw_wdt_timeout timeouts[DW_WDT_NUM_TOPS]; > struct watchdog_device wdd; > struct reset_control*rst; > @@ -86,6 +95,20 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt) > WDOG_CONTROL_REG_WDT_EN_MASK; > } > > +static void dw_wdt_update_mode(struct dw_wdt *dw_wdt, enum dw_wdt_rmod rmod) > +{ > + u32 val; > + > + val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); > + if (rmod == DW_WDT_RMOD_IRQ) > + val |= WDOG_CONTROL_REG_RESP_MODE_MASK; > + else > + val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK; > + writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); > + > + dw_wdt->rmod = rmod; > +} > + > static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt, >unsigned int timeout, u32 *top_val) > { > @@ -145,7 +168,11 @@ static unsigned int dw_wdt_get_timeout(struct dw_wdt > *dw_wdt) > break; > } > > - return dw_wdt->timeouts[idx].sec; > + /* > + * In IRQ mode due to the two stages counter, the actual timeout is > + * twice greater than the TOP setting. > + */ > + return dw_wdt->timeouts[idx].sec * dw_wdt->rmod; > } > > static int dw_wdt_ping(struct watchdog_device *wdd) > @@ -164,7 +191,20 @@ static int dw_wdt_set_timeout(struct watchdog_device > *wdd, unsigned int top_s) > unsigned int timeout; > u32 top_val; > > - timeout = dw_wdt_find_best_top(dw_wdt, top_s, &top_val); > + /* > + * Note IRQ mode being enabled means having a non-zero pre-timeout > + * setup. In this case we try to find a TOP as close to the half of the > + * requested timeout as possible since DW Watchdog IRQ mode is designed > + * in two stages way - first timeout rises the pre-timeout interrupt, > + *