[PATCH 08/14] x86/entry: Optimize local_db_save() for virt

2020-05-29 Thread Peter Zijlstra
Because DRn access is 'difficult' with virt; but the DR7 read is
cheaper than a cacheline miss on native, add a virt specific
fast path to local_db_save(), such that when breakpoints are not in
use we avoid touching DRn entirely.

Suggested-by: Andy Lutomirski 
Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/x86/include/asm/debugreg.h |7 ++-
 arch/x86/kernel/hw_breakpoint.c |   26 ++
 arch/x86/kvm/vmx/nested.c   |2 +-
 3 files changed, 29 insertions(+), 6 deletions(-)

--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -85,8 +85,8 @@ static inline void hw_breakpoint_disable
set_debugreg(0UL, 3);
 }
 
-static inline int hw_breakpoint_active(void)
+static inline bool hw_breakpoint_active(void)
 {
return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
 }
 
@@ -117,6 +119,9 @@ static __always_inline unsigned long loc
 {
unsigned long dr7;
 
+   if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active())
+   return 0;
+
get_debugreg(dr7, 7);
dr7 &= ~0x400; /* architecturally set bit */
if (dr7)
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -99,6 +99,8 @@ int arch_install_hw_breakpoint(struct pe
unsigned long *dr7;
int i;
 
+   lockdep_assert_irqs_disabled();
+
for (i = 0; i < HBP_NUM; i++) {
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
 
@@ -117,6 +119,12 @@ int arch_install_hw_breakpoint(struct pe
dr7 = this_cpu_ptr(&cpu_dr7);
*dr7 |= encode_dr7(i, info->len, info->type);
 
+   /*
+* Ensure we first write cpu_dr7 before we set the DR7 register.
+* This ensures an NMI never see cpu_dr7 0 when DR7 is not.
+*/
+   barrier();
+
set_debugreg(*dr7, 7);
if (info->mask)
set_dr_addr_mask(info->mask, i);
@@ -136,9 +144,11 @@ int arch_install_hw_breakpoint(struct pe
 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 {
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-   unsigned long *dr7;
+   unsigned long dr7;
int i;
 
+   lockdep_assert_irqs_disabled();
+
for (i = 0; i < HBP_NUM; i++) {
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
 
@@ -151,12 +161,20 @@ void arch_uninstall_hw_breakpoint(struct
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
return;
 
-   dr7 = this_cpu_ptr(&cpu_dr7);
-   *dr7 &= ~__encode_dr7(i, info->len, info->type);
+   dr7 = this_cpu_read(cpu_dr7);
+   dr7 &= ~__encode_dr7(i, info->len, info->type);
 
-   set_debugreg(*dr7, 7);
+   set_debugreg(dr7, 7);
if (info->mask)
set_dr_addr_mask(0, i);
+
+   /*
+* Ensure the write to cpu_dr7 is after we've set the DR7 register.
+* This ensures an NMI never see cpu_dr7 0 when DR7 is not.
+*/
+   barrier();
+
+   this_cpu_write(cpu_dr7, dr7);
 }
 
 static int arch_bp_generic_len(int x86_len)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3028,9 +3028,9 @@ static int nested_vmx_check_vmentry_hw(s
/*
 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
 */
-   local_irq_enable();
if (hw_breakpoint_active())
set_debugreg(__this_cpu_read(cpu_dr7), 7);
+   local_irq_enable();
preempt_enable();
 
/*




Re: Lost PCIe PME after a914ff2d78ce ("PCI/ASPM: Don't select CONFIG_PCIEASPM by default")

2020-05-29 Thread Matthew Garrett
On Fri, May 29, 2020 at 03:21:35PM -0500, Bjorn Helgaas wrote:

> Yeah, that makes sense.  I can't remember the details, but I'm pretty
> sure there's a reason why we ask for the whole set of things.  Seems
> like it solved some problem.  I think Matthew Garrett might have been
> involved in that.

This was https://bugzilla.redhat.com/show_bug.cgi?id=638912 - some 
firmware misbehaves unless you pass the same set of supported 
functionality as Windows does.

-- 
Matthew Garrett | mj...@srcf.ucam.org


[RFC][PATCH] ASoC: qcom: q6asm-dai: kCFI fix

2020-05-29 Thread John Stultz
Fixes the following kCFI crash seen on db845c, caused
by the function prototypes not matching the callback
function prototype.

[   82.585661] Unable to handle kernel NULL pointer dereference at virtual 
address 0001
[   82.595387] Mem abort info:
[   82.599463]   ESR = 0x9605
[   82.602658]   EC = 0x25: DABT (current EL), IL = 32 bits
[   82.608177]   SET = 0, FnV = 0
[   82.611829]   EA = 0, S1PTW = 0
[   82.615369] Data abort info:
[   82.618751]   ISV = 0, ISS = 0x0005
[   82.622641]   CM = 0, WnR = 0
[   82.625774] user pgtable: 4k pages, 39-bit VAs, pgdp=000174259000
[   82.632292] [0001] pgd=, pud=
[   82.639167] Internal error: Oops: 9605 [#1] PREEMPT SMP
[   82.644795] Modules linked in: hci_uart btqca xhci_plat_hcd xhci_pci_renesas 
xhci_pci xhci_hcd wcn36xx wcnss_ctrl wcd934x vctrl_regulator ufs_qcom 
syscon_reboot_e
[   82.644927]  qcom_apcs_ipc_mailbox q6asm_dai q6routing q6asm q6afe_dai q6adm 
q6afe q6core q6dsp_common pm8941_pwrkey pm8916_wdt platform_mhu 
pinctrl_spmi_mpp pine
[   82.812982] CPU: 3 PID: 240 Comm: kworker/u16:4 Tainted: GW 
5.6.0-rc7-mainline-00960-g0c34353d11b9-dirty #1
[   82.824201] Hardware name: Thundercomm Dragonboard 845c (DT)
[   82.829937] Workqueue: qcom_apr_rx apr_rxwq [apr]
[   82.834698] pstate: 80c5 (Nzcv daif +PAN +UAO)
[   82.839553] pc : __cfi_check_fail+0x4/0x1c [q6asm_dai]
[   82.844754] lr : __cfi_check+0x3a8/0x3b0 [q6asm_dai]
[   82.849767] sp : ffc0105f3c20
[   82.853123] x29: ffc0105f3c30 x28: 0020
[   82.858489] x27: ff80f4588400 x26: ff80f458ec94
[   82.863854] x25: ff80f458ece8 x24: ffe3670c7000
[   82.869220] x23: ff8094bb7b34 x22: ffe367137000
[   82.874585] x21: bd07909b332eada6 x20: 0001
[   82.879950] x19: ffe36713863c x18: ff80f8df4430
[   82.885316] x17: 0001 x16: ffe39d15e660
[   82.890681] x15: 0001 x14: 0027
[   82.896047] x13:  x12: ffe39e6465a0
[   82.901413] x11: 0051 x10: 
[   82.906779] x9 : 000ffe366c19 x8 : c3c5f18762d1ceef
[   82.912145] x7 :  x6 : ffc010877698
[   82.917511] x5 : ffc0105f3c00 x4 : 
[   82.922877] x3 :  x2 : 0001
[   82.928243] x1 : ffe36713863c x0 : 0001
[   82.933610] Call trace:
[   82.936099]  __cfi_check_fail+0x4/0x1c [q6asm_dai]
[   82.940955]  q6asm_srvc_callback+0x22c/0x618 [q6asm]
[   82.945973]  apr_rxwq+0x1a8/0x27c [apr]
[   82.949861]  process_one_work+0x2e8/0x54c
[   82.953919]  worker_thread+0x27c/0x4d4
[   82.957715]  kthread+0x144/0x154
[   82.960985]  ret_from_fork+0x10/0x18
[   82.964603] Code: a8c37bfd f85f8e5e d65f03c0 b4a0 (3948)
[   82.970762] ---[ end trace 410accb839617143 ]---
[   82.975429] Kernel panic - not syncing: Fatal exception

Cc: Patrick Lai 
Cc: Banajit Goswami 
Cc: Liam Girdwood 
Cc: Mark Brown 
Cc: Jaroslav Kysela 
Cc: Takashi Iwai 
Cc: Srinivas Kandagatla 
Cc: Vinod Koul 
Cc: Kuninori Morimoto 
Cc: Stephan Gerhold 
Cc: Sami Tolvanen 
Cc: Todd Kjos 
Cc: Alistair Delva 
Cc: Amit Pundir 
Cc: Sumit Semwal 
Cc: alsa-de...@alsa-project.org
Signed-off-by: John Stultz 
---
 sound/soc/qcom/qdsp6/q6asm-dai.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
index 125af00bba53..4640804aab7f 100644
--- a/sound/soc/qcom/qdsp6/q6asm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
@@ -176,7 +176,7 @@ static const struct snd_compr_codec_caps q6asm_compr_caps = 
{
 };
 
 static void event_handler(uint32_t opcode, uint32_t token,
- uint32_t *payload, void *priv)
+ void *payload, void *priv)
 {
struct q6asm_dai_rtd *prtd = priv;
struct snd_pcm_substream *substream = prtd->substream;
@@ -490,7 +490,7 @@ static int q6asm_dai_hw_params(struct snd_soc_component 
*component,
 }
 
 static void compress_event_handler(uint32_t opcode, uint32_t token,
-  uint32_t *payload, void *priv)
+  void *payload, void *priv)
 {
struct q6asm_dai_rtd *prtd = priv;
struct snd_compr_stream *substream = prtd->cstream;
-- 
2.17.1



Re: [PATCH] ARM: omap2: fix omap5_realtime_timer_init definition

2020-05-29 Thread Arnd Bergmann
On Fri, May 29, 2020 at 11:14 PM Tony Lindgren  wrote:
> * Arnd Bergmann  [200529 21:09]:
> >
> > #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
> > extern void omap5_realtime_timer_init(void);
> > #else
> > static inline void omap5_realtime_timer_init(void)
> > {
> > }
> > #endif
> >
> > In fact, the inline stub is what that caused the regression,
> > so I think it's ok with my patch.
>
> To me it seems not having SOC_HAS_REALTIME_COUNTER will
> cause omap5_realtime_timer_init() not get called?

Correct, this looked to me like it was the intention of that
symbol. Unfortunately there is no help text but it is user
selectable:

config SOC_HAS_REALTIME_COUNTER
bool "Real time free running counter"
depends on SOC_OMAP5 || SOC_DRA7XX
default y

> That initializes clocks and calls timer_probe(). So this
> will result in non-booting system AFAIK, the header
> file stub should no rely CONFIG_SOC_HAS_REALTIME_COUNTER
> also, but rather ! CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX.
>
> Also the Makefile change at least seems wrong, that
> can't rely on CONFIG_SOC_HAS_REALTIME_COUNTER.

How about just removing the prompt on
CONFIG_SOC_HAS_REALTIME_COUNTER but keeping the
rest of my patch? That way it's just always enabled when
there is a chip that needs it enabled in the kernel config.

The only other usage of the symbol is

#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
void set_cntfreq(void);
#else
static inline void set_cntfreq(void)
{
}
#endif

Alternatively, we could just remove the Kconfig symbol
altogether and rely on (SOC_OMAP5 || SOC_DRA7XX)
everywhere, but that seems a little more fragile in case
there is going to be another chip that needs it.

Arnd


Re: [PATCH] refperf: work around 64-bit division

2020-05-29 Thread Paul E. McKenney
On Fri, May 29, 2020 at 10:15:51PM +0200, Arnd Bergmann wrote:
> A 64-bit division was introduced in refperf, breaking compilation
> on all 32-bit architectures:
> 
> kernel/rcu/refperf.o: in function `main_func':
> refperf.c:(.text+0x57c): undefined reference to `__aeabi_uldivmod'
> 
> Work it by using div_u64 to mark the expensive operation.
> 
> Fixes: bd5b16d6c88d ("refperf: Allow decimal nanoseconds")
> Signed-off-by: Arnd Bergmann 

Many thanks, Arnd!

I queued this and reverted my earlier 64-bit-only restriction, pushing
the result out on the -rcu tree's "dev" branch.  I also added a couple
of Reported-by entries.

Thanx, Paul

> ---
>  kernel/rcu/refperf.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/rcu/refperf.c b/kernel/rcu/refperf.c
> index 47df72c492b3..c2366648981d 100644
> --- a/kernel/rcu/refperf.c
> +++ b/kernel/rcu/refperf.c
> @@ -386,7 +386,7 @@ static int main_func(void *arg)
>   if (torture_must_stop())
>   goto end;
>  
> - result_avg[exp] = 1000 * process_durations(nreaders) / 
> (nreaders * loops);
> + result_avg[exp] = div_u64(1000 * process_durations(nreaders), 
> nreaders * loops);
>   }
>  
>   // Print the average of all experiments
> @@ -397,9 +397,14 @@ static int main_func(void *arg)
>   strcat(buf, "Threads\tTime(ns)\n");
>  
>   for (exp = 0; exp < nruns; exp++) {
> + u64 avg;
> + u32 rem;
> +
>   if (errexit)
>   break;
> - sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, result_avg[exp] / 
> 1000, (int)(result_avg[exp] % 1000));
> +
> + avg = div_s64_rem(result_avg[exp], 1000, &rem);
> + sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, avg, rem);
>   strcat(buf, buf1);
>   }
>  
> -- 
> 2.26.2
> 


[PATCH v3 4/5] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

2020-05-29 Thread Peter Zijlstra
Currently all IRQ-tracking state is in task_struct, this means that
task_struct needs to be defined before we use it.

Especially for lockdep_assert_irq*() this can lead to header-hell.

Move the hardirq state into per-cpu variables to avoid the task_struct
dependency.

Signed-off-by: Peter Zijlstra (Intel) 
---
 include/linux/irqflags.h |   19 ---
 include/linux/lockdep.h  |   34 ++
 include/linux/sched.h|2 --
 kernel/fork.c|4 +---
 kernel/locking/lockdep.c |   30 +++---
 kernel/softirq.c |6 ++
 6 files changed, 52 insertions(+), 43 deletions(-)

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -14,6 +14,7 @@
 
 #include 
 #include 
+#include 
 
 /* Currently lockdep_softirqs_on/off is used only by lockdep */
 #ifdef CONFIG_PROVE_LOCKING
@@ -31,18 +32,22 @@
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
   extern void trace_hardirqs_on_prepare(void);
   extern void trace_hardirqs_off_finish(void);
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p)((p)->hardirq_context)
+# define lockdep_hardirq_context(p)(this_cpu_read(hardirq_context))
 # define lockdep_softirq_context(p)((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p)   ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled(p)   (this_cpu_read(hardirqs_enabled))
 # define lockdep_softirqs_enabled(p)   ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter()   \
-do {   \
-   if (!current->hardirq_context++)\
-   current->hardirq_threaded = 0;  \
+# define lockdep_hardirq_enter()   \
+do {   \
+   if (this_cpu_inc_return(hardirq_context) == 1)  \
+   current->hardirq_threaded = 0;  \
 } while (0)
 # define lockdep_hardirq_threaded()\
 do {   \
@@ -50,7 +55,7 @@ do {  \
 } while (0)
 # define lockdep_hardirq_exit()\
 do {   \
-   current->hardirq_context--; \
+   this_cpu_dec(hardirq_context);  \
 } while (0)
 # define lockdep_softirq_enter()   \
 do {   \
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -19,6 +19,7 @@ extern int lock_stat;
 
 #define MAX_LOCKDEP_SUBCLASSES 8UL
 
+#include 
 #include 
 
 enum lockdep_wait_type {
@@ -703,28 +704,29 @@ do {  
\
lock_release(&(lock)->dep_map, _THIS_IP_);  \
 } while (0)
 
-#define lockdep_assert_irqs_enabled()  do {\
-   WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirqs_enabled,   \
- "IRQs not enabled as expected\n");\
-   } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
 
-#define lockdep_assert_irqs_disabled() do {\
-   WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirqs_enabled,\
- "IRQs not disabled as expected\n");   \
-   } while (0)
+#define lockdep_assert_irqs_enabled()  \
+do {   \
+   WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));  \
+} while (0)
 
-#define lockdep_assert_in_irq() do {   \
-   WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirq_context,\
- "Not in hardirq as expected\n");  \
-   } while (0)
+#define lockdep_assert_irqs_disabled() \
+do {   \
+   WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled));   \
+} while (0)
+
+#define lockdep_assert_in_irq()
\
+do {   \
+   WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context));   \
+} while (0)
 
 #else
 # define might_lock(lock) do { } while (0)
 # define might_lock_read(lock) do { } while (0)
 # define might_lock_nested(lock, subclass) do { } while (0)
+
 # define lockdep_assert_irqs_enabled() do { } while (0)
 # define lockdep_assert_irqs_disabled() do { } while (0)
 # define lockdep_assert_in_irq() do { 

[PATCH v3 3/5] s390: Break cyclic percpu include

2020-05-29 Thread Peter Zijlstra
In order to use  in irqflags.h, we need to make sure
asm/percpu.h does not itself depend on irqflags.h

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/s390/include/asm/smp.h |1 +
 arch/s390/include/asm/thread_info.h |1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -10,6 +10,7 @@
 
 #include 
 #include 
+#include 
 
 #define raw_smp_processor_id() (S390_lowcore.cpu_nr)
 
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -24,7 +24,6 @@
 #ifndef __ASSEMBLY__
 #include 
 #include 
-#include 
 
 #define STACK_INIT_OFFSET \
(THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))




[PATCH v3 5/5] lockdep: Remove lockdep_hardirq{s_enabled,_context}() argument

2020-05-29 Thread Peter Zijlstra
Now that the macros use per-cpu data, we no longer need the argument.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/x86/entry/common.c|2 +-
 include/linux/irqflags.h   |8 
 include/linux/lockdep.h|2 +-
 kernel/locking/lockdep.c   |   30 +++---
 kernel/softirq.c   |2 +-
 tools/include/linux/irqflags.h |4 ++--
 6 files changed, 24 insertions(+), 24 deletions(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -689,7 +689,7 @@ noinstr void idtentry_exit_user(struct p
 
 noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
 {
-   bool irq_state = lockdep_hardirqs_enabled(current);
+   bool irq_state = lockdep_hardirqs_enabled();
 
__nmi_enter();
lockdep_hardirqs_off(CALLER_ADDR0);
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -40,9 +40,9 @@ DECLARE_PER_CPU(int, hardirq_context);
   extern void trace_hardirqs_off_finish(void);
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p)(this_cpu_read(hardirq_context))
+# define lockdep_hardirq_context() (this_cpu_read(hardirq_context))
 # define lockdep_softirq_context(p)((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p)   (this_cpu_read(hardirqs_enabled))
+# define lockdep_hardirqs_enabled()(this_cpu_read(hardirqs_enabled))
 # define lockdep_softirqs_enabled(p)   ((p)->softirqs_enabled)
 # define lockdep_hardirq_enter()   \
 do {   \
@@ -109,9 +109,9 @@ do {\
 # define trace_hardirqs_off_finish()   do { } while (0)
 # define trace_hardirqs_on()   do { } while (0)
 # define trace_hardirqs_off()  do { } while (0)
-# define lockdep_hardirq_context(p)0
+# define lockdep_hardirq_context() 0
 # define lockdep_softirq_context(p)0
-# define lockdep_hardirqs_enabled(p)   0
+# define lockdep_hardirqs_enabled()0
 # define lockdep_softirqs_enabled(p)   0
 # define lockdep_hardirq_enter()   do { } while (0)
 # define lockdep_hardirq_threaded()do { } while (0)
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -736,7 +736,7 @@ do {
\
 
 # define lockdep_assert_RT_in_threaded_ctx() do {  \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- lockdep_hardirq_context(current) &&   \
+ lockdep_hardirq_context() &&  \
  !(current->hardirq_threaded || current->irq_config),  
\
  "Not in threaded context on PREEMPT_RT as 
expected\n");   \
 } while (0)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_str
pr_warn("-\n");
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
-   lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+   lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
-   lockdep_hardirqs_enabled(curr),
+   lockdep_hardirqs_enabled(),
curr->softirqs_enabled);
print_lock(next);
 
@@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr
 
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
curr->comm, task_pid_nr(curr),
-   lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+   lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
-   lockdep_hardirqs_enabled(curr),
+   lockdep_hardirqs_enabled(),
lockdep_softirqs_enabled(curr));
print_lock(this);
 
@@ -3655,7 +3655,7 @@ void lockdep_hardirqs_on_prepare(unsigne
if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & 
LOCKDEP_RECURSION_MASK))
return;
 
-   if (unlikely(lockdep_hardirqs_enabled(current))) {
+   if (unlikely(lockdep_hardirqs_enabled())) {
/*
 * Neither irq nor preemption are disabled here
 * so this is racy by nature but losing one hit
@@ -3683,7 +3683,7 @@ void lockdep_hardirqs_on_prepare(unsigne
 * Can't allow enabling interrupts while in an interrupt handler,
 * that's general bad form and such. Recursion, limited stack etc..
 */
-   if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current)))
+   if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context()))
return;
 
curren

[RFC][PATCH v3 1/5] sparc64: Fix asm/percpu.h build error

2020-05-29 Thread Peter Zijlstra
In order to break a header dependency between lockdep and task_struct,
I need per-cpu stuff from lockdep.

Including  from lockdep.h gives a build error, this
patch cures that, but results in the following warning:

../arch/sparc/include/asm/percpu_64.h:7:24: warning: call-clobbered register 
used for global register variable
register unsigned long __local_per_cpu_offset asm("g5");

But i've no idea how to fix that :/ but it does build.

Not-Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/sparc/include/asm/trap_block.h |2 ++
 1 file changed, 2 insertions(+)

--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_TRAP_BLOCK_H
 #define _SPARC_TRAP_BLOCK_H
 
+#include 
+
 #include 
 #include 
 




Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro

2020-05-29 Thread Andy Shevchenko
On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris  wrote:
> On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko
>  wrote:
> > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote:
> > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot  wrote:

...

> > > Taking the example statement (in my patch) where compilation warning
> > > is getting reported:
> > > return (map[index] >> offset) & GENMASK(nbits - 1, 0);
> > >
> > > 'nbits' is of type 'unsigned long'.
> > > In above, the sanity check is comparing '0' with unsigned value. And
> > > unsigned value can't be less than '0' ever, hence the warning.
> > > But this warning will occur whenever there will be '0' as one of the
> > > 'argument' and an unsigned variable as another 'argument' for GENMASK.

> > Proper fix is to fix GENMASK(), but allowed workaround is to use
> > (BIT(nbits) - 1)
> > instead.

> When I used BIT macro (earlier), I had faced a problem. I want to tell
> you about that.
>
> Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or
> clump size) is BITS_PER_LONG, unexpected calculation happens.
>
> Explanation:
> Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer),
> (BIT(nbits) - 1)
> gives a value of zero and when this zero is ANDed with any value, it
> makes it full zero. This is unexpected and incorrect calculation happening.
>
> What actually happens is in the macro expansion of BIT(64), that is 1
> << 64, the '1' overflows from leftmost bit position (most significant
> bit) and re-enters at the rightmost bit position (least significant
> bit), therefore 1 << 64 becomes '0x1', and when another '1' is
> subtracted from this, the final result becomes 0.
>
> Since this macro is being used in both bitmap_get_value and
> bitmap_set_value functions, it will give unexpected results when nbits or 
> clump
> size is BITS_PER_LONG (32 or 64 depending on arch).

I see, something like
https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139
should be done.
But yes, let's try to fix GENMASK().

So, if we modify the following

  #define GENMASK_INPUT_CHECK(h, l) \
(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
__builtin_constant_p((l) > (h)), (l) > (h), 0)))

to be

  #define GENMASK_INPUT_CHECK(h, l) \
(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
__builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0)))

would it work?

> William also knows about this issue:
> "This is undefined behavior in the C standard (section 6.5.7 in the N1124)"

I think it is about 6.5.7.3  here, 1U << 31 (or 63) is okay.

--
With Best Regards,
Andy Shevchenko


RE: [EXTERNAL] Re: [PATCH v4] perf inject --jit: Remove //anon mmap events

2020-05-29 Thread Steve MacLean
>/tmp/perf/perf record -k 1 -e cycles:u -o /tmp/perf.data java 
>-agentpath:/tmp/perf/libperf-jvmti.so -XX:+PreserveFramePointer 
>-XX:InitialCodeCacheSize=20M -XX:ReservedCodeCacheSize=1G -jar 
>dacapo-9.12-bach.jar jython 

Its also possible the `InitialCodeCacheSize=20M` argument is masking the 
problem.  Something like 4K would make the problem much easier to see.

I see the problem every time .NET grows the cache across a 64K page boundary.  
20M may mean you are allocating huge pages or something which might mask the 
problem. We may be allocating code pages 64K at a time.



Re: [PATCH v2 1/4] dt-bindings: pinctrl: Document 7211 compatible for brcm, bcm2835-gpio.txt

2020-05-29 Thread Rob Herring
On Fri, 29 May 2020 12:15:19 -0700, Florian Fainelli wrote:
> Document the brcm,bcm7211-gpio compatible string in the
> brcm,bcm2835-gpio.txt document.
> 
> Signed-off-by: Florian Fainelli 
> ---
>  Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt | 1 +
>  1 file changed, 1 insertion(+)
> 

Acked-by: Rob Herring 


[PATCH v3 0/5] lockdep: Change IRQ state tracking to use per-cpu variables

2020-05-29 Thread Peter Zijlstra
Ahmed and Sebastian wanted additional lockdep_assert*() macros and ran
into header hell.

Move the IRQ state into per-cpu variables, which removes the dependency on
task_struct, which is what generated the header-hell.

These patches are intended to go on top of:

 https://lkml.kernel.org/r/20200529212728.795169...@infradead.org

but should apply on current tip/master without much difficulty.

There are a few build fixes for Sparc64, PowerPC64 and s390. Especially the
Sparc one I'm not sure about.



[PATCH v3 2/5] powerpc64: Break asm/percpu.h vs spinlock_types.h dependency

2020-05-29 Thread Peter Zijlstra
In order to use  in lockdep.h, we need to make sure
asm/percpu.h does not itself depend on lockdep.

The below seems to make that so and builds powerpc64-defconfig +
PROVE_LOCKING.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/powerpc/include/asm/dtl.h |   52 +
 arch/powerpc/include/asm/lppaca.h  |   44 ---
 arch/powerpc/kernel/time.c |1 
 arch/powerpc/kvm/book3s_hv.c   |1 
 arch/powerpc/platforms/pseries/dtl.c   |1 
 arch/powerpc/platforms/pseries/lpar.c  |1 
 arch/powerpc/platforms/pseries/setup.c |1 
 arch/powerpc/platforms/pseries/svm.c   |1 
 8 files changed, 58 insertions(+), 44 deletions(-)

--- /dev/null
+++ b/arch/powerpc/include/asm/dtl.h
@@ -0,0 +1,52 @@
+#ifndef _ASM_POWERPC_DTL_H
+#define _ASM_POWERPC_DTL_H
+
+#include 
+#include 
+
+/*
+ * Layout of entries in the hypervisor's dispatch trace log buffer.
+ */
+struct dtl_entry {
+   u8  dispatch_reason;
+   u8  preempt_reason;
+   __be16  processor_id;
+   __be32  enqueue_to_dispatch_time;
+   __be32  ready_to_enqueue_time;
+   __be32  waiting_to_ready_time;
+   __be64  timebase;
+   __be64  fault_addr;
+   __be64  srr0;
+   __be64  srr1;
+};
+
+#define DISPATCH_LOG_BYTES 4096/* bytes per cpu */
+#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+
+/*
+ * Dispatch trace log event enable mask:
+ *   0x1: voluntary virtual processor waits
+ *   0x2: time-slice preempts
+ *   0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE   0x1
+#define DTL_LOG_PREEMPT0x2
+#define DTL_LOG_FAULT  0x4
+#define DTL_LOG_ALL(DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
+extern struct kmem_cache *dtl_cache;
+extern rwlock_t dtl_access_lock;
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log.  If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
+extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif /* _ASM_POWERPC_DTL_H */
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -42,7 +42,6 @@
  */
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -146,49 +145,6 @@ struct slb_shadow {
} save_area[SLB_NUM_BOLTED];
 } cacheline_aligned;
 
-/*
- * Layout of entries in the hypervisor's dispatch trace log buffer.
- */
-struct dtl_entry {
-   u8  dispatch_reason;
-   u8  preempt_reason;
-   __be16  processor_id;
-   __be32  enqueue_to_dispatch_time;
-   __be32  ready_to_enqueue_time;
-   __be32  waiting_to_ready_time;
-   __be64  timebase;
-   __be64  fault_addr;
-   __be64  srr0;
-   __be64  srr1;
-};
-
-#define DISPATCH_LOG_BYTES 4096/* bytes per cpu */
-#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
-
-/*
- * Dispatch trace log event enable mask:
- *   0x1: voluntary virtual processor waits
- *   0x2: time-slice preempts
- *   0x4: virtual partition memory page faults
- */
-#define DTL_LOG_CEDE   0x1
-#define DTL_LOG_PREEMPT0x2
-#define DTL_LOG_FAULT  0x4
-#define DTL_LOG_ALL(DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
-
-extern struct kmem_cache *dtl_cache;
-extern rwlock_t dtl_access_lock;
-
-/*
- * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
- * reading from the dispatch trace log.  If other code wants to consume
- * DTL entries, it can set this pointer to a function that will get
- * called once for each DTL entry that gets processed.
- */
-extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
-
-extern void register_dtl_buffer(int cpu);
-extern void alloc_dtl_buffers(unsigned long *time_limit);
 extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
 
 #endif /* CONFIG_PPC_BOOK3S */
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -69,6 +69,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* powerpc clocksource/clockevent code */
 
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -74,6 +74,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "book3s.h"
 
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "pseries.h"
 
--- a/ar

Re: [RFC PATCH v4 07/10] vfio/pci: introduce a new irq type VFIO_IRQ_TYPE_REMAP_BAR_REGION

2020-05-29 Thread Alex Williamson
On Sun, 17 May 2020 22:52:45 -0400
Yan Zhao  wrote:

> This is a virtual irq type.
> vendor driver triggers this irq when it wants to notify userspace to
> remap PCI BARs.
> 
> 1. vendor driver triggers this irq and packs the target bar number in
>the ctx count. i.e. "1 << bar_number".
>if a bit is set, the corresponding bar is to be remapped.
> 
> 2. userspace requery the specified PCI BAR from kernel and if flags of
> the bar regions are changed, it removes the old subregions and attaches
> subregions according to the new flags.
> 
> 3. userspace notifies back to kernel by writing one to the eventfd of
> this irq.
> 
> Please check the corresponding qemu implementation from the reply of this
> patch, and a sample usage in vendor driver in patch [10/10].
> 
> Cc: Kevin Tian 
> Signed-off-by: Yan Zhao 
> ---
>  include/uapi/linux/vfio.h | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 2d0d85c7c4d4..55895f75d720 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -704,6 +704,17 @@ struct vfio_irq_info_cap_type {
>   __u32 subtype;  /* type specific */
>  };
>  
> +/* Bar Region Query IRQ TYPE */
> +#define VFIO_IRQ_TYPE_REMAP_BAR_REGION   (1)
> +
> +/* sub-types for VFIO_IRQ_TYPE_REMAP_BAR_REGION */
> +/*
> + * This irq notifies userspace to re-query BAR region and remaps the
> + * subregions.
> + */
> +#define VFIO_IRQ_SUBTYPE_REMAP_BAR_REGION(0)

Hi Yan,

How do we do this in a way that's backwards compatible?  Or maybe, how
do we perform a handshake between the vendor driver and userspace to
indicate this support?  Would the vendor driver refuse to change
device_state in the migration region if the user has not enabled this
IRQ?

Everything you've described in the commit log needs to be in this
header, we can't have the usage protocol buried in a commit log.  It
also seems like this is unnecessarily PCI specific.  Can't the count
bitmap simply indicate the region index to re-evaluate?  Maybe you were
worried about running out of bits in the ctx count?  An IRQ per region
could resolve that, but maybe we could also just add another IRQ for
the next bitmap of regions.  I assume that the bitmap can indicate
multiple regions to re-evaluate, but that should be documented.

Also, what sort of service requirements does this imply?  Would the
vendor driver send this IRQ when the user tries to set the device_state
to _SAVING and therefore we'd require the user to accept, implement the
mapping change, and acknowledge the IRQ all while waiting for the write
to device_state to return?  That implies quite a lot of asynchronous
support in the userspace driver.  Thanks,

Alex

> +
> +
>  /**
>   * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct 
> vfio_irq_set)
>   *



[PATCH v4 kunit-next 0/2] kunit: extend kunit resources API

2020-05-29 Thread Alan Maguire
A recent RFC patch set [1] suggests some additional functionality
may be needed around kunit resources.  It seems to require

1. support for resources without allocation
2. support for lookup of such resources
3. support for access to resources across multiple kernel threads

The proposed changes here are designed to address these needs.
The idea is we first generalize the API to support adding
resources with static data; then from there we support named
resources.  The latter support is needed because if we are
in a different thread context and only have the "struct kunit *"
to work with, we need a way to identify a resource in lookup.

[1] https://lkml.org/lkml/2020/2/26/1286

Changes since v3:
- removed unused "init" field from "struct kunit_resources" (Brendan)

Changes since v2:
 - moved a few functions relating to resource retrieval in patches
   1 and 2 into include/kunit/test.h and defined as "static inline";
   this allows built-in consumers to use these functions when KUnit
   is built as a module

Changes since v1:
 - reformatted longer parameter lists to have one parameter per-line
   (Brendan, patch 1)
 - fixed phrasing in various comments to clarify allocation of memory
   and added comment to kunit resource tests to clarify why
   kunit_put_resource() is used there (Brendan, patch 1)
 - changed #define to static inline function (Brendan, patch 2)
 - simplified kunit_add_named_resource() to use more of existing
   code for non-named resource (Brendan, patch 2)

Alan Maguire (2):
  kunit: generalize kunit_resource API beyond allocated resources
  kunit: add support for named resources

Alan Maguire (2):
  kunit: generalize kunit_resource API beyond allocated resources
  kunit: add support for named resources

 include/kunit/test.h  | 210 +++---
 lib/kunit/kunit-test.c| 111 +++-
 lib/kunit/string-stream.c |  14 ++--
 lib/kunit/test.c  | 171 ++---
 4 files changed, 380 insertions(+), 126 deletions(-)

-- 
1.8.3.1



[PATCH v4 kunit-next 1/2] kunit: generalize kunit_resource API beyond allocated resources

2020-05-29 Thread Alan Maguire
In its original form, the kunit resources API - consisting the
struct kunit_resource and associated functions - was focused on
adding allocated resources during test operation that would be
automatically cleaned up on test completion.

The recent RFC patch proposing converting KASAN tests to KUnit [1]
showed another potential model - where outside of test context,
but with a pointer to the test state, we wish to access/update
test-related data, but expressly want to avoid allocations.

It turns out we can generalize the kunit_resource to support
static resources where the struct kunit_resource * is passed
in and initialized for us. As part of this work, we also
change the "allocation" field to the more general "data" name,
as instead of associating an allocation, we can associate a
pointer to static data.  Static data is distinguished by a NULL
free functions.  A test is added to cover using kunit_add_resource()
with a static resource and data.

Finally we also make use of the kernel's krefcount interfaces
to manage reference counting of KUnit resources.  The motivation
for this is simple; if we have kernel threads accessing and
using resources (say via kunit_find_resource()) we need to
ensure we do not remove said resources (or indeed free them
if they were dynamically allocated) until the reference count
reaches zero.  A new function - kunit_put_resource() - is
added to handle this, and it should be called after a
thread using kunit_find_resource() is finished with the
retrieved resource.

We ensure that the functions needed to look up, use and
drop reference count are "static inline"-defined so that
they can be used by builtin code as well as modules in
the case that KUnit is built as a module.

A cosmetic change here also; I've tried moving to
kunit_[action]_resource() as the format of function names
for consistency and readability.

[1] https://lkml.org/lkml/2020/2/26/1286

Signed-off-by: Alan Maguire 
Reviewed-by: Brendan Higgins 
---
 include/kunit/test.h  | 156 +-
 lib/kunit/kunit-test.c|  74 --
 lib/kunit/string-stream.c |  14 ++---
 lib/kunit/test.c  | 153 -
 4 files changed, 268 insertions(+), 129 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 47e61e1..f9b914e 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct kunit_resource;
 
@@ -23,13 +24,19 @@
 
 /**
  * struct kunit_resource - represents a *test managed resource*
- * @allocation: for the user to store arbitrary data.
+ * @data: for the user to store arbitrary data.
  * @free: a user supplied function to free the resource. Populated by
- * kunit_alloc_resource().
+ * kunit_resource_alloc().
  *
  * Represents a *test managed resource*, a resource which will automatically be
  * cleaned up at the end of a test case.
  *
+ * Resources are reference counted so if a resource is retrieved via
+ * kunit_alloc_and_get_resource() or kunit_find_resource(), we need
+ * to call kunit_put_resource() to reduce the resource reference count
+ * when finished with it.  Note that kunit_alloc_resource() does not require a
+ * kunit_resource_put() because it does not retrieve the resource itself.
+ *
  * Example:
  *
  * .. code-block:: c
@@ -42,9 +49,9 @@
  * static int kunit_kmalloc_init(struct kunit_resource *res, void *context)
  * {
  * struct kunit_kmalloc_params *params = context;
- * res->allocation = kmalloc(params->size, params->gfp);
+ * res->data = kmalloc(params->size, params->gfp);
  *
- * if (!res->allocation)
+ * if (!res->data)
  * return -ENOMEM;
  *
  * return 0;
@@ -52,30 +59,26 @@
  *
  * static void kunit_kmalloc_free(struct kunit_resource *res)
  * {
- * kfree(res->allocation);
+ * kfree(res->data);
  * }
  *
  * void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp)
  * {
  * struct kunit_kmalloc_params params;
- * struct kunit_resource *res;
  *
  * params.size = size;
  * params.gfp = gfp;
  *
- * res = kunit_alloc_resource(test, kunit_kmalloc_init,
+ * return kunit_alloc_resource(test, kunit_kmalloc_init,
  * kunit_kmalloc_free, ¶ms);
- * if (res)
- * return res->allocation;
- *
- * return NULL;
  * }
  */
 struct kunit_resource {
-   void *allocation;
-   kunit_resource_free_t free;
+   void *data;
 
/* private: internal use only. */
+   kunit_resource_free_t free;
+   struct kref refcount;
struct list_head node;
 };
 
@@ -284,6 +287,64 @@ struct kunit_resource *kunit_alloc_and_get_resource(struct 
kunit *test,
void *context);

Re: [PATCH] ARM: omap2: fix omap5_realtime_timer_init definition

2020-05-29 Thread Tony Lindgren
* Arnd Bergmann  [200529 21:41]:
> On Fri, May 29, 2020 at 11:14 PM Tony Lindgren  wrote:
> > * Arnd Bergmann  [200529 21:09]:
> > >
> > > #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
> > > extern void omap5_realtime_timer_init(void);
> > > #else
> > > static inline void omap5_realtime_timer_init(void)
> > > {
> > > }
> > > #endif
> > >
> > > In fact, the inline stub is what that caused the regression,
> > > so I think it's ok with my patch.
> >
> > To me it seems not having SOC_HAS_REALTIME_COUNTER will
> > cause omap5_realtime_timer_init() not get called?
> 
> Correct, this looked to me like it was the intention of that
> symbol. Unfortunately there is no help text but it is user
> selectable:
> 
> config SOC_HAS_REALTIME_COUNTER
> bool "Real time free running counter"
> depends on SOC_OMAP5 || SOC_DRA7XX
> default y

Maybe this is a legacy Kconfig option since Santosh got
the cpuidle coupled to switch things to using the always on
timers for idle modes years ago already.

> > That initializes clocks and calls timer_probe(). So this
> > will result in non-booting system AFAIK, the header
> > file stub should no rely CONFIG_SOC_HAS_REALTIME_COUNTER
> > also, but rather ! CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX.
> >
> > Also the Makefile change at least seems wrong, that
> > can't rely on CONFIG_SOC_HAS_REALTIME_COUNTER.
> 
> How about just removing the prompt on
> CONFIG_SOC_HAS_REALTIME_COUNTER but keeping the
> rest of my patch? That way it's just always enabled when
> there is a chip that needs it enabled in the kernel config.
> 
> The only other usage of the symbol is
> 
> #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
> void set_cntfreq(void);
> #else
> static inline void set_cntfreq(void)
> {
> }
> #endif

Yeah it's already default y, so I'd say let's just get
rid of the option.

> Alternatively, we could just remove the Kconfig symbol
> altogether and rely on (SOC_OMAP5 || SOC_DRA7XX)
> everywhere, but that seems a little more fragile in case
> there is going to be another chip that needs it.

Sounds like we can just remove CONFIG_SOC_HAS_REALTIME_COUNTER
and rely on (SOC_OMAP5 || SOC_DRA7XX).

Regards,

Tony


[PATCH v4 kunit-next 2/2] kunit: add support for named resources

2020-05-29 Thread Alan Maguire
The kunit resources API allows for custom initialization and
cleanup code (init/fini); here a new resource add function sets
the "struct kunit_resource" "name" field, and calls the standard
add function.  Having a simple way to name resources is
useful in cases such as multithreaded tests where a set of
resources are shared among threads; a pointer to the
"struct kunit *" test state then is all that is needed to
retrieve and use named resources.  Support is provided to add,
find and destroy named resources; the latter two are simply
wrappers that use a "match-by-name" callback.

If an attempt to add a resource with a name that already exists
is made kunit_add_named_resource() will return -EEXIST.

Signed-off-by: Alan Maguire 
Reviewed-by: Brendan Higgins 
---
 include/kunit/test.h   | 54 ++
 lib/kunit/kunit-test.c | 37 ++
 lib/kunit/test.c   | 24 ++
 3 files changed, 115 insertions(+)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index f9b914e..59f3144 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -72,9 +72,15 @@
  * return kunit_alloc_resource(test, kunit_kmalloc_init,
  * kunit_kmalloc_free, ¶ms);
  * }
+ *
+ * Resources can also be named, with lookup/removal done on a name
+ * basis also.  kunit_add_named_resource(), kunit_find_named_resource()
+ * and kunit_destroy_named_resource().  Resource names must be
+ * unique within the test instance.
  */
 struct kunit_resource {
void *data;
+   const char *name;   /* optional name */
 
/* private: internal use only. */
kunit_resource_free_t free;
@@ -344,6 +350,21 @@ int kunit_add_resource(struct kunit *test,
   kunit_resource_free_t free,
   struct kunit_resource *res,
   void *data);
+
+/**
+ * kunit_add_named_resource() - Add a named *test managed resource*.
+ * @test: The test context object.
+ * @init: a user-supplied function to initialize the resource data, if needed.
+ * @free: a user-supplied function to free the resource data, if needed.
+ * @name_data: name and data to be set for resource.
+ */
+int kunit_add_named_resource(struct kunit *test,
+kunit_resource_init_t init,
+kunit_resource_free_t free,
+struct kunit_resource *res,
+const char *name,
+void *data);
+
 /**
  * kunit_alloc_resource() - Allocates a *test managed resource*.
  * @test: The test context object.
@@ -399,6 +420,19 @@ static inline bool kunit_resource_instance_match(struct 
kunit *test,
 }
 
 /**
+ * kunit_resource_name_match() - Match a resource with the same name.
+ * @test: Test case to which the resource belongs.
+ * @res: The resource.
+ * @match_name: The name to match against.
+ */
+static inline bool kunit_resource_name_match(struct kunit *test,
+struct kunit_resource *res,
+void *match_name)
+{
+   return res->name && strcmp(res->name, match_name) == 0;
+}
+
+/**
  * kunit_find_resource() - Find a resource using match function/data.
  * @test: Test case to which the resource belongs.
  * @match: match function to be applied to resources/match data.
@@ -427,6 +461,19 @@ static inline bool kunit_resource_instance_match(struct 
kunit *test,
 }
 
 /**
+ * kunit_find_named_resource() - Find a resource using match name.
+ * @test: Test case to which the resource belongs.
+ * @name: match name.
+ */
+static inline struct kunit_resource *
+kunit_find_named_resource(struct kunit *test,
+ const char *name)
+{
+   return kunit_find_resource(test, kunit_resource_name_match,
+  (void *)name);
+}
+
+/**
  * kunit_destroy_resource() - Find a kunit_resource and destroy it.
  * @test: Test case to which the resource belongs.
  * @match: Match function. Returns whether a given resource matches 
@match_data.
@@ -439,6 +486,13 @@ int kunit_destroy_resource(struct kunit *test,
   kunit_resource_match_t match,
   void *match_data);
 
+static inline int kunit_destroy_named_resource(struct kunit *test,
+  const char *name)
+{
+   return kunit_destroy_resource(test, kunit_resource_name_match,
+ (void *)name);
+}
+
 /**
  * kunit_remove_resource: remove resource from resource list associated with
  *   test.
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index 03f3eca..69f9024 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -325,6 +325,42 @@ static void kunit_resource_test_static(struct kunit *test)
KUNIT_EXPECT_TRUE(test, list_empty(&test->resources));

[PATCH v9 4/5] ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods

2020-05-29 Thread Vaibhav Jain
Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm
module and add the command family NVDIMM_FAMILY_PAPR to the white list
of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the
nvdimm command mask and implement necessary scaffolding in the module
to handle ND_CMD_CALL ioctl and PDSM requests that we receive.

The layout of the PDSM request as we expect from libnvdimm/libndctl is
described in newly introduced uapi header 'papr_pdsm.h' which
defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used
to communicate the PDSM request via member
'nd_cmd_pkg.nd_command' and size of payload that need to be
sent/received for servicing the PDSM.

A new function is_cmd_valid() is implemented that reads the args to
papr_scm_ndctl() and performs sanity tests on them. A new function
papr_scm_service_pdsm() is introduced and is called from
papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL
command from libnvdimm.

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v8..v9:
* Reduced the usage of term SCM replacing it with appropriate
  replacement [ Dan Williams, Aneesh ]
* Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h'
* s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g
* s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g
* Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'.
* Minor update to patch description.

v7..v8:
* Removed the 'payload_offset' field from 'struct
  nd_pdsm_cmd_pkg'. Instead command payload is always assumed to start
  at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ]
* To enable introducing new fields to 'struct nd_pdsm_cmd_pkg',
  'reserved' field of 10-bytes is introduced. [ Aneesh ]
* Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h
  [ Ira ]

Resend:
* None

v6..v7 :
* Removed the re-definitions of __packed macro from papr_scm_pdsm.h
  [Mpe].
* Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe].
* Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h
  [Mpe].
* Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]

v5..v6 :
* Changed the usage of the term DSM to PDSM to distinguish it from the
  ACPI term [ Dan Williams ]
* Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various struct
  to reflect the new terminology.
* Updated the patch description and title to reflect the new terminology.
* Squashed patch to introduce new command family in 'ndctl.h' with
  this patch [ Dan Williams ]
* Updated the papr_scm_pdsm method starting index from 0x1 to 0x0
  [ Dan Williams ]
* Removed redundant license text from the papr_scm_psdm.h file.
  [ Dan Williams ]
* s/envelop/envelope/ at various places [ Dan Williams ]
* Added '__packed' attribute to command package header to gaurd
  against different compiler adding paddings between the fields.
  [ Dan Williams]
* Converted various pr_debug to dev_debug [ Dan Williams ]

v4..v5 :
* None

v3..v4 :
* None

v2..v3 :
* Updated the patch prefix to 'ndctl/uapi' [Aneesh]

v1..v2 :
* None
---
 arch/powerpc/include/uapi/asm/papr_pdsm.h | 136 ++
 arch/powerpc/platforms/pseries/papr_scm.c | 101 +++-
 include/uapi/linux/ndctl.h|   1 +
 3 files changed, 232 insertions(+), 6 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h

diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h 
b/arch/powerpc/include/uapi/asm/papr_pdsm.h
new file mode 100644
index ..6407fefcc007
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
+ *
+ * (C) Copyright IBM 2020
+ *
+ * Author: Vaibhav Jain 
+ */
+
+#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
+#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
+
+#include 
+
+/*
+ * PDSM Envelope:
+ *
+ * The ioctl ND_CMD_CALL transfers data between user-space and kernel via
+ * envelope which consists of a header and user-defined payload sections.
+ * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a
+ * payload following it and accessible via 'nd_pdsm_cmd_pkg.payload' field.
+ * There is reserved field that can used to introduce new fields to the
+ * structure in future. It also tries to ensure that 'nd_pdsm_cmd_pkg.payload'
+ * lies at a 8-byte boundary.
+ *
+ *  +-+-+---+
+ *  |   64-Bytes  |   16-Bytes  |   Max 176-Bytes   |
+ *  +-+-+---+
+ *  |   nd_pdsm_cmd_pkg |   |
+ *  |-+ |   |
+ *  |  nd_cmd_pkg | |   |
+ *  +-+-+---+
+ *  | nd_family   | |   |
+ *  | nd_size

[PATCH v9 0/5] powerpc/papr_scm: Add support for reporting nvdimm health

2020-05-29 Thread Vaibhav Jain
Changes since v8 [1]:

* Updated proposed changes to remove usage of term 'SCM' due to
  ambiguity with 'PMEM' and 'NVDIMM'. [ Dan Williams ]
* Replaced the usage of term 'SCM' with 'PMEM' in most contexts.
  [ Aneesh ]
* Renamed NVDIMM health defines from PAPR_SCM_DIMM_* to PAPR_PMEM_* .
* Updates to various newly introduced identifiers in 'papr_scm.c'
  removing the 'SCM' prefix from their names.
* Renamed NVDIMM_FAMILY_PAPR_SCM to NVDIMM_FAMILY_PAPR
* Renamed PAPR_SCM_PDSM_HEALTH PAPR_PDSM_HEALTH
* Renamed uapi header 'papr_scm_pdsm.h' to 'papr_pdsm.h'.
* Renamed sysfs ABI document from sysfs-bus-papr-scm to
  sysfs-bus-papr-pmem.
* No behavioural changes from v8 [1].

[1] 
https://lore.kernel.org/linux-nvdimm/20200527041244.37821-1-vaib...@linux.ibm.com/
---

The PAPR standard[2][4] provides mechanisms to query the health and
performance stats of an NVDIMM via various hcalls as described in
Ref[3].  Until now these stats were never available nor exposed to the
user-space tools like 'ndctl'. This is partly due to PAPR platform not
having support for ACPI and NFIT. Hence 'ndctl' is unable to query and
report the dimm health status and a user had no way to determine the
current health status of a NDVIMM.

To overcome this limitation, this patch-set updates papr_scm kernel
module to query and fetch NVDIMM health stats using hcalls described
in Ref[3].  This health and performance stats are then exposed to
userspace via sysfs and PAPR-NVDIMM-Specific-Methods(PDSM) issued by
libndctl.

These changes coupled with proposed ndtcl changes located at Ref[5]
should provide a way for the user to retrieve NVDIMM health status
using ndtcl.

Below is a sample output using proposed kernel + ndctl for PAPR NVDIMM
in a emulation environment:

 # ndctl list -DH
[
  {
"dev":"nmem0",
"health":{
  "health_state":"fatal",
  "shutdown_state":"dirty"
}
  }
]

Dimm health report output on a pseries guest lpar with vPMEM or HMS
based NVDIMMs that are in perfectly healthy conditions:

 # ndctl list -d nmem0 -H
[
  {
"dev":"nmem0",
"health":{
  "health_state":"ok",
  "shutdown_state":"clean"
}
  }
]

PAPR NVDIMM-Specific-Methods(PDSM)
==

PDSM requests are issued by vendor specific code in libndctl to
execute certain operations or fetch information from NVDIMMS. PDSMs
requests can be sent to papr_scm module via libndctl(userspace) and
libnvdimm (kernel) using the ND_CMD_CALL ioctl command which can be
handled in the dimm control function papr_scm_ndctl(). Current
patchset proposes a single PDSM to retrieve NVDIMM health, defined in
the newly introduced uapi header named 'papr_pdsm.h'. Support for
more PDSMs will be added in future.

Structure of the patch-set
==

The patch-set starts with a doc patch documenting details of hcall
H_SCM_HEALTH. Second patch exports kernel symbol seq_buf_printf()
thats used in subsequent patches to generate sysfs attribute content.

Third patch implements support for fetching NVDIMM health information
from PHYP and partially exposing it to user-space via a NVDIMM sysfs
flag.

Fourth patches deal with implementing support for servicing PDSM
commands in papr_scm module.

Finally the last patch implements support for servicing PDSM
'PAPR_PDSM_HEALTH' that returns the NVDIMM health information to
libndctl.

References:
[2] "Power Architecture Platform Reference"
  https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference
[3] commit 58b278f568f0
 ("powerpc: Provide initial documentation for PAPR hcalls")
[4] "Linux on Power Architecture Platform Reference"
 https://members.openpowerfoundation.org/document/dl/469
[5] https://github.com/vaibhav92/ndctl/tree/papr_scm_health_v9

---

Vaibhav Jain (5):
  powerpc: Document details on H_SCM_HEALTH hcall
  seq_buf: Export seq_buf_printf
  powerpc/papr_scm: Fetch nvdimm health information from PHYP
  ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods
  powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH

 Documentation/ABI/testing/sysfs-bus-papr-pmem |  27 ++
 Documentation/powerpc/papr_hcalls.rst |  46 ++-
 arch/powerpc/include/uapi/asm/papr_pdsm.h | 175 +
 arch/powerpc/platforms/pseries/papr_scm.c | 363 +-
 include/uapi/linux/ndctl.h|   1 +
 lib/seq_buf.c |   1 +
 6 files changed, 600 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem
 create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h

-- 
2.26.2



[PATCH v9 1/5] powerpc: Document details on H_SCM_HEALTH hcall

2020-05-29 Thread Vaibhav Jain
Add documentation to 'papr_hcalls.rst' describing the bitmap flags
that are returned from H_SCM_HEALTH hcall as per the PAPR-SCM
specification.

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v8..v9:
* s/SCM/PMEM device. [ Dan Williams, Aneesh ]

v7..v8:
* Added a clarification on bit-ordering of Health Bitmap

Resend:
* None

v6..v7:
* None

v5..v6:
* New patch in the series
---
 Documentation/powerpc/papr_hcalls.rst | 46 ---
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/Documentation/powerpc/papr_hcalls.rst 
b/Documentation/powerpc/papr_hcalls.rst
index 3493631a60f8..48fcf1255a33 100644
--- a/Documentation/powerpc/papr_hcalls.rst
+++ b/Documentation/powerpc/papr_hcalls.rst
@@ -220,13 +220,51 @@ from the LPAR memory.
 **H_SCM_HEALTH**
 
 | Input: drcIndex
-| Out: *health-bitmap, health-bit-valid-bitmap*
+| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)*
 | Return Value: *H_Success, H_Parameter, H_Hardware*
 
 Given a DRC Index return the info on predictive failure and overall health of
-the NVDIMM. The asserted bits in the health-bitmap indicate a single predictive
-failure and health-bit-valid-bitmap indicate which bits in health-bitmap are
-valid.
+the PMEM device. The asserted bits in the health-bitmap indicate one or more 
states
+(described in table below) of the PMEM device and health-bit-valid-bitmap 
indicate
+which bits in health-bitmap are valid. The bits are reported in
+reverse bit ordering for example a value of 0xC400
+indicates bits 0, 1, and 5 are valid.
+
+Health Bitmap Flags:
+
++--+---+
+|  Bit |   Definition  
|
++==+===+
+|  00  | PMEM device is unable to persist memory contents. 
|
+|  | If the system is powered down, nothing will be saved. 
|
++--+---+
+|  01  | PMEM device failed to persist memory contents. Either contents were   
|
+|  | not saved successfully on power down or were not restored properly on 
|
+|  | power up. 
|
++--+---+
+|  02  | PMEM device contents are persisted from previous IPL. The data from   
|
+|  | the last boot were successfully restored. 
|
++--+---+
+|  03  | PMEM device contents are not persisted from previous IPL. There was 
no|
+|  | data to restore from the last boot.   
|
++--+---+
+|  04  | PMEM device memory life remaining is critically low   
|
++--+---+
+|  05  | PMEM device will be garded off next IPL due to failure
|
++--+---+
+|  06  | PMEM device contents cannot persist due to current platform health
|
+|  | status. A hardware failure may prevent data from being saved or   
|
+|  | restored. 
|
++--+---+
+|  07  | PMEM device is unable to persist memory contents in certain 
conditions|
++--+---+
+|  08  | PMEM device is encrypted  
|
++--+---+
+|  09  | PMEM device has successfully completed a requested erase or secure
|
+|  | erase procedure.  
|
++--+---+
+|10:63 | Reserved / Unused 
|
++--+---+
 
 **H_SCM_PERFORMANCE_STATS**
 
-- 
2.26.2



[PATCH v9 3/5] powerpc/papr_scm: Fetch nvdimm health information from PHYP

2020-05-29 Thread Vaibhav Jain
Implement support for fetching nvdimm health information via
H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair
of 64-bit bitmap, bitwise-and of which is then stored in
'struct papr_scm_priv' and subsequently partially exposed to
user-space via newly introduced dimm specific attribute
'papr/flags'. Since the hcall is costly, the health information is
cached and only re-queried, 60s after the previous successful hcall.

The patch also adds a  documentation text describing flags reported by
the the new sysfs attribute 'papr/flags' is also introduced at
Documentation/ABI/testing/sysfs-bus-papr-pmem.

[1] commit 58b278f568f0 ("powerpc: Provide initial documentation for
PAPR hcalls")

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v8..v9:
* Rename some variables and defines to reduce usage of term SCM
  replacing it with PMEM [Dan Williams, Aneesh]
* s/PAPR_SCM_DIMM/PAPR_PMEM/g
* s/papr_scm_nd_attributes/papr_nd_attributes/g
* s/papr_scm_nd_attribute_group/papr_nd_attribute_group/g
* s/papr_scm_dimm_attr_groups/papr_nd_attribute_groups/g
* Renamed file sysfs-bus-papr-scm to sysfs-bus-papr-pmem

v7..v8:
* Update type of variable 'rc' in __drc_pmem_query_health() and
  drc_pmem_query_health() to long and int respectively. [ Ira ]
* Updated the patch description to s/64 bit Big Endian Number/64-bit
  bitmap/ [ Ira, Aneesh ].

Resend:
* None

v6..v7 :
* Used the exported buf_seq_printf() function to generate content for
  'papr/flags'
* Moved the PAPR_SCM_DIMM_* bit-flags macro definitions to papr_scm.c
  and removed the papr_scm.h file [Mpe]
* Some minor consistency issued in sysfs-bus-papr-scm
  documentation. [Mpe]
* s/dimm_mutex/health_mutex/g [Mpe]
* Split drc_pmem_query_health() into two function one of which takes
  care of caching and locking. [Mpe]
* Fixed a local copy creation of dimm health information using
  READ_ONCE(). [Mpe]

v5..v6 :
* Change the flags sysfs attribute from 'papr_flags' to 'papr/flags'
  [Dan Williams]
* Include documentation for 'papr/flags' attr [Dan Williams]
* Change flag 'save_fail' to 'flush_fail' [Dan Williams]
* Caching of health bitmap to reduce expensive hcalls [Dan Williams]
* Removed usage of PPC_BIT from 'papr-scm.h' header [Mpe]
* Replaced two __be64 integers from papr_scm_priv to a single u64
  integer [Mpe]
* Updated patch description to reflect the changes made in this
  version.
* Removed avoidable usage of 'papr_scm_priv.dimm_mutex' from
  flags_show() [Dan Williams]

v4..v5 :
* None

v3..v4 :
* None

v2..v3 :
* Removed PAPR_SCM_DIMM_HEALTH_NON_CRITICAL as a condition for
 NVDIMM unarmed [Aneesh]

v1..v2 :
* New patch in the series.
---
 Documentation/ABI/testing/sysfs-bus-papr-pmem |  27 +++
 arch/powerpc/platforms/pseries/papr_scm.c | 169 +-
 2 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem

diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem 
b/Documentation/ABI/testing/sysfs-bus-papr-pmem
new file mode 100644
index ..5b10d036a8d4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
@@ -0,0 +1,27 @@
+What:  /sys/bus/nd/devices/nmemX/papr/flags
+Date:  Apr, 2020
+KernelVersion: v5.8
+Contact:   linuxppc-dev , 
linux-nvd...@lists.01.org,
+Description:
+   (RO) Report flags indicating various states of a
+   papr-pmem NVDIMM device. Each flag maps to a one or
+   more bits set in the dimm-health-bitmap retrieved in
+   response to H_SCM_HEALTH hcall. The details of the bit
+   flags returned in response to this hcall is available
+   at 'Documentation/powerpc/papr_hcalls.rst' . Below are
+   the flags reported in this sysfs file:
+
+   * "not_armed"   : Indicates that NVDIMM contents will not
+ survive a power cycle.
+   * "flush_fail"  : Indicates that NVDIMM contents
+ couldn't be flushed during last
+ shut-down event.
+   * "restore_fail": Indicates that NVDIMM contents
+ couldn't be restored during NVDIMM
+ initialization.
+   * "encrypted"   : NVDIMM contents are encrypted.
+   * "smart_notify": There is health event for the NVDIMM.
+   * "scrubbed": Indicating that contents of the
+ NVDIMM have been scrubbed.
+   * "locked"  : Indicating that NVDIMM contents cant
+ be modified until next power cycle.
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index f35592423380..149431594839 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_

Re: [PATCH v1] auxdisplay: charlcd: Reuse hex_to_bin() instead of custom code

2020-05-29 Thread Miguel Ojeda
Hi Andy,

On Mon, May 18, 2020 at 9:36 PM Andy Shevchenko
 wrote:
>
> hex_to_bin() may be used to convert hexdecimal digit to its binary
> representation.
>
> Signed-off-by: Andy Shevchenko 
> ---

Looks fine to me and the logic is simpler for the `esc` increase too.
Thanks for the cleanup! Were you able to test it, by any chance? I
will queue it up for -rc1.

 (Sidenote: it would be nice if `git patch` allowed for a full-to-full
comparison for patches like this since the unified format is harder to
read; I couldn't find the option from a quick look...).

Cheers,
Miguel


[PATCH v9 2/5] seq_buf: Export seq_buf_printf

2020-05-29 Thread Vaibhav Jain
'seq_buf' provides a very useful abstraction for writing to a string
buffer without needing to worry about it over-flowing. However even
though the API has been stable for couple of years now its still not
exported to kernel loadable modules limiting its usage.

Hence this patch proposes update to 'seq_buf.c' to mark
seq_buf_printf() which is part of the seq_buf API to be exported to
kernel loadable GPL modules. This symbol will be used in later parts
of this patch-set to simplify content creation for a sysfs attribute.

Cc: Piotr Maziarz 
Cc: Cezary Rojewski 
Cc: Christoph Hellwig 
Cc: Steven Rostedt 
Cc: Borislav Petkov 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v8..v9:
* None

v7..v8:
* Updated the patch title [ Christoph Hellwig ]
* Updated patch description to replace confusing term 'external kernel
  modules' to 'kernel lodable modules'.

Resend:
* Added ack from Steven Rostedt

v6..v7:
* New patch in the series
---
 lib/seq_buf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 4e865d42ab03..707453f5d58e 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -91,6 +91,7 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
 
return ret;
 }
+EXPORT_SYMBOL_GPL(seq_buf_printf);
 
 #ifdef CONFIG_BINARY_PRINTF
 /**
-- 
2.26.2



[PATCH v9 5/5] powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH

2020-05-29 Thread Vaibhav Jain
This patch implements support for PDSM request 'PAPR_PDSM_HEALTH'
that returns a newly introduced 'struct nd_papr_pdsm_health' instance
containing dimm health information back to user space in response to
ND_CMD_CALL. This functionality is implemented in newly introduced
papr_pdsm_health() that queries the nvdimm health information and
then copies this information to the package payload whose layout is
defined by 'struct nd_papr_pdsm_health'.

The patch also introduces a new member 'struct papr_scm_priv.health'
thats an instance of 'struct nd_papr_pdsm_health' to cache the health
information of a nvdimm. As a result functions drc_pmem_query_health()
and flags_show() are updated to populate and use this new struct
instead of a u64 integer that was earlier used.

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v8..v9:
* s/PAPR_SCM_PDSM_HEALTH/PAPR_PDSM_HEALTH/g  [ Dan , Aneesh ]
* s/PAPR_SCM_PSDM_DIMM_*/PAPR_PDSM_DIMM_*/g
* Renamed papr_scm_get_health() to papr_psdm_health()
* Updated patch description to replace papr-scm dimm with nvdimm.

v7..v8:
* None

Resend:
* None

v6..v7:
* Updated flags_show() to use seq_buf_printf(). [Mpe]
* Updated papr_scm_get_health() to use newly introduced
  __drc_pmem_query_health() bypassing the cache [Mpe].

v5..v6:
* Added attribute '__packed' to 'struct nd_papr_pdsm_health_v1' to
  gaurd against possibility of different compilers adding different
  paddings to the struct [ Dan Williams ]

* Updated 'struct nd_papr_pdsm_health_v1' to use __u8 instead of
  'bool' and also updated drc_pmem_query_health() to take this into
  account. [ Dan Williams ]

v4..v5:
* None

v3..v4:
* Call the DSM_PAPR_SCM_HEALTH service function from
  papr_scm_service_dsm() instead of papr_scm_ndctl(). [Aneesh]

v2..v3:
* Updated struct nd_papr_scm_dimm_health_stat_v1 to use '__xx' types
  as its exported to the userspace [Aneesh]
* Changed the constants DSM_PAPR_SCM_DIMM_XX indicating dimm health
  from enum to #defines [Aneesh]

v1..v2:
* New patch in the series
---
 arch/powerpc/include/uapi/asm/papr_pdsm.h |  39 +++
 arch/powerpc/platforms/pseries/papr_scm.c | 125 +++---
 2 files changed, 147 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h 
b/arch/powerpc/include/uapi/asm/papr_pdsm.h
index 6407fefcc007..411725a91591 100644
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -115,6 +115,7 @@ struct nd_pdsm_cmd_pkg {
  */
 enum papr_pdsm {
PAPR_PDSM_MIN = 0x0,
+   PAPR_PDSM_HEALTH,
PAPR_PDSM_MAX,
 };
 
@@ -133,4 +134,42 @@ static inline void *pdsm_cmd_to_payload(struct 
nd_pdsm_cmd_pkg *pcmd)
return (void *)(pcmd->payload);
 }
 
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY   0
+#define PAPR_PDSM_DIMM_UNHEALTHY 1
+#define PAPR_PDSM_DIMM_CRITICAL  2
+#define PAPR_PDSM_DIMM_FATAL 3
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ *
+ * dimm_unarmed: Dimm not armed. So contents wont persist.
+ * dimm_bad_shutdown   : Previous shutdown did not persist contents.
+ * dimm_bad_restore: Contents from previous shutdown werent restored.
+ * dimm_scrubbed   : Contents of the dimm have been scrubbed.
+ * dimm_locked : Contents of the dimm cant be modified until CEC reboot
+ * dimm_encrypted  : Contents of dimm are encrypted.
+ * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_
+ */
+struct nd_papr_pdsm_health_v1 {
+   __u8 dimm_unarmed;
+   __u8 dimm_bad_shutdown;
+   __u8 dimm_bad_restore;
+   __u8 dimm_scrubbed;
+   __u8 dimm_locked;
+   __u8 dimm_encrypted;
+   __u16 dimm_health;
+} __packed;
+
+/*
+ * Typedef the current struct for dimm_health so that any application
+ * or kernel recompiled after introducing a new version automatically
+ * supports the new version.
+ */
+#define nd_papr_pdsm_health nd_papr_pdsm_health_v1
+
+/* Current version number for the dimm health struct */
+#define ND_PAPR_PDSM_HEALTH_VERSION 1
+
 #endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index 5e2237e7ec08..c0606c0c659c 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -88,7 +88,7 @@ struct papr_scm_priv {
unsigned long lasthealth_jiffies;
 
/* Health information for the dimm */
-   u64 health_bitmap;
+   struct nd_papr_pdsm_health health;
 };
 
 static int drc_pmem_bind(struct papr_scm_priv *p)
@@ -201,6 +201,7 @@ static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
 static int __drc_pmem_query_health(struct papr_scm_priv *p)
 {
unsigned long ret[PLPAR_HCALL_BUFSIZE];
+   u64 health;
long rc;
 
 

Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro

2020-05-29 Thread Syed Nayyar Waris
On Sat, May 30, 2020 at 3:01 AM William Breathitt Gray
 wrote:
>
> On Sat, May 30, 2020 at 01:32:44AM +0530, Syed Nayyar Waris wrote:
> > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko
> >  wrote:
> > >
> > > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote:
> > > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot  
> > > > wrote:
> > >
> > > ...
> > >
> > > > >579  static inline unsigned long bitmap_get_value(const unsigned 
> > > > > long *map,
> > > > >580unsigned long 
> > > > > start,
> > > > >581unsigned long 
> > > > > nbits)
> > > > >582  {
> > > > >583  const size_t index = BIT_WORD(start);
> > > > >584  const unsigned long offset = start % BITS_PER_LONG;
> > > > >585  const unsigned long ceiling = roundup(start + 1, 
> > > > > BITS_PER_LONG);
> > > > >586  const unsigned long space = ceiling - start;
> > > > >587  unsigned long value_low, value_high;
> > > > >588
> > > > >589  if (space >= nbits)
> > > > >  > 590  return (map[index] >> offset) & GENMASK(nbits 
> > > > > - 1, 0);
> > > > >591  else {
> > > > >592  value_low = map[index] & 
> > > > > BITMAP_FIRST_WORD_MASK(start);
> > > > >593  value_high = map[index + 1] & 
> > > > > BITMAP_LAST_WORD_MASK(start + nbits);
> > > > >594  return (value_low >> offset) | (value_high << 
> > > > > space);
> > > > >595  }
> > > > >596  }
> > >
> > > > Regarding the above compilation warnings. All the warnings are because
> > > > of GENMASK usage in my patch.
> > > > The warnings are coming because of sanity checks present for 'GENMASK'
> > > > macro in include/linux/bits.h.
> > > >
> > > > Taking the example statement (in my patch) where compilation warning
> > > > is getting reported:
> > > > return (map[index] >> offset) & GENMASK(nbits - 1, 0);
> > > >
> > > > 'nbits' is of type 'unsigned long'.
> > > > In above, the sanity check is comparing '0' with unsigned value. And
> > > > unsigned value can't be less than '0' ever, hence the warning.
> > > > But this warning will occur whenever there will be '0' as one of the
> > > > 'argument' and an unsigned variable as another 'argument' for GENMASK.
> > > >
> > > > This warning is getting cleared if I cast the 'nbits' to 'long'.
> > > >
> > > > Let me know if I should submit a next patch with the casts applied.
> > > > What do you guys think?
> > >
> > > Proper fix is to fix GENMASK(), but allowed workaround is to use
> > > (BIT(nbits) - 1)
> > > instead.
> > >
> > > --
> > > With Best Regards,
> > > Andy Shevchenko
> > >
> >
> > Hi Andy. Thank You for your comment.
> >
> > When I used BIT macro (earlier), I had faced a problem. I want to tell
> > you about that.
> >
> > Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or
> > clump size) is BITS_PER_LONG, unexpected calculation happens.
> >
> > Explanation:
> > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer),
> > (BIT(nbits) - 1)
> > gives a value of zero and when this zero is ANDed with any value, it
> > makes it full zero. This is unexpected and incorrect calculation happening.
> >
> > What actually happens is in the macro expansion of BIT(64), that is 1
> > << 64, the '1' overflows from leftmost bit position (most significant
> > bit) and re-enters at the rightmost bit position (least significant
> > bit), therefore 1 << 64 becomes '0x1', and when another '1' is
> > subtracted from this, the final result becomes 0.
> >
> > Since this macro is being used in both bitmap_get_value and
> > bitmap_set_value functions, it will give unexpected results when nbits or 
> > clump
> > size is BITS_PER_LONG (32 or 64 depending on arch).
> >
> > William also knows about this issue:
> >
> > "This is undefined behavior in the C standard (section 6.5.7 in the N1124)"
> >
> > Andy, William,
> > Let me know what do you think ?
> >
> > Regards
> > Syed Nayyar Waris
>
> We can't use BIT here because nbits could be equal to BITS_PER_LONG in
> some cases. Casting to long should be fine because the nbits will never
> be greater than BITS_PER_LONG, so long should be safe to use.
>
> However, I agree with Andy that the proper solution is to fix GENMASK so
> that this warning does not come up. What's the actual line of code in
> the GENMASK macro that is throwing this warning? I'd like to understand
> better the logic of this sanity check.
>
> William Breathitt Gray

Here is the code snippet:

#define GENMASK_INPUT_CHECK(h, l) \
(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
__builtin_constant_p((l) > (h)), (l) > (h), 0)))

Above you can see the comparisons are being done in the last line.
And because of these comparisons, those compilation warnings are generated.

For full code related to GENMASK macro:
h

Re: [PATCH v2] arm: dts: Move am33xx and am43xx mmc nodes to sdhci-omap driver

2020-05-29 Thread Tony Lindgren
* Tony Lindgren  [200527 16:06]:
> * Tony Lindgren  [200519 09:05]:
> > * Tomi Valkeinen  [200519 15:55]:
> > > (Dropping DT from cc)
> > > 
> > > On 19/05/2020 18:48, Tony Lindgren wrote:
> > > 
> > > > > > Suspend/resume on am43xx-gpevm is broken right now in mainline and 
> > > > > > the regression looks
> > > > > > like it is caused by the display subsystem. I have reported this to 
> > > > > > Tomi and
> > > > > > its being investigated.
> > > > > > 
> > > > > > Meanwhile I have tested this patch with display configs disabled 
> > > > > > and Keerthy's
> > > > > > suspend/resume tests pass on both am3 and am4.
> > > > 
> > > > OK great thanks for checking it. Do you have the display subsystem
> > > > related commit that broke PM? I'm wondering if my recent DSS platform
> > > > data removal changes might have caused the regression.
> > > 
> > > I spent a bit time looking at this, but unfortunately I wasn't even able 
> > > to
> > > resume my AM4 evm from suspend. I tried with rtcwake and with plain 
> > > console
> > > (with no_console_suspend). I did not have DSS loaded.
> > 
> > My test-bbb-suspend script seems to have:
> > 
> > sudo modprobe wkup_m3_ipc
> > sudo modprobe pm33xx
> > sudo modprobe rtc-omap
> > rtcwake -m mem -s 5
> > 
> > I think the same should work for am437x. But some boards do not support
> > deep sleep like am437x-idk.
> > 
> > > Anyone have quick hints on how to debug why resume doesn't seem to happen?
> > 
> > You might get some info with no_console_suspend, but that might also
> > cause other issues.
> 
> To me it seems we may have some dss clock missing with the ti-sysc dts
> changes that makes resume fail. Or else there is some ordering issue
> between the dss components now on resume, I'll try to debug more.

Looks like we now set the CLOCKATIVITY bit while earlier we did not
set it except for HWMOD_SET_DEFAULT_CLOCKACT cases. I've also found
few other minor issues, but I'm still seeing resume fail for DSS.
The clock and sysconfig registers look just fine, so getting closer.

Regards,

Tony


Re: [PATCH 1/2] dt-bindings: chrome: Add cros-ec-typec mux props

2020-05-29 Thread Rob Herring
On Tue, May 12, 2020 at 7:42 AM Heikki Krogerus
 wrote:
>
> Hi guys,
>
> On Mon, May 11, 2020 at 01:46:35PM -0700, Prashant Malani wrote:
> > Hi Rob,
> >
> > Thank you for reviewing the patch. Kindly see my comments inline:
> >
> > On Mon, May 11, 2020 at 02:28:00PM -0500, Rob Herring wrote:
> > > On Wed, Apr 22, 2020 at 03:22:39PM -0700, Prashant Malani wrote:
> > > > Add properties for mode, orientation and USB data role switches for
> > > > Type C connectors. When available, these will allow the Type C connector
> > > > class port driver to configure the various switches according to USB PD
> > > > information (like orientation, alt mode etc.) provided by the Chrome OS
> > > > EC controller.
> > > >
> > > > Signed-off-by: Prashant Malani 
> > > > ---
> > > >  .../bindings/chrome/google,cros-ec-typec.yaml | 27 ++-
> > > >  1 file changed, 26 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git 
> > > > a/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml 
> > > > b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
> > > > index 6d7396ab8bee..b5814640aa32 100644
> > > > --- a/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
> > > > +++ b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
> > > > @@ -21,7 +21,21 @@ properties:
> > > >  const: google,cros-ec-typec
> > > >
> > > >connector:
> > > > -$ref: /schemas/connector/usb-connector.yaml#
> > > > +allOf:
> > > > +  - $ref: /schemas/connector/usb-connector.yaml#
> > > > +  - type: object
> > > > +properties:
> > >
> > > These don't seem CrOS EC specific, so why document them as such.
> >
> > Are you referring to the "mode-switch", "orientation-switch" and
> > "usb-role-switch" properties? If so, then yes, they aren't Cros EC
> > specific. The Type C connector class framework requires the nodes to be
> > named like this, and the cros-ec-typec driver uses this framework, hence
> > the description here (the Type C connector class framework doesn't have
> > any bindings).
> >
> > Would it be better to add in the description string that Type Connector
> > class expects these switches to be named this way? :
> >
> > " Reference to a DT node for the USB Type C Multiplexer controlling the
> > data lines routing for this connector. This switch is assumed registered
> > with the Type C connector class framework, which requires it to be named
> > this way."
> > >
> > > > +  mode-switch:
> > > > +description: Reference to a DT node for the USB Type C 
> > > > Multiplexer
> > > > +  controlling the data lines routing for this connector.
> > >
> > > This is for alternate mode muxing I presume.
> >
> > Yes, that's right.
> > >
> > > We already have a mux-control binding. Why not use that here?
> >
> > Heikki might be able to offer more insight into why this is the case,
> > since the connector class framework seems to expect a phandle and for
> > the device driver to implement a "set" command. Heikki, would you happen to 
> > know?
>
> The mode-switch here would actually represent the "consumer" part in
> the mux-control bindings. So the mux-controls would describe the
> relationship between the "mode-switch" and the mux controller(s),
> while the mode-switch property describes the relationship between
> something like USB Type-C Port Manager (or this cros_ec function) and
> the "mux consumer".

The "USB Type-C Port Manager" is not just the parent node in your case?

Can you point me to what you expect your DT to look like showing the
mode switch node, the connector, the USB host(s), and the DP/HDMI
bridge/output?

Rob


[PATCH 00/27] afs: Improvements

2020-05-29 Thread David Howells


Here's a set of patches to make a number of improvements to the AFS driver:

 (1) Improve callback (ie. third party change notification) processing by:

 (a) Relying more on the fact we're doing this under RCU and by using
 fewer locks.

 This involves making the inode hash table RCU safe and providing
 some RCU-safe accessor functions.  The search can then be done
 without taking the inode_hash_lock.  Care must be taken because
 the object may be being deleted and no wait is made.

 This is also used to improve Ext4's time updating.  Konstantin
 Khlebnikov said "For now, I've plugged this issue with try-lock in
 ext4 lazy time update.  This solution is much better."

 (b) Moving to keeping volumes in a tree indexed by volume ID rather
 than a flat list.

 (c) Making the server and volume records logically part of the cell.
 This means that a server record now points directly at the cell
 and the tree of volumes is there.  This removes an N:M mapping
 table, simplifying things.

 (2) Improve keeping NAT or firewall channels open for the server callbacks
 to reach the client by actively polling the fileserver on a timed
 basis, instead of only doing it when we have an operation to process.

 (3) Improving detection of delayed or lost callbacks by including the
 parent directory in the list of file IDs to be queried when doing a
 bulk status fetch from lookup.  We can then check to see if our copy
 of the directory has changed under us without us getting notified.

 (4) Determine aliasing of cells (such as a cell that is pointed to be a
 DNS alias).  This allows us to avoid having ambiguity due to
 apparently different cells using the same volume and file servers.

 (5) Improve the fileserver rotation to do more probing when it detects
 that all of the addresses to a server are listed as non-responsive.
 It's possible that an address that previously stopped responding has
 become responsive again.

Beyond that, lay some foundations for making some calls asynchronous:

 (1) Turn the fileserver cursor struct into a general operation struct and
 hang the parameters off of that rather than keeping them in local
 variables and hang results off of that rather than the call struct.

 (2) Implement some general operation handling code and simplify the
 callers of operations that affect a volume or a volume component (such
 as a file).  Most of the operation is now done by core code.

 (3) Operations are supplied with a table of operations to issue different
 variants of RPCs and to manage the completion, where all the required
 data is held in the operation object, thereby allowing these to be
 called from a workqueue.

 (4) Put the standard "if (begin), while(select), call op, end" sequence
 into a canned function that just emulates the current behaviour for
 now.

There are also some fixes interspersed:

 (1) Don't let the EACCES from ICMP6 mapping reach the user as such, since
 it's confusing as to whether it's a filesystem error.  Convert it to
 EHOSTUNREACH.

 (2) Don't use the epoch value acquired through probing a server.  If we
 have two servers with the same UUID but in different cells, it's hard
 to draw conclusions from them having different epoch values.

 (3) Don't interpret the argument to the CB.ProbeUuid RPC as a fileserver
 UUID and look up a fileserver from it.

 (4) Deal with servers in different cells having the same UUIDs.  In the
 event that a CB.InitCallBackState3 RPC is received, we have to break
 the callback promises for every server record matching that UUID.

 (5) Don't let afs_statfs return values that go below 0.

 (6) Don't use running fileserver probe state to make server selection and
 address selection decisions on.  Only make decisions on final state as
 the running state is cleared at the start of probing.

The patches are here:


http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=afs-next

David
---
David Howells (1):
  afs: Adjust the fileserver rotation algorithm to reprobe/retry more 
quickly


 fs/afs/Makefile|2 +
 fs/afs/afs.h   |3 +-
 fs/afs/afs_vl.h|1 +
 fs/afs/callback.c  |  345 
 fs/afs/cell.c  |   10 +-
 fs/afs/cmservice.c |   67 +--
 fs/afs/dir.c   | 1253 --
 fs/afs/dir_silly.c |  190 +++
 fs/afs/dynroot.c   |   93 
 fs/afs/file.c  |   62 ++-
 fs/afs/flock.c |  114 ++--
 fs/afs/fs_operation.c  |  239 
 fs/afs/fs_probe.c  |  339 +---
 fs/afs/fsclient.c  | 1295 +---
 fs/afs/inode.c |  491 -
 fs/afs/internal.h  |  523 

[PATCH 01/27] vfs, afs, ext4: Make the inode hash table RCU searchable

2020-05-29 Thread David Howells
Make the inode hash table RCU searchable so that searches that want to
access or modify an inode without taking a ref on that inode can do so
without taking the inode hash table lock.

The main thing this requires is some RCU annotation on the list
manipulation operations.  Inodes are already freed by RCU in most cases.

Users of this interface must take care as the inode may be still under
construction or may be being torn down around them.

There are at least two instances where this can be of use:

 (1) Ext4 date stamp updating.

 (2) AFS callback breaking.

Signed-off-by: David Howells 
Acked-by: Konstantin Khlebnikov 
cc: linux-e...@vger.kernel.org
cc: linux-...@lists.infradead.org
---

 fs/afs/callback.c  |   12 +++-
 fs/ext4/inode.c|   44 +++--
 fs/inode.c |  173 
 include/linux/fs.h |3 +
 4 files changed, 179 insertions(+), 53 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 2dca8df1a18d..0dcbd40732d1 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -252,6 +252,7 @@ static void afs_break_one_callback(struct afs_server 
*server,
struct afs_vnode *vnode;
struct inode *inode;
 
+   rcu_read_lock();
read_lock(&server->cb_break_lock);
hlist_for_each_entry(vi, &server->cb_volumes, srv_link) {
if (vi->vid < fid->vid)
@@ -287,12 +288,16 @@ static void afs_break_one_callback(struct afs_server 
*server,
} else {
data.volume = NULL;
data.fid = *fid;
-   inode = ilookup5_nowait(cbi->sb, fid->vnode,
-   afs_iget5_test, &data);
+
+   /* See if we can find a matching inode - even an I_NEW
+* inode needs to be marked as it can have its callback
+* broken before we finish setting up the local inode.
+*/
+   inode = find_inode_rcu(cbi->sb, fid->vnode,
+  afs_iget5_test, &data);
if (inode) {
vnode = AFS_FS_I(inode);
afs_break_callback(vnode, 
afs_cb_break_for_callback);
-   iput(inode);
} else {
trace_afs_cb_miss(fid, 
afs_cb_break_for_callback);
}
@@ -301,6 +306,7 @@ static void afs_break_one_callback(struct afs_server 
*server,
 
 out:
read_unlock(&server->cb_break_lock);
+   rcu_read_unlock();
 }
 
 /*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a4aae6acdcb..2bbb55d05bb7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4860,21 +4860,22 @@ static int ext4_inode_blocks_set(handle_t *handle,
return 0;
 }
 
-struct other_inode {
-   unsigned long   orig_ino;
-   struct ext4_inode   *raw_inode;
-};
-
-static int other_inode_match(struct inode * inode, unsigned long ino,
-void *data)
+static void __ext4_update_other_inode_time(struct super_block *sb,
+  unsigned long orig_ino,
+  unsigned long ino,
+  struct ext4_inode *raw_inode)
 {
-   struct other_inode *oi = (struct other_inode *) data;
+   struct inode *inode;
+
+   inode = find_inode_by_ino_rcu(sb, ino);
+   if (!inode)
+   return;
 
-   if ((inode->i_ino != ino) ||
-   (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+   if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
   I_DIRTY_INODE)) ||
((inode->i_state & I_DIRTY_TIME) == 0))
-   return 0;
+   return;
+
spin_lock(&inode->i_lock);
if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_INODE)) == 0) &&
@@ -4885,16 +4886,15 @@ static int other_inode_match(struct inode * inode, 
unsigned long ino,
spin_unlock(&inode->i_lock);
 
spin_lock(&ei->i_raw_lock);
-   EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
-   EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
-   EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
-   ext4_inode_csum_set(inode, oi->raw_inode, ei);
+   EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+   EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
+   EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
+   ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
-   trace_ext4_other_inode_update_time(inode, oi->orig_ino);
-   return -1;
+   trace_ext4_other_inode_update_time(inode, orig_ino);

[PATCH 03/27] rxrpc: Adjust /proc/net/rxrpc/calls to display call->debug_id not user_ID

2020-05-29 Thread David Howells
The user ID value isn't actually much use - and leaks a kernel pointer or a
userspace value - so replace it with the call debug ID, which appears in trace
points.

Signed-off-by: David Howells 
---

 net/rxrpc/proc.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 8b179e3c802a..543afd9bd664 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -68,7 +68,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 "Proto Local  "
 " Remote "
 " SvID ConnID   CallID   End Use StateAbort   "
-" UserID   TxSeqTW RxSeqRW RxSerial 
RxTimo\n");
+" DebugId  TxSeqTW RxSeqRW RxSerial RxTimo\n");
return 0;
}
 
@@ -100,7 +100,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void 
*v)
rx_hard_ack = READ_ONCE(call->rx_hard_ack);
seq_printf(seq,
   "UDP   %-47.47s %-47.47s %4x %08x %08x %s %3u"
-  " %-8.8s %08x %lx %08x %02x %08x %02x %08x %06lx\n",
+  " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n",
   lbuff,
   rbuff,
   call->service_id,
@@ -110,7 +110,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void 
*v)
   atomic_read(&call->usage),
   rxrpc_call_states[call->state],
   call->abort_code,
-  call->user_call_ID,
+  call->debug_id,
   tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack,
   rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack,
   call->rx_serial,




[PATCH 02/27] rxrpc: Map the EACCES error produced by some ICMP6 to EHOSTUNREACH

2020-05-29 Thread David Howells
Map the EACCES error that is produced by some ICMP6 packets to EHOSTUNREACH
when we get them as EACCES has other meanings within a filesystem context.

Signed-off-by: David Howells 
---

 net/rxrpc/peer_event.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index b1449d971883..112e490ebbcd 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -271,6 +271,9 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
break;
 
case SO_EE_ORIGIN_ICMP6:
+   if (err == EACCES)
+   err = EHOSTUNREACH;
+   /* Fall through */
default:
_proto("Rx Received error report { orig=%u }", ee->ee_origin);
break;




[PATCH 06/27] afs: Split the usage count on struct afs_server

2020-05-29 Thread David Howells
Split the usage count on the afs_server struct to have an active count that
registers who's actually using it separately from the reference count on
the object.

This allows a future patch to dispatch polling probes without advancing the
"unuse" time into the future each time we emit a probe, which would
otherwise prevent unused server records from expiring.

Included in this:

 (1) The latter part of afs_destroy_server() in which the RCU destruction
 of afs_server objects is invoked and the outstanding server count is
 decremented is split out into __afs_put_server().

 (2) afs_put_server() now calls __afs_put_server() rather then setting the
 management timer.

 (3) The calls begun by afs_fs_give_up_all_callbacks() and
 afs_fs_get_capabilities() can now take a ref on the server record, so
 afs_destroy_server() can just drop its ref and needn't wait for the
 completion of these calls.  They'll put the ref when they're done.

 (4) Because of (3), afs_fs_probe_done() no longer needs to wake up
 afs_destroy_server() with server->probe_outstanding.

 (5) afs_gc_servers can be simplified.  It only needs to check if
 server->active is 0 rather than playing games with the refcount.

 (6) afs_manage_servers() can propose a server for gc if usage == 0 rather
 than if ref == 1.  The gc is effected by (5).

Signed-off-by: David Howells 
---

 fs/afs/cmservice.c |4 +
 fs/afs/fs_probe.c  |1 
 fs/afs/fsclient.c  |5 +
 fs/afs/internal.h  |8 ++
 fs/afs/proc.c  |9 +--
 fs/afs/rxrpc.c |2 -
 fs/afs/server.c|  151 +---
 fs/afs/server_list.c   |4 +
 include/trace/events/afs.h |   18 +++--
 9 files changed, 131 insertions(+), 71 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 380ad5ace7cf..7dcbca3bf828 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -268,7 +268,9 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 * to maintain cache coherency.
 */
if (call->server) {
-   trace_afs_server(call->server, 
atomic_read(&call->server->usage),
+   trace_afs_server(call->server,
+atomic_read(&call->server->ref),
+atomic_read(&call->server->active),
 afs_server_trace_callback);
afs_break_callbacks(call->server, call->count, call->request);
}
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index 37d1bba57b00..d37d78eb84bd 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -16,7 +16,6 @@ static bool afs_fs_probe_done(struct afs_server *server)
if (!atomic_dec_and_test(&server->probe_outstanding))
return false;
 
-   wake_up_var(&server->probe_outstanding);
clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
return true;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index d2b3798c1932..3854d16e14b1 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1842,7 +1842,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
bp = call->request;
*bp++ = htonl(FSGIVEUPALLCALLBACKS);
 
-   /* Can't take a ref on server */
+   call->server = afs_use_server(server, afs_server_trace_give_up_cb);
afs_make_call(ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, ac);
 }
@@ -1924,7 +1924,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net 
*net,
return ERR_PTR(-ENOMEM);
 
call->key = key;
-   call->server = afs_get_server(server, afs_server_trace_get_caps);
+   call->server = afs_use_server(server, afs_server_trace_get_caps);
call->server_index = server_index;
call->upgrade = true;
call->async = true;
@@ -1934,7 +1934,6 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net 
*net,
bp = call->request;
*bp++ = htonl(FSGETCAPABILITIES);
 
-   /* Can't take a ref on server */
trace_afs_make_fs_call(call, NULL);
afs_make_call(ac, call, GFP_NOFS);
return call;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ee17c868ad2c..cb70e1c234cc 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -498,7 +498,7 @@ struct afs_server {
struct hlist_node   addr6_link; /* Link in net->fs_addresses6 */
struct hlist_node   proc_link;  /* Link in net->fs_proc */
struct afs_server   *gc_next;   /* Next server in manager's 
list */
-   time64_tput_time;   /* Time at which last put */
+   time64_tunuse_time; /* Time at which last unused */
unsigned long   flags;
 #define AFS_SERVER_FL_NOT_READY1   /* The record is not 
ready for use */
 #define AFS_SERVER_FL_NOT_F

[PATCH 04/27] afs: Always include dir in bulk status fetch from afs_do_lookup()

2020-05-29 Thread David Howells
When a lookup is done in an AFS directory, the filesystem will speculate
and fetch up to 49 other statuses for files in the same directory and fetch
those as well, turning them into inodes or updating inodes that already
exist.

However, occasionally, a callback break might go missing due to NAT timing
out, but the afs filesystem doesn't then realise that the directory is not
up to date.

Alleviate this by using one of the status slots to check the directory in
which the lookup is being done.

Reported-by: Dave Botsch 
Suggested-by: Jeffrey Altman 
Signed-off-by: David Howells 
---

 fs/afs/dir.c |9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index d1e1caa23c8b..3c486340b220 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -658,7 +658,8 @@ static struct inode *afs_do_lookup(struct inode *dir, 
struct dentry *dentry,
 
cookie->ctx.actor = afs_lookup_filldir;
cookie->name = dentry->d_name;
-   cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */
+   cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want
+ * and slot 1 for the directory */
 
read_seqlock_excl(&dvnode->cb_lock);
dcbi = rcu_dereference_protected(dvnode->cb_interest,
@@ -709,7 +710,11 @@ static struct inode *afs_do_lookup(struct inode *dir, 
struct dentry *dentry,
if (!cookie->inodes)
goto out_s;
 
-   for (i = 1; i < cookie->nr_fids; i++) {
+   cookie->fids[1] = dvnode->fid;
+   cookie->statuses[1].cb_break = afs_calc_vnode_cb_break(dvnode);
+   cookie->inodes[1] = igrab(&dvnode->vfs_inode);
+
+   for (i = 2; i < cookie->nr_fids; i++) {
scb = &cookie->statuses[i];
 
/* Find any inodes that already exist and get their




[PATCH 07/27] afs: Actively poll fileservers to maintain NAT or firewall openings

2020-05-29 Thread David Howells
When an AFS client accesses a file, it receives a limited-duration callback
promise that the server will notify it if another client changes a file.
This callback duration can be a few hours in length.

If a client mounts a volume and then an application prevents it from being
unmounted, say by chdir'ing into it, but then does nothing for some time,
the rxrpc_peer record will expire and rxrpc-level keepalive will cease.

If there is NAT or a firewall between the client and the server, the route
back for the server may close after a comparatively short duration, meaning
that attempts by the server to notify the client may then bounce.

The client, however, may (so far as it knows) still have a valid unexpired
promise and will then rely on its cached data and will not see changes made
on the server by a third party until it incidentally rechecks the status or
the promise needs renewal.

To deal with this, the client needs to regularly probe the server.  This
has two effects: firstly, it keeps a route open back for the server, and
secondly, it causes the server to disgorge any notifications that got
queued up because they couldn't be sent.

Fix this by adding a mechanism to emit regular probes.

Two levels of probing are made available: Under normal circumstances the
'slow' queue will be used for a fileserver - this just probes the preferred
address once every 5 mins or so; however, if server fails to respond to any
probes, the server will shift to the 'fast' queue from which all its
interfaces will be probed every 30s.  When it finally responds, the record
will switch back to the slow queue.

Further notes:

 (1) Probing is now no longer driven from the fileserver rotation
 algorithm.

 (2) Probes are dispatched to all interfaces on a fileserver when that an
 afs_server object is set up to record it.

 (3) The afs_server object is removed from the probe queues when we start
 to probe it.  afs_is_probing_server() returns true if it's not listed
 - ie. it's undergoing probing.

 (4) The afs_server object is added back on to the probe queue when the
 final outstanding probe completes, but the probed_at time is set when
 we're about to launch a probe so that it's not dependent on the probe
 duration.

 (5) The timer and the work item added for this must be handed a count on
 net->servers_outstanding, which they hand on or release.  This makes
 sure that network namespace cleanup waits for them.

Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and 
fileserver rotation")
Reported-by: Dave Botsch 
Signed-off-by: David Howells 
---

 fs/afs/cmservice.c |2 
 fs/afs/fs_probe.c  |  277 +---
 fs/afs/fsclient.c  |   21 ++-
 fs/afs/internal.h  |   41 +--
 fs/afs/main.c  |5 +
 fs/afs/rotate.c|7 -
 fs/afs/server.c|   19 +--
 fs/afs/volume.c|   24 ++--
 include/trace/events/afs.h |4 +
 9 files changed, 281 insertions(+), 119 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 7dcbca3bf828..7ae88958051f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -157,7 +157,7 @@ static int afs_record_cm_probe(struct afs_call *call, 
struct afs_server *server)
_enter("");
 
if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
-   !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
+   !afs_is_probing_server(server)) {
if (server->cm_epoch == call->epoch)
return 0;
 
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index d37d78eb84bd..442b5e7944ff 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS fileserver probing
  *
- * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowe...@redhat.com)
  */
 
@@ -11,14 +11,83 @@
 #include "internal.h"
 #include "protocol_yfs.h"
 
-static bool afs_fs_probe_done(struct afs_server *server)
+static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
+static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
+
+/*
+ * Start the probe polling timer.  We have to supply it with an inc on the
+ * outstanding server count.
+ */
+static void afs_schedule_fs_probe(struct afs_net *net,
+ struct afs_server *server, bool fast)
+{
+   unsigned long atj;
+
+   if (!net->live)
+   return;
+
+   atj = server->probed_at;
+   atj += fast ? afs_fs_probe_fast_poll_interval : 
afs_fs_probe_slow_poll_interval;
+
+   afs_inc_servers_outstanding(net);
+   if (timer_reduce(&net->fs_probe_timer, atj))
+   afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_fs_probe(struct afs_net *net, struct afs_

[PATCH 09/27] afs: Make callback processing more efficient.

2020-05-29 Thread David Howells
afs_vol_interest objects represent the volume IDs currently being accessed
from a fileserver.  These hold lists of afs_cb_interest objects that
repesent the superblocks using that volume ID on that server.

When a callback notification from the server telling of a modification by
another client arrives, the volume ID specified in the notification is
looked up in the server's afs_vol_interest list.  Through the
afs_cb_interest list, the relevant superblocks can be iterated over and the
specific inode looked up and marked in each one.

Make the following efficiency improvements:

 (1) Hold rcu_read_lock() over the entire processing rather than locking it
 each time.

 (2) Do all the callbacks for each vid together rather than individually.
 Each volume then only needs to be looked up once.

 (3) afs_vol_interest objects are now stored in an rb_tree rather than a
 flat list to reduce the lookup step count.

 (4) afs_vol_interest lookup is now done with RCU, but because it's in an
 rb_tree which may rotate under us, a seqlock is used so that if it
 changes during the walk, we repeat the walk with a lock held.

With this and the preceding patch which adds RCU-based lookups in the inode
cache, target volumes/vnodes can be taken without the need to take any
locks, except on the target itself.

Signed-off-by: David Howells 
---

 fs/afs/callback.c |  150 ++---
 fs/afs/internal.h |6 +-
 fs/afs/server.c   |4 +
 3 files changed, 100 insertions(+), 60 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 0dcbd40732d1..b16781e1683e 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -28,7 +28,7 @@ static struct afs_cb_interest *afs_create_interest(struct 
afs_server *server,
 {
struct afs_vol_interest *new_vi, *vi;
struct afs_cb_interest *new;
-   struct hlist_node **pp;
+   struct rb_node *parent, **pp;
 
new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL);
if (!new_vi)
@@ -42,7 +42,6 @@ static struct afs_cb_interest *afs_create_interest(struct 
afs_server *server,
 
new_vi->usage = 1;
new_vi->vid = vnode->volume->vid;
-   INIT_HLIST_NODE(&new_vi->srv_link);
INIT_HLIST_HEAD(&new_vi->cb_interests);
 
refcount_set(&new->usage, 1);
@@ -51,31 +50,31 @@ static struct afs_cb_interest *afs_create_interest(struct 
afs_server *server,
new->server = afs_get_server(server, afs_server_trace_get_new_cbi);
INIT_HLIST_NODE(&new->cb_vlink);
 
-   write_lock(&server->cb_break_lock);
+   write_seqlock(&server->cb_break_lock);
 
-   for (pp = &server->cb_volumes.first; *pp; pp = &(*pp)->next) {
-   vi = hlist_entry(*pp, struct afs_vol_interest, srv_link);
-   if (vi->vid < new_vi->vid)
-   continue;
-   if (vi->vid > new_vi->vid)
-   break;
-   vi->usage++;
-   goto found_vi;
+   pp = &server->cb_volumes.rb_node;
+   while ((parent = *pp)) {
+   vi = rb_entry(parent, struct afs_vol_interest, srv_node);
+   if (vi->vid < new_vi->vid) {
+   pp = &(*pp)->rb_left;
+   } else if (vi->vid > new_vi->vid) {
+   pp = &(*pp)->rb_right;
+   } else {
+   vi->usage++;
+   goto found_vi;
+   }
}
 
-   new_vi->srv_link.pprev = pp;
-   new_vi->srv_link.next = *pp;
-   if (*pp)
-   (*pp)->pprev = &new_vi->srv_link.next;
-   *pp = &new_vi->srv_link;
vi = new_vi;
new_vi = NULL;
-found_vi:
+   rb_link_node_rcu(&vi->srv_node, parent, pp);
+   rb_insert_color(&vi->srv_node, &server->cb_volumes);
 
+found_vi:
new->vol_interest = vi;
hlist_add_head(&new->cb_vlink, &vi->cb_interests);
 
-   write_unlock(&server->cb_break_lock);
+   write_sequnlock(&server->cb_break_lock);
kfree(new_vi);
return new;
 }
@@ -182,17 +181,17 @@ void afs_put_cb_interest(struct afs_net *net, struct 
afs_cb_interest *cbi)
 
if (cbi && refcount_dec_and_test(&cbi->usage)) {
if (!hlist_unhashed(&cbi->cb_vlink)) {
-   write_lock(&cbi->server->cb_break_lock);
+   write_seqlock(&cbi->server->cb_break_lock);
 
hlist_del_init(&cbi->cb_vlink);
vi = cbi->vol_interest;
cbi->vol_interest = NULL;
if (--vi->usage == 0)
-   hlist_del(&vi->srv_link);
+   rb_erase(&vi->srv_node, 
&cbi->server->cb_volumes);
else
vi = NULL;
 
-   write_unlock(&cbi->server->cb_break_lock);
+   write_sequnlock(&cbi->server->cb_break_lock);
if 

[PATCH] [v2] ARM: omap2: fix omap5_realtime_timer_init definition

2020-05-29 Thread Arnd Bergmann
There is one more regression introduced by the last build fix:

arch/arm/mach-omap2/timer.c:170:6: error: attribute declaration must precede 
definition [-Werror,-Wignored-attributes]
void __init omap5_realtime_timer_init(void)
 ^
arch/arm/mach-omap2/common.h:118:20: note: previous definition is here
static inline void omap5_realtime_timer_init(void)
   ^
arch/arm/mach-omap2/timer.c:170:13: error: redefinition of 
'omap5_realtime_timer_init'
void __init omap5_realtime_timer_init(void)
^
arch/arm/mach-omap2/common.h:118:20: note: previous definition is here
static inline void omap5_realtime_timer_init(void)

As it turns out, the CONFIG_SOC_HAS_REALTIME_COUNTER option
should never be disabled for OMAP5 as we realy on this to initialize
the clocks and the timer. Just remove it here and make it the default.

Removing the guard around the set_cntfreq() definition, I noticed that
this is not properly namespaced, so fix that as well.

Fixes: d86ad463d670 ("ARM: OMAP2+: Fix regression for using local timer on 
non-SMP SoCs")
Signed-off-by: Arnd Bergmann 
---
I'll let this run randconfig builds overnight to be sure it doesn't
cause another regression.
---
 arch/arm/mach-omap2/Kconfig   |  5 -
 arch/arm/mach-omap2/common.h  |  6 --
 arch/arm/mach-omap2/omap-secure.h |  9 +
 arch/arm/mach-omap2/omap-smp.c|  2 +-
 arch/arm/mach-omap2/timer.c   | 14 ++
 5 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index ca74473f01df..87fb4df4cf72 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -145,11 +145,6 @@ config ARCH_OMAP2PLUS_TYPICAL
 config SOC_HAS_OMAP2_SDRC
bool "OMAP2 SDRAM Controller support"
 
-config SOC_HAS_REALTIME_COUNTER
-   bool "Real time free running counter"
-   depends on SOC_OMAP5 || SOC_DRA7XX
-   default y
-
 config POWER_AVS_OMAP
bool "AVS(Adaptive Voltage Scaling) support for OMAP IP versions 1&2"
depends on POWER_AVS && (ARCH_OMAP3 || ARCH_OMAP4) && PM
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 49926eced5f1..70e3d6df4cb6 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -112,13 +112,7 @@ static inline int omap_l2_cache_init(void)
 #define omap4_l2c310_write_sec NULL
 #endif
 
-#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
 extern void omap5_realtime_timer_init(void);
-#else
-static inline void omap5_realtime_timer_init(void)
-{
-}
-#endif
 
 void omap2420_init_early(void);
 void omap2430_init_early(void);
diff --git a/arch/arm/mach-omap2/omap-secure.h 
b/arch/arm/mach-omap2/omap-secure.h
index 4aaa95706d39..fbc02bb639c4 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -81,14 +81,7 @@ extern u32 rx51_secure_rng_call(u32 ptr, u32 count, u32 
flag);
 
 extern bool optee_available;
 void omap_secure_init(void);
-
-#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
-void set_cntfreq(void);
-#else
-static inline void set_cntfreq(void)
-{
-}
-#endif
+void omap5_set_cntfreq(void);
 
 #endif /* __ASSEMBLER__ */
 #endif /* OMAP_ARCH_OMAP_SECURE_H */
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 570a987e6d1a..f35d5642122a 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -162,7 +162,7 @@ static void omap4_secondary_init(unsigned int cpu)
 * Configure the CNTFRQ register for the secondary cpu's which
 * indicates the frequency of the cpu local timers.
 */
-   set_cntfreq();
+   omap5_set_cntfreq();
/* Configure ACR to disable streaming WA for 801819 */
omap5_erratum_workaround_801819();
/* Enable ACR to allow for ICUALLU workaround */
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index c1737e737a94..9b7b1240de81 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -39,11 +39,9 @@
 #define INCREMENTER_DENUMERATOR_RELOAD_OFFSET  0x14
 #define NUMERATOR_DENUMERATOR_MASK 0xf000
 
-#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
-
 static unsigned long arch_timer_freq;
 
-void set_cntfreq(void)
+void omap5_set_cntfreq(void)
 {
omap_smc1(OMAP5_DRA7_MON_SET_CNTFRQ_INDEX, arch_timer_freq);
 }
@@ -154,19 +152,11 @@ static void __init realtime_counter_init(void)
writel_relaxed(reg, base + INCREMENTER_DENUMERATOR_RELOAD_OFFSET);
 
arch_timer_freq = DIV_ROUND_UP_ULL(rate * num, den);
-   set_cntfreq();
+   omap5_set_cntfreq();
 
iounmap(base);
 }
 
-#else
-
-static inline void realtime_counter_init(void)
-{
-}
-
-#endif /* CONFIG_SOC_HAS_REALTIME_COUNTER */
-
 void __init omap5_realtime_timer_init(void)
 {
omap_clk_init();
-- 
2.26.2



[PATCH 10/27] afs: Set error flag rather than return error from file status decode

2020-05-29 Thread David Howells
Set a flag in the call struct to indicate an unmarshalling error rather
than return and handle an error from the decoding of file statuses.  This
flag is checked on a successful return from the delivery function.

Signed-off-by: David Howells 
---

 fs/afs/fsclient.c  |   88 +++-
 fs/afs/internal.h  |1 +
 fs/afs/rxrpc.c |4 ++
 fs/afs/yfsclient.c |   85 +++---
 4 files changed, 55 insertions(+), 123 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 401de063996c..b1d8d8f780d2 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -56,16 +56,15 @@ static void xdr_dump_bad(const __be32 *bp)
 /*
  * decode an AFSFetchStatus block
  */
-static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
-struct afs_call *call,
-struct afs_status_cb *scb)
+static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
 {
const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
struct afs_file_status *status = &scb->status;
bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
-   int ret;
 
abort_code = ntohl(xdr->abort_code);
 
@@ -79,7 +78,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
 */
status->abort_code = abort_code;
scb->have_error = true;
-   goto good;
+   goto advance;
}
 
pr_warn("Unknown AFSFetchStatus version %u\n", 
ntohl(xdr->if_version));
@@ -89,7 +88,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
if (abort_code != 0 && inline_error) {
status->abort_code = abort_code;
scb->have_error = true;
-   goto good;
+   goto advance;
}
 
type = ntohl(xdr->type);
@@ -125,15 +124,13 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
status->data_version = data_version;
scb->have_status = true;
-good:
-   ret = 0;
 advance:
*_bp = (const void *)*_bp + sizeof(*xdr);
-   return ret;
+   return;
 
 bad:
xdr_dump_bad(*_bp);
-   ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+   afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
goto advance;
 }
 
@@ -254,9 +251,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct 
afs_call *call)
 
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
-   ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
-   if (ret < 0)
-   return ret;
+   xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSCallBack(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
 
@@ -419,9 +414,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
return ret;
 
bp = call->buffer;
-   ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
-   if (ret < 0)
-   return ret;
+   xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSCallBack(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
 
@@ -577,12 +570,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call 
*call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFid(&bp, call->out_fid);
-   ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
-   if (ret < 0)
-   return ret;
-   ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
-   if (ret < 0)
-   return ret;
+   xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+   xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_AFSCallBack(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
 
@@ -691,9 +680,7 @@ static int afs_deliver_fs_dir_status_and_vol(struct 
afs_call *call)
 
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
-   ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
-   if (ret < 0)
-   return ret;
+   xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
 
_leave(" = 0 [done]");
@@ -784,12 +771,8 @@ static int afs_deliver_fs_link(struct afs_call *call)
 
/* unmarshall the reply once we've received all of it */
bp = call->

[PATCH 05/27] afs: Use the serverUnique field in the UVLDB record to reduce rpc ops

2020-05-29 Thread David Howells
The U-version VLDB volume record retrieved by the VL.GetEntryByNameU rpc op
carries a change counter (the serverUnique field) for each fileserver
listed in the record as backing that volume.  This is incremented whenever
the registration details for a fileserver change (such as its address
list).  Note that the same value will be seen in all UVLDB records that
refer to that fileserver.

This should be checked before calling the VL server to re-query the address
list for a fileserver.  If it's the same, there's no point doing the query.

Reported-by: Jeffrey Altman 
Signed-off-by: David Howells 
---

 fs/afs/internal.h|5 +++--
 fs/afs/server.c  |   26 ++
 fs/afs/server_list.c |3 ++-
 fs/afs/vlclient.c|1 +
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 80255513e230..ee17c868ad2c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -471,6 +471,7 @@ struct afs_vldb_entry {
 #define AFS_VLDB_QUERY_ERROR   4   /* - VL server returned error */
 
uuid_t  fs_server[AFS_NMAXNSERVERS];
+   u32 addr_version[AFS_NMAXNSERVERS]; /* Registration 
change counters */
u8  fs_mask[AFS_NMAXNSERVERS];
 #define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this 
server) */
 #define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this 
server) */
@@ -498,7 +499,6 @@ struct afs_server {
struct hlist_node   proc_link;  /* Link in net->fs_proc */
struct afs_server   *gc_next;   /* Next server in manager's 
list */
time64_tput_time;   /* Time at which last put */
-   time64_tupdate_at;  /* Time at which to next update 
the record */
unsigned long   flags;
 #define AFS_SERVER_FL_NOT_READY1   /* The record is not 
ready for use */
 #define AFS_SERVER_FL_NOT_FOUND2   /* VL server says no 
such server */
@@ -511,6 +511,7 @@ struct afs_server {
 #define AFS_SERVER_FL_IS_YFS   9   /* Server is YFS not AFS */
 #define AFS_SERVER_FL_NO_RM2   10  /* Fileserver doesn't support 
YFS.RemoveFile2 */
 #define AFS_SERVER_FL_HAVE_EPOCH 11/* ->epoch is valid */
+#define AFS_SERVER_FL_NEEDS_UPDATE 12  /* Fileserver address list is 
out of date */
atomic_tusage;
u32 addr_version;   /* Address list version */
u32 cm_epoch;   /* Server RxRPC epoch */
@@ -1241,7 +1242,7 @@ extern spinlock_t afs_server_peer_lock;
 extern struct afs_server *afs_find_server(struct afs_net *,
  const struct sockaddr_rxrpc *);
 extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const 
uuid_t *);
-extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, 
const uuid_t *);
+extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, 
const uuid_t *, u32);
 extern struct afs_server *afs_get_server(struct afs_server *, enum 
afs_server_trace);
 extern void afs_put_server(struct afs_net *, struct afs_server *, enum 
afs_server_trace);
 extern void afs_manage_servers(struct work_struct *);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 11b90ac7ea30..9e50ccde5d37 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -12,7 +12,6 @@
 #include "protocol_yfs.h"
 
 static unsigned afs_server_gc_delay = 10;  /* Server record timeout in 
seconds */
-static unsigned afs_server_update_delay = 30;  /* Time till VLDB recheck in 
secs */
 static atomic_t afs_server_debug_id;
 
 static void afs_inc_servers_outstanding(struct afs_net *net)
@@ -218,7 +217,6 @@ static struct afs_server *afs_alloc_server(struct afs_net 
*net,
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
-   server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
rwlock_init(&server->fs_lock);
INIT_HLIST_HEAD(&server->cb_volumes);
rwlock_init(&server->cb_break_lock);
@@ -264,7 +262,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct 
afs_cell *cell,
  * Get or create a fileserver record.
  */
 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
-const uuid_t *uuid)
+const uuid_t *uuid, u32 addr_version)
 {
struct afs_addr_list *alist;
struct afs_server *server, *candidate;
@@ -272,8 +270,11 @@ struct afs_server *afs_lookup_server(struct afs_cell 
*cell, struct key *key,
_enter("%p,%pU", cell->net, uuid);
 
server = afs_find_server_by_uuid(cell->net, uuid);
-   if (server)
+   if (server) {
+   if (server->addr_version != addr_version)
+ 

[PATCH 11/27] afs: Remove the error argument from afs_protocol_error()

2020-05-29 Thread David Howells
Remove the error argument from afs_protocol_error() as it's always
-EBADMSG.

Signed-off-by: David Howells 
---

 fs/afs/cmservice.c |9 +++--
 fs/afs/fsclient.c  |   17 ++---
 fs/afs/inode.c |4 ++--
 fs/afs/internal.h  |2 +-
 fs/afs/rxrpc.c |6 +++---
 fs/afs/vlclient.c  |   34 ++
 fs/afs/yfsclient.c |   17 ++---
 include/trace/events/afs.h |   10 --
 8 files changed, 39 insertions(+), 60 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 7ae88958051f..ed0fb34d77dd 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -307,8 +307,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
call->count = ntohl(call->tmp);
_debug("FID count: %u", call->count);
if (call->count > AFSCBMAX)
-   return afs_protocol_error(call, -EBADMSG,
- afs_eproto_cb_fid_count);
+   return afs_protocol_error(call, 
afs_eproto_cb_fid_count);
 
call->buffer = kmalloc(array3_size(call->count, 3, 4),
   GFP_KERNEL);
@@ -353,8 +352,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
call->count2 = ntohl(call->tmp);
_debug("CB count: %u", call->count2);
if (call->count2 != call->count && call->count2 != 0)
-   return afs_protocol_error(call, -EBADMSG,
- afs_eproto_cb_count);
+   return afs_protocol_error(call, afs_eproto_cb_count);
call->iter = &call->def_iter;
iov_iter_discard(&call->def_iter, READ, call->count2 * 3 * 4);
call->unmarshall++;
@@ -674,8 +672,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call 
*call)
call->count = ntohl(call->tmp);
_debug("FID count: %u", call->count);
if (call->count > YFSCBMAX)
-   return afs_protocol_error(call, -EBADMSG,
- afs_eproto_cb_fid_count);
+   return afs_protocol_error(call, 
afs_eproto_cb_fid_count);
 
size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid));
call->buffer = kmalloc(size, GFP_KERNEL);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index b1d8d8f780d2..7d4503174dd1 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -130,7 +130,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 
 bad:
xdr_dump_bad(*_bp);
-   afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+   afs_protocol_error(call, afs_eproto_bad_status);
goto advance;
 }
 
@@ -1470,8 +1470,7 @@ static int afs_deliver_fs_get_volume_status(struct 
afs_call *call)
call->count = ntohl(call->tmp);
_debug("volname length: %u", call->count);
if (call->count >= AFSNAMEMAX)
-   return afs_protocol_error(call, -EBADMSG,
- afs_eproto_volname_len);
+   return afs_protocol_error(call, afs_eproto_volname_len);
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
@@ -1500,8 +1499,7 @@ static int afs_deliver_fs_get_volume_status(struct 
afs_call *call)
call->count = ntohl(call->tmp);
_debug("offline msg length: %u", call->count);
if (call->count >= AFSNAMEMAX)
-   return afs_protocol_error(call, -EBADMSG,
- afs_eproto_offline_msg_len);
+   return afs_protocol_error(call, 
afs_eproto_offline_msg_len);
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
@@ -1531,8 +1529,7 @@ static int afs_deliver_fs_get_volume_status(struct 
afs_call *call)
call->count = ntohl(call->tmp);
_debug("motd length: %u", call->count);
if (call->count >= AFSNAMEMAX)
-   return afs_protocol_error(call, -EBADMSG,
- afs_eproto_motd_len);
+   return afs_protocol_error(call, afs_eproto_motd_len);
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
@@ -2012,8 +2009,7 @@ static int afs_deliver_fs_inline_bulk_status(struct 
afs_call *call)
tmp = ntohl(call->tmp);
_debug("status count: %u/%u", tmp, call->count2);
if (tmp != call->count2)
-

[PATCH 08/27] afs: Show more information in /proc/net/afs/servers

2020-05-29 Thread David Howells
Show more information in /proc/net/afs/servers to make it easier to see
what's going on with the server probing.

Signed-off-by: David Howells 
---

 fs/afs/proc.c |   17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9bce7898cd7d..1d21465a4108 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -378,21 +378,22 @@ static int afs_proc_servers_show(struct seq_file *m, void 
*v)
int i;
 
if (v == SEQ_START_TOKEN) {
-   seq_puts(m, "UUID REF ACT 
ADDR\n");
+   seq_puts(m, "UUID REF ACT\n");
return 0;
}
 
server = list_entry(v, struct afs_server, proc_link);
alist = rcu_dereference(server->addresses);
-   seq_printf(m, "%pU %3d %3d %pISpc%s\n",
+   seq_printf(m, "%pU %3d %3d\n",
   &server->uuid,
   atomic_read(&server->ref),
-  atomic_read(&server->active),
-  &alist->addrs[0].transport,
-  alist->preferred == 0 ? "*" : "");
-   for (i = 1; i < alist->nr_addrs; i++)
-   seq_printf(m, " 
%pISpc%s\n",
-  &alist->addrs[i].transport,
+  atomic_read(&server->active));
+   seq_printf(m, "  - ALIST v=%u osp=%u r=%lx f=%lx\n",
+  alist->version, atomic_read(&server->probe_outstanding),
+  alist->responded, alist->failed);
+   for (i = 0; i < alist->nr_addrs; i++)
+   seq_printf(m, "[%x] %pISpc%s\n",
+  i, &alist->addrs[i].transport,
   alist->preferred == i ? "*" : "");
return 0;
 }




[PATCH 14/27] afs: Don't get epoch from a server because it may be ambiguous

2020-05-29 Thread David Howells
Don't get the epoch from a server, particularly one that we're looking up
by UUID, as UUIDs may be ambiguous and may map to more than one server - so
we can't draw any conclusions from it.

Reported-by: Jeffrey Altman 
Signed-off-by: David Howells 
---

 fs/afs/cmservice.c |   49 ++---
 fs/afs/internal.h  |7 ---
 2 files changed, 2 insertions(+), 54 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index ed0fb34d77dd..954030ae7a0f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -118,8 +118,6 @@ bool afs_cm_incoming_call(struct afs_call *call)
 {
_enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
 
-   call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
-
switch (call->operation_ID) {
case CBCallBack:
call->type = &afs_SRXCBCallBack;
@@ -149,49 +147,6 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
 }
 
-/*
- * Record a probe to the cache manager from a server.
- */
-static int afs_record_cm_probe(struct afs_call *call, struct afs_server 
*server)
-{
-   _enter("");
-
-   if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
-   !afs_is_probing_server(server)) {
-   if (server->cm_epoch == call->epoch)
-   return 0;
-
-   if (!server->probe.said_rebooted) {
-   pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
-   server->probe.said_rebooted = true;
-   }
-   }
-
-   spin_lock(&server->probe_lock);
-
-   if (!test_and_set_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
-   server->cm_epoch = call->epoch;
-   server->probe.cm_epoch = call->epoch;
-   goto out;
-   }
-
-   if (server->probe.cm_probed &&
-   call->epoch != server->probe.cm_epoch &&
-   !server->probe.said_inconsistent) {
-   pr_notice("kAFS: FS endpoints inconsistent %pU\n",
- &server->uuid);
-   server->probe.said_inconsistent = true;
-   }
-
-   if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
-   server->probe.cm_epoch = server->cm_epoch;
-
-out:
-   server->probe.cm_probed = true;
-   spin_unlock(&server->probe_lock);
-   return 0;
-}
-
 /*
  * Find the server record by peer address and record a probe to the cache
  * manager from a server.
@@ -210,7 +165,7 @@ static int afs_find_cm_server_by_peer(struct afs_call *call)
}
 
call->server = server;
-   return afs_record_cm_probe(call, server);
+   return 0;
 }
 
 /*
@@ -231,7 +186,7 @@ static int afs_find_cm_server_by_uuid(struct afs_call *call,
}
 
call->server = server;
-   return afs_record_cm_probe(call, server);
+   return 0;
 }
 
 /*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 4b8ac049fc17..9f024c1bd650 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -124,7 +124,6 @@ struct afs_call {
spinlock_t  state_lock;
int error;  /* error code */
u32 abort_code; /* Remote abort ID or 0 */
-   u32 epoch;
unsigned intmax_lifespan;   /* Maximum lifespan to set if 
not 0 */
unsignedrequest_size;   /* size of request data */
unsignedreply_max;  /* maximum size of reply */
@@ -491,12 +490,10 @@ struct afs_server {
 #define AFS_SERVER_FL_MAY_HAVE_CB 8/* May have callbacks on this 
fileserver */
 #define AFS_SERVER_FL_IS_YFS   9   /* Server is YFS not AFS */
 #define AFS_SERVER_FL_NO_RM2   10  /* Fileserver doesn't support 
YFS.RemoveFile2 */
-#define AFS_SERVER_FL_HAVE_EPOCH 11/* ->epoch is valid */
 #define AFS_SERVER_FL_NEEDS_UPDATE 12  /* Fileserver address list is 
out of date */
atomic_tref;/* Object refcount */
atomic_tactive; /* Active user count */
u32 addr_version;   /* Address list version */
-   u32 cm_epoch;   /* Server RxRPC epoch */
unsigned intdebug_id;   /* Debugging ID for traces */
 
/* file service access */
@@ -515,15 +512,11 @@ struct afs_server {
struct {
unsigned intrtt;/* RTT as ktime/64 */
u32 abort_code;
-   u32 cm_epoch;
short   error;
boolresponded:1;
boolis_yfs:1;
boolnot_yfs:1;
boollocal_failure:1;
-   boolcm_probed:1;
-   boolsaid_rebooted:1;
-   boolsaid_inconsistent:1

[PATCH 12/27] afs: Rename struct afs_fs_cursor to afs_operation

2020-05-29 Thread David Howells
As a prelude to implementing asynchronous fileserver operations in the afs
filesystem, rename struct afs_fs_cursor to afs_operation.

This struct is going to form the core of the operation management and is
going to acquire more members in later.

Signed-off-by: David Howells 
---

 fs/afs/dir.c   |   22 ++--
 fs/afs/dir_silly.c |4 -
 fs/afs/file.c  |2 
 fs/afs/flock.c |6 +
 fs/afs/fsclient.c  |   42 ---
 fs/afs/inode.c |   10 +-
 fs/afs/internal.h  |  112 ++--
 fs/afs/rotate.c|  292 ++--
 fs/afs/server.c|8 +
 fs/afs/super.c |4 -
 fs/afs/volume.c|4 -
 fs/afs/write.c |2 
 fs/afs/xattr.c |8 +
 fs/afs/yfsclient.c |   40 ---
 14 files changed, 278 insertions(+), 278 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 3c486340b220..ff421db40cf2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -643,7 +643,7 @@ static struct inode *afs_do_lookup(struct inode *dir, 
struct dentry *dentry,
struct afs_super_info *as = dir->i_sb->s_fs_info;
struct afs_status_cb *scb;
struct afs_iget_data iget_data;
-   struct afs_fs_cursor fc;
+   struct afs_operation fc;
struct afs_server *server;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct inode *inode = NULL, *ti;
@@ -1220,7 +1220,7 @@ void afs_d_release(struct dentry *dentry)
 /*
  * Create a new inode for create/mkdir/symlink
  */
-static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
+static void afs_vnode_new_inode(struct afs_operation *fc,
struct dentry *new_dentry,
struct afs_iget_data *new_data,
struct afs_status_cb *new_scb)
@@ -1248,7 +1248,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
d_instantiate(new_dentry, inode);
 }
 
-static void afs_prep_for_new_inode(struct afs_fs_cursor *fc,
+static void afs_prep_for_new_inode(struct afs_operation *fc,
   struct afs_iget_data *iget_data)
 {
iget_data->volume = fc->vnode->volume;
@@ -1261,7 +1261,7 @@ static void afs_prep_for_new_inode(struct afs_fs_cursor 
*fc,
  * number derived from the result of the operation.  It doesn't matter if
  * d_fsdata goes backwards as we'll just revalidate.
  */
-static void afs_update_dentry_version(struct afs_fs_cursor *fc,
+static void afs_update_dentry_version(struct afs_operation *fc,
  struct dentry *dentry,
  struct afs_status_cb *scb)
 {
@@ -1277,7 +1277,7 @@ static int afs_mkdir(struct inode *dir, struct dentry 
*dentry, umode_t mode)
 {
struct afs_iget_data iget_data;
struct afs_status_cb *scb;
-   struct afs_fs_cursor fc;
+   struct afs_operation fc;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
afs_dataversion_t data_version;
@@ -1367,7 +1367,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
 static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 {
struct afs_status_cb *scb;
-   struct afs_fs_cursor fc;
+   struct afs_operation fc;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
afs_dataversion_t data_version;
@@ -1483,7 +1483,7 @@ static int afs_dir_remove_link(struct afs_vnode *dvnode, 
struct dentry *dentry,
  */
 static int afs_unlink(struct inode *dir, struct dentry *dentry)
 {
-   struct afs_fs_cursor fc;
+   struct afs_operation fc;
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
@@ -1588,7 +1588,7 @@ static int afs_create(struct inode *dir, struct dentry 
*dentry, umode_t mode,
  bool excl)
 {
struct afs_iget_data iget_data;
-   struct afs_fs_cursor fc;
+   struct afs_operation fc;
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
@@ -1666,7 +1666,7 @@ static int afs_create(struct inode *dir, struct dentry 
*dentry, umode_t mode,
 static int afs_link(struct dentry *from, struct inode *dir,
struct dentry *dentry)
 {
-   struct afs_fs_cursor fc;
+   struct afs_operation fc;
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
@@ -1755,7 +1755,7 @@ static int afs_symlink(struct inode *dir, struct dentry 
*dentry,
   const char *content)
 {
struct afs_iget_data iget_data;
-   struct afs_fs_cursor fc;
+   struct afs_operation fc;
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
@@ -1837,7 +1837,7 @@ static int afs_rename(struct inode *old_dir, struct 
dentry *old_dentry,

[PATCH 15/27] afs: Fix handling of CB.ProbeUuid cache manager op

2020-05-29 Thread David Howells
The AFS filesystem driver is handling the CB.ProbeUuid request incorrectly.
The UUID presented in the request is that of the cache manager, not the
fileserver, so afs_deliver_cb_probe_uuid() shouldn't be using that UUID to
look up the server.

Fix this by looking up the server by address instead.

Signed-off-by: David Howells 
---

 fs/afs/cmservice.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 954030ae7a0f..bef413818af7 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -464,7 +464,8 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 }
 
 /*
- * allow the fileserver to quickly find out if the fileserver has been rebooted
+ * Allow the fileserver to quickly find out if the cache manager has been
+ * rebooted.
  */
 static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 {
@@ -536,7 +537,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-   return afs_find_cm_server_by_uuid(call, call->request);
+   return afs_find_cm_server_by_peer(call);
 }
 
 /*




[PATCH 19/27] afs: Detect cell aliases 2 - Cells with no root volumes

2020-05-29 Thread David Howells
Implement the second phase of cell alias detection.  This part handles
alias detection for cells that don't have root.cell volumes and so we have
to find some other volume or fileserver to query.

We take the first volume from each such cell and attempt to look it up in
the new cell.  If found, we compare the records, if they are the same, we
judge the cell names to be aliases.

Signed-off-by: David Howells 
---

 fs/afs/vl_alias.c |   90 -
 1 file changed, 89 insertions(+), 1 deletion(-)

diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
index d1d91a25fbe0..76bfa4dde4a4 100644
--- a/fs/afs/vl_alias.c
+++ b/fs/afs/vl_alias.c
@@ -180,6 +180,94 @@ static int afs_compare_cell_roots(struct afs_cell *cell)
return 1;
 }
 
+/*
+ * Query the new cell for a volume from a cell we're already using.
+ */
+static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key,
+  struct afs_cell *p)
+{
+   struct afs_volume *volume, *pvol = NULL;
+   int ret;
+
+   /* Arbitrarily pick the first volume in the list. */
+   read_lock(&p->proc_lock);
+   if (!list_empty(&p->proc_volumes))
+   pvol = afs_get_volume(list_first_entry(&p->proc_volumes,
+  struct afs_volume, 
proc_link));
+   read_unlock(&p->proc_lock);
+   if (!pvol)
+   return 0;
+
+   _enter("%s:%s", cell->name, pvol->name);
+
+   /* And see if it's in the new cell. */
+   volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len);
+   if (IS_ERR(volume)) {
+   afs_put_volume(cell->net, pvol);
+   if (PTR_ERR(volume) != -ENOMEDIUM)
+   return PTR_ERR(volume);
+   /* That volume is not in the new cell, so not an alias */
+   return 0;
+   }
+
+   /* The new cell has a like-named volume also - compare volume ID,
+* server and address lists.
+*/
+   ret = 0;
+   if (pvol->vid == volume->vid) {
+   rcu_read_lock();
+   if (afs_compare_volume_slists(volume, pvol))
+   ret = 1;
+   rcu_read_unlock();
+   }
+
+   afs_put_volume(cell->net, volume);
+   afs_put_volume(cell->net, pvol);
+   return ret;
+}
+
+/*
+ * Query the new cell for volumes we know exist in cells we're already using.
+ */
+static int afs_query_for_alias(struct afs_cell *cell, struct key *key)
+{
+   struct afs_cell *p;
+
+   _enter("%s", cell->name);
+
+   if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0)
+   return -ERESTARTSYS;
+
+   hlist_for_each_entry(p, &cell->net->proc_cells, proc_link) {
+   if (p == cell || p->alias_of)
+   continue;
+   if (list_empty(&p->proc_volumes))
+   continue;
+   if (p->root_volume)
+   continue; /* Ignore cells that have a root.cell volume. 
*/
+   afs_get_cell(p);
+   mutex_unlock(&cell->net->proc_cells_lock);
+
+   if (afs_query_for_alias_one(cell, key, p) != 0)
+   goto is_alias;
+
+   if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) {
+   afs_put_cell(cell->net, p);
+   return -ERESTARTSYS;
+   }
+
+   afs_put_cell(cell->net, p);
+   }
+
+   mutex_unlock(&cell->net->proc_cells_lock);
+   _leave(" = 0");
+   return 0;
+
+is_alias:
+   cell->alias_of = p; /* Transfer our ref */
+   return 1;
+}
+
 static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key)
 {
struct afs_volume *root_volume;
@@ -199,7 +287,7 @@ static int afs_do_cell_detect_alias(struct afs_cell *cell, 
struct key *key)
/* Okay, this cell doesn't have an root.cell volume.  We need to
 * locate some other random volume and use that to check.
 */
-   return -ENOMEDIUM;
+   return afs_query_for_alias(cell, key);
 }
 
 /*




[PATCH 20/27] afs: Detect cell aliases 3 - YFS Cells with a canonical cell name op

2020-05-29 Thread David Howells
YFS Volume Location servers have an operation by which the cell name may be
queried.  Use this to find out what a YFS server thinks the canonical cell
name should be.

Signed-off-by: David Howells 
---

 fs/afs/vl_alias.c  |   60 
 fs/afs/vl_rotate.c |4 +++
 2 files changed, 64 insertions(+)

diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
index 76bfa4dde4a4..1fcb63c65ba9 100644
--- a/fs/afs/vl_alias.c
+++ b/fs/afs/vl_alias.c
@@ -268,12 +268,72 @@ static int afs_query_for_alias(struct afs_cell *cell, 
struct key *key)
return 1;
 }
 
+/*
+ * Look up a VLDB record for a volume.
+ */
+static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
+{
+   struct afs_vl_cursor vc;
+   char *cell_name = ERR_PTR(-EDESTADDRREQ);
+   bool skipped = false, not_skipped = false;
+   int ret;
+
+   if (!afs_begin_vlserver_operation(&vc, cell, key))
+   return ERR_PTR(-ERESTARTSYS);
+
+   while (afs_select_vlserver(&vc)) {
+   if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) {
+   vc.ac.error = -EOPNOTSUPP;
+   skipped = true;
+   continue;
+   }
+   not_skipped = true;
+   cell_name = afs_yfsvl_get_cell_name(&vc);
+   }
+
+   ret = afs_end_vlserver_operation(&vc);
+   if (skipped && !not_skipped)
+   ret = -EOPNOTSUPP;
+   return ret < 0 ? ERR_PTR(ret) : cell_name;
+}
+
+static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key 
*key)
+{
+   struct afs_cell *master;
+   char *cell_name;
+
+   cell_name = afs_vl_get_cell_name(cell, key);
+   if (IS_ERR(cell_name))
+   return PTR_ERR(cell_name);
+
+   if (strcmp(cell_name, cell->name) == 0) {
+   kfree(cell_name);
+   return 0;
+   }
+
+   master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name),
+NULL, false);
+   kfree(cell_name);
+   if (IS_ERR(master)) {
+   kfree(cell_name);
+   return PTR_ERR(master);
+   }
+
+   cell->alias_of = master; /* Transfer our ref */
+   return 1;
+}
+
 static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key)
 {
struct afs_volume *root_volume;
+   int ret;
 
_enter("%s", cell->name);
 
+   ret = yfs_check_canonical_cell_name(cell, key);
+   if (ret != -EOPNOTSUPP)
+   return ret;
+
/* Try and get the root.cell volume for comparison with other cells */
root_volume = afs_sample_volume(cell, key, "root.cell", 9);
if (!IS_ERR(root_volume)) {
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index 72eacc14e6e1..f405ca8b240a 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -151,6 +151,10 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
vc->error = error;
vc->flags |= AFS_VL_CURSOR_RETRY;
goto next_server;
+
+   case -EOPNOTSUPP:
+   _debug("notsupp");
+   goto next_server;
}
 
 restart_from_beginning:




[PATCH 18/27] afs: Detect cell aliases 1 - Cells with root volumes

2020-05-29 Thread David Howells
Put in the first phase of cell alias detection.  This part handles alias
detection for cells that have root.cell volumes (which is expected to be
likely).

When a cell becomes newly active, it is probed for its root.cell volume,
and if it has one, this volume is compared against other root.cell volumes
to find out if the list of fileserver UUIDs have any in common - and if
that's the case, do the address lists of those fileservers have any
addresses in common.  If they do, the new cell is adjudged to be an alias
of the old cell and the old cell is used instead.

Comparing is aided by the server list in struct afs_server_list being
sorted in UUID order and the addresses in the fileserver address lists
being sorted in address order.

The cell then retains the afs_volume object for the root.cell volume, even
if it's not mounted for future alias checking.

This necessary because:

 (1) Whilst fileservers have UUIDs that are meant to be globally unique, in
 practice they are not because cells get cloned without changing the
 UUIDs - so afs_server records need to be per cell.

 (2) Sometimes the DNS is used to make cell aliases - but if we don't know
 they're the same, we may end up with multiple superblocks and multiple
 afs_server records for the same thing, impairing our ability to
 deliver callback notifications of third party changes

 (3) The fileserver RPC API doesn't contain the cell name, so it can't tell
 us which cell it's notifying and can't see that a change made to to
 one cell should notify the same client that's also accessed as the
 other cell.

Reported-by: Jeffrey Altman 
Signed-off-by: David Howells 
---

 fs/afs/Makefile   |1 
 fs/afs/cell.c |3 +
 fs/afs/internal.h |   17 +++-
 fs/afs/main.c |1 
 fs/afs/proc.c |5 +
 fs/afs/super.c|   18 
 fs/afs/vl_alias.c |  235 +
 fs/afs/volume.c   |7 +-
 8 files changed, 279 insertions(+), 8 deletions(-)
 create mode 100644 fs/afs/vl_alias.c

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 924f02e9d7e7..75c4e4043d1d 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -31,6 +31,7 @@ kafs-y := \
server_list.o \
super.o \
vlclient.o \
+   vl_alias.o \
vl_list.o \
vl_probe.o \
vl_rotate.o \
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 78ba5f932287..212098514ebf 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -164,6 +164,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
INIT_LIST_HEAD(&cell->proc_volumes);
rwlock_init(&cell->proc_lock);
rwlock_init(&cell->vl_servers_lock);
+   cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
 
/* Provide a VL server list, filling it in if we were given a list of
 * addresses to use.
@@ -481,7 +482,9 @@ static void afs_cell_destroy(struct rcu_head *rcu)
 
ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
+   afs_put_volume(cell->net, cell->root_volume);
afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
+   afs_put_cell(cell->net, cell->alias_of);
key_put(cell->anonymous_key);
kfree(cell);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 3606cfa50832..a3ef97d560ca 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -269,6 +269,7 @@ struct afs_net {
struct timer_list   cells_timer;
atomic_tcells_outstanding;
seqlock_t   cells_lock;
+   struct mutexcells_alias_lock;
 
struct mutexproc_cells_lock;
struct hlist_head   proc_cells;
@@ -342,8 +343,10 @@ enum afs_cell_state {
  * for authentication and encryption.  The cell name is not typically used in
  * the protocol.
  *
- * There is no easy way to determine if two cells are aliases or one is a
- * subset of another.
+ * Two cells are determined to be aliases if they have an explicit alias (YFS
+ * only), share any VL servers in common or have at least one volume in common.
+ * "In common" means that the address list of the VL servers or the fileservers
+ * share at least one endpoint.
  */
 struct afs_cell {
union {
@@ -351,6 +354,8 @@ struct afs_cell {
struct rb_node  net_node;   /* Node in net->cells */
};
struct afs_net  *net;
+   struct afs_cell *alias_of;  /* The cell this is an alias of 
*/
+   struct afs_volume   *root_volume;   /* The root.cell volume if 
there is one */
struct key  *anonymous_key; /* anonymous user key for this 
cell */
struct work_struct  manager;/* Manager for init/deinit/dns 
*/
struct hlist_node   proc_link;  /* /proc cell list link */
@@ -363,6 +368,7 @@ struct afs_cell {
unsigned long   flags;
 #define AFS_CELL_FL_NO_GC  0   /* The cell was added manually, 
don't auto-gc */

[PATCH 16/27] afs: Retain more of the VLDB record for alias detection

2020-05-29 Thread David Howells
Save more bits from the volume location database record obtained for a
server so that we can use this information in cell alias detection.

Signed-off-by: David Howells 
---

 fs/afs/internal.h|3 ++-
 fs/afs/server_list.c |3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 9f024c1bd650..dce03e068cab 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -546,7 +546,7 @@ struct afs_cb_interest {
 };
 
 /*
- * Replaceable server list.
+ * Replaceable volume server list.
  */
 struct afs_server_entry {
struct afs_server   *server;
@@ -554,6 +554,7 @@ struct afs_server_entry {
 };
 
 struct afs_server_list {
+   afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */
refcount_t  usage;
unsigned char   nr_servers;
unsigned char   preferred;  /* Preferred server */
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index b77e50f62459..a35f6951a74a 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -46,6 +46,9 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell 
*cell,
refcount_set(&slist->usage, 1);
rwlock_init(&slist->lock);
 
+   for (i = 0; i < AFS_MAXTYPES; i++)
+   slist->vids[i] = vldb->vid[i];
+
/* Make sure a records exists for each server in the list. */
for (i = 0; i < vldb->nr_servers; i++) {
if (!(vldb->fs_mask[i] & type_mask))




[PATCH 17/27] afs: Implement client support for the YFSVL.GetCellName RPC op

2020-05-29 Thread David Howells
Implement client support for the YFSVL.GetCellName RPC operation by which
YFS permits the canonical cell name to be queried from a VL server.

Signed-off-by: David Howells 
---

 fs/afs/afs.h   |2 -
 fs/afs/afs_vl.h|1 
 fs/afs/internal.h  |2 +
 fs/afs/protocol_yfs.h  |2 -
 fs/afs/vlclient.c  |  111 
 include/trace/events/afs.h |4 ++
 6 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index f8e34406243e..432cb4b23961 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -10,7 +10,7 @@
 
 #include 
 
-#define AFS_MAXCELLNAME64  /* Maximum length of a cell 
name */
+#define AFS_MAXCELLNAME256 /* Maximum length of a cell 
name */
 #define AFS_MAXVOLNAME 64  /* Maximum length of a volume name */
 #define AFS_MAXNSERVERS8   /* Maximum servers in a basic 
volume record */
 #define AFS_NMAXNSERVERS   13  /* Maximum servers in a N/U-class 
volume record */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index e9b8029920ec..9c65ffb8a523 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -22,6 +22,7 @@ enum AFSVL_Operations {
VLGETENTRYBYNAMEU   = 527,  /* AFS Get VLDB entry by name 
(UUID-variant) */
VLGETADDRSU = 533,  /* AFS Get addrs for fileserver */
YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume 
server */
+   YVLGETCELLNAME  = 64014, /* YFS Get actual cell name */
VLGETCAPABILITIES   = 65537, /* AFS Get server capabilities */
 };
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index dce03e068cab..3606cfa50832 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -116,6 +116,7 @@ struct afs_call {
longret0;   /* Value to reply with instead 
of 0 */
struct afs_addr_list*ret_alist;
struct afs_vldb_entry   *ret_vldb;
+   char*ret_str;
};
struct afs_operation*op;
unsigned intserver_index;
@@ -1373,6 +1374,7 @@ extern struct afs_addr_list *afs_vl_get_addrs_u(struct 
afs_vl_cursor *, const uu
 extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct 
afs_addr_cursor *,
struct key *, struct 
afs_vlserver *, unsigned int);
 extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, 
const uuid_t *);
+extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *);
 
 /*
  * vl_probe.c
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index 32be9c698348..b5bd03b1d3c7 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -8,7 +8,7 @@
 #define YFS_FS_SERVICE 2500
 #define YFS_CM_SERVICE 2501
 
-#define YFSCBMAX 1024
+#define YFSCBMAX   1024
 
 enum YFS_CM_Operations {
YFSCBProbe  = 206,  /* probe client */
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index d0c85623ce8f..fd82850cd424 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -645,3 +645,114 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct 
afs_vl_cursor *vc,
afs_make_call(&vc->ac, call, GFP_KERNEL);
return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, 
&vc->ac);
 }
+
+/*
+ * Deliver reply data to a YFSVL.GetCellName operation.
+ */
+static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
+{
+   char *cell_name;
+   u32 namesz, paddedsz;
+   int ret;
+
+   _enter("{%u,%zu/%u}",
+  call->unmarshall, iov_iter_count(call->iter), call->count);
+
+   switch (call->unmarshall) {
+   case 0:
+   afs_extract_to_tmp(call);
+   call->unmarshall++;
+
+   /* Fall through - and extract the cell name length */
+   case 1:
+   ret = afs_extract_data(call, true);
+   if (ret < 0)
+   return ret;
+
+   namesz = ntohl(call->tmp);
+   if (namesz > AFS_MAXCELLNAME)
+   return afs_protocol_error(call, 
afs_eproto_cellname_len);
+   paddedsz = (namesz + 3) & ~3;
+   call->count = namesz;
+   call->count2 = paddedsz - namesz;
+
+   cell_name = kmalloc(namesz + 1, GFP_KERNEL);
+   if (!cell_name)
+   return -ENOMEM;
+   cell_name[namesz] = 0;
+   call->ret_str = cell_name;
+
+   afs_extract_begin(call, cell_name, namesz);
+   call->unmarshall++;
+
+   /* Fall through - and extract cell name */
+   case 2:
+   ret = afs_extract_data(call, true);
+   if (ret < 0)
+   return ret;
+
+   afs_extract_discard(call, call->count2);
+   call->unmarshall++;
+
+   /* Fall through - 

[PATCH 21/27] afs: Add a tracepoint to track the lifetime of the afs_volume struct

2020-05-29 Thread David Howells
Add a tracepoint to track the lifetime of the afs_volume struct.

Signed-off-by: David Howells 
---

 fs/afs/cell.c  |2 +-
 fs/afs/fs_operation.c  |4 ++-
 fs/afs/internal.h  |   10 ++--
 fs/afs/super.c |   10 +---
 fs/afs/vl_alias.c  |9 ---
 fs/afs/volume.c|   27 ++---
 include/trace/events/afs.h |   56 
 7 files changed, 95 insertions(+), 23 deletions(-)

diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 212098514ebf..8bfc8a05fd46 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -482,7 +482,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
 
ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
-   afs_put_volume(cell->net, cell->root_volume);
+   afs_put_volume(cell->net, cell->root_volume, 
afs_volume_trace_put_cell_root);
afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
afs_put_cell(cell->net, cell->alias_of);
key_put(cell->anonymous_key);
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index f7a768d12141..f57efd9d2db0 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -36,7 +36,7 @@ struct afs_operation *afs_alloc_operation(struct key *key, 
struct afs_volume *vo
}
 
op->key = key;
-   op->volume  = afs_get_volume(volume);
+   op->volume  = afs_get_volume(volume, afs_volume_trace_get_new_op);
op->net = volume->cell->net;
op->cb_v_break  = volume->cb_v_break;
op->debug_id= atomic_inc_return(&afs_operation_debug_counter);
@@ -233,7 +233,7 @@ int afs_put_operation(struct afs_operation *op)
afs_end_cursor(&op->ac);
afs_put_cb_interest(op->net, op->cbi);
afs_put_serverlist(op->net, op->server_list);
-   afs_put_volume(op->net, op->volume);
+   afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op);
kfree(op);
return ret;
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a3ef97d560ca..e084936066b0 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1429,17 +1429,11 @@ extern struct afs_vlserver_list 
*afs_extract_vlserver_list(struct afs_cell *,
 /*
  * volume.c
  */
-static inline struct afs_volume *afs_get_volume(struct afs_volume *volume)
-{
-   if (volume)
-   atomic_inc(&volume->usage);
-   return volume;
-}
-
 extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
 extern void afs_activate_volume(struct afs_volume *);
 extern void afs_deactivate_volume(struct afs_volume *);
-extern void afs_put_volume(struct afs_net *, struct afs_volume *);
+extern struct afs_volume *afs_get_volume(struct afs_volume *, enum 
afs_volume_trace);
+extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum 
afs_volume_trace);
 extern int afs_check_volume_status(struct afs_volume *, struct afs_operation 
*);
 
 /*
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 1bb69159956f..45e937dcb80a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -376,7 +376,8 @@ static int afs_validate_fc(struct fs_context *fc)
ctx->key = key;
 
if (ctx->volume) {
-   afs_put_volume(ctx->net, ctx->volume);
+   afs_put_volume(ctx->net, ctx->volume,
+  afs_volume_trace_put_validate_fc);
ctx->volume = NULL;
}
 
@@ -507,7 +508,8 @@ static struct afs_super_info *afs_alloc_sbi(struct 
fs_context *fc)
as->dyn_root = true;
} else {
as->cell = afs_get_cell(ctx->cell);
-   as->volume = afs_get_volume(ctx->volume);
+   as->volume = afs_get_volume(ctx->volume,
+   
afs_volume_trace_get_alloc_sbi);
}
}
return as;
@@ -517,7 +519,7 @@ static void afs_destroy_sbi(struct afs_super_info *as)
 {
if (as) {
struct afs_net *net = afs_net(as->net_ns);
-   afs_put_volume(net, as->volume);
+   afs_put_volume(net, as->volume, 
afs_volume_trace_put_destroy_sbi);
afs_put_cell(net, as->cell);
put_net(as->net_ns);
kfree(as);
@@ -604,7 +606,7 @@ static void afs_free_fc(struct fs_context *fc)
struct afs_fs_context *ctx = fc->fs_private;
 
afs_destroy_sbi(fc->s_fs_info);
-   afs_put_volume(ctx->net, ctx->volume);
+   afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc);
afs_put_cell(ctx->net, ctx->cell);
key_put(ctx->key);
kfree(ctx);
diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
index 1fcb63c65ba9..1cf9584bb51d 100644
--- a/fs/afs/vl_alias.c
+++ b/fs/afs/vl_alias.c
@@ -193,7 +193,8 @@ static int afs_query_for_alias_one(struct afs_cell *cell, 
struct key *key,
read

Re: clean up kernel_{read,write} & friends v2

2020-05-29 Thread Casey Schaufler
On 5/29/2020 12:19 PM, Linus Torvalds wrote:
> On Fri, May 29, 2020 at 6:08 AM David Laight  wrote:
>> A wide monitor is for looking at lots of files.
> Not necessarily.
>
> Excessive line breaks are BAD. They cause real and every-day problems.
>
> They cause problems for things like "grep" both in the patterns and in
> the output, since grep (and a lot of other very basic unix utilities)
> is fundamentally line-based.
>
> So the fact is, many of us have long long since skipped the whole
> "80-column terminal" model, for the same reason that we have many more
> lines than 25 lines visible at a time.
>
> And honestly, I don't want to see patches that make the kernel reading
> experience worse for me and likely for the vast majority of people,
> based on the argument that some odd people have small terminal
> windows.
>
> If you or Christoph have 80 character lines, you'll get possibly ugly
> wrapped output. Tough. That's _your_ choice. Your hardware limitations
> shouldn't be a pain for the rest of us.
>
> Longer lines are fundamentally useful. My monitor is not only a lot
> wider than it is tall, my fonts are universally narrower than they are
> tall. Long lines are natural.
>
> When I tile my terminal windows on my display, I can have 6 terminals
> visible at one time, and that's because I have them three wide. And I
> could still fit 80% of a fourth one side-by-side.
>
> And guess what? That's with my default "100x50" terminal window (go to
> your gnome terminal settings, you'll find that the 80x25 thing is just
> an initial default that you can change), not with some 80x25 one. And
> that's with a font that has anti-aliasing and isn't some pixelated
> mess.
>
> And most of my terminals actually end up being dragged wider and
> taller than that. I checked, and my main one is 142x76 characters
> right now, because it turns out that wider (and taller) terminals are
> useful not just for source code.
>
> Have you looked at "ps ax" output lately? Or used "top"? Or done "git
> diff --stat" or any number of things where it turns out that 80x25 is
> really really limiting, and is simply NO LONGER RELEVANT to most of
> us.
>
> So no. I do not care about somebody with a 80x25 terminal window
> getting line wrapping.

The first law of good C programming is:
"Thou shalt adhere to the One True Brace Style"

It extrapolates into indentation, line width, and a number of
other things. Since Linux is a trademark of Linus Torvalds,
you get to define what the "One Truth" is. Time to resize my
terminals.

>
> For exactly the same reason I find it completely irrelevant if
> somebody says that their kernel compile takes 10 hours because they
> are doing kernel development on a Raspberry PI with 4GB of RAM.
>
> People with restrictive hardware shouldn't make it more inconvenient
> for people who have better resources. Yes, we'll accommodate things to
> within reasonable limits. But no, 80-column terminals in 2020 isn't
> "reasonable" any more as far as I'm concerned. People commonly used
> 132-column terminals even back in the 80's, for chrissake, don't try
> to make 80 columns some immovable standard.
>
> If you choose to use a 80-column terminal, you can live with the line
> wrapping. It's just that simple.
>
> And longer lines are simply useful. Part of that is that we aren't
> programming in the 80's any more, and our source code is fundamentally
> wider as a result.
>
> Yes, local iteration variables are still called 'i', because more
> context just isn't helpful for some anonymous counter. Being concise
> is still a good thing, and overly verbose names are not inherently
> better.
>
> But still - it's entirely reasonable to have variable names that are
> 10-15 characters and it makes the code more legible. Writing things
> out instead of using abbreviations etc.
>
> And yes, we do use wide tabs, because that makes indentation something
> you can visually see in the structure at a glance and on a
> whole-function basis, rather than something you have to try to
> visually "line up" things for or count spaces.
>
> So we have lots of fairly fundamental issues that fairly easily make
> for longer lines in many circumstances.
>
> And yes, we do line breaks at some point. But there really isn't any
> reason to make that point be 80 columns any more.
>
>   Linus



[PATCH 24/27] afs: Fix afs_statfs() to not let the values go below zero

2020-05-29 Thread David Howells
Fix afs_statfs() so that the value for f_bavail and f_bfree don't go
"negative" if the number of blocks in use by a volume exceeds the max quota
for that volume.

Signed-off-by: David Howells 
---

 fs/afs/super.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/afs/super.c b/fs/afs/super.c
index c77b11b31233..b552357b1d13 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -729,7 +729,10 @@ static void afs_get_volume_status_success(struct 
afs_operation *op)
buf->f_blocks = vs->part_max_blocks;
else
buf->f_blocks = vs->max_quota;
-   buf->f_bavail = buf->f_bfree = buf->f_blocks - vs->blocks_in_use;
+
+   if (buf->f_blocks > vs->blocks_in_use)
+   buf->f_bavail = buf->f_bfree =
+   buf->f_blocks - vs->blocks_in_use;
 }
 
 static const struct afs_operation_ops afs_get_volume_status_operation = {




[PATCH 23/27] afs: Fix the by-UUID server tree to allow servers with the same UUID

2020-05-29 Thread David Howells
Whilst it shouldn't happen, it is possible for multiple fileservers to
share a UUID, particularly if an entire cell has been duplicated, UUIDs and
all.  In such a case, it's not necessarily possible to map the effect of
the CB.InitCallBackState3 incoming RPC to a specific server unambiguously
by UUID and thus to a specific cell.

Indeed, there's a problem whereby multiple server records may need to
occupy the same spot in the rb_tree rooted in the afs_net struct.

Fix this by allowing servers to form a list, with the head of the list in
the tree.  When the front entry in the list is removed, the second in the
list just replaces it.  afs_init_callback_state() then just goes down the
line, poking each server in the list.

This means that some servers will be unnecessarily poked, unfortunately.
An alternative would be to route by call parameters.

Reported-by: Jeffrey Altman 
Signed-off-by: David Howells 
Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and 
fileserver rotation")
---

 fs/afs/callback.c |   10 -
 fs/afs/internal.h |4 +++-
 fs/afs/server.c   |   56 +
 3 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index b4cb9bb63f0a..7d9b23d981bf 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -21,11 +21,17 @@
 #include "internal.h"
 
 /*
- * allow the fileserver to request callback state (re-)initialisation
+ * Allow the fileserver to request callback state (re-)initialisation.
+ * Unfortunately, UUIDs are not guaranteed unique.
  */
 void afs_init_callback_state(struct afs_server *server)
 {
-   server->cb_s_break++;
+   rcu_read_lock();
+   do {
+   server->cb_s_break++;
+   server = rcu_dereference(server->uuid_next);
+   } while (0);
+   rcu_read_unlock();
 }
 
 /*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c64c2b47ece7..e0dc14d4d8b9 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -486,7 +486,9 @@ struct afs_server {
 
struct afs_addr_list__rcu *addresses;
struct afs_cell *cell;  /* Cell to which belongs (pins 
ref) */
-   struct rb_node  uuid_rb;/* Link in cell->fs_servers */
+   struct rb_node  uuid_rb;/* Link in net->fs_servers */
+   struct afs_server __rcu *uuid_next; /* Next server with same UUID */
+   struct afs_server   *uuid_prev; /* Previous server with same 
UUID */
struct list_headprobe_link; /* Link in net->fs_probe_list */
struct hlist_node   addr4_link; /* Link in net->fs_addresses4 */
struct hlist_node   addr6_link; /* Link in net->fs_addresses6 */
diff --git a/fs/afs/server.c b/fs/afs/server.c
index c51039a077cd..88593ffcb54e 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -130,13 +130,15 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net 
*net, const uuid_t *uu
 }
 
 /*
- * Install a server record in the namespace tree
+ * Install a server record in the namespace tree.  If there's a clash, we stick
+ * it into a list anchored on whichever afs_server struct is actually in the
+ * tree.
  */
 static struct afs_server *afs_install_server(struct afs_cell *cell,
 struct afs_server *candidate)
 {
const struct afs_addr_list *alist;
-   struct afs_server *server;
+   struct afs_server *server, *next;
struct afs_net *net = cell->net;
struct rb_node **pp, *p;
int diff;
@@ -153,12 +155,30 @@ static struct afs_server *afs_install_server(struct 
afs_cell *cell,
_debug("- consider %p", p);
server = rb_entry(p, struct afs_server, uuid_rb);
diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
-   if (diff < 0)
+   if (diff < 0) {
pp = &(*pp)->rb_left;
-   else if (diff > 0)
+   } else if (diff > 0) {
pp = &(*pp)->rb_right;
-   else
-   goto exists;
+   } else {
+   if (server->cell == cell)
+   goto exists;
+
+   /* We have the same UUID representing servers in
+* different cells.  Append the new server to the list.
+*/
+   for (;;) {
+   next = rcu_dereference_protected(
+   server->uuid_next,
+   lockdep_is_held(&net->fs_lock.lock));
+   if (!next)
+   break;
+   server = next;
+   }
+   rcu_assign_pointer(server->uuid_next, candidate);
+   candidate->uuid_prev = serv

[PATCH 26/27] afs: Show more a bit more server state in /proc/net/afs/servers

2020-05-29 Thread David Howells
Display more information about the state of a server record, including the
flags, rtt and break counter plus the probe state for each server in
/proc/net/afs/servers.

Rearrange the server flags a bit to make them easier to read at a glance in
the proc file.

Signed-off-by: David Howells 
---

 fs/afs/internal.h |   16 
 fs/afs/proc.c |   10 +++---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a4fe5d1a8b53..af0b7fca87db 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -497,15 +497,15 @@ struct afs_server {
time64_tunuse_time; /* Time at which last unused */
unsigned long   flags;
 #define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */
-#define AFS_SERVER_FL_NOT_READY1   /* The record is not 
ready for use */
-#define AFS_SERVER_FL_NOT_FOUND2   /* VL server says no 
such server */
-#define AFS_SERVER_FL_VL_FAIL  3   /* Failed to access VL server */
-#define AFS_SERVER_FL_UPDATING 4
-#define AFS_SERVER_FL_NO_IBULK 7   /* Fileserver doesn't support 
FS.InlineBulkStatus */
+#define AFS_SERVER_FL_UPDATING 1
+#define AFS_SERVER_FL_NEEDS_UPDATE 2   /* Fileserver address list is 
out of date */
+#define AFS_SERVER_FL_NOT_READY4   /* The record is not 
ready for use */
+#define AFS_SERVER_FL_NOT_FOUND5   /* VL server says no 
such server */
+#define AFS_SERVER_FL_VL_FAIL  6   /* Failed to access VL server */
 #define AFS_SERVER_FL_MAY_HAVE_CB 8/* May have callbacks on this 
fileserver */
-#define AFS_SERVER_FL_IS_YFS   9   /* Server is YFS not AFS */
-#define AFS_SERVER_FL_NO_RM2   10  /* Fileserver doesn't support 
YFS.RemoveFile2 */
-#define AFS_SERVER_FL_NEEDS_UPDATE 12  /* Fileserver address list is 
out of date */
+#define AFS_SERVER_FL_IS_YFS   16  /* Server is YFS not AFS */
+#define AFS_SERVER_FL_NO_IBULK 17  /* Fileserver doesn't support 
FS.InlineBulkStatus */
+#define AFS_SERVER_FL_NO_RM2   18  /* Fileserver doesn't support 
YFS.RemoveFile2 */
atomic_tref;/* Object refcount */
atomic_tactive; /* Active user count */
u32 addr_version;   /* Address list version */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 309a7b578255..22d00cf1913d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -386,9 +386,13 @@ static int afs_proc_servers_show(struct seq_file *m, void 
*v)
   &server->uuid,
   atomic_read(&server->ref),
   atomic_read(&server->active));
-   seq_printf(m, "  - ALIST v=%u osp=%u r=%lx f=%lx\n",
-  alist->version, atomic_read(&server->probe_outstanding),
-  alist->responded, alist->failed);
+   seq_printf(m, "  - info: fl=%lx rtt=%u brk=%x\n",
+  server->flags, server->rtt, server->cb_s_break);
+   seq_printf(m, "  - probe: last=%d out=%d\n",
+  (int)(jiffies - server->probed_at) / HZ,
+  atomic_read(&server->probe_outstanding));
+   seq_printf(m, "  - ALIST v=%u rsp=%lx f=%lx\n",
+  alist->version, alist->responded, alist->failed);
for (i = 0; i < alist->nr_addrs; i++)
seq_printf(m, "[%x] %pISpc%s\n",
   i, &alist->addrs[i].transport,




[PATCH 27/27] afs: Adjust the fileserver rotation algorithm to reprobe/retry more quickly

2020-05-29 Thread David Howells
Adjust the fileserver rotation algorithm so that if we've tried all the
addresses on a server (cumulatively over multiple operations) until we've
run out of untried addresses, immediately reprobe all that server's
interfaces and retry the op at least once before we move onto the next
server.

Signed-off-by: David Howells 
---

 fs/afs/fs_probe.c |   47 +++
 fs/afs/internal.h |   24 ++--
 fs/afs/rotate.c   |   29 +++--
 3 files changed, 88 insertions(+), 12 deletions(-)

diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index c41cf3b2ab89..b34f74b0f319 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -338,6 +338,18 @@ static void afs_dispatch_fs_probe(struct afs_net *net, 
struct afs_server *server
afs_put_server(net, server, afs_server_trace_put_probe);
 }
 
+/*
+ * Probe a server immediately without waiting for its due time to come
+ * round.  This is used when all of the addresses have been tried.
+ */
+void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
+{
+   write_seqlock(&net->fs_lock);
+   if (!list_empty(&server->probe_link))
+   return afs_dispatch_fs_probe(net, server, true);
+   write_sequnlock(&net->fs_lock);
+}
+
 /*
  * Probe dispatcher to regularly dispatch probes to keep NAT alive.
  */
@@ -411,3 +423,38 @@ void afs_fs_probe_dispatcher(struct work_struct *work)
_leave(" [quiesce]");
}
 }
+
+/*
+ * Wait for a probe on a particular fileserver to complete for 2s.
+ */
+int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
+{
+   struct wait_queue_entry wait;
+   unsigned long timo = 2 * HZ;
+
+   if (atomic_read(&server->probe_outstanding) == 0)
+   goto dont_wait;
+
+   init_wait_entry(&wait, 0);
+   for (;;) {
+   prepare_to_wait_event(&server->probe_wq, &wait,
+ is_intr ? TASK_INTERRUPTIBLE : 
TASK_UNINTERRUPTIBLE);
+   if (timo == 0 ||
+   server->probe.responded ||
+   atomic_read(&server->probe_outstanding) == 0 ||
+   (is_intr && signal_pending(current)))
+   break;
+   timo = schedule_timeout(timo);
+   }
+
+   finish_wait(&server->probe_wq, &wait);
+
+dont_wait:
+   if (server->probe.responded)
+   return 0;
+   if (is_intr && signal_pending(current))
+   return -ERESTARTSYS;
+   if (timo == 0)
+   return -ETIME;
+   return -EDESTADDRREQ;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index af0b7fca87db..e1621b0670cc 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -826,16 +826,18 @@ struct afs_operation {
unsigned short  nr_iterations;  /* Number of server iterations 
*/
 
unsigned intflags;
-#define AFS_OPERATION_STOP 0x0001  /* Set to cease iteration */
-#define AFS_OPERATION_VBUSY0x0002  /* Set if seen VBUSY */
-#define AFS_OPERATION_VMOVED   0x0004  /* Set if seen VMOVED */
-#define AFS_OPERATION_VNOVOL   0x0008  /* Set if seen VNOVOL */
-#define AFS_OPERATION_CUR_ONLY 0x0010  /* Set if current server only 
(file lock held) */
-#define AFS_OPERATION_NO_VSLEEP0x0020  /* Set to prevent sleep 
on VBUSY, VOFFLINE, ... */
-#define AFS_OPERATION_UNINTR   0x0040  /* Set if op is uninterruptible 
*/
-#define AFS_OPERATION_DOWNGRADE0x0080  /* Set to retry with 
downgraded opcode */
-#define AFS_OPERATION_LOCK_0   0x0100  /* Set if have io_lock on 
file[0] */
-#define AFS_OPERATION_LOCK_1   0x0200  /* Set if have io_lock on 
file[1] */
+#define AFS_OPERATION_STOP 0x0001  /* Set to cease iteration */
+#define AFS_OPERATION_VBUSY0x0002  /* Set if seen VBUSY */
+#define AFS_OPERATION_VMOVED   0x0004  /* Set if seen VMOVED */
+#define AFS_OPERATION_VNOVOL   0x0008  /* Set if seen VNOVOL */
+#define AFS_OPERATION_CUR_ONLY 0x0010  /* Set if current server only 
(file lock held) */
+#define AFS_OPERATION_NO_VSLEEP0x0020  /* Set to prevent sleep 
on VBUSY, VOFFLINE, ... */
+#define AFS_OPERATION_UNINTR   0x0040  /* Set if op is uninterruptible 
*/
+#define AFS_OPERATION_DOWNGRADE0x0080  /* Set to retry with 
downgraded opcode */
+#define AFS_OPERATION_LOCK_0   0x0100  /* Set if have io_lock on 
file[0] */
+#define AFS_OPERATION_LOCK_1   0x0200  /* Set if have io_lock on 
file[1] */
+#define AFS_OPERATION_TRIED_ALL0x0400  /* Set if we've tried 
all the fileservers */
+#define AFS_OPERATION_RETRY_SERVER 0x0800  /* Set if we should retry the 
current server */
 };
 
 /*
@@ -1055,7 +1057,9 @@ static inline void afs_op_set_fid(struct afs_operation 
*op, unsigned int n,
 extern void afs_fileserver_probe_result(s

[PATCH 25/27] afs: Don't use probe running state to make decisions outside probe code

2020-05-29 Thread David Howells
Don't use the running state for fileserver probes to make decisions about
which server to use as the state is cleared at the start of a probe and
also intermediate values might be misleading.

Instead, add a separate 'latest known' rtt in the afs_server struct and a
flag to indicate if the server is known to be responding and update these
as and when we know what to change them to.

Signed-off-by: David Howells 
---

 fs/afs/fs_probe.c |   18 --
 fs/afs/internal.h |4 +++-
 fs/afs/rotate.c   |3 ++-
 fs/afs/server.c   |1 +
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index 442b5e7944ff..c41cf3b2ab89 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -42,10 +42,13 @@ static void afs_finished_fs_probe(struct afs_net *net, 
struct afs_server *server
bool responded = server->probe.responded;
 
write_seqlock(&net->fs_lock);
-   if (responded)
+   if (responded) {
list_add_tail(&server->probe_link, &net->fs_probe_slow);
-   else
+   } else {
+   server->rtt = UINT_MAX;
+   clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
list_add_tail(&server->probe_link, &net->fs_probe_fast);
+   }
write_sequnlock(&net->fs_lock);
 
afs_schedule_fs_probe(net, server, !responded);
@@ -161,12 +164,14 @@ void afs_fileserver_probe_result(struct afs_call *call)
rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
+   server->rtt = rtt_us;
alist->preferred = index;
}
 
smp_wmb(); /* Set rtt before responded. */
server->probe.responded = true;
set_bit(index, &alist->responded);
+   set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
 out:
spin_unlock(&server->probe_lock);
 
@@ -224,7 +229,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, 
unsigned long untried)
 {
struct wait_queue_entry *waits;
struct afs_server *server;
-   unsigned int rtt = UINT_MAX;
+   unsigned int rtt = UINT_MAX, rtt_s;
bool have_responders = false;
int pref = -1, i;
 
@@ -280,10 +285,11 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, 
unsigned long untried)
for (i = 0; i < slist->nr_servers; i++) {
if (test_bit(i, &untried)) {
server = slist->servers[i].server;
-   if (server->probe.responded &&
-   server->probe.rtt < rtt) {
+   rtt_s = READ_ONCE(server->rtt);
+   if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) 
&&
+   rtt_s < rtt) {
pref = i;
-   rtt = server->probe.rtt;
+   rtt = rtt_s;
}
 
remove_wait_queue(&server->probe_wq, &waits[i]);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e0dc14d4d8b9..a4fe5d1a8b53 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -496,6 +496,7 @@ struct afs_server {
struct afs_server   *gc_next;   /* Next server in manager's 
list */
time64_tunuse_time; /* Time at which last unused */
unsigned long   flags;
+#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */
 #define AFS_SERVER_FL_NOT_READY1   /* The record is not 
ready for use */
 #define AFS_SERVER_FL_NOT_FOUND2   /* VL server says no 
such server */
 #define AFS_SERVER_FL_VL_FAIL  3   /* Failed to access VL server */
@@ -508,6 +509,7 @@ struct afs_server {
atomic_tref;/* Object refcount */
atomic_tactive; /* Active user count */
u32 addr_version;   /* Address list version */
+   unsigned intrtt;/* Server's current RTT in uS */
unsigned intdebug_id;   /* Debugging ID for traces */
 
/* file service access */
@@ -522,7 +524,7 @@ struct afs_server {
atomic_tprobe_outstanding;
spinlock_t  probe_lock;
struct {
-   unsigned intrtt;/* RTT as ktime/64 */
+   unsigned intrtt;/* RTT in uS */
u32 abort_code;
short   error;
boolresponded:1;
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 979979e33a77..d1590fb382b6 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -339,7 +339,8 @@ bool afs_select_fileserver(struct afs_operation *op)
for (i = 0; i < op->server_list->nr_servers; i++) {
struct afs_server *s = op->server_list->servers[i

[PATCH 22/27] afs: Reorganise volume and server trees to be rooted on the cell

2020-05-29 Thread David Howells
Reorganise afs_volume objects such that they're in a tree keyed on volume
ID, rooted at on an afs_cell object rather than being in multiple trees,
each of which is rooted on an afs_server object.

afs_server structs become per-cell and acquire a pointer to the cell.

The process of breaking a callback then starts with finding the server by
its network address, following that to the cell and then looking up each
volume ID in the volume tree.

This is simpler than the afs_vol_interest/afs_cb_interest N:M mapping web
and allows those structs and the code for maintaining them to be simplified
or removed.

It does make a couple of things a bit more tricky, though:

 (1) Operations now start with a volume, not a server, so there can be more
 than one answer as to whether or not the server we'll end up using
 supports the FS.InlineBulkStatus RPC.

 (2) CB RPC operations that specify the server UUID.  There's still a tree
 of servers by UUID on the afs_net struct, but the UUIDs in it aren't
 guaranteed unique.

Signed-off-by: David Howells 
---

 fs/afs/callback.c |  286 +++--
 fs/afs/cell.c |7 +
 fs/afs/dir.c  |   45 ++--
 fs/afs/fs_operation.c |   11 --
 fs/afs/fsclient.c |   12 +-
 fs/afs/inode.c|   59 --
 fs/afs/internal.h |   78 -
 fs/afs/proc.c |   15 +--
 fs/afs/rotate.c   |  128 +++---
 fs/afs/rxrpc.c|5 -
 fs/afs/security.c |8 +
 fs/afs/server.c   |   13 +-
 fs/afs/server_list.c  |   30 -
 fs/afs/super.c|7 -
 fs/afs/vl_alias.c |   14 +-
 fs/afs/volume.c   |   84 +-
 fs/afs/yfsclient.c|2 
 17 files changed, 257 insertions(+), 547 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 282dbac84435..b4cb9bb63f0a 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -20,185 +20,6 @@
 #include 
 #include "internal.h"
 
-/*
- * Create volume and callback interests on a server.
- */
-static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
-  struct afs_vnode *vnode)
-{
-   struct afs_vol_interest *new_vi, *vi;
-   struct afs_cb_interest *new;
-   struct rb_node *parent, **pp;
-
-   new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL);
-   if (!new_vi)
-   return NULL;
-
-   new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL);
-   if (!new) {
-   kfree(new_vi);
-   return NULL;
-   }
-
-   new_vi->usage = 1;
-   new_vi->vid = vnode->volume->vid;
-   INIT_HLIST_HEAD(&new_vi->cb_interests);
-
-   refcount_set(&new->usage, 1);
-   new->sb = vnode->vfs_inode.i_sb;
-   new->server = afs_get_server(server, afs_server_trace_get_new_cbi);
-   INIT_HLIST_NODE(&new->cb_vlink);
-
-   write_seqlock(&server->cb_break_lock);
-
-   pp = &server->cb_volumes.rb_node;
-   while ((parent = *pp)) {
-   vi = rb_entry(parent, struct afs_vol_interest, srv_node);
-   if (vi->vid < new_vi->vid) {
-   pp = &(*pp)->rb_left;
-   } else if (vi->vid > new_vi->vid) {
-   pp = &(*pp)->rb_right;
-   } else {
-   vi->usage++;
-   goto found_vi;
-   }
-   }
-
-   vi = new_vi;
-   new_vi = NULL;
-   rb_link_node_rcu(&vi->srv_node, parent, pp);
-   rb_insert_color(&vi->srv_node, &server->cb_volumes);
-
-found_vi:
-   new->vol_interest = vi;
-   hlist_add_head(&new->cb_vlink, &vi->cb_interests);
-
-   write_sequnlock(&server->cb_break_lock);
-   kfree(new_vi);
-   return new;
-}
-
-/*
- * Set up an interest-in-callbacks record for a volume on a server and
- * register it with the server.
- * - Called with vnode->io_lock held.
- */
-int afs_register_server_cb_interest(struct afs_vnode *vnode,
-   struct afs_server_list *slist,
-   unsigned int index)
-{
-   struct afs_server_entry *entry = &slist->servers[index];
-   struct afs_cb_interest *cbi, *vcbi, *new, *old;
-   struct afs_server *server = entry->server;
-
-again:
-   vcbi = rcu_dereference_protected(vnode->cb_interest,
-lockdep_is_held(&vnode->io_lock));
-   if (vcbi && likely(vcbi == entry->cb_interest))
-   return 0;
-
-   read_lock(&slist->lock);
-   cbi = afs_get_cb_interest(entry->cb_interest);
-   read_unlock(&slist->lock);
-
-   if (vcbi) {
-   if (vcbi == cbi) {
-   afs_put_cb_interest(afs_v2net(vnode), cbi);
-   return 0;
-   }
-
-   /* Use a new interest in the server list for the same server
-* rather than an old one that's still attached to a 

Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro

2020-05-29 Thread Andy Shevchenko
On Sat, May 30, 2020 at 12:42 AM Andy Shevchenko
 wrote:
> On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris  
> wrote:
> > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko
> >  wrote:

...

> > Explanation:
> > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer),
> > (BIT(nbits) - 1)
> > gives a value of zero and when this zero is ANDed with any value, it
> > makes it full zero. This is unexpected and incorrect calculation happening.
> >
> > What actually happens is in the macro expansion of BIT(64), that is 1
> > << 64, the '1' overflows from leftmost bit position (most significant
> > bit) and re-enters at the rightmost bit position (least significant
> > bit), therefore 1 << 64 becomes '0x1', and when another '1' is
> > subtracted from this, the final result becomes 0.
> >
> > Since this macro is being used in both bitmap_get_value and
> > bitmap_set_value functions, it will give unexpected results when nbits or 
> > clump
> > size is BITS_PER_LONG (32 or 64 depending on arch).
>
> I see, something like
> https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139
> should be done.
> But yes, let's try to fix GENMASK().
>
> So, if we modify the following
>
>   #define GENMASK_INPUT_CHECK(h, l) \
> (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
> __builtin_constant_p((l) > (h)), (l) > (h), 0)))
>
> to be
>
>   #define GENMASK_INPUT_CHECK(h, l) \
> (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
> __builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0)))
>
> would it work?

Actually it needs an amendment for signed types...

(l) ? (l) > (h) : !((h) > 0)

...but today is Friday night, so, mistakes are warranted :-)

-- 
With Best Regards,
Andy Shevchenko


drivers/usb/cdns3/gadget.c:1188:29: sparse: sparse: incorrect type in assignment (different base types)

2020-05-29 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   75caf310d16cc5e2f851c048cd597f5437013368
commit: 70d8b9e5e63d212019ba3f6823c8ec3d2df87645 usb: cdns3: make signed 1 bit 
bitfields unsigned
date:   9 weeks ago
config: i386-randconfig-s031-20200529 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-13) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.1-243-gc100a7ab-dirty
git checkout 70d8b9e5e63d212019ba3f6823c8ec3d2df87645
# save the attached .config to linux build tree
make W=1 C=1 ARCH=i386 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 


sparse warnings: (new ones prefixed by >>)

   drivers/usb/cdns3/gadget.c:1157:35: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
control @@ got unsigned long @@
   drivers/usb/cdns3/gadget.c:1157:35: sparse: expected restricted __le32 
[usertype] control
   drivers/usb/cdns3/gadget.c:1157:35: sparse: got unsigned long
   drivers/usb/cdns3/gadget.c:1173:29: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
buffer @@ got unsigned long @@
   drivers/usb/cdns3/gadget.c:1173:29: sparse: expected restricted __le32 
[usertype] buffer
   drivers/usb/cdns3/gadget.c:1173:29: sparse: got unsigned long
>> drivers/usb/cdns3/gadget.c:1188:29: sparse: sparse: incorrect type in 
>> assignment (different base types) @@ expected restricted __le32 
>> [usertype] length @@ got unsigned long @@
   drivers/usb/cdns3/gadget.c:1188:29: sparse: expected restricted __le32 
[usertype] length
   drivers/usb/cdns3/gadget.c:1188:29: sparse: got unsigned long
   drivers/usb/cdns3/gadget.c:1191:37: sparse: sparse: invalid assignment: |=
   drivers/usb/cdns3/gadget.c:1191:37: sparse:left side has type restricted 
__le32
   drivers/usb/cdns3/gadget.c:1191:37: sparse:right side has type unsigned 
long
   drivers/usb/cdns3/gadget.c:1213:38: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
control @@ got unsigned int [assigned] [usertype] control @@
   drivers/usb/cdns3/gadget.c:1213:38: sparse: expected restricted __le32 
[usertype] control
   drivers/usb/cdns3/gadget.c:1213:38: sparse: got unsigned int [assigned] 
[usertype] control
   drivers/usb/cdns3/gadget.c:1215:48: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
control @@ got unsigned int [assigned] [usertype] control @@
   drivers/usb/cdns3/gadget.c:1215:48: sparse: expected restricted __le32 
[usertype] control
   drivers/usb/cdns3/gadget.c:1215:48: sparse: got unsigned int [assigned] 
[usertype] control
   drivers/usb/cdns3/gadget.c:1229:30: sparse: sparse: invalid assignment: |=
   drivers/usb/cdns3/gadget.c:1229:30: sparse:left side has type restricted 
__le32
   drivers/usb/cdns3/gadget.c:1229:30: sparse:right side has type unsigned 
long
   drivers/usb/cdns3/gadget.c:1255:36: sparse: sparse: restricted __le32 
degrades to integer
   drivers/usb/cdns3/gadget.c:1255:30: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
control @@ got unsigned int @@
   drivers/usb/cdns3/gadget.c:1255:30: sparse: expected restricted __le32 
[usertype] control
   drivers/usb/cdns3/gadget.c:1255:30: sparse: got unsigned int
   drivers/usb/cdns3/gadget.c:1010:29: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
buffer @@ got unsigned long @@
   drivers/usb/cdns3/gadget.c:1010:29: sparse: expected restricted __le32 
[usertype] buffer
   drivers/usb/cdns3/gadget.c:1010:29: sparse: got unsigned long
   drivers/usb/cdns3/gadget.c:1013:29: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
buffer @@ got unsigned long @@
   drivers/usb/cdns3/gadget.c:1013:29: sparse: expected restricted __le32 
[usertype] buffer
   drivers/usb/cdns3/gadget.c:1013:29: sparse: got unsigned long
   drivers/usb/cdns3/gadget.c:1019:21: sparse: sparse: incorrect type in 
assignment (different base types) @@ expected restricted __le32 [usertype] 
length @@ got unsigned long @@
   drivers/usb/cdns3/gadget.c:1019:21: sparse: expected restricted __le32 
[usertype] length
   drivers/usb/cdns3/gadget.c:1019:21: sparse: got unsigned long
   drivers/usb/cdns3/gadget.c:1029:37: sparse: sparse: invalid assignment: |=
   drivers/usb/cdns3/gadget.c:1029:37: sparse:left side has type restricted 
__le32
   drivers/usb/cdns3/gadget.c:1029:37: sparse:right side has type unsigned 
long
   

Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro

2020-05-29 Thread Syed Nayyar Waris
On Sat, May 30, 2020 at 3:13 AM Andy Shevchenko
 wrote:
>
> On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris  
> wrote:
> > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko
> >  wrote:
> > > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote:
> > > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot  
> > > > wrote:
>
> ...
>
> > > > Taking the example statement (in my patch) where compilation warning
> > > > is getting reported:
> > > > return (map[index] >> offset) & GENMASK(nbits - 1, 0);
> > > >
> > > > 'nbits' is of type 'unsigned long'.
> > > > In above, the sanity check is comparing '0' with unsigned value. And
> > > > unsigned value can't be less than '0' ever, hence the warning.
> > > > But this warning will occur whenever there will be '0' as one of the
> > > > 'argument' and an unsigned variable as another 'argument' for GENMASK.
>
> > > Proper fix is to fix GENMASK(), but allowed workaround is to use
> > > (BIT(nbits) - 1)
> > > instead.
>
> > When I used BIT macro (earlier), I had faced a problem. I want to tell
> > you about that.
> >
> > Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or
> > clump size) is BITS_PER_LONG, unexpected calculation happens.
> >
> > Explanation:
> > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my computer),
> > (BIT(nbits) - 1)
> > gives a value of zero and when this zero is ANDed with any value, it
> > makes it full zero. This is unexpected and incorrect calculation happening.
> >
> > What actually happens is in the macro expansion of BIT(64), that is 1
> > << 64, the '1' overflows from leftmost bit position (most significant
> > bit) and re-enters at the rightmost bit position (least significant
> > bit), therefore 1 << 64 becomes '0x1', and when another '1' is
> > subtracted from this, the final result becomes 0.
> >
> > Since this macro is being used in both bitmap_get_value and
> > bitmap_set_value functions, it will give unexpected results when nbits or 
> > clump
> > size is BITS_PER_LONG (32 or 64 depending on arch).
>
> I see, something like
> https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139
> should be done.
> But yes, let's try to fix GENMASK().
>
> So, if we modify the following
>
>   #define GENMASK_INPUT_CHECK(h, l) \
> (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
> __builtin_constant_p((l) > (h)), (l) > (h), 0)))
>
> to be
>
>   #define GENMASK_INPUT_CHECK(h, l) \
> (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
> __builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0)))
>
> would it work?

Sorry Andy it is not working. Actually the warning will be thrown,
whenever there will be comparison between 'h' and 'l'. If one of them
is '0' and the other is unsigned variable.
In above, still there is comparison being done between 'h' and 'l', so
the warning is getting thrown.

>
> > William also knows about this issue:
> > "This is undefined behavior in the C standard (section 6.5.7 in the N1124)"
>
> I think it is about 6.5.7.3  here, 1U << 31 (or 63) is okay.

Actually for:
(BIT(nbits) - 1)
When nbits will be BITS_PER_LONG it will be 1U << 32 (or 64). Isn't it ?
The expression,
BIT(64) - 1
can become unexpectedly zero (incorrectly).

>
> --
> With Best Regards,
> Andy Shevchenko


Re: [PATCH] [v2] ARM: omap2: fix omap5_realtime_timer_init definition

2020-05-29 Thread Tony Lindgren
* Arnd Bergmann  [200529 22:01]:
> There is one more regression introduced by the last build fix:
> 
> arch/arm/mach-omap2/timer.c:170:6: error: attribute declaration must precede 
> definition [-Werror,-Wignored-attributes]
> void __init omap5_realtime_timer_init(void)
>  ^
> arch/arm/mach-omap2/common.h:118:20: note: previous definition is here
> static inline void omap5_realtime_timer_init(void)
>^
> arch/arm/mach-omap2/timer.c:170:13: error: redefinition of 
> 'omap5_realtime_timer_init'
> void __init omap5_realtime_timer_init(void)
> ^
> arch/arm/mach-omap2/common.h:118:20: note: previous definition is here
> static inline void omap5_realtime_timer_init(void)
> 
> As it turns out, the CONFIG_SOC_HAS_REALTIME_COUNTER option
> should never be disabled for OMAP5 as we realy on this to initialize
> the clocks and the timer. Just remove it here and make it the default.
> 
> Removing the guard around the set_cntfreq() definition, I noticed that
> this is not properly namespaced, so fix that as well.

Looks good to me, thanks for fixing it:

Acked-by: Tony Lindgren 


Re: [PATCH] refperf: work around 64-bit division

2020-05-29 Thread Randy Dunlap
On 5/29/20 1:15 PM, Arnd Bergmann wrote:
> A 64-bit division was introduced in refperf, breaking compilation
> on all 32-bit architectures:
> 
> kernel/rcu/refperf.o: in function `main_func':
> refperf.c:(.text+0x57c): undefined reference to `__aeabi_uldivmod'
> 
> Work it by using div_u64 to mark the expensive operation.
> 
> Fixes: bd5b16d6c88d ("refperf: Allow decimal nanoseconds")
> Signed-off-by: Arnd Bergmann 

Acked-by: Randy Dunlap  # build-tested

Thanks.

> ---
>  kernel/rcu/refperf.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/rcu/refperf.c b/kernel/rcu/refperf.c
> index 47df72c492b3..c2366648981d 100644
> --- a/kernel/rcu/refperf.c
> +++ b/kernel/rcu/refperf.c
> @@ -386,7 +386,7 @@ static int main_func(void *arg)
>   if (torture_must_stop())
>   goto end;
>  
> - result_avg[exp] = 1000 * process_durations(nreaders) / 
> (nreaders * loops);
> + result_avg[exp] = div_u64(1000 * process_durations(nreaders), 
> nreaders * loops);
>   }
>  
>   // Print the average of all experiments
> @@ -397,9 +397,14 @@ static int main_func(void *arg)
>   strcat(buf, "Threads\tTime(ns)\n");
>  
>   for (exp = 0; exp < nruns; exp++) {
> + u64 avg;
> + u32 rem;
> +
>   if (errexit)
>   break;
> - sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, result_avg[exp] / 
> 1000, (int)(result_avg[exp] % 1000));
> +
> + avg = div_s64_rem(result_avg[exp], 1000, &rem);
> + sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, avg, rem);
>   strcat(buf, buf1);
>   }
>  
> 


-- 
~Randy


Re: [PATCH v3] regmap: fix alignment issue

2020-05-29 Thread Mark Brown
On Fri, May 29, 2020 at 09:25:38PM +0200, Jens Thoms Toerring wrote:

> pointers with odd addresses. On architectures with strict alignment
> requirements this results in a kernel crash for u16 and u32 values.

Which architectures out of interest?


signature.asc
Description: PGP signature


Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking

2020-05-29 Thread Steven Rostedt
On Fri, 29 May 2020 23:27:41 +0200
Peter Zijlstra  wrote:

> There is no reason not to always, accurately, track IRQ state.
> 
> This change also makes IRQ state tracking ignore lockdep_off().
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
>  kernel/locking/lockdep.c |   33 ++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> 
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -3646,7 +3646,13 @@ static void __trace_hardirqs_on_caller(v
>   */
>  void lockdep_hardirqs_on_prepare(unsigned long ip)
>  {
> - if (unlikely(!debug_locks || current->lockdep_recursion))

Why remove the check for debug_locks? Isn't that there to disable
everything at once to prevent more warnings to be printed?

Also, isn't there other ways that we could have recursion besides NMIs?
Say we do a printk inside here, or call something that may also enable
interrupts? I thought the recursion check was also to prevent lockdep
infrastructure calling something that lockdep monitors being a problem?

Or am I missing something?

-- Steve


> + /*
> +  * NMIs do not (and cannot) track lock dependencies, nothing to do.
> +  */
> + if (in_nmi())
> + return;
> +
> + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & 
> LOCKDEP_RECURSION_MASK))
>   return;
>  
>   if (unlikely(current->hardirqs_enabled)) {
> @@ -3692,7 +3698,24 @@ void noinstr lockdep_hardirqs_on(unsigne
>  {
>   struct task_struct *curr = current;
>  
> - if (unlikely(!debug_locks || curr->lockdep_recursion))
> + /*
> +  * NMIs can happen in the middle of local_irq_{en,dis}able() where the
> +  * tracking state and hardware state are out of sync.
> +  *
> +  * NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from,
> +  * and not rely on hardware state like normal interrupts.
> +  */
> + if (in_nmi()) {
> + /*
> +  * Skip:
> +  *  - recursion check, because NMI can hit lockdep;
> +  *  - hardware state check, because above;
> +  *  - chain_key check, see lockdep_hardirqs_on_prepare().
> +  */
> + goto skip_checks;
> + }
> +
> + if (DEBUG_LOCKS_WARN_ON(curr->lockdep_recursion & 
> LOCKDEP_RECURSION_MASK))
>   return;
>  
>   if (curr->hardirqs_enabled) {
> @@ -3720,6 +3743,7 @@ void noinstr lockdep_hardirqs_on(unsigne
>   DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
>   current->curr_chain_key);
>  
> +skip_checks:
>   /* we'll do an OFF -> ON transition: */
>   curr->hardirqs_enabled = 1;
>   curr->hardirq_enable_ip = ip;
> @@ -3735,7 +3759,10 @@ void noinstr lockdep_hardirqs_off(unsign
>  {
>   struct task_struct *curr = current;
>  
> - if (unlikely(!debug_locks || curr->lockdep_recursion))
> + /*
> +  * NMIs can happen in lockdep.
> +  */
> + if (!in_nmi() && DEBUG_LOCKS_WARN_ON(curr->lockdep_recursion & 
> LOCKDEP_RECURSION_MASK))
>   return;
>  
>   /*
> 



Re: [PATCH v2 00/33] iommu: Move iommu_group setup to IOMMU core code

2020-05-29 Thread Jerry Snitselaar

On Tue Apr 14 20, Joerg Roedel wrote:

Hi,

here is the second version of this patch-set. The first version with
some more introductory text can be found here:

https://lore.kernel.org/lkml/20200407183742.4344-1-j...@8bytes.org/

Changes v1->v2:

* Rebased to v5.7-rc1

* Re-wrote the arm-smmu changes as suggested by Robin Murphy

* Re-worked the Exynos patches to hopefully not break the
  driver anymore

* Fixed a missing mutex_unlock() reported by Marek Szyprowski,
  thanks for that.

There is also a git-branch available with these patches applied:


https://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git/log/?h=iommu-probe-device-v2

Please review.

Thanks,

Joerg

Joerg Roedel (32):
 iommu: Move default domain allocation to separate function
 iommu/amd: Implement iommu_ops->def_domain_type call-back
 iommu/vt-d: Wire up iommu_ops->def_domain_type
 iommu/amd: Remove dma_mask check from check_device()
 iommu/amd: Return -ENODEV in add_device when device is not handled by
   IOMMU
 iommu: Add probe_device() and remove_device() call-backs
 iommu: Move default domain allocation to iommu_probe_device()
 iommu: Keep a list of allocated groups in __iommu_probe_device()
 iommu: Move new probe_device path to separate function
 iommu: Split off default domain allocation from group assignment
 iommu: Move iommu_group_create_direct_mappings() out of
   iommu_group_add_device()
 iommu: Export bus_iommu_probe() and make is safe for re-probing
 iommu/amd: Remove dev_data->passthrough
 iommu/amd: Convert to probe/release_device() call-backs
 iommu/vt-d: Convert to probe/release_device() call-backs
 iommu/arm-smmu: Convert to probe/release_device() call-backs
 iommu/pamu: Convert to probe/release_device() call-backs
 iommu/s390: Convert to probe/release_device() call-backs
 iommu/virtio: Convert to probe/release_device() call-backs
 iommu/msm: Convert to probe/release_device() call-backs
 iommu/mediatek: Convert to probe/release_device() call-backs
 iommu/mediatek-v1 Convert to probe/release_device() call-backs
 iommu/qcom: Convert to probe/release_device() call-backs
 iommu/rockchip: Convert to probe/release_device() call-backs
 iommu/tegra: Convert to probe/release_device() call-backs
 iommu/renesas: Convert to probe/release_device() call-backs
 iommu/omap: Remove orphan_dev tracking
 iommu/omap: Convert to probe/release_device() call-backs
 iommu/exynos: Use first SYSMMU in controllers list for IOMMU core
 iommu/exynos: Convert to probe/release_device() call-backs
 iommu: Remove add_device()/remove_device() code-paths
 iommu: Unexport iommu_group_get_for_dev()

Sai Praneeth Prakhya (1):
 iommu: Add def_domain_type() callback in iommu_ops

drivers/iommu/amd_iommu.c   |  97 
drivers/iommu/amd_iommu_types.h |   1 -
drivers/iommu/arm-smmu-v3.c |  38 +--
drivers/iommu/arm-smmu.c|  39 ++--
drivers/iommu/exynos-iommu.c|  24 +-
drivers/iommu/fsl_pamu_domain.c |  22 +-
drivers/iommu/intel-iommu.c |  68 +-
drivers/iommu/iommu.c   | 393 +---
drivers/iommu/ipmmu-vmsa.c  |  60 ++---
drivers/iommu/msm_iommu.c   |  34 +--
drivers/iommu/mtk_iommu.c   |  24 +-
drivers/iommu/mtk_iommu_v1.c|  50 ++--
drivers/iommu/omap-iommu.c  |  99 ++--
drivers/iommu/qcom_iommu.c  |  24 +-
drivers/iommu/rockchip-iommu.c  |  26 +--
drivers/iommu/s390-iommu.c  |  22 +-
drivers/iommu/tegra-gart.c  |  24 +-
drivers/iommu/tegra-smmu.c  |  31 +--
drivers/iommu/virtio-iommu.c|  41 +---
include/linux/iommu.h   |  21 +-
20 files changed, 533 insertions(+), 605 deletions(-)

--
2.17.1

___
iommu mailing list
io...@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu



Hi Joerg,

With this patchset, I have an epyc system where if I boot with
iommu=nopt and force a dump I will see some io page faults for a nic
on the system. The vmcore is harvested and the system reboots. I
haven't reproduced it on other systems yet, but without the patchset I
don't see the io page faults during the kdump.

Regards,
Jerry



Re: [PATCH v12 2/4] dt-bindings: power: Convert battery.txt to battery.yaml

2020-05-29 Thread Rob Herring
On Thu, May 28, 2020 at 05:53:48PM -0500, Ricardo Rivera-Matos wrote:
> From: Dan Murphy 
> 
> Convert the battery.txt file to yaml and fix up the examples.
> 
> Signed-off-by: Dan Murphy 
> ---
>  .../bindings/power/supply/battery.txt |  82 +-
>  .../bindings/power/supply/battery.yaml| 143 ++
>  2 files changed, 144 insertions(+), 81 deletions(-)
>  create mode 100644 
> Documentation/devicetree/bindings/power/supply/battery.yaml
> 
> diff --git a/Documentation/devicetree/bindings/power/supply/battery.txt 
> b/Documentation/devicetree/bindings/power/supply/battery.txt
> index 3049cf88bdcf..b9a81621ce59 100644
> --- a/Documentation/devicetree/bindings/power/supply/battery.txt
> +++ b/Documentation/devicetree/bindings/power/supply/battery.txt
> @@ -1,81 +1 @@
> -Battery Characteristics
> -
> -The devicetree battery node provides static battery characteristics.
> -In smart batteries, these are typically stored in non-volatile memory
> -on a fuel gauge chip. The battery node should be used where there is
> -no appropriate non-volatile memory, or it is unprogrammed/incorrect.
> -
> -Upstream dts files should not include battery nodes, unless the battery
> -represented cannot easily be replaced in the system by one of a
> -different type. This prevents unpredictable, potentially harmful,
> -behavior should a replacement that changes the battery type occur
> -without a corresponding update to the dtb.
> -
> -Required Properties:
> - - compatible: Must be "simple-battery"
> -
> -Optional Properties:
> - - voltage-min-design-microvolt: drained battery voltage
> - - voltage-max-design-microvolt: fully charged battery voltage
> - - energy-full-design-microwatt-hours: battery design energy
> - - charge-full-design-microamp-hours: battery design capacity
> - - precharge-current-microamp: current for pre-charge phase
> - - charge-term-current-microamp: current for charge termination phase
> - - constant-charge-current-max-microamp: maximum constant input current
> - - constant-charge-voltage-max-microvolt: maximum constant input voltage
> - - factory-internal-resistance-micro-ohms: battery factory internal 
> resistance
> - - ocv-capacity-table-0: An array providing the open circuit voltage (OCV)
> -   of the battery and corresponding battery capacity percent, which is used
> -   to look up battery capacity according to current OCV value. And the open
> -   circuit voltage unit is microvolt.
> - - ocv-capacity-table-1: Same as ocv-capacity-table-0
> - ..
> - - ocv-capacity-table-n: Same as ocv-capacity-table-0
> - - ocv-capacity-celsius: An array containing the temperature in degree 
> Celsius,
> -   for each of the battery capacity lookup table. The first temperature value
> -   specifies the OCV table 0, and the second temperature value specifies the
> -   OCV table 1, and so on.
> - - resistance-temp-table: An array providing the temperature in degree 
> Celsius
> -   and corresponding battery internal resistance percent, which is used to 
> look
> -   up the resistance percent according to current temperature to get a 
> accurate
> -   batterty internal resistance in different temperatures.
> -
> -Battery properties are named, where possible, for the corresponding
> -elements in enum power_supply_property, defined in
> -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/power_supply.h
> -
> -Batteries must be referenced by chargers and/or fuel-gauges
> -using a phandle. The phandle's property should be named
> -"monitored-battery".
> -
> -Example:
> -
> - bat: battery {
> - compatible = "simple-battery";
> - voltage-min-design-microvolt = <320>;
> - voltage-max-design-microvolt = <420>;
> - energy-full-design-microwatt-hours = <529>;
> - charge-full-design-microamp-hours = <143>;
> - precharge-current-microamp = <256000>;
> - charge-term-current-microamp = <128000>;
> - constant-charge-current-max-microamp = <90>;
> - constant-charge-voltage-max-microvolt = <420>;
> - factory-internal-resistance-micro-ohms = <25>;
> - ocv-capacity-celsius = <(-10) 0 10>;
> - ocv-capacity-table-0 = <4185000 100>, <4113000 95>, <4066000 
> 90>, ...;
> - ocv-capacity-table-1 = <420 100>, <4185000 95>, <4113000 
> 90>, ...;
> - ocv-capacity-table-2 = <425 100>, <420 95>, <4185000 
> 90>, ...;
> - resistance-temp-table = <20 100>, <10 90>, <0 80>, <(-10) 60>;
> - };
> -
> - charger: charger@11 {
> - 
> - monitored-battery = <&bat>;
> - ...
> - };
> -
> - fuel_gauge: fuel-gauge@22 {
> - 
> - monitored-battery = <&bat>;
> - ...
> - };
> +The contents of this file has been moved to battery.yaml
> diff --git a/Documentation/devicetree/bindings/p

Re: [PATCH v12 3/4] dt-bindings: power: Add the bindings for the bq2515x family of chargers.

2020-05-29 Thread Rob Herring
On Thu, May 28, 2020 at 05:53:49PM -0500, Ricardo Rivera-Matos wrote:
> The BQ2515X family of devices are highly integrated battery management
> ICs that integrate the most common functions for wearable devices
> namely a charger, an output voltage rail, ADC for battery and system
> monitoring, and a push-button controller.
> 
> Datasheets:
> http://www.ti.com/lit/ds/symlink/bq25150.pdf
> http://www.ti.com/lit/ds/symlink/bq25155.pdf
> 
> Signed-off-by: Ricardo Rivera-Matos 
> ---
>  .../bindings/power/supply/bq2515x.yaml| 91 +++
>  1 file changed, 91 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/power/supply/bq2515x.yaml
> 
> diff --git a/Documentation/devicetree/bindings/power/supply/bq2515x.yaml 
> b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml
> new file mode 100644
> index ..19cb336d581e
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml
> @@ -0,0 +1,91 @@
> +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
> +# Copyright (C) 2020 Texas Instruments Incorporated
> +%YAML 1.2
> +---
> +$id: "http://devicetree.org/schemas/power/supply/bq2515x.yaml#";
> +$schema: "http://devicetree.org/meta-schemas/core.yaml#";
> +
> +title: TI bq2515x 500-mA Linear charger family
> +
> +maintainers:
> +  - Dan Murphy 
> +  - Ricardo Rivera-Matos 
> +
> +description: |
> +  The BQ2515x family is a highly integrated battery charge management IC that
> +  integrates the most common functions for wearable devices, namely a 
> charger,
> +  an output voltage rail, ADC for battery and system monitoring, and
> +  push-button controller.
> +
> +  Specifications about the charger can be found at:
> +http://www.ti.com/lit/ds/symlink/bq25150.pdf
> +http://www.ti.com/lit/ds/symlink/bq25155.pdf
> +
> +properties:
> +  compatible:
> +enum:
> +  - ti,bq25150
> +  - ti,bq25155
> +
> +  reg:
> +maxItems: 1
> +description: I2C address of the charger.
> +
> +  ac-detect-gpios:
> +description: |
> +   GPIO used for connecting the bq2515x device PG (AC Detect)
> +   pin.
> +maxItems: 1
> +
> +  reset-gpios:
> +description: GPIO used for hardware reset.
> +maxItems: 1
> +
> +  powerdown-gpios:
> +description: GPIO used for low power mode of IC.
> +maxItems: 1
> +
> +  charge-enable-gpios:
> +description: GPIO used to turn on and off charging.
> +maxItems: 1
> +
> +  input-current-limit-microamp:
> +$ref: /schemas/types.yaml#/definitions/uint32
> +description: Maximum input current in micro Amps.
> +minimum: 5
> +maximum: 50
> +
> +  monitored-battery:
> +$ref: battery.yaml#

This doesn't work. It's saying monitored-battery is a node containing 
all the properties in battery.yaml.

> +
> +required:
> +  - compatible
> +  - reg

How is the battery optional?

> +
> +additionalProperties: false
> +
> +examples:
> +  - |
> +bat: battery {
> +  compatible = "simple-battery";
> +  constant-charge-current-max-microamp = <5>;
> +  precharge-current-microamp = <2500>;
> +  constant-charge-voltage-max-microvolt = <400>;
> +};
> +#include 
> +i2c0 {
> +  #address-cells = <1>;
> +  #size-cells = <0>;
> +
> +  bq25150: charger@6b {
> +compatible = "ti,bq25150";
> +reg = <0x6b>;
> +monitored-battery = <&bat>;
> +input-current-limit-microamp = <10>;
> +
> +ac-detect-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
> +reset-gpios = <&gpio0 14 GPIO_ACTIVE_HIGH>;
> +powerdown-gpios = <&gpio0 15 GPIO_ACTIVE_HIGH>;
> +charge-enable-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
> +  };
> +};
> -- 
> 2.26.2
> 


Re: [PATCH v7 1/4] bitops: Introduce the the for_each_set_clump macro

2020-05-29 Thread Andy Shevchenko
On Sat, May 30, 2020 at 1:11 AM Syed Nayyar Waris  wrote:
> On Sat, May 30, 2020 at 3:13 AM Andy Shevchenko
>  wrote:
> > On Fri, May 29, 2020 at 11:07 PM Syed Nayyar Waris  
> > wrote:
> > > On Sat, May 30, 2020 at 12:08 AM Andy Shevchenko
> > >  wrote:
> > > > On Fri, May 29, 2020 at 11:38:18PM +0530, Syed Nayyar Waris wrote:
> > > > > On Sun, May 24, 2020 at 8:15 PM kbuild test robot  
> > > > > wrote:
> >
> > ...
> >
> > > > > Taking the example statement (in my patch) where compilation warning
> > > > > is getting reported:
> > > > > return (map[index] >> offset) & GENMASK(nbits - 1, 0);
> > > > >
> > > > > 'nbits' is of type 'unsigned long'.
> > > > > In above, the sanity check is comparing '0' with unsigned value. And
> > > > > unsigned value can't be less than '0' ever, hence the warning.
> > > > > But this warning will occur whenever there will be '0' as one of the
> > > > > 'argument' and an unsigned variable as another 'argument' for GENMASK.
> >
> > > > Proper fix is to fix GENMASK(), but allowed workaround is to use
> > > > (BIT(nbits) - 1)
> > > > instead.
> >
> > > When I used BIT macro (earlier), I had faced a problem. I want to tell
> > > you about that.
> > >
> > > Inside functions 'bitmap_set_value' and 'bitmap_get_value' when nbits (or
> > > clump size) is BITS_PER_LONG, unexpected calculation happens.
> > >
> > > Explanation:
> > > Actually when nbits (clump size) is 64 (BITS_PER_LONG is 64 on my 
> > > computer),
> > > (BIT(nbits) - 1)
> > > gives a value of zero and when this zero is ANDed with any value, it
> > > makes it full zero. This is unexpected and incorrect calculation 
> > > happening.
> > >
> > > What actually happens is in the macro expansion of BIT(64), that is 1
> > > << 64, the '1' overflows from leftmost bit position (most significant
> > > bit) and re-enters at the rightmost bit position (least significant
> > > bit), therefore 1 << 64 becomes '0x1', and when another '1' is
> > > subtracted from this, the final result becomes 0.
> > >
> > > Since this macro is being used in both bitmap_get_value and
> > > bitmap_set_value functions, it will give unexpected results when nbits or 
> > > clump
> > > size is BITS_PER_LONG (32 or 64 depending on arch).
> >
> > I see, something like
> > https://elixir.bootlin.com/linux/latest/source/include/linux/dma-mapping.h#L139
> > should be done.
> > But yes, let's try to fix GENMASK().
> >
> > So, if we modify the following
> >
> >   #define GENMASK_INPUT_CHECK(h, l) \
> > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
> > __builtin_constant_p((l) > (h)), (l) > (h), 0)))
> >
> > to be
> >
> >   #define GENMASK_INPUT_CHECK(h, l) \
> > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
> > __builtin_constant_p((l) > (h)), (l) ? (l) > (h) : 0, 0)))
> >
> > would it work?
>
> Sorry Andy it is not working. Actually the warning will be thrown,
> whenever there will be comparison between 'h' and 'l'. If one of them
> is '0' and the other is unsigned variable.
> In above, still there is comparison being done between 'h' and 'l', so
> the warning is getting thrown.

Ah, okay

what about (l) && ((l) > (h)) ?

> > > William also knows about this issue:
> > > "This is undefined behavior in the C standard (section 6.5.7 in the 
> > > N1124)"
> >
> > I think it is about 6.5.7.3  here, 1U << 31 (or 63) is okay.
>
> Actually for:
> (BIT(nbits) - 1)
> When nbits will be BITS_PER_LONG it will be 1U << 32 (or 64). Isn't it ?
> The expression,
> BIT(64) - 1
> can become unexpectedly zero (incorrectly).

Yes, that's why I pointed out to the paragraph. It's about right
operand to be "great than or equal to" the size of type of left
operand.

-- 
With Best Regards,
Andy Shevchenko


Re: [PATCH] vfio/mdev: Fix reference count leak in add_mdev_supported_type.

2020-05-29 Thread Alex Williamson
On Wed, 27 May 2020 21:01:09 -0500
wu000...@umn.edu wrote:

> From: Qiushi Wu 
> 
> kobject_init_and_add() takes reference even when it fails.
> If this function returns an error, kobject_put() must be called to
> properly clean up the memory associated with the object. Thus,
> replace kfree() by kobject_put() to fix this issue. Previous
> commit "b8eb718348b8" fixed a similar problem.
> 
> Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
> Signed-off-by: Qiushi Wu 
> ---

Applied to vfio next branch for v5.8 with Connie's and Kirti's reviews.
Thanks,

Alex

>  drivers/vfio/mdev/mdev_sysfs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
> index 8ad14e5c02bf..917fd84c1c6f 100644
> --- a/drivers/vfio/mdev/mdev_sysfs.c
> +++ b/drivers/vfio/mdev/mdev_sysfs.c
> @@ -110,7 +110,7 @@ static struct mdev_type *add_mdev_supported_type(struct 
> mdev_parent *parent,
>  "%s-%s", dev_driver_string(parent->dev),
>  group->name);
>   if (ret) {
> - kfree(type);
> + kobject_put(&type->kobj);
>   return ERR_PTR(ret);
>   }
>  



Re: [PATCH v3] regmap: fix alignment issue

2020-05-29 Thread Mark Brown
On Fri, May 29, 2020 at 09:25:38PM +0200, Jens Thoms Toerring wrote:
> The assembly and disassembly of data to be sent to or received from a
> device invoke functions (regmap_format_XXX() and regmap_parse_XXX())
> that extract or insert data items from or into a buffer, using
> assignments. In some cases those functions are called with buffer

This doesn't apply against current code, please check and resend:

HEAD is now at 93b929922dba Merge series "regmap: provide simple bitops and use 
them in a driver" from Bartosz Golaszewski  Bartosz Golaszewski 
:
Applying: regmap: fix alignment issue
Using index info to reconstruct a base tree...
M   drivers/base/regmap/regmap.c
error: patch failed: drivers/base/regmap/regmap.c:261
error: drivers/base/regmap/regmap.c: patch does not apply
error: Did you hand edit your patch?
It does not apply to blobs recorded in its index.
Patch failed at 0001 regmap: fix alignment issue

(it also didn't apply against v5.7-rc1 and git didn't know the blobs it
was based on, the most recent change to that code was in 2015 it
seems...).


signature.asc
Description: PGP signature


Re: [PATCH v6 00/16] spi: dw: Add generic DW DMA controller support

2020-05-29 Thread Mark Brown
On Fri, May 29, 2020 at 03:22:26PM -0600, Rob Herring wrote:
> On Fri, May 29, 2020 at 06:33:25PM +0100, Mark Brown wrote:

> > Please rebase.  TBH I'd not noticed Rob's review so I just left it
> > waiting for that, there's such a huge backlog there it didn't occur to
> > me that it might've been reviewed.

> Hey, I'm down to about 10 patches now. I think I'll take the rest of the 
> week off.

Ah, nice!  Good to hear.


signature.asc
Description: PGP signature


Re: [PATCH] dt-bindings: Merge gpio-usb-b-connector with usb-connector

2020-05-29 Thread Rob Herring
On Fri, 29 May 2020 20:06:31 +0200, Thierry Reding wrote:
> From: Thierry Reding 
> 
> The binding for usb-connector is a superset of gpio-usb-b-connector. One
> major difference is that gpio-usb-b-connector requires at least one of
> the vbus-gpios and id-gpios properties to be specified. Merge the two
> bindings by adding the compatible string combination for the GPIO USB-B
> variant and an extra conditional for the required properties list to the
> usb-connector.yaml file.
> 
> Signed-off-by: Thierry Reding 
> ---
>  .../bindings/connector/usb-connector.yaml | 39 +--
>  .../devicetree/bindings/usb/usb-conn-gpio.txt | 30 --
>  2 files changed, 35 insertions(+), 34 deletions(-)
>  delete mode 100644 Documentation/devicetree/bindings/usb/usb-conn-gpio.txt
> 

Applied, thanks!


Re: [PATCH v2 2/4] dt-bindings: pinctrl: Document optional BCM7211 wake-up interrupts

2020-05-29 Thread Rob Herring
On Fri, 29 May 2020 12:15:20 -0700, Florian Fainelli wrote:
> BCM7211 supports wake-up interrupts in the form of optional interrupt
> lines, one per bank, plus the "all banks" interrupt line.
> 
> Signed-off-by: Florian Fainelli 
> ---
>  .../devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 

Acked-by: Rob Herring 


[GIT PULL] ARM: SoC fixes for v5.7

2020-05-29 Thread Arnd Bergmann
The following changes since commit b9bbe6ed63b2b9f2c9ee5cbd0f2c946a2723f4ce:

  Linux 5.7-rc6 (2020-05-17 16:48:37 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git
tags/armsoc-fixes-v5.7

for you to fetch changes up to 99706d62fb50486eadb4441eaed311491fd7addf:

  Merge tag 'omap-for-v5.7/cpsw-fixes-signed' of
git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into
arm/fixes (2020-05-26 00:18:48 +0200)


ARM: SoC fixes for v5.7

This time there is one fix for the error path in the mediatek cmdq driver
(used by their video driver) and a couple of devicetree fixes, mostly
for 32-bit ARM, and fairly harmless:

- On OMAP2 there were a few regressions in the ethernet drivers,
  one of them leading to an external abort trap

- One Raspberry Pi version had a misconfigured LED

- Interrupts on Broadcom NSP were slightly misconfigured

- One i.MX6q board had issues with graphics mode setting

- On mmp3 there are some minor fixes that were submitted for
  v5.8 with a cc:stable tag, so I ended up picking them up
  here as well

- The Mediatek Video Codec needs to run at a higher frequency
  than configured originally

Signed-off-by: Arnd Bergmann 


Arnd Bergmann (5):
  Merge branch 'mmp/fixes' into arm/fixes
  Merge tag 'imx-fixes-5.7-2' of
git://git.kernel.org/.../shawnguo/linux into arm/fixes
  Merge branch 'v5.7-fixes' of
git://git.kernel.org/.../matthias.bgg/linux into arm/fixes
  Merge tag 'arm-soc/for-5.7/devicetree-fixes-part2-v2' of
https://github.com/Broadcom/stblinux into arm/fixes
  Merge tag 'omap-for-v5.7/cpsw-fixes-signed' of
git://git.kernel.org/.../tmlind/linux-omap into arm/fixes

Dennis YC Hsieh (1):
  soc: mediatek: cmdq: return send msg error code

Grygorii Strashko (2):
  ARM: dts: am57xx: fix networking on boards with ksz9031 phy
  ARM: dts: am437x: fix networking on boards with ksz9031 phy

Hamish Martin (1):
  ARM: dts: bcm: HR2: Fix PPI interrupt types

Hsin-Yi Wang (1):
  arm64: dts: mt8173: fix vcodec-enc clock

Lubomir Rintel (3):
  ARM: dts: mmp3: Use the MMP3 compatible string for /clocks
  ARM: dts: mmp3-dell-ariel: Fix the SPI devices
  ARM: dts: mmp3: Drop usb-nop-xceiv from HSIC phy

Robert Beckett (1):
  ARM: dts/imx6q-bx50v3: Set display interface clock parents

Tony Lindgren (1):
  ARM: dts: Fix wrong mdio clock for dm814x

Vincent Stehlé (1):
  ARM: dts: bcm2835-rpi-zero-w: Fix led polarity

 arch/arm/boot/dts/am437x-gp-evm.dts |  2 +-
 arch/arm/boot/dts/am437x-idk-evm.dts|  2 +-
 arch/arm/boot/dts/am437x-sk-evm.dts |  4 ++--
 arch/arm/boot/dts/am571x-idk.dts|  4 ++--
 arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi |  4 ++--
 arch/arm/boot/dts/am57xx-idk-common.dtsi|  4 ++--
 arch/arm/boot/dts/bcm-hr2.dtsi  |  6 +++---
 arch/arm/boot/dts/bcm2835-rpi-zero-w.dts|  2 +-
 arch/arm/boot/dts/dm814x.dtsi   |  2 +-
 arch/arm/boot/dts/imx6q-b450v3.dts  |  7 ---
 arch/arm/boot/dts/imx6q-b650v3.dts  |  7 ---
 arch/arm/boot/dts/imx6q-b850v3.dts  | 11 ---
 arch/arm/boot/dts/imx6q-bx50v3.dtsi | 15 +++
 arch/arm/boot/dts/mmp3-dell-ariel.dts   | 12 ++--
 arch/arm/boot/dts/mmp3.dtsi |  8 +++-
 arch/arm64/boot/dts/mediatek/mt8173.dtsi|  4 ++--
 drivers/soc/mediatek/mtk-cmdq-helper.c  |  4 +++-
 17 files changed, 44 insertions(+), 54 deletions(-)


[ANNOUNCE] 5.4.43-rt25

2020-05-29 Thread Steven Rostedt


Dear RT Folks,

I'm pleased to announce the 5.4.43-rt25 stable release.


This release is just an update to the new stable 5.4.43 version
and no RT specific changes have been made.


You can get this release via the git tree at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git

  branch: v5.4-rt
  Head SHA1: 2a84edbec2cb7d50c72e51a1021aff9c4c3bb800


Or to build 5.4.43-rt25 directly, the following patches should be applied:

  http://www.kernel.org/pub/linux/kernel/v5.x/linux-5.4.tar.xz

  http://www.kernel.org/pub/linux/kernel/v5.x/patch-5.4.43.xz

  
http://www.kernel.org/pub/linux/kernel/projects/rt/5.4/patch-5.4.43-rt25.patch.xz




Enjoy,

-- Steve



Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking

2020-05-29 Thread Peter Zijlstra
On Fri, May 29, 2020 at 06:14:01PM -0400, Steven Rostedt wrote:
> On Fri, 29 May 2020 23:27:41 +0200
> Peter Zijlstra  wrote:
> 
> > There is no reason not to always, accurately, track IRQ state.
> > 
> > This change also makes IRQ state tracking ignore lockdep_off().
> > 
> > Signed-off-by: Peter Zijlstra (Intel) 
> > ---
> >  kernel/locking/lockdep.c |   33 ++---
> >  1 file changed, 30 insertions(+), 3 deletions(-)
> > 
> > --- a/kernel/locking/lockdep.c
> > +++ b/kernel/locking/lockdep.c
> > @@ -3646,7 +3646,13 @@ static void __trace_hardirqs_on_caller(v
> >   */
> >  void lockdep_hardirqs_on_prepare(unsigned long ip)
> >  {
> > -   if (unlikely(!debug_locks || current->lockdep_recursion))
> 
> Why remove the check for debug_locks? Isn't that there to disable
> everything at once to prevent more warnings to be printed?

Yeah, maybe. I was thinking we could keep IRQ state running. But you're
right, if we mess up the IRQ state itself this might generate a wee
mess.

> Also, isn't there other ways that we could have recursion besides NMIs?
> Say we do a printk inside here, or call something that may also enable
> interrupts? I thought the recursion check was also to prevent lockdep
> infrastructure calling something that lockdep monitors being a problem?
> 
> Or am I missing something?

> > +   /*
> > +* NMIs do not (and cannot) track lock dependencies, nothing to do.
> > +*/
> > +   if (in_nmi())
> > +   return;
> > +
> > +   if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & 
> > LOCKDEP_RECURSION_MASK))
> > return;

^^ there's your regular recursion check.


Re: Lost PCIe PME after a914ff2d78ce ("PCI/ASPM: Don't select CONFIG_PCIEASPM by default")

2020-05-29 Thread Heiner Kallweit
On 29.05.2020 22:59, Matthew Garrett wrote:
> On Fri, May 29, 2020 at 03:21:35PM -0500, Bjorn Helgaas wrote:
> 
>> Yeah, that makes sense.  I can't remember the details, but I'm pretty
>> sure there's a reason why we ask for the whole set of things.  Seems
>> like it solved some problem.  I think Matthew Garrett might have been
>> involved in that.
> 
> This was https://bugzilla.redhat.com/show_bug.cgi?id=638912 - some 
> firmware misbehaves unless you pass the same set of supported 
> functionality as Windows does.
> 

Current situation means that PME is unusable on all systems where
pcie_aspm_support_enabled() returns false, what is basically every
system except EXPERT mode is enabled and CONFIG_PCIEASPM is set.
So we definitely need to do something.

One question is whether the system from the 10yr old bug report
actually depends on OSC_PCI_EXPRESS_LTR_CONTROL control, or whether
some other change in recent years fixed the issue.
Not sure whether the system is still available for re-testing.

If worst case we have 10yr old systems breaking with a new kernel
then we still would have the workaround to enable CONFIG_PCIEASPM
on that system.


Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()

2020-05-29 Thread Bart Van Assche
On 2020-05-29 13:27, Matthew Wilcox wrote:
> On Fri, May 29, 2020 at 10:11:00PM +0800, Kaitao Cheng wrote:
>> There is a function named ilog2() exist which can replace blksize.
>> The generated code will be shorter and more efficient on some
>> architecture, such as arm64. And ilog2() can be optimized according
>> to different architecture.
> 
> We'd get the same benefit from a smaller patch with just:
> 
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1502,15 +1502,9 @@ static inline int blk_rq_aligned(struct request_queue 
> *q, unsigned long addr,
>   return !(addr & alignment) && !(len & alignment);
>  }
>  
> -/* assumes size > 256 */
>  static inline unsigned int blksize_bits(unsigned int size)
>  {
> - unsigned int bits = 8;
> - do {
> - bits++;
> - size >>= 1;
> - } while (size > 256);
> - return bits;
> + return ilog2(size);
>  }
>  
>  static inline unsigned int block_size(struct block_device *bdev)

Hi Matthew,

I had suggested to change all blksize_bits() calls into ilog2() calls
because I think that a single function to calculate the logarithm base 2
is sufficient.

Thanks,

Bart.




donation

2020-05-29 Thread Anastasia McDonald
You have a sum donation for Catholic Charities Foundation, kindly contact the 
Program Coordinator through  her email  anastasia.mc...@gmail.com  for details 
of claim.

Regard
Sir. Angelo Tony.


Re: [RFC PATCH 2/2] drm: xlnx: driver for Xilinx DSI TX Subsystem

2020-05-29 Thread Hyun Kwon
Hi Laurent,

On Wed, 2020-05-27 at 15:45:24 -0700, Laurent Pinchart wrote:
> Hi Hyun,
> 
> On Wed, May 27, 2020 at 10:54:35AM -0700, Hyun Kwon wrote:
> > On Sat, 2020-05-23 at 20:08:13 -0700, Laurent Pinchart wrote:
> > > On Mon, May 04, 2020 at 11:43:48AM -0700, Hyun Kwon wrote:
> > >> On Mon, 2020-04-20 at 14:20:56 -0700, Venkateshwar Rao Gannavarapu wrote:
> > >>> The Xilinx MIPI DSI Tx Subsystem soft IP is used to display video
> > >>> data from AXI-4 stream interface.
> > >>> 
> > >>> It supports upto 4 lanes, optional register interface for the DPHY,
> > >> 
> > >> I don't see the register interface for dphy support.
> > > 
> > > I think the D-PHY should be supported through a PHY driver, as it seems
> > > to be shared between different subsystems.
> > 
> > Right, if the logic is shared across subsystems. I can't tell if that's
> > the case as the IP comes as a single block. Maybe GVRao can confirm.
> 
> I believe the CSI2-RX subsystem uses the same D-PHY IP core, but a
> confirmation would be nice.
> 
> > >>> multiple RGB color formats, command mode and video mode.
> > >>> This is a MIPI-DSI host driver and provides DSI bus for panels.
> > >>> This driver also helps to communicate with its panel using panel
> > >>> framework.
> > >>> 
> > >>> Signed-off-by: Venkateshwar Rao Gannavarapu 
> > >>> 
> > >>> ---
> > >>>  drivers/gpu/drm/xlnx/Kconfig|  11 +
> > >>>  drivers/gpu/drm/xlnx/Makefile   |   2 +
> > >>>  drivers/gpu/drm/xlnx/xlnx_dsi.c | 755 
> > >>> 
> > > 
> > > Daniel Vetter has recently expressed his opiion that bridge drivers
> > > should go to drivers/gpu/drm/bridge/. It would then be
> > > drivers/gpu/drm/bridge/xlnx/. I don't have a strong opinion myself.
> > > 
> > >>>  3 files changed, 768 insertions(+)
> > >>>  create mode 100644 drivers/gpu/drm/xlnx/xlnx_dsi.c
> > >>> 
> > >>> diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig
> > >>> index aa6cd88..73873cf 100644
> > >>> --- a/drivers/gpu/drm/xlnx/Kconfig
> > >>> +++ b/drivers/gpu/drm/xlnx/Kconfig
> > >>> @@ -11,3 +11,14 @@ config DRM_ZYNQMP_DPSUB
> > >>>   This is a DRM/KMS driver for ZynqMP DisplayPort controller. 
> > >>> Choose
> > >>>   this option if you have a Xilinx ZynqMP SoC with DisplayPort
> > >>>   subsystem.
> > >>> +
> > >>> +config DRM_XLNX_DSI
> > >>> +tristate "Xilinx DRM DSI Subsystem Driver"
> > >>> +select DRM_MIPI_DSI
> > >>> +select DRM_PANEL
> > >>> +select DRM_PANEL_SIMPLE
> > >>> +help
> > >>> + This enables support for Xilinx MIPI-DSI.
> > >> 
> > >> This sentence is not needed with below. Could you please rephrase the 
> > >> whole?
> > >> 
> > >>> + This is a DRM/KMS driver for Xilinx programmable DSI 
> > >>> controller.
> > >>> + Choose this option if you have a Xilinx MIPI DSI-TX controller
> > >>> + subsytem.
> > >> 
> > >> These seem incorrectly indented.
> > >> 
> > >>> diff --git a/drivers/gpu/drm/xlnx/Makefile 
> > >>> b/drivers/gpu/drm/xlnx/Makefile
> > >>> index 2b844c6..b7ee6ef 100644
> > >>> --- a/drivers/gpu/drm/xlnx/Makefile
> > >>> +++ b/drivers/gpu/drm/xlnx/Makefile
> > >>> @@ -1,2 +1,4 @@
> > >>>  zynqmp-dpsub-objs += zynqmp_disp.o zynqmp_dpsub.o zynqmp_dp.o
> > >>>  obj-$(CONFIG_DRM_ZYNQMP_DPSUB) += zynqmp-dpsub.o
> > >>> +
> > >>> +obj-$(CONFIG_DRM_XLNX_DSI) += xlnx_dsi.o
> > >>> diff --git a/drivers/gpu/drm/xlnx/xlnx_dsi.c 
> > >>> b/drivers/gpu/drm/xlnx/xlnx_dsi.c
> > >>> new file mode 100644
> > >>> index 000..b8cae59
> > >>> --- /dev/null
> > >>> +++ b/drivers/gpu/drm/xlnx/xlnx_dsi.c
> > >>> @@ -0,0 +1,755 @@
> > >>> +// SPDX-License-Identifier: GPL-2.0
> > >>> +/*
> > >>> + * Xilinx FPGA MIPI DSI Tx Controller driver
> > >>> + *
> > >>> + * Copyright (C) 2017 - 2019 Xilinx, Inc.
> > >>> + *
> > >>> + * Authors:
> > >>> + * - Saurabh Sengar 
> > >>> + * - Venkateshwar Rao Gannavarapu 
> > >>> 
> > >>> + */
> > >>> +
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +#include 
> > >>> +
> > >>> +#include 
> > >>> +#include 
> > >>> +
> > >>> +/* DSI Tx IP registers */
> > >>> +#define XDSI_CCR   0x00
> > >>> +#define XDSI_CCR_COREENB   BIT(0)
> > >>> +#define XDSI_CCR_SOFTRST   BIT(1)
> > >>> +#define XDSI_CCR_CRREADY   BIT(2)
> > >>> +#define XDSI_CCR_CMDMODE   BIT(3)
> > >>> +#define XDSI_CCR_DFIFORST  BIT(4)
> > >>> +#define XDSI_CCR_CMDFIFORSTBIT(5)
> > >>> +#define XDSI_PCR   0x04
> > 
> > [snip]
> > 
> > >>> +   }
> > >>> +
> > >>> +   ret = clk_prepare_enable(dsi->video_aclk);
> > >>> +   if (ret) {
> > >>

Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking

2020-05-29 Thread Steven Rostedt
On Sat, 30 May 2020 00:25:05 +0200
Peter Zijlstra  wrote:

> > > + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion & 
> > > LOCKDEP_RECURSION_MASK))
> > >   return;  
> 
> ^^ there's your regular recursion check.

Yes, but this is more of a "bug if it happens" than just "ignore it".

-- Steve


Re: Regression with PM / wakeup: Show wakeup sources stats in sysfs"

2020-05-29 Thread Tri Vo
On Fri, May 29, 2020 at 9:51 AM Rafael J. Wysocki
 wrote:
>
> On 5/28/2020 10:46 PM, Florian Fainelli wrote:
> > Hi,
> >
> > Commit c8377adfa78103be5380200eb9dab764d7ca890e ("PM / wakeup: Show
> > wakeup sources stats in sysfs") is causing some of our tests to fail
> > because /sys/class/net/*/device/power/wakeup_count is now 0, despite
> > /sys/kernel/debug/wakeup_sources clearly indicating that the Ethernet
> > device was responsible for system wake-up.
> >
> > What's more in looking at /sys/class/wakekup/wakeup2/event_count, we
> > have the number of Wake-on-LAN wakeups recorded properly, but
> > wakeup_count is desperately 0, why is that?
>
> I need to look at that commit in detail to find out what is going on.

It would be helpful to see the contents of
/sys/kernel/debug/wakeup_sources, /sys/class/net/*/device/power/*, and
/sys/class/wakekup/* corresponding to the device in question. The
values in these files are queried from the same struct wakeup_source.
So it's odd if wakeup_count diverges.


Re: [PATCH v3 0/2] dtc: Improve checks for i2c reg properties

2020-05-29 Thread Rob Herring
On Thu, May 28, 2020 at 2:57 AM Joel Stanley  wrote:
>
> This is to fix a build warning in the Linux kernel caused by dtc
> incorrectly warning about I2C_OWN_SLAVE_ADDRESS.
>
> v3 fixes the 10 bit size check
> v2 contains a second patch to check for 10 bit vs 7 bit addresses.
>
> Joel Stanley (2):
>   checks: Remove warning for I2C_OWN_SLAVE_ADDRESS
>   checks: Improve i2c reg property checking

Reviewed-by: Rob Herring 

I'll sync the kernel copy when David applies.

Rob


Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"

2020-05-29 Thread Jaegeuk Kim
On 05/29, Chao Yu wrote:
> Under heavy fsstress, we may triggle panic while issuing discard,
> because __check_sit_bitmap() detects that discard command may earse
> valid data blocks, the root cause is as below race stack described,
> since we removed lock when flushing quota data, quota data writeback
> may race with write_checkpoint(), so that it causes inconsistency in
> between cached discard entry and segment bitmap.
> 
> - f2fs_write_checkpoint
>  - block_operations
>   - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH)
>  - f2fs_flush_sit_entries
>   - add_discard_addrs
>- __set_bit_le(i, (void *)de->discard_map);
>   - f2fs_write_data_pages
>- f2fs_write_single_data_page
>  : inode is quota one, 
> cp_rwsem won't be locked
> - f2fs_do_write_data_page
>  - f2fs_allocate_data_block
>   - f2fs_wait_discard_bio
> : discard entry has not 
> been added yet.
>   - update_sit_entry
>  - f2fs_clear_prefree_segments
>   - f2fs_issue_discard
>   : add discard entry
> 
> This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix quota_sync
> failure due to f2fs_lock_op").
> 
> Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op")

The previous patch fixes quota_sync gets EAGAIN all the time.
How about this? It seems this works for fsstress test.

---
 fs/f2fs/segment.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ebbadde6cbced..f67cffc38975e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3095,6 +3095,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, 
struct page *page,
struct curseg_info *curseg = CURSEG_I(sbi, type);
bool put_pin_sem = false;
 
+   /*
+* We need to wait for node_write to avoid block allocation during
+* checkpoint. This can only happen to quota writes which can cause
+* the below discard race condition.
+*/
+   if (IS_DATASEG(type))
+   down_write(&sbi->node_write);
+
if (type == CURSEG_COLD_DATA) {
/* GC during CURSEG_COLD_DATA_PINNED allocation */
if (down_read_trylock(&sbi->pin_sem)) {
@@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, 
struct page *page,
 
if (put_pin_sem)
up_read(&sbi->pin_sem);
+
+   if (IS_DATASEG(type))
+   up_write(&sbi->node_write);
 }
 
 static void update_device_state(struct f2fs_io_info *fio)
-- 
2.27.0.rc0.183.gde8f92d652-goog



Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier

2020-05-29 Thread Sargun Dhillon
On Fri, May 29, 2020 at 6:31 AM Christian Brauner
 wrote:
>
> > > +   /* Check if we were woken up by a addfd message */
> > > +   addfd = list_first_entry_or_null(&n.addfd,
> > > +struct seccomp_kaddfd, list);
> > > +   if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) {
> > > +   seccomp_handle_addfd(addfd);
> > > +   mutex_unlock(&match->notify_lock);
> > > +   goto wait;
> > > +   }
> > > ret = n.val;
> > > err = n.error;
> > > flags = n.flags;
> > > }
> > >
> > > +   /* If there were any pending addfd calls, clear them out */
> > > +   list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
> > > +   /* The process went away before we got a chance to handle it 
> > > */
> > > +   addfd->ret = -ESRCH;
> > > +   list_del_init(&addfd->list);
> > > +   complete(&addfd->completion);
> > > +   }
>
> I forgot to ask this in my first review before, don't you need a
> complete(&addfd->completion) call in seccomp_notify_release() before
> freeing it?
>

When complete(&knotif->ready) is called in seccomp_notify_release,
subsequently the notifier (seccomp_do_user_notification) will be woken up and
it'll fail this check:
if (addfd && n.state != SECCOMP_NOTIFY_REPLIED)

Falling through to:
/* If there were any pending addfd calls, clear them out */
list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
/* The process went away before we got a chance to handle it */
addfd->ret = -ESRCH;
list_del_init(&addfd->list);
complete(&addfd->completion);
}

Although ESRCH isn't the "right" response, this fall through behaviour
should work.


Re: [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking

2020-05-29 Thread Peter Zijlstra
On Sat, May 30, 2020 at 12:25:05AM +0200, Peter Zijlstra wrote:
> On Fri, May 29, 2020 at 06:14:01PM -0400, Steven Rostedt wrote:
> > On Fri, 29 May 2020 23:27:41 +0200
> > Peter Zijlstra  wrote:
> > 
> > > There is no reason not to always, accurately, track IRQ state.
> > > 
> > > This change also makes IRQ state tracking ignore lockdep_off().
> > > 
> > > Signed-off-by: Peter Zijlstra (Intel) 
> > > ---
> > >  kernel/locking/lockdep.c |   33 ++---
> > >  1 file changed, 30 insertions(+), 3 deletions(-)
> > > 
> > > --- a/kernel/locking/lockdep.c
> > > +++ b/kernel/locking/lockdep.c
> > > @@ -3646,7 +3646,13 @@ static void __trace_hardirqs_on_caller(v
> > >   */
> > >  void lockdep_hardirqs_on_prepare(unsigned long ip)
> > >  {
> > > - if (unlikely(!debug_locks || current->lockdep_recursion))
> > 
> > Why remove the check for debug_locks? Isn't that there to disable
> > everything at once to prevent more warnings to be printed?
> 
> Yeah, maybe. I was thinking we could keep IRQ state running. But you're
> right, if we mess up the IRQ state itself this might generate a wee
> mess.

That is, mostly the IRQ state recovers when we mess up. It's only when
we then trigger more fail that we crash and burn, and that will likely
already give more warnings.

But I can put the debug_locks check back.


Re: Regression with PM / wakeup: Show wakeup sources stats in sysfs"

2020-05-29 Thread Florian Fainelli
On 5/29/20 3:28 PM, Tri Vo wrote:
> On Fri, May 29, 2020 at 9:51 AM Rafael J. Wysocki
>  wrote:
>>
>> On 5/28/2020 10:46 PM, Florian Fainelli wrote:
>>> Hi,
>>>
>>> Commit c8377adfa78103be5380200eb9dab764d7ca890e ("PM / wakeup: Show
>>> wakeup sources stats in sysfs") is causing some of our tests to fail
>>> because /sys/class/net/*/device/power/wakeup_count is now 0, despite
>>> /sys/kernel/debug/wakeup_sources clearly indicating that the Ethernet
>>> device was responsible for system wake-up.
>>>
>>> What's more in looking at /sys/class/wakekup/wakeup2/event_count, we
>>> have the number of Wake-on-LAN wakeups recorded properly, but
>>> wakeup_count is desperately 0, why is that?
>>
>> I need to look at that commit in detail to find out what is going on.
> 
> It would be helpful to see the contents of
> /sys/kernel/debug/wakeup_sources, /sys/class/net/*/device/power/*, and
> /sys/class/wakekup/* corresponding to the device in question. The
> values in these files are queried from the same struct wakeup_source.
> So it's odd if wakeup_count diverges.

Most certainly, below is the information you want, the two cat
/s/k/d/wakeup_sources were done before Wake-on-LAN and after waking-up
from LAN. /sys/class/wakeup/wakeup2 maps to the Ethernet device.

The Ethernet device calls pm_wakeup_event() against the struct device
that is embedded in the platform_device that it was probed with. I will
try to debug this myself over the weekend, time permitting.


# ethtool -s eth0 wol g
# cat /sys/kernel/debug/wakeup_sources
nameactive_countevent_count wakeup_count
expire_countactive_sincetotal_time  max_timelast_changep
revent_suspend_time
47d58.ethernet  0   0   0
0   0   0   0   0  0
alarmtimer  0   0   0   0
0   0   0   0  0
47c408400.waketimer 2   2   0
0   0   0   0   6144
1   0
# pml -w20
[ 3449.937142] brcm-waketimer 47c408400.waketimer: Using sysfs
attributes, consider using 'rtcwake'
Pass 1 out of 1, mode=none, tp_al[ 3449.952654] PM: suspend entry (shallow)
l=1, cycle_tp=, sleep=, [ 3449.959004] Filesystems sync: 0.000 seconds
wakeup_time=20
[ 3449.965984] Freezing user space processes ... (elapsed 0.001 seconds)
done.
[ 3449.974087] OOM killer disabled.
[ 3449.977316] Freezing remaining freezable tasks ... (elapsed 0.006
seconds) done.
[ 3449.991114] printk: Suspending console(s) (use no_console_suspend to
debug)
AMS: System is entering S2...
[ 3450.022381] bcmgenet 47d58.ethernet eth0: Link is Down
[ 3450.048340] Disabling non-boot CPUs ...
[ 3450.049344] CPU1: shutdown
[ 3450.050393] psci: CPU1 killed (polled 1 ms)
[ 3450.051332] Enabling non-boot CPUs ...
[ 3450.051712] Detected PIPT I-cache on CPU1
[ 3450.051812] CPU1: Booted secondary processor 0x01 [0x410fd083]
[ 3450.052435] CPU1 is up
[ 3450.683588] bcmgenet 47d58.ethernet eth0: Link is Up - 1Gbps/Full
- flow control rx/tx
[ 3450.729677] OOM killer enabled.
[ 3450.732908] Restarting tasks ... done.
[ 3450.738539] PM: suspend exit
--
[ 3450.744239] brcm-waketimer 47c408400.waketimer: Using sysfs
attributes, consider using 'rtcwake'
# cat /sys/kernel/debug/wakeup_sources
nameactive_countevent_count wakeup_count
expire_countactive_sincetotal_time  max_timelast_changep
revent_suspend_time
47d58.ethernet  1   1   0
0   0   0   0   3450
054 0
alarmtimer  0   0   0   0
0   0   0   0  0
47c408400.waketimer 2   2   0
0   0   0   0   6144
1   0
# cat /sys/class/net/*/device/power/*
cat: read error: Input/output error
auto
0
unsupported
0
enabled
0
0
1
0
0
3450054
0
0
# ls -l /sys/class/net/eth0/device/
driver/  net/ subsystem/   wakeup/
driver_override  of_node/ uevent
modalias power/   unimac-mdio.0/
# ls -l /sys/class/net/eth0/device/power/wakeup
wakeupwakeup_active_count   wakeup_last_time_ms
wakeup_abort_countwakeup_count  wakeup_max_time_ms
wakeup_active wakeup_expire_count   wakeup_total_time_ms
# ls -l /sys/class/net/eth0/device/power/wakeup^C
# ls -l /sys/class/wakeup/wakeup
wakeup0/ wakeup1/ wakeup2/
# ls -l /sys/class/wakeup/wakeup2/
total 0
-r--r--r--1 root root  4096 Jan  1 00:59 active_count
-r--r--r--1 root root  4096 Jan  1 00:59 active_time_ms
lrwxrwxrwx1 root root 0 Jan  1 00:59 device ->
../../../47d58.ethernet
-r--r--r--1 root root  4096 Jan  1 0

Re: general protection fault in inet_unhash

2020-05-29 Thread Andrii Nakryiko

On 5/29/20 10:32 AM, Eric Dumazet wrote:



On 5/28/20 11:32 PM, Andrii Nakryiko wrote:

On 5/28/20 11:23 PM, Dmitry Vyukov wrote:

On Thu, May 28, 2020 at 11:01 PM 'Andrii Nakryiko' via syzkaller-bugs
 wrote:


On 5/28/20 9:44 AM, syzbot wrote:

Hello,

syzbot found the following crash on:

HEAD commit:    dc0f3ed1 net: phy: at803x: add cable diagnostics support f..
git tree:   net-next
console output: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_log.txt-3Fx-3D17289cd210&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=t1v5ZakZM9Aw_9u_I6FbFZ28U0GFs0e9dMMUOyiDxO4&e=
kernel config:  
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_.config-3Fx-3D7e1bc97341edbea6&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=yeXCTODuJF6ExmCJ-ppqMHsfvMCbCQ9zkmZi3W6NGHo&e=
dashboard link: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_bug-3Fextid-3D3610d489778b57cc8031&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=8fAJHh81yojiinnGJzTw6hN4w4A6XRZST4463CWL9Y8&e=
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:  
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_repro.syz-3Fx-3D15f237aa10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=cPv-hQsGYs0CVz3I26BmauS0hQ8_YTWHeH5p-U5ElWY&e=
C reproducer:   
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_repro.c-3Fx-3D1553834a10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=r6sGJDOgosZDE9sRxqFnVibDNJFt_6IteSWeqEQLbNE&e=

The bug was bisected to:

commit af6eea57437a830293eab56246b6025cc7d46ee7
Author: Andrii Nakryiko 
Date:   Mon Mar 30 02:59:58 2020 +

   bpf: Implement bpf_link-based cgroup BPF program attachment

bisection log:  
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_bisect.txt-3Fx-3D1173cd7e10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=rJIpYFSAMRfea3349dd7PhmLD_hriVwq8ZtTHcSagBA&e=
final crash:    
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_report.txt-3Fx-3D1373cd7e10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=TWpx5JNdxKiKPABUScn8WB7u3fXueCp7BXwQHg4Unz0&e=
console output: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__syzkaller.appspot.com_x_log.txt-3Fx-3D1573cd7e10&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=vxqvl81C2rT6GOGdPyz8iQ&m=sMAtpavBBjBzFzT0V8c6FcH8cu2M9da3ZozO5Lc8do0&s=-SMhn-dVZI4W51EZQ8Im0sdThgwt9M6fxUt3_bcYvk8&e=

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+3610d489778b57cc8...@syzkaller.appspotmail.com
Fixes: af6eea57437a ("bpf: Implement bpf_link-based cgroup BPF program 
attachment")

general protection fault, probably for non-canonical address 
0xdc01:  [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0008-0x000f]
CPU: 0 PID: 7063 Comm: syz-executor654 Not tainted 5.7.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:inet_unhash+0x11f/0x770 net/ipv4/inet_hashtables.c:600


No idea why it was bisected to bpf_link change. It seems completely
struct sock-related. Seems like


Hi Andrii,

You can always find a detailed explanation of syzbot bisections under
the "bisection log" link.


Right. Sorry, I didn't mean that bisect went wrong or anything like that. I 
just don't see how my change has anything to do with invalid socket state. As I 
just replied in another email, this particular repro is using bpf_link_create() 
for cgroup attachment, which was added in my patch. So running repro before my 
patch would always fail to attach BPF program, and thus won't be able to repro 
the issue (because the bug is somewhere in the interaction between BPF program 
attachment and socket itself). So it will always bisect to my patch :)


L2TP seems to use sk->sk_node to insert sockets into l2tp_ip_table, _and_ uses 
l2tp_ip_prot.unhash == inet_unhash

So if/when BPF_CGROUP_RUN_PROG_INET_SOCK(sk) returns an error and inet_create() 
calls sk_common_release()
bad things happen, because inet_unhash() expects a valid hashinfo pointer.

I guess the following patch should fix this.

Bug has been there forever, but only BPF_CGROUP_RUN_PROG_INET_SOCK(sk) could 
trigger it.


I knew it! :) Thanks a lot for taking a look, Eric!



diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 
10cf7c3dcbb3fb1b27657588f3d1ba806cba737f..097c80c0e323777df997a189eb456e3ae6d26888
 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@

Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()

2020-05-29 Thread Jens Axboe
On 5/29/20 4:27 PM, Bart Van Assche wrote:
> On 2020-05-29 13:27, Matthew Wilcox wrote:
>> On Fri, May 29, 2020 at 10:11:00PM +0800, Kaitao Cheng wrote:
>>> There is a function named ilog2() exist which can replace blksize.
>>> The generated code will be shorter and more efficient on some
>>> architecture, such as arm64. And ilog2() can be optimized according
>>> to different architecture.
>>
>> We'd get the same benefit from a smaller patch with just:
>>
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -1502,15 +1502,9 @@ static inline int blk_rq_aligned(struct request_queue 
>> *q, unsigned long addr,
>>  return !(addr & alignment) && !(len & alignment);
>>  }
>>  
>> -/* assumes size > 256 */
>>  static inline unsigned int blksize_bits(unsigned int size)
>>  {
>> -unsigned int bits = 8;
>> -do {
>> -bits++;
>> -size >>= 1;
>> -} while (size > 256);
>> -return bits;
>> +return ilog2(size);
>>  }
>>  
>>  static inline unsigned int block_size(struct block_device *bdev)
> 
> Hi Matthew,
> 
> I had suggested to change all blksize_bits() calls into ilog2() calls
> because I think that a single function to calculate the logarithm base 2
> is sufficient.

I think this should be a two-parter:

1) Use the inode blkbits where appropriate
2) Then do this change

I'm leaning towards just doing it in blksize_bits() instead of updating
every caller, unless there aren't that many left once we've gone
through patch 1.

-- 
Jens Axboe



Re: [PATCH V2] dt-bindings: regulator: Convert anatop regulator to json-schema

2020-05-29 Thread Mark Brown
On Fri, 29 May 2020 09:59:11 +0800, Anson Huang wrote:
> Convert the anatop regulator binding to DT schema format using json-schema.

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 
for-next

Thanks!

[1/1] dt-bindings: regulator: Convert anatop regulator to json-schema
  commit: 81227f49bd272cbcd9bb4650b250519c8aa22065

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Re: [PATCH v3 4/7] watchdog: dw_wdt: Support devices with non-fixed TOP values

2020-05-29 Thread Guenter Roeck
On Tue, May 26, 2020 at 06:41:20PM +0300, Serge Semin wrote:
> In case if the DW Watchdog IP core is synthesised with
> WDT_USE_FIX_TOP == false, the TOP interval indexes make the device
> to load a custom periods to the counter. These periods are hardwired
> at the IP synthesis stage and can be within [2^8, 2^(WDT_CNT_WIDTH - 1)].
> Alas their values can't be detected at runtime and must be somehow
> supplied to the driver so one could properly determine the watchdog
> timeout intervals. For this purpose we suggest to have a vendor-
> specific dts property "snps,watchdog-tops" utilized, which would
> provide an array of sixteen counter values. At device probe stage they
> will be used to initialize the watchdog device timeouts determined
> from the array values and current clocks source rate.
> 
> In order to have custom TOP values supported the driver must be
> altered in the following way. First of all the fixed-top values
> ready-to-use array must be determined for compatibility with currently
> supported devices, which were synthesised with WDT_USE_FIX_TOP == true.
> It will be used if either fixed TOP feature is detected being enabled or
> no custom TOPs are fetched from the device dt node. Secondly at the probe
> stage we must initialize an array of the watchdog timeouts corresponding
> to the detected TOPs list and the reference clock rate. For generality the
> procedure of initialization is designed in a way to support the TOPs array
> with no limitations on the items order or value. Finally the watchdog
> period search methods should be altered to support the new timeouts data
> structure.
> 
> Signed-off-by: Serge Semin 
> Cc: Alexey Malahov 
> Cc: Thomas Bogendoerfer 
> Cc: Arnd Bergmann 
> Cc: Rob Herring 
> Cc: linux-m...@vger.kernel.org
> Cc: devicet...@vger.kernel.org

Reviewed-by: Guenter Roeck 

> ---
> 
> Changelog v2:
> - Rearrange SoBs.
> - Add "ms" suffix to the methods returning msec and convert the methods
>   with no "ms" suffix to return a timeout in sec.
> - Make sure minimum timeout is at least 1 sec.
> - Refactor the timeouts calculation procedure to retain the timeouts in
>   the ascending order.
> - Make sure there is no integer overflow in milliseconds calculation. It
>   is saved in a dedicated uint field of the timeout structure.
> ---
>  drivers/watchdog/dw_wdt.c | 191 +-
>  1 file changed, 167 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
> index fba21de2bbad..693c0d1fd796 100644
> --- a/drivers/watchdog/dw_wdt.c
> +++ b/drivers/watchdog/dw_wdt.c
> @@ -13,6 +13,8 @@
>   */
>  
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -34,21 +36,40 @@
>  #define WDOG_CURRENT_COUNT_REG_OFFSET0x08
>  #define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c
>  #define WDOG_COUNTER_RESTART_KICK_VALUE  0x76
> +#define WDOG_COMP_PARAMS_1_REG_OFFSET   0xf4
> +#define WDOG_COMP_PARAMS_1_USE_FIX_TOP  BIT(6)
>  
> -/* The maximum TOP (timeout period) value that can be set in the watchdog. */
> -#define DW_WDT_MAX_TOP   15
> +/* There are sixteen TOPs (timeout periods) that can be set in the watchdog. 
> */
> +#define DW_WDT_NUM_TOPS  16
> +#define DW_WDT_FIX_TOP(_idx) (1U << (16 + _idx))
>  
>  #define DW_WDT_DEFAULT_SECONDS   30
>  
> +static const u32 dw_wdt_fix_tops[DW_WDT_NUM_TOPS] = {
> + DW_WDT_FIX_TOP(0), DW_WDT_FIX_TOP(1), DW_WDT_FIX_TOP(2),
> + DW_WDT_FIX_TOP(3), DW_WDT_FIX_TOP(4), DW_WDT_FIX_TOP(5),
> + DW_WDT_FIX_TOP(6), DW_WDT_FIX_TOP(7), DW_WDT_FIX_TOP(8),
> + DW_WDT_FIX_TOP(9), DW_WDT_FIX_TOP(10), DW_WDT_FIX_TOP(11),
> + DW_WDT_FIX_TOP(12), DW_WDT_FIX_TOP(13), DW_WDT_FIX_TOP(14),
> + DW_WDT_FIX_TOP(15)
> +};
> +
>  static bool nowayout = WATCHDOG_NOWAYOUT;
>  module_param(nowayout, bool, 0);
>  MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
>"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
>  
> +struct dw_wdt_timeout {
> + u32 top_val;
> + unsigned int sec;
> + unsigned int msec;
> +};
> +
>  struct dw_wdt {
>   void __iomem*regs;
>   struct clk  *clk;
>   unsigned long   rate;
> + struct dw_wdt_timeout   timeouts[DW_WDT_NUM_TOPS];
>   struct watchdog_device  wdd;
>   struct reset_control*rst;
>   /* Save/restore */
> @@ -64,20 +85,66 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt)
>   WDOG_CONTROL_REG_WDT_EN_MASK;
>  }
>  
> -static inline int dw_wdt_top_in_seconds(struct dw_wdt *dw_wdt, unsigned top)
> +static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt,
> +  unsigned int timeout, u32 *top_val)
>  {
> + int idx;
> +
>   /*
> -  * There are 16 possible timeout values in 0..15 where the number of
> -  * cycles is 2 ^ (16 + i) and the watchdog counts down.
> +  * Find a TO

Re: [PATCH v3 5/7] watchdog: dw_wdt: Support devices with asynch clocks

2020-05-29 Thread Guenter Roeck
On Tue, May 26, 2020 at 06:41:21PM +0300, Serge Semin wrote:
> DW Watchdog IP core can be synthesised with asynchronous timer/APB
> clocks support (WDT_ASYNC_CLK_MODE_ENABLE == 1). In this case
> separate clock signals are supposed to be used to feed watchdog timer
> and APB interface of the device. Currently the driver supports
> the synchronous mode only. Since there is no way to determine which
> mode was actually activated for device from its registers, we have to
> rely on the platform device configuration data. If optional "pclk"
> clock source is supplied, we consider the device working in asynchronous
> mode, otherwise the driver falls back to the synchronous configuration.
> 
> Signed-off-by: Serge Semin 
> Cc: Alexey Malahov 
> Cc: Thomas Bogendoerfer 
> Cc: Arnd Bergmann 
> Cc: Rob Herring 
> Cc: linux-m...@vger.kernel.org
> Cc: devicet...@vger.kernel.org

Reviewed-by: Guenter Roeck 

> ---
> 
> Changelog v2:
> - Rearrange SoBs.
> ---
>  drivers/watchdog/dw_wdt.c | 48 +++
>  1 file changed, 43 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
> index 693c0d1fd796..efbc36872670 100644
> --- a/drivers/watchdog/dw_wdt.c
> +++ b/drivers/watchdog/dw_wdt.c
> @@ -68,6 +68,7 @@ struct dw_wdt_timeout {
>  struct dw_wdt {
>   void __iomem*regs;
>   struct clk  *clk;
> + struct clk  *pclk;
>   unsigned long   rate;
>   struct dw_wdt_timeout   timeouts[DW_WDT_NUM_TOPS];
>   struct watchdog_device  wdd;
> @@ -274,6 +275,7 @@ static int dw_wdt_suspend(struct device *dev)
>   dw_wdt->control = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
>   dw_wdt->timeout = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
>  
> + clk_disable_unprepare(dw_wdt->pclk);
>   clk_disable_unprepare(dw_wdt->clk);
>  
>   return 0;
> @@ -287,6 +289,12 @@ static int dw_wdt_resume(struct device *dev)
>   if (err)
>   return err;
>  
> + err = clk_prepare_enable(dw_wdt->pclk);
> + if (err) {
> + clk_disable_unprepare(dw_wdt->clk);
> + return err;
> + }
> +
>   writel(dw_wdt->timeout, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
>   writel(dw_wdt->control, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
>  
> @@ -393,9 +401,18 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
>   if (IS_ERR(dw_wdt->regs))
>   return PTR_ERR(dw_wdt->regs);
>  
> - dw_wdt->clk = devm_clk_get(dev, NULL);
> - if (IS_ERR(dw_wdt->clk))
> - return PTR_ERR(dw_wdt->clk);
> + /*
> +  * Try to request the watchdog dedicated timer clock source. It must
> +  * be supplied if asynchronous mode is enabled. Otherwise fallback
> +  * to the common timer/bus clocks configuration, in which the very
> +  * first found clock supply both timer and APB signals.
> +  */
> + dw_wdt->clk = devm_clk_get(dev, "tclk");
> + if (IS_ERR(dw_wdt->clk)) {
> + dw_wdt->clk = devm_clk_get(dev, NULL);
> + if (IS_ERR(dw_wdt->clk))
> + return PTR_ERR(dw_wdt->clk);
> + }
>  
>   ret = clk_prepare_enable(dw_wdt->clk);
>   if (ret)
> @@ -407,10 +424,27 @@ static int dw_wdt_drv_probe(struct platform_device 
> *pdev)
>   goto out_disable_clk;
>   }
>  
> + /*
> +  * Request APB clock if device is configured with async clocks mode.
> +  * In this case both tclk and pclk clocks are supposed to be specified.
> +  * Alas we can't know for sure whether async mode was really activated,
> +  * so the pclk phandle reference is left optional. If it couldn't be
> +  * found we consider the device configured in synchronous clocks mode.
> +  */
> + dw_wdt->pclk = devm_clk_get_optional(dev, "pclk");
> + if (IS_ERR(dw_wdt->pclk)) {
> + ret = PTR_ERR(dw_wdt->pclk);
> + goto out_disable_clk;
> + }
> +
> + ret = clk_prepare_enable(dw_wdt->pclk);
> + if (ret)
> + goto out_disable_clk;
> +
>   dw_wdt->rst = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
>   if (IS_ERR(dw_wdt->rst)) {
>   ret = PTR_ERR(dw_wdt->rst);
> - goto out_disable_clk;
> + goto out_disable_pclk;
>   }
>  
>   reset_control_deassert(dw_wdt->rst);
> @@ -449,10 +483,13 @@ static int dw_wdt_drv_probe(struct platform_device 
> *pdev)
>  
>   ret = watchdog_register_device(wdd);
>   if (ret)
> - goto out_disable_clk;
> + goto out_disable_pclk;
>  
>   return 0;
>  
> +out_disable_pclk:
> + clk_disable_unprepare(dw_wdt->pclk);
> +
>  out_disable_clk:
>   clk_disable_unprepare(dw_wdt->clk);
>   return ret;
> @@ -464,6 +501,7 @@ static int dw_wdt_drv_remove(struct platform_device *pdev)
>  
>   watchdog_unregister_device(&dw_wdt->wdd);
>   reset_control_assert(dw_wdt->rst);
>

[PATCH] perf libdw: Fix off-by 1 relative directory includes

2020-05-29 Thread Ian Rogers
This is currently working due to extra include paths in the build.

Before:
$ cd tools/perf/arch/arm64/util
$ ls -la ../../util/unwind-libdw.h
ls: cannot access '../../util/unwind-libdw.h': No such file or directory

After:
$ ls -la ../../../util/unwind-libdw.h
-rw-r- 1 irogers irogers 553 Apr 17 14:31 ../../../util/unwind-libdw.h
---
 tools/perf/arch/arm64/util/unwind-libdw.c   | 6 +++---
 tools/perf/arch/powerpc/util/unwind-libdw.c | 6 +++---
 tools/perf/arch/x86/util/unwind-libdw.c | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c 
b/tools/perf/arch/arm64/util/unwind-libdw.c
index 7623d85e77f3..a50941629649 100644
--- a/tools/perf/arch/arm64/util/unwind-libdw.c
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c 
b/tools/perf/arch/powerpc/util/unwind-libdw.c
index abf2dbc7f829..7b2d96ec28e3 100644
--- a/tools/perf/arch/powerpc/util/unwind-libdw.c
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
 #include 
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
 
 /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
 static const int special_regs[3][2] = {
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c 
b/tools/perf/arch/x86/util/unwind-libdw.c
index fda8f4206ee4..eea2bf87232b 100644
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
-- 
2.27.0.rc2.251.g90737beb825-goog



Re: [PATCH v3 1/7] dt-bindings: watchdog: Convert DW WDT binding to DT schema

2020-05-29 Thread Guenter Roeck
On Tue, May 26, 2020 at 06:41:17PM +0300, Serge Semin wrote:
> Modern device tree bindings are supposed to be created as YAML-files
> in accordance with dt-schema. This commit replaces the DW Watchdog
> legacy bare text bindings with YAML file. As before the binding states
> that the corresponding dts node is supposed to have a registers
> range, a watchdog timer references clock source, optional reset line and
> pre-timeout interrupt.
> 
> Signed-off-by: Serge Semin 
> Reviewed-by: Rob Herring 
> Cc: Alexey Malahov 
> Cc: Thomas Bogendoerfer 
> Cc: Arnd Bergmann 
> Cc: linux-m...@vger.kernel.org

Reviewed-by: Guenter Roeck 

> ---
> 
> Changelog v2:
> - Rearrange SoBs.
> - Discard BE copyright header.
> - Replace "additionalProperties: false" with "unevaluatedProperties: false"
>   property.
> - Discard interrupts property from the required properties list.
> - Remove a label definition from the binding example.
> - Move the asynchronous APB3 clock support into a dedicated patch.
> ---
>  .../devicetree/bindings/watchdog/dw_wdt.txt   | 24 -
>  .../bindings/watchdog/snps,dw-wdt.yaml| 50 +++
>  2 files changed, 50 insertions(+), 24 deletions(-)
>  delete mode 100644 Documentation/devicetree/bindings/watchdog/dw_wdt.txt
>  create mode 100644 
> Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> 
> diff --git a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt 
> b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
> deleted file mode 100644
> index eb0914420c7c..
> --- a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
> +++ /dev/null
> @@ -1,24 +0,0 @@
> -Synopsys Designware Watchdog Timer
> -
> -Required Properties:
> -
> -- compatible : Should contain "snps,dw-wdt"
> -- reg: Base address and size of the watchdog timer registers.
> -- clocks : phandle + clock-specifier for the clock that drives the
> - watchdog timer.
> -
> -Optional Properties:
> -
> -- interrupts : The interrupt used for the watchdog timeout warning.
> -- resets : phandle pointing to the system reset controller with
> - line index for the watchdog.
> -
> -Example:
> -
> - watchdog0: wd@ffd02000 {
> - compatible = "snps,dw-wdt";
> - reg = <0xffd02000 0x1000>;
> - interrupts = <0 171 4>;
> - clocks = <&per_base_clk>;
> - resets = <&rst WDT0_RESET>;
> - };
> diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml 
> b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> new file mode 100644
> index ..4f6944756ab4
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> @@ -0,0 +1,50 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/watchdog/snps,dw-wdt.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Synopsys Designware Watchdog Timer
> +
> +allOf:
> +  - $ref: "watchdog.yaml#"
> +
> +maintainers:
> +  - Jamie Iles 
> +
> +properties:
> +  compatible:
> +const: snps,dw-wdt
> +
> +  reg:
> +maxItems: 1
> +
> +  interrupts:
> +description: DW Watchdog pre-timeout interrupt
> +maxItems: 1
> +
> +  clocks:
> +items:
> +  - description: Watchdog timer reference clock
> +
> +  resets:
> +description: Phandle to the DW Watchdog reset lane
> +maxItems: 1
> +
> +unevaluatedProperties: false
> +
> +required:
> +  - compatible
> +  - reg
> +  - clocks
> +
> +examples:
> +  - |
> +watchdog@ffd02000 {
> +  compatible = "snps,dw-wdt";
> +  reg = <0xffd02000 0x1000>;
> +  interrupts = <0 171 4>;
> +  clocks = <&per_base_clk>;
> +  resets = <&wdt_rst>;
> +};
> +...


Re: [PATCH v3 2/7] dt-bindings: watchdog: dw-wdt: Support devices with asynch clocks

2020-05-29 Thread Guenter Roeck
On Tue, May 26, 2020 at 06:41:18PM +0300, Serge Semin wrote:
> DW Watchdog IP core can be synthesised with asynchronous timer/APB
> clocks support (WDT_ASYNC_CLK_MODE_ENABLE == 1). In this case
> separate clock signals are supposed to be used to feed watchdog timer
> and APB interface of the device. Let's update the DW Watchdog DT node
> schema so it would support the optional APB3 bus clock specified along
> with the mandatory watchdog timer reference clock.
> 
> Signed-off-by: Serge Semin 
> Reviewed-by: Rob Herring 
> Cc: Alexey Malahov 
> Cc: Thomas Bogendoerfer 
> Cc: Arnd Bergmann 
> Cc: linux-m...@vger.kernel.org

Reviewed-by: Guenter Roeck 

> ---
> 
> Changelog v2:
> - It's a new patch unpinned from the previous one.
> ---
>  .../devicetree/bindings/watchdog/snps,dw-wdt.yaml | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml 
> b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> index 4f6944756ab4..5bf6dc6377f3 100644
> --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> @@ -24,8 +24,16 @@ properties:
>  maxItems: 1
>  
>clocks:
> +minItems: 1
>  items:
>- description: Watchdog timer reference clock
> +  - description: APB3 interface clock
> +
> +  clock-names:
> +minItems: 1
> +items:
> +  - const: tclk
> +  - const: pclk
>  
>resets:
>  description: Phandle to the DW Watchdog reset lane


Re: [PATCH v3 3/7] dt-bindings: watchdog: dw-wdt: Add watchdog TOPs array property

2020-05-29 Thread Guenter Roeck
On Tue, May 26, 2020 at 06:41:19PM +0300, Serge Semin wrote:
> In case if DW Watchdog IP core is built with WDT_USE_FIX_TOP == false,
> a custom timeout periods are used to preset the timer counter. In
> this case that periods should be specified in a new "snps,watchdog-tops"
> property of the DW watchdog dts node.
> 
> Signed-off-by: Serge Semin 
> Reviewed-by: Rob Herring 
> Cc: Alexey Malahov 
> Cc: Thomas Bogendoerfer 
> Cc: Arnd Bergmann 
> Cc: linux-m...@vger.kernel.org

Reviewed-by: Guenter Roeck 

> ---
> 
> Changelog v2:
> - Rearrange SoBs.
> - Move $ref to the root level of the "snps,watchdog-tops" property
>   so does the constraints.
> - Add default TOP values array.
> - Discard the label definition from the new bindings example.
> 
> Changelog v3:
> - Remove items property and move the minItems and maxItems constraints to
>   the root level of the snps,watchdog-tops property.
> ---
>  .../bindings/watchdog/snps,dw-wdt.yaml| 32 +++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml 
> b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> index 5bf6dc6377f3..d9fc7bb851b1 100644
> --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
> @@ -39,6 +39,23 @@ properties:
>  description: Phandle to the DW Watchdog reset lane
>  maxItems: 1
>  
> +  snps,watchdog-tops:
> +$ref: /schemas/types.yaml#/definitions/uint32-array
> +description: |
> +  DW APB Watchdog custom timer intervals - Timeout Period ranges (TOPs).
> +  Each TOP is a number loaded into the watchdog counter at the moment of
> +  the timer restart. The counter decrementing happens each tick of the
> +  reference clock. Therefore the TOPs array is equivalent to an array of
> +  the timer expiration intervals supported by the DW APB Watchdog. Note
> +  DW APB Watchdog IP-core might be synthesized with fixed TOP values,
> +  in which case this property is unnecessary with default TOPs utilized.
> +default: [0x0001000 0x0002000 0x0004000 0x0008000
> +  0x001 0x002 0x004 0x008
> +  0x010 0x020 0x040 0x080
> +  0x100 0x200 0x400 0x800]
> +minItems: 16
> +maxItems: 16
> +
>  unevaluatedProperties: false
>  
>  required:
> @@ -55,4 +72,19 @@ examples:
>clocks = <&per_base_clk>;
>resets = <&wdt_rst>;
>  };
> +
> +  - |
> +watchdog@ffd02000 {
> +  compatible = "snps,dw-wdt";
> +  reg = <0xffd02000 0x1000>;
> +  interrupts = <0 171 4>;
> +  clocks = <&per_base_clk>;
> +  clock-names = "tclk";
> +  snps,watchdog-tops = <0x00FF 0x01FF 0x03FF
> +0x07FF 0x 0x0001
> +0x0003 0x0007 0x000F
> +0x001F 0x003F 0x007F
> +0x00FF 0x01FF 0x03FF
> +0x07FF>;
> +};
>  ...


Re: linux-next: manual merge of the akpm-current tree with the mips tree

2020-05-29 Thread Stephen Rothwell
Hi Andrew,

On Fri, 29 May 2020 14:15:46 -0700 Andrew Morton  
wrote:
>
> On Thu, 28 May 2020 19:29:43 +1000 Stephen Rothwell  
> wrote:
> > 
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@@ -2752,9 -2748,6 +2749,8 @@@ static vm_fault_t wp_page_copy(struct v
> > /* Free the old page.. */
> > new_page = old_page;
> > page_copied = 1;
> >  +  } else {
> >  +  update_mmu_tlb(vma, vmf->address, vmf->pte);
> > -   mem_cgroup_cancel_charge(new_page, memcg, false);
> > }
> >   
> > if (new_page)  
> 
> We decided that the update_mmu_tlb() call is no longer needed here, so
> there is no need to re-add it when resolving this.

OK, I have fixed up my merge resolution to remove those 2 lines.
-- 
Cheers,
Stephen Rothwell


pgpHz7ZVh_Q4Y.pgp
Description: OpenPGP digital signature


Re: Lost PCIe PME after a914ff2d78ce ("PCI/ASPM: Don't select CONFIG_PCIEASPM by default")

2020-05-29 Thread Matthew Garrett
On Sat, May 30, 2020 at 12:26:17AM +0200, Heiner Kallweit wrote:
 
> Current situation means that PME is unusable on all systems where
> pcie_aspm_support_enabled() returns false, what is basically every
> system except EXPERT mode is enabled and CONFIG_PCIEASPM is set.
> So we definitely need to do something.

CONFIG_PCIEASPM is default y. I don't think there's huge value in 
adding complexity to deal with it being disabled, given that the kernel 
is then in a configuration that no vendor is testing against. There are 
existing runtime mechanisms to disable it at runtime.
 
-- 
Matthew Garrett | mj...@srcf.ucam.org


Re: [PATCH v3 6/7] watchdog: dw_wdt: Add pre-timeouts support

2020-05-29 Thread Guenter Roeck
On Tue, May 26, 2020 at 06:41:22PM +0300, Serge Semin wrote:
> DW Watchdog can rise an interrupt in case if IRQ request mode is enabled
> and timer reaches the zero value. In this case the IRQ lane is left
> pending until either the next watchdog kick event (watchdog restart) or
> until the WDT_EOI register is read or the device/system reset. This
> interface can be used to implement the pre-timeout functionality
> optionally provided by the Linux kernel watchdog devices.
> 
> IRQ mode provides a two stages timeout interface. It means the IRQ is
> raised when the counter reaches zero, while the system reset occurs only
> after subsequent timeout if the timer restart is not performed. Due to
> this peculiarity the pre-timeout value is actually set to the achieved
> hardware timeout, while the real watchdog timeout is considered to be
> twice as much of it. This applies a significant limitation on the
> pre-timeout values, so current implementation supports either zero value,
> which disables the pre-timeout events, or non-zero values, which imply
> the pre-timeout to be at least half of the current watchdog timeout.
> 
> Note that we ask the interrupt controller to detect the rising-edge
> pre-timeout interrupts to prevent the high-level-IRQs flood, since
> if the pre-timeout happens, the IRQ lane will be left pending until
> it's cleared by the timer restart.
> 
> Signed-off-by: Serge Semin 
> Cc: Alexey Malahov 
> Cc: Thomas Bogendoerfer 
> Cc: Arnd Bergmann 
> Cc: Rob Herring 
> Cc: linux-m...@vger.kernel.org
> Cc: devicet...@vger.kernel.org

Nitpick below, but I don't really know what to do about it, so

Reviewed-by: Guenter Roeck 

> ---
> 
> Changelog v2:
> - Rearrange SoBs.
> - Make the Pre-timeout IRQ being optionally supported.
> ---
>  drivers/watchdog/dw_wdt.c | 138 +++---
>  1 file changed, 130 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
> index efbc36872670..3cd7c485cd70 100644
> --- a/drivers/watchdog/dw_wdt.c
> +++ b/drivers/watchdog/dw_wdt.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -36,6 +37,8 @@
>  #define WDOG_CURRENT_COUNT_REG_OFFSET0x08
>  #define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c
>  #define WDOG_COUNTER_RESTART_KICK_VALUE  0x76
> +#define WDOG_INTERRUPT_STATUS_REG_OFFSET0x10
> +#define WDOG_INTERRUPT_CLEAR_REG_OFFSET 0x14
>  #define WDOG_COMP_PARAMS_1_REG_OFFSET   0xf4
>  #define WDOG_COMP_PARAMS_1_USE_FIX_TOP  BIT(6)
>  
> @@ -59,6 +62,11 @@ module_param(nowayout, bool, 0);
>  MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
>"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
>  
> +enum dw_wdt_rmod {
> + DW_WDT_RMOD_RESET = 1,
> + DW_WDT_RMOD_IRQ = 2
> +};
> +
>  struct dw_wdt_timeout {
>   u32 top_val;
>   unsigned int sec;
> @@ -70,6 +78,7 @@ struct dw_wdt {
>   struct clk  *clk;
>   struct clk  *pclk;
>   unsigned long   rate;
> + enum dw_wdt_rmodrmod;
>   struct dw_wdt_timeout   timeouts[DW_WDT_NUM_TOPS];
>   struct watchdog_device  wdd;
>   struct reset_control*rst;
> @@ -86,6 +95,20 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt)
>   WDOG_CONTROL_REG_WDT_EN_MASK;
>  }
>  
> +static void dw_wdt_update_mode(struct dw_wdt *dw_wdt, enum dw_wdt_rmod rmod)
> +{
> + u32 val;
> +
> + val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
> + if (rmod == DW_WDT_RMOD_IRQ)
> + val |= WDOG_CONTROL_REG_RESP_MODE_MASK;
> + else
> + val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK;
> + writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
> +
> + dw_wdt->rmod = rmod;
> +}
> +
>  static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt,
>unsigned int timeout, u32 *top_val)
>  {
> @@ -145,7 +168,11 @@ static unsigned int dw_wdt_get_timeout(struct dw_wdt 
> *dw_wdt)
>   break;
>   }
>  
> - return dw_wdt->timeouts[idx].sec;
> + /*
> +  * In IRQ mode due to the two stages counter, the actual timeout is
> +  * twice greater than the TOP setting.
> +  */
> + return dw_wdt->timeouts[idx].sec * dw_wdt->rmod;
>  }
>  
>  static int dw_wdt_ping(struct watchdog_device *wdd)
> @@ -164,7 +191,20 @@ static int dw_wdt_set_timeout(struct watchdog_device 
> *wdd, unsigned int top_s)
>   unsigned int timeout;
>   u32 top_val;
>  
> - timeout = dw_wdt_find_best_top(dw_wdt, top_s, &top_val);
> + /*
> +  * Note IRQ mode being enabled means having a non-zero pre-timeout
> +  * setup. In this case we try to find a TOP as close to the half of the
> +  * requested timeout as possible since DW Watchdog IRQ mode is designed
> +  * in two stages way - first timeout rises the pre-timeout interrupt,
> +  *

<    7   8   9   10   11   12   13   14   >