[PATCHv11 4/4] watchdog/softlockup: report the most frequent interrupts

2024-02-27 Thread Bitao Hu
When the watchdog determines that the current soft lockup is due
to an interrupt storm based on CPU utilization, reporting the
most frequent interrupts could be good enough for further
troubleshooting.

Below is an example of interrupt storm. The call tree does not
provide useful information, but we can analyze which interrupt
caused the soft lockup by comparing the counts of interrupts.

[  638.870231] watchdog: BUG: soft lockup - CPU#9 stuck for 26s! [swapper/9:0]
[  638.870825] CPU#9 Utilization every 4s during lockup:
[  638.871194]  #1:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.871652]  #2:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.872107]  #3:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.872563]  #4:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.873018]  #5:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.873494] CPU#9 Detect HardIRQ Time exceeds 50%. Most frequent HardIRQs:
[  638.873994]  #1: 330945  irq#7
[  638.874236]  #2: 31  irq#82
[  638.874493]  #3: 10  irq#10
[  638.874744]  #4: 2   irq#89
[  638.874992]  #5: 1   irq#102
...
[  638.875313] Call trace:
[  638.875315]  __do_softirq+0xa8/0x364

Signed-off-by: Bitao Hu 
Reviewed-by: Liu Song 
---
 kernel/watchdog.c | 115 --
 1 file changed, 111 insertions(+), 4 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 69e72d7e461d..c9d49ae8d045 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -12,22 +12,25 @@
 
 #define pr_fmt(fmt) "watchdog: " fmt
 
-#include 
 #include 
-#include 
 #include 
+#include 
+#include 
 #include 
+#include 
 #include 
+#include 
 #include 
+#include 
+#include 
 #include 
 #include 
+
 #include 
 #include 
 #include 
-#include 
 
 #include 
-#include 
 
 static DEFINE_MUTEX(watchdog_mutex);
 
@@ -417,13 +420,104 @@ static void print_cpustat(void)
}
 }
 
+#define HARDIRQ_PERCENT_THRESH  50
+#define NUM_HARDIRQ_REPORT  5
+struct irq_counts {
+   int irq;
+   u32 counts;
+};
+
+static DEFINE_PER_CPU(bool, snapshot_taken);
+
+/* Tabulate the most frequent interrupts. */
+static void tabulate_irq_count(struct irq_counts *irq_counts, int irq, u32 
counts, int rank)
+{
+   int i;
+   struct irq_counts new_count = {irq, counts};
+
+   for (i = 0; i < rank; i++) {
+   if (counts > irq_counts[i].counts)
+   swap(new_count, irq_counts[i]);
+   }
+}
+
+/*
+ * If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period,
+ * then the cause of softlockup might be interrupt storm. In this case, it
+ * would be useful to start interrupt counting.
+ */
+static bool need_counting_irqs(void)
+{
+   u8 util;
+   int tail = __this_cpu_read(cpustat_tail);
+
+   tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT;
+   util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]);
+   return util > HARDIRQ_PERCENT_THRESH;
+}
+
+static void start_counting_irqs(void)
+{
+   if (!__this_cpu_read(snapshot_taken)) {
+   kstat_snapshot_irqs();
+   __this_cpu_write(snapshot_taken, true);
+   }
+}
+
+static void stop_counting_irqs(void)
+{
+   __this_cpu_write(snapshot_taken, false);
+}
+
+static void print_irq_counts(void)
+{
+   unsigned int i, count;
+   struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = {
+   {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}
+   };
+
+   if (__this_cpu_read(snapshot_taken)) {
+   for_each_active_irq(i) {
+   count = kstat_get_irq_since_snapshot(i);
+   tabulate_irq_count(irq_counts_sorted, i, count, 
NUM_HARDIRQ_REPORT);
+   }
+
+   /*
+* We do not want the "watchdog: " prefix on every line,
+* hence we use "printk" instead of "pr_crit".
+*/
+   printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most 
frequent HardIRQs:\n",
+  smp_processor_id(), HARDIRQ_PERCENT_THRESH);
+
+   for (i = 0; i < NUM_HARDIRQ_REPORT; i++) {
+   if (irq_counts_sorted[i].irq == -1)
+   break;
+
+   printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n",
+  i + 1, irq_counts_sorted[i].counts,
+  irq_counts_sorted[i].irq);
+   }
+
+   /*
+* If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in 
the last
+* sample_period, then we suspect the interrupt storm might be 
subsiding.
+*/
+   if (!need_counting_irqs())
+   stop_counting_irqs();
+   }
+}
+
 static void report_cpu_status(void)
 {
print_cpustat();
+   

[PATCHv11 3/4] genirq: Avoid summation loops for /proc/interrupts

2024-02-27 Thread Bitao Hu
show_interrupts() unconditionally accumulates the per CPU interrupt
statistics to determine whether an interrupt was ever raised.

This can be avoided for all interrupts which are not strictly per CPU
and not of type NMI because those interrupts provide already an
accumulated counter. The required logic is already implemented in
kstat_irqs().

Split the inner access logic out of kstat_irqs() and use it for
kstat_irqs() and show_interrupts() to avoid the accumulation loop
when possible.

Originally-by: Thomas Gleixner 
Signed-off-by: Bitao Hu 
Reviewed-by: Liu Song 
---
 kernel/irq/internals.h |  2 ++
 kernel/irq/irqdesc.c   | 16 +++-
 kernel/irq/proc.c  |  6 ++
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 1d92532c2aae..6c43ef3e7308 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -98,6 +98,8 @@ extern void mask_irq(struct irq_desc *desc);
 extern void unmask_irq(struct irq_desc *desc);
 extern void unmask_threaded_irq(struct irq_desc *desc);
 
+extern unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct 
cpumask *cpumask);
+
 #ifdef CONFIG_SPARSE_IRQ
 static inline void irq_mark_irq(unsigned int irq) { }
 #else
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9cd17080b2d8..65a7f2dcd17b 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -960,24 +960,30 @@ static bool irq_is_nmi(struct irq_desc *desc)
return desc->istate & IRQS_NMI;
 }
 
-static unsigned int kstat_irqs(unsigned int irq)
+unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask 
*cpumask)
 {
-   struct irq_desc *desc = irq_to_desc(irq);
unsigned int sum = 0;
int cpu;
 
-   if (!desc || !desc->kstat_irqs)
-   return 0;
if (!irq_settings_is_per_cpu_devid(desc) &&
!irq_settings_is_per_cpu(desc) &&
!irq_is_nmi(desc))
return data_race(desc->tot_count);
 
-   for_each_possible_cpu(cpu)
+   for_each_cpu(cpu, cpumask)
sum += data_race(per_cpu(desc->kstat_irqs->cnt, cpu));
return sum;
 }
 
+static unsigned int kstat_irqs(unsigned int irq)
+{
+   struct irq_desc *desc = irq_to_desc(irq);
+
+   if (!desc || !desc->kstat_irqs)
+   return 0;
+   return kstat_irqs_desc(desc, cpu_possible_mask);
+}
+
 void kstat_snapshot_irqs(void)
 {
struct irq_desc *desc;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6954e0a02047..5c320c3f10a7 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -488,10 +488,8 @@ int show_interrupts(struct seq_file *p, void *v)
if (!desc || irq_settings_is_hidden(desc))
goto outsparse;
 
-   if (desc->kstat_irqs) {
-   for_each_online_cpu(j)
-   any_count |= data_race(per_cpu(desc->kstat_irqs->cnt, 
j));
-   }
+   if (desc->kstat_irqs)
+   any_count = kstat_irqs_desc(desc, cpu_online_mask);
 
if ((!desc->action || irq_desc_is_chained(desc)) && !any_count)
goto outsparse;
-- 
2.37.1 (Apple Git-137.1)



[PATCHv11 2/4] genirq: Provide a snapshot mechanism for interrupt statistics

2024-02-27 Thread Bitao Hu
The soft lockup detector lacks a mechanism to identify interrupt storms
as root cause of a lockup. To enable this the detector needs a
mechanism to snapshot the interrupt count statistics on a CPU when the
detector observes a potential lockup scenario and compare that against
the interrupt count when it warns about the lockup later on. The number
of interrupts in that period give a hint whether the lockup might be
caused by an interrupt storm.

Instead of having extra storage in the lockup detector and accessing
the internals of the interrupt descriptor directly, convert the per CPU
irq_desc::kstat_irq member to a data structure which contains the
counter plus a snapshot member and provide interfaces to take a
snapshot of all interrupts on the current CPU and to retrieve the delta
of a specific interrupt later on.

Originally-by: Thomas Gleixner 
Signed-off-by: Bitao Hu 
Reviewed-by: Liu Song 
---
 arch/mips/dec/setup.c|  2 +-
 arch/parisc/kernel/smp.c |  2 +-
 arch/powerpc/kvm/book3s_hv_rm_xics.c |  2 +-
 include/linux/irqdesc.h  | 14 ++--
 include/linux/kernel_stat.h  |  3 +++
 kernel/irq/internals.h   |  2 +-
 kernel/irq/irqdesc.c | 34 ++--
 kernel/irq/proc.c|  5 ++--
 scripts/gdb/linux/interrupts.py  |  6 ++---
 9 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index 6c3704f51d0d..87f0a1436bf9 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c
@@ -756,7 +756,7 @@ void __init arch_init_irq(void)
NULL))
pr_err("Failed to register fpu interrupt\n");
desc_fpu = irq_to_desc(irq_fpu);
-   fpu_kstat_irq = this_cpu_ptr(desc_fpu->kstat_irqs);
+   fpu_kstat_irq = this_cpu_ptr(_fpu->kstat_irqs->cnt);
}
if (dec_interrupt[DEC_IRQ_CASCADE] >= 0) {
if (request_irq(dec_interrupt[DEC_IRQ_CASCADE], no_action,
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 444154271f23..800eb64e91ad 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -344,7 +344,7 @@ static int smp_boot_one_cpu(int cpuid, struct task_struct 
*idle)
struct irq_desc *desc = irq_to_desc(i);
 
if (desc && desc->kstat_irqs)
-   *per_cpu_ptr(desc->kstat_irqs, cpuid) = 0;
+   *per_cpu_ptr(desc->kstat_irqs, cpuid) = (struct 
irqstat) { };
}
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index e42984878503..f2636414d82a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -837,7 +837,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu 
*addr)
  */
 static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
 {
-   this_cpu_inc_rm(desc->kstat_irqs);
+   this_cpu_inc_rm(>kstat_irqs->cnt);
__this_cpu_inc(kstat.irqs_sum);
 }
 
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index d9451d456a73..95d8def84471 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -17,6 +17,16 @@ struct irq_desc;
 struct irq_domain;
 struct pt_regs;
 
+/**
+ * struct irqstat - interrupt statistics
+ * @cnt:   real-time interrupt count
+ * @ref:   snapshot of interrupt count
+ */
+struct irqstat {
+   unsigned intcnt;
+   unsigned intref;
+};
+
 /**
  * struct irq_desc - interrupt descriptor
  * @irq_common_data:   per irq and chip data passed down to chip functions
@@ -55,7 +65,7 @@ struct pt_regs;
 struct irq_desc {
struct irq_common_data  irq_common_data;
struct irq_data irq_data;
-   unsigned int __percpu   *kstat_irqs;
+   struct irqstat __percpu *kstat_irqs;
irq_flow_handler_t  handle_irq;
struct irqaction*action;/* IRQ action list */
unsigned intstatus_use_accessors;
@@ -119,7 +129,7 @@ extern struct irq_desc irq_desc[NR_IRQS];
 static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc,
  unsigned int cpu)
 {
-   return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
+   return desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0;
 }
 
 static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 9935f7ecbfb9..98b3043ea5e6 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -79,6 +79,9 @@ static inline unsigned int kstat_cpu_softirqs_sum(int cpu)
return sum;
 }
 
+extern void kstat_snapshot_irqs(void);
+extern unsigned int kstat_get_irq_since_snapshot(unsigned int irq);
+
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
diff --git 

[PATCHv11 1/4] watchdog/softlockup: low-overhead detection of interrupt storm

2024-02-27 Thread Bitao Hu
The following softlockup is caused by interrupt storm, but it cannot be
identified from the call tree. Because the call tree is just a snapshot
and doesn't fully capture the behavior of the CPU during the soft lockup.
  watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921]
  ...
  Call trace:
__do_softirq+0xa0/0x37c
__irq_exit_rcu+0x108/0x140
irq_exit+0x14/0x20
__handle_domain_irq+0x84/0xe0
gic_handle_irq+0x80/0x108
el0_irq_naked+0x50/0x58

Therefore,I think it is necessary to report CPU utilization during the
softlockup_thresh period (report once every sample_period, for a total
of 5 reportings), like this:
  watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921]
  CPU#28 Utilization every 4s during lockup:
#1: 0% system, 0% softirq, 100% hardirq, 0% idle
#2: 0% system, 0% softirq, 100% hardirq, 0% idle
#3: 0% system, 0% softirq, 100% hardirq, 0% idle
#4: 0% system, 0% softirq, 100% hardirq, 0% idle
#5: 0% system, 0% softirq, 100% hardirq, 0% idle
  ...

This would be helpful in determining whether an interrupt storm has
occurred or in identifying the cause of the softlockup. The criteria for
determination are as follows:
  a. If the hardirq utilization is high, then interrupt storm should be
  considered and the root cause cannot be determined from the call tree.
  b. If the softirq utilization is high, then we could analyze the call
  tree but it may cannot reflect the root cause.
  c. If the system utilization is high, then we could analyze the root
  cause from the call tree.

The mechanism requires a considerable amount of global storage space
when configured for the maximum number of CPUs. Therefore, adding a
SOFTLOCKUP_DETECTOR_INTR_STORM Kconfig knob that defaults to "yes"
if the max number of CPUs is <= 128.

Signed-off-by: Bitao Hu 
Reviewed-by: Douglas Anderson 
Reviewed-by: Liu Song 
---
 kernel/watchdog.c | 98 ++-
 lib/Kconfig.debug | 13 +++
 2 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 81a8862295d6..69e72d7e461d 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -16,6 +16,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -35,6 +37,8 @@ static DEFINE_MUTEX(watchdog_mutex);
 # define WATCHDOG_HARDLOCKUP_DEFAULT   0
 #endif
 
+#define NUM_SAMPLE_PERIODS 5
+
 unsigned long __read_mostly watchdog_enabled;
 int __read_mostly watchdog_user_enabled = 1;
 static int __read_mostly watchdog_hardlockup_user_enabled = 
WATCHDOG_HARDLOCKUP_DEFAULT;
@@ -333,6 +337,95 @@ __setup("watchdog_thresh=", watchdog_thresh_setup);
 
 static void __lockup_detector_cleanup(void);
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM
+enum stats_per_group {
+   STATS_SYSTEM,
+   STATS_SOFTIRQ,
+   STATS_HARDIRQ,
+   STATS_IDLE,
+   NUM_STATS_PER_GROUP,
+};
+
+static const enum cpu_usage_stat tracked_stats[NUM_STATS_PER_GROUP] = {
+   CPUTIME_SYSTEM,
+   CPUTIME_SOFTIRQ,
+   CPUTIME_IRQ,
+   CPUTIME_IDLE,
+};
+
+static DEFINE_PER_CPU(u16, cpustat_old[NUM_STATS_PER_GROUP]);
+static DEFINE_PER_CPU(u8, 
cpustat_util[NUM_SAMPLE_PERIODS][NUM_STATS_PER_GROUP]);
+static DEFINE_PER_CPU(u8, cpustat_tail);
+
+/*
+ * We don't need nanosecond resolution. A granularity of 16ms is
+ * sufficient for our precision, allowing us to use u16 to store
+ * cpustats, which will roll over roughly every ~1000 seconds.
+ * 2^24 ~= 16 * 10^6
+ */
+static u16 get_16bit_precision(u64 data_ns)
+{
+   return data_ns >> 24LL; /* 2^24ns ~= 16.8ms */
+}
+
+static void update_cpustat(void)
+{
+   int i;
+   u8 util;
+   u16 old_stat, new_stat;
+   struct kernel_cpustat kcpustat;
+   u64 *cpustat = kcpustat.cpustat;
+   u8 tail = __this_cpu_read(cpustat_tail);
+   u16 sample_period_16 = get_16bit_precision(sample_period);
+
+   kcpustat_cpu_fetch(, smp_processor_id());
+
+   for (i = 0; i < NUM_STATS_PER_GROUP; i++) {
+   old_stat = __this_cpu_read(cpustat_old[i]);
+   new_stat = get_16bit_precision(cpustat[tracked_stats[i]]);
+   util = DIV_ROUND_UP(100 * (new_stat - old_stat), 
sample_period_16);
+   __this_cpu_write(cpustat_util[tail][i], util);
+   __this_cpu_write(cpustat_old[i], new_stat);
+   }
+
+   __this_cpu_write(cpustat_tail, (tail + 1) % NUM_SAMPLE_PERIODS);
+}
+
+static void print_cpustat(void)
+{
+   int i, group;
+   u8 tail = __this_cpu_read(cpustat_tail);
+   u64 sample_period_second = sample_period;
+
+   do_div(sample_period_second, NSEC_PER_SEC);
+
+   /*
+* We do not want the "watchdog: " prefix on every line,
+* hence we use "printk" instead of "pr_crit".
+*/
+   printk(KERN_CRIT "CPU#%d Utilization every %llus during lockup:\n",
+  smp_processor_id(), sample_period_second);
+
+   for (i = 0; i < 

[PATCHv11 0/4] *** Detect interrupt storm in softlockup ***

2024-02-27 Thread Bitao Hu
Hi, guys.
I have implemented a low-overhead method for detecting interrupt
storm in softlockup. Please review it, all comments are welcome.

Changes from v10 to v11:

- Only patch #2 and patch #3 have been changed.

- Add comments to explain each field of 'struct irqstat' in patch #2.

- Split the inner summation logic out of kstat_irqs() and encapsulate
it into kstat_irqs_desc() in patch #3.

- Adopt Thomas's change log for patch #3.

- Add the 'Reviewed-by' tag of Liu Song.

Changes from v9 to v10:

- The two patches related to 'watchdog/softlockup' remain unchanged.

- The majority of the work related to 'genirq' is contributed by
Thomas, indicated by adding 'Originally-by' tag. And I'd like to
express my gratitude for Thomas's contributions and guidance here.

- Adopt Thomas's change log for the snapshot mechanism for interrupt
statistics.

- Split unrelated change in patch #2 into a separate patch #3.

Changes from v8 to v9:

- Patch #1 remains unchanged.

- From Thomas Gleixner, split patch #2 into two patches. Interrupt
infrastructure first and then the actual usage site in the
watchdog code.

Changes from v7 to v8:

- From Thomas Gleixner, implement statistics within the interrupt
core code and provide sensible interfaces for the watchdog code.

- Patch #1 remains unchanged. Patch #2 has significant changes
based on Thomas's suggestions, which is why I have removed
Liu Song and Douglas's Reviewed-by from patch #2. Please review
it again, and all comments are welcome.

Changes from v6 to v7:

- Remove "READ_ONCE" in "start_counting_irqs"

- Replace the hard-coded 5 with "NUM_SAMPLE_PERIODS" macro in
"set_sample_period".

- Add empty lines to help with reading the code.

- Remove the branch that processes IRQs where "counts_diff = 0".

- Add the Reviewed-by of Liu Song and Douglas.

Changes from v5 to v6:

- Use "./scripts/checkpatch.pl --strict" to get a few extra
style nits and fix them.

- Squash patch #3 into patch #1, and wrapp the help text to
80 columns.

- Sort existing headers alphabetically in watchdog.c

- Drop "softlockup_hardirq_cpus", just read "hardirq_counts"
and see if it's non-NULL.

- Store "nr_irqs" in a local variable.

- Simplify the calculation of "cpu_diff".

Changes from v4 to v5:

- Rearranging variable placement to make code look neater.

Changes from v3 to v4:

- Renaming some variable and function names to make the code logic
more readable.

- Change the code location to avoid predeclaring.

- Just swap rather than a double loop in tabulate_irq_count.

- Since nr_irqs has the potential to grow at runtime, bounds-check
logic has been implemented.

- Add SOFTLOCKUP_DETECTOR_INTR_STORM Kconfig knob.

Changes from v2 to v3:

- From Liu Song, using enum instead of macro for cpu_stats, shortening
the name 'idx_to_stat' to 'stats', adding 'get_16bit_precesion' instead
of using right shift operations, and using 'struct irq_counts'.

- From kernel robot test, using '__this_cpu_read' and '__this_cpu_write'
instead of accessing to an per-cpu array directly, in order to avoid
this warning.
'sparse: incorrect type in initializer (different modifiers)'

Changes from v1 to v2:

- From Douglas, optimize the memory of cpustats. With the maximum number
of CPUs, that's now this.
2 * 8192 * 4 + 1 * 8192 * 5 * 4 + 1 * 8192 = 237,568 bytes.

- From Liu Song, refactor the code format and add necessary comments.

- From Douglas, use interrupt counts instead of interrupt time to
determine the cause of softlockup.

- Remove the cmdline parameter added in PATCHv1.

Bitao Hu (4):
  watchdog/softlockup: low-overhead detection of interrupt storm
  genirq: Provide a snapshot mechanism for interrupt statistics
  genirq: Avoid summation loops for /proc/interrupts
  watchdog/softlockup: report the most frequent interrupts

 arch/mips/dec/setup.c|   2 +-
 arch/parisc/kernel/smp.c |   2 +-
 arch/powerpc/kvm/book3s_hv_rm_xics.c |   2 +-
 include/linux/irqdesc.h  |  14 +-
 include/linux/kernel_stat.h  |   3 +
 kernel/irq/internals.h   |   4 +-
 kernel/irq/irqdesc.c |  50 +--
 kernel/irq/proc.c|   9 +-
 kernel/watchdog.c| 213 ++-
 lib/Kconfig.debug|  13 ++
 scripts/gdb/linux/interrupts.py  |   6 +-
 11 files changed, 286 insertions(+), 32 deletions(-)

-- 
2.37.1 (Apple Git-137.1)



Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts

2024-02-27 Thread Bitao Hu

On 2024/2/27 23:39, Thomas Gleixner wrote:

On Tue, Feb 27 2024 at 19:20, Bitao Hu wrote:

On 2024/2/27 17:26, Thomas Gleixner wrote:


and then let kstat_irqs() and show_interrupts() use it. See?


I have a concern. kstat_irqs() uses for_each_possible_cpu() for
summation. However, show_interrupts() uses for_each_online_cpu(),
which means it only outputs interrupt statistics for online cpus.
If we use for_each_possible_cpu() in show_interrupts() to calculate
'any_count', there could be a problem with the following scenario:
If an interrupt has a count of zero on online cpus but a non-zero
count on possible cpus, then 'any_count' would not be zero, and the
statistics for that interrupt would be output, which is not the
desired behavior for show_interrupts(). Therefore, I think it's not
good to have kstat_irqs() and show_interrupts() both use the same
logic. What do you think?


Good point. But you simply can have

unsigned int kstat_irq_desc(struct irq_desc *desc, const struct cpumask *mask)

and hand in the appropriate cpumask, which still shares the code, no?


Alright, that is a good approach.


Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Michael Ellerman
Christophe Leroy  writes:
> Le 27/02/2024 à 15:00, Uwe Kleine-König a écrit :
>> On Tue, Feb 27, 2024 at 01:52:07PM +, Christophe Leroy wrote:
>>> Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit :
 On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote:
> Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit :
>> recently the spi-ppc4xx.c driver suffered from build errors and warnings
>> that were undetected for longer than I expected. I think it would be
>> very beneficial if this driver was enabled in (at least) a powerpc
>> allmodconfig build.
>>
>> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is
>> only defined for 4xx (as these select PPC_DCR_NATIVE).
...
>> 
>> The reason I'd like to see it in allmodconfig is that testing
>> allmodconfig on several archs is the check I do for my patch series.
>
> I think for powerpc you should really check ppc32_allmodconfig in 
> addition to allmodconfig

Yeah.

arch/powerpc is really ~7 separate sub architectures.

Building allmodconfig and ppc32_allmodconfig at least covers the bulk of
the code. But it doesn't include 4xx, or several other platforms.

I think the best/easiest option is just to enable this driver in the 44x
defconfig. At least that way the auto builders should catch any problems.

eg.

diff --git a/arch/powerpc/configs/ppc44x_defconfig 
b/arch/powerpc/configs/ppc44x_defconfig
index 8b595f67068c..95a1e7efb79f 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -67,6 +67,8 @@ CONFIG_I2C=m
 CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_GPIO=m
 CONFIG_I2C_IBM_IIC=m
+CONFIG_SPI=y
+CONFIG_SPI_PPC4xx=y
 # CONFIG_HWMON is not set
 CONFIG_FB=m
 CONFIG_USB=m

cheers


Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info

2024-02-27 Thread Edgecombe, Rick P
On Tue, 2024-02-27 at 18:16 +, Christophe Leroy wrote:
> > > Why doing a full init of the struct when all fields are re-
> > > written a few
> > > lines after ?
> > 
> > It's a nice change for robustness and makes future changes easier.
> > It's
> > not actually wasteful since the compiler will throw away all
> > redundant
> > stores.
> 
> Well, I tend to dislike default init at declaration because it often 
> hides missed real init. When a field is not initialized GCC should
> emit 
> a Warning, at least when built with W=2 which sets 
> -Wmissing-field-initializers ?

Sorry, I'm not following where you are going with this. There aren't
any struct vm_unmapped_area_info users that use initializers today, so
that warning won't apply in this case. Meanwhile, designated style
struct initialization (which would zero new members) is very common, as
well as not get anything checked by that warning. Anything with this
many members is probably going to use the designated style.

If we are optimizing to avoid bugs, the way this struct is used today
is not great. It is essentially being used as an argument passer.
Normally when a function signature changes, but a caller is missed, of
course the compiler will notice loudly. But not here. So I think
probably zero initializing it is safer than being setup to pass
garbage.

I'm trying to figure out what to do here. If I changed it so that just
powerpc set the new field manually, then the convention across the
kernel would be for everything to be default zero, and future other new
parameters could have a greater chance of turning into garbage on
powerpc. Since it could be easy to miss that powerpc was special. Would
you prefer it?

Or maybe I could try a new vm_unmapped_area() that takes the extra
argument separately? The old callers could call the old function and
not need any arch updates. It all seems strange though, because
automatic zero initializing struct members is so common in the kernel.
But it also wouldn't add the cleanup Kees was pointing out. Hmm.

Any preference? Or maybe am I missing your point and talking nonsense?



Re: [PATCH v8 08/10] PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event

2024-02-27 Thread Manivannan Sadhasivam
On Tue, Feb 27, 2024 at 12:26:05PM -0500, Frank Li wrote:
> On Tue, Feb 27, 2024 at 06:00:24PM +0530, Manivannan Sadhasivam wrote:
> > On Mon, Feb 26, 2024 at 12:18:18PM -0500, Frank Li wrote:
> > > On Sat, Feb 24, 2024 at 12:24:14PM +0530, Manivannan Sadhasivam wrote:
> > > > The PCIe link can go to LINK_DOWN state in one of the following 
> > > > scenarios:
> > > > 
> > > > 1. Fundamental (PERST#)/hot/warm reset
> > > > 2. Link transition from L2/L3 to L0
> > > 
> > > From L0 to L2/l3
> > > 
> > 
> > I don't understand what you mean here. Link down won't happen while moving 
> > from
> > L0 to L2/L3, it is the opposite.
> 
> Strange, why there are not linkdown from L0 to L2/l3. But have linkdown
> from L2/l3 to L0? when linkup happen? Any document show these?
> 

Refer PCIe Spec 5.0, Figure 5-1 Link Power Management State Flow Diagram.

- Mani

> Frank
> 
> > 
> > > > 
> > > > In those cases, LINK_DOWN causes some non-sticky DWC registers to loose 
> > > > the
> > > > state (like REBAR, PTM_CAP etc...). So the drivers need to reinitialize
> > > > them to function properly once the link comes back again.
> > > > 
> > > > This is not a problem for drivers supporting PERST# IRQ, since they can
> > > > reinitialize the registers in the PERST# IRQ callback. But for the 
> > > > drivers
> > > > not supporting PERST#, there is no way they can reinitialize the 
> > > > registers
> > > > other than relying on LINK_DOWN IRQ received when the link goes down. So
> > > > let's add a DWC generic API dw_pcie_ep_linkdown() that reinitializes the
> > > > non-sticky registers and also notifies the EPF drivers about link going
> > > > down.
> > > > 
> > > > This API can also be used by the drivers supporting PERST# to handle the
> > > > scenario (2) mentioned above.
> > > > 
> > > > Signed-off-by: Manivannan Sadhasivam 
> > > > ---
> > > >  drivers/pci/controller/dwc/pcie-designware-ep.c | 111 
> > > > ++--
> > > >  drivers/pci/controller/dwc/pcie-designware.h|   5 ++
> > > >  2 files changed, 72 insertions(+), 44 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> > > > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > > index 278bdc9b2269..fed4c2936c78 100644
> > > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > > @@ -14,14 +14,6 @@
> > > >  #include 
> > > >  #include 
> > > >  
> > > > -void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
> > > > -{
> > > > -   struct pci_epc *epc = ep->epc;
> > > > -
> > > > -   pci_epc_linkup(epc);
> > > > -}
> > > > -EXPORT_SYMBOL_GPL(dw_pcie_ep_linkup);
> > > > -
> > > 
> > > No sure why git remove this block and add these back.
> > > 
> > 
> > Because, we are adding dw_pcie_ep_linkdown() API way below and it makes 
> > sense to
> > move this API also to keep it ordered. Maybe I should've described it in 
> > commit
> > message.
> > 
> > - Mani
> > 
> > > 
> > > >  void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep)
> > > >  {
> > > > struct pci_epc *epc = ep->epc;
> > > > @@ -603,19 +595,56 @@ static unsigned int 
> > > > dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap)
> > > > return 0;
> > > >  }
> > > >  
> > > > +static void dw_pcie_ep_init_non_sticky_registers(struct dw_pcie *pci)
> > > > +{
> > > > +   unsigned int offset, ptm_cap_base;
> > > > +   unsigned int nbars;
> > > > +   u32 reg, i;
> > > > +
> > > > +   offset = dw_pcie_ep_find_ext_capability(pci, 
> > > > PCI_EXT_CAP_ID_REBAR);
> > > > +   ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, 
> > > > PCI_EXT_CAP_ID_PTM);
> > > > +
> > > > +   dw_pcie_dbi_ro_wr_en(pci);
> > > > +
> > > > +   if (offset) {
> > > > +   reg = dw_pcie_readl_dbi(pci, offset + PCI_REBAR_CTRL);
> > > > +   nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >>
> > > > +   PCI_REBAR_CTRL_NBAR_SHIFT;
> > > > +
> > > > +   for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL)
> > > > +   dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 
> > > > 0x0);
> > > > +   }
> > > > +
> > > > +   /*
> > > > +* PTM responder capability can be disabled only after disabling
> > > > +* PTM root capability.
> > > > +*/
> > > > +   if (ptm_cap_base) {
> > > > +   dw_pcie_dbi_ro_wr_en(pci);
> > > > +   reg = dw_pcie_readl_dbi(pci, ptm_cap_base + 
> > > > PCI_PTM_CAP);
> > > > +   reg &= ~PCI_PTM_CAP_ROOT;
> > > > +   dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, 
> > > > reg);
> > > > +
> > > > +   reg = dw_pcie_readl_dbi(pci, ptm_cap_base + 
> > > > PCI_PTM_CAP);
> > > > +   reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK);
> > > > +   dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, 
> > > > reg);
> > > > +   dw_pcie_dbi_ro_wr_dis(pci);
> > > > +   

Re: [PATCH v8 09/10] PCI: qcom-ep: Use the generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event

2024-02-27 Thread Manivannan Sadhasivam
On Tue, Feb 27, 2024 at 12:34:15PM -0500, Frank Li wrote:
> On Tue, Feb 27, 2024 at 06:02:30PM +0530, Manivannan Sadhasivam wrote:
> > On Mon, Feb 26, 2024 at 12:20:41PM -0500, Frank Li wrote:
> > > On Sat, Feb 24, 2024 at 12:24:15PM +0530, Manivannan Sadhasivam wrote:
> > > > Now that the API is available, let's make use of it. It also handles the
> > > > reinitialization of DWC non-sticky registers in addition to sending the
> > > > notification to EPF drivers.
> > > > 
> > > > Signed-off-by: Manivannan Sadhasivam 
> > > > ---
> > > >  drivers/pci/controller/dwc/pcie-qcom-ep.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c 
> > > > b/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > > > index 2fb8c15e7a91..4e45bc4bca45 100644
> > > > --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > > > +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > > > @@ -640,7 +640,7 @@ static irqreturn_t 
> > > > qcom_pcie_ep_global_irq_thread(int irq, void *data)
> > > > if (FIELD_GET(PARF_INT_ALL_LINK_DOWN, status)) {
> > > > dev_dbg(dev, "Received Linkdown event\n");
> > > > pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN;
> > > > -   pci_epc_linkdown(pci->ep.epc);
> > > > +   dw_pcie_ep_linkdown(>ep);
> > > 
> > > Suppose pci_epc_linkdown() will call dw_pcie_ep_linkdown() ?
> > > why need direct call dw_pcie_ep_linkdown() here?
> > > 
> > 
> > I've already justified this in the commit message. Here is the excerpt:
> > 
> > "It also handles the reinitialization of DWC non-sticky registers in 
> > addition
> > to sending the notification to EPF drivers."
> 
> API function name is too similar. It is hard to know difference from API
> naming. It'd better to know what function do from function name.
> 

In reality we cannot name a function based on everything it does. The naming is
mostly based on what is the primary motive of the API and here it is handling
Link down event. Maybe dw_pcie_ep_handle_linkdown() would be an apt one, but
that's out of scope of this series (since changing that would also require
changes to other similar APIs).

- Mani

> Frank
> > 
> > - Mani
> > 
> > -- 
> > மணிவண்ணன் சதாசிவம்

-- 
மணிவண்ணன் சதாசிவம்


Re: [PATCH v8 06/10] PCI: dwc: ep: Call dw_pcie_ep_init_registers() API directly from all glue drivers

2024-02-27 Thread Manivannan Sadhasivam
On Tue, Feb 27, 2024 at 12:28:41PM -0500, Frank Li wrote:
> On Tue, Feb 27, 2024 at 05:51:41PM +0530, Manivannan Sadhasivam wrote:
> > On Mon, Feb 26, 2024 at 12:04:33PM -0500, Frank Li wrote:
> > > On Sat, Feb 24, 2024 at 12:24:12PM +0530, Manivannan Sadhasivam wrote:
> > > > Currently, dw_pcie_ep_init_registers() API is directly called by the 
> > > > glue
> > > > drivers requiring active refclk from host. But for the other drivers, 
> > > > it is
> > > > getting called implicitly by dw_pcie_ep_init(). This is due to the fact
> > > > that this API initializes DWC EP specific registers and that requires an
> > > > active refclk (either from host or generated locally by endpoint itsef).
> > > > 
> > > > But, this causes a discrepancy among the glue drivers. So to avoid this
> > > > confusion, let's call this API directly from all glue drivers 
> > > > irrespective
> > > > of refclk dependency. Only difference here is that the drivers requiring
> > > > refclk from host will call this API only after the refclk is received 
> > > > and
> > > > other drivers without refclk dependency will call this API right after
> > > > dw_pcie_ep_init().
> > > > 
> > > > This change will also allow us to remove the "core_init_notifier" flag 
> > > > in
> > > > the later commits.
> > > > 
> > > > Signed-off-by: Manivannan Sadhasivam 
> > > > ---
> > > >  drivers/pci/controller/dwc/pci-dra7xx.c   |  7 +++
> > > >  drivers/pci/controller/dwc/pci-imx6.c |  8 
> > > >  drivers/pci/controller/dwc/pci-keystone.c |  9 +
> > > >  drivers/pci/controller/dwc/pci-layerscape-ep.c|  7 +++
> > > >  drivers/pci/controller/dwc/pcie-designware-ep.c   | 22 
> > > > --
> > > >  drivers/pci/controller/dwc/pcie-designware-plat.c |  9 +
> > > >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 12 +++-
> > > >  drivers/pci/controller/dwc/pcie-uniphier-ep.c | 13 -
> > > >  8 files changed, 63 insertions(+), 24 deletions(-)
> > 
> > [...]
> > 
> > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> > > > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > > index ed1f2afd830a..278bdc9b2269 100644
> > > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > > @@ -729,7 +729,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > > > struct device *dev = pci->dev;
> > > > struct platform_device *pdev = to_platform_device(dev);
> > > > struct device_node *np = dev->of_node;
> > > > -   const struct pci_epc_features *epc_features;
> > > >  
> > > > INIT_LIST_HEAD(>func_list);
> > > >  
> > > > @@ -775,29 +774,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > > > goto err_exit_epc_mem;
> > > > }
> > > >  
> > > > -   if (ep->ops->get_features) {
> > > > -   epc_features = ep->ops->get_features(ep);
> > > > -   if (epc_features->core_init_notifier)
> > > > -   return 0;
> > > > -   }
> > > 
> > > why remove this check?
> > > 
> > 
> > There is no point in keeping this check since we are removing the call to
> > dw_pcie_ep_init_registers() below. But I should've described this change in 
> > the
> > commit message.
> 
> Sperated patch will be helpful. This clean up does not related with other
> change.
> 

Well this is not a generic cleanup that could be moved to a separate patch. Due
to the changes in this patch, the use of the flag becomes redundant. So it has
to removed here itself.

- Mani

-- 
மணிவண்ணன் சதாசிவம்


Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info

2024-02-27 Thread Christophe Leroy


Le 27/02/2024 à 19:07, Kees Cook a écrit :
> On Tue, Feb 27, 2024 at 07:02:59AM +, Christophe Leroy wrote:
>>
>>
>> Le 26/02/2024 à 20:09, Rick Edgecombe a écrit :
>>> Future changes will need to add a field to struct vm_unmapped_area_info.
>>> This would cause trouble for any archs that don't initialize the
>>> struct. Currently every user sets each field, so if new fields are
>>> added, the core code parsing the struct will see garbage in the new
>>> field.
>>>
>>> It could be possible to initialize the new field for each arch to 0, but
>>> instead simply inialize the field with a C99 struct inializing syntax.
>>
>> Why doing a full init of the struct when all fields are re-written a few
>> lines after ?
> 
> It's a nice change for robustness and makes future changes easier. It's
> not actually wasteful since the compiler will throw away all redundant
> stores.

Well, I tend to dislike default init at declaration because it often 
hides missed real init. When a field is not initialized GCC should emit 
a Warning, at least when built with W=2 which sets 
-Wmissing-field-initializers ?

> 
>> If I take the exemple of powerpc function slice_find_area_bottomup():
>>
>>  struct vm_unmapped_area_info info;
>>
>>  info.flags = 0;
>>  info.length = len;
>>  info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
>>  info.align_offset = 0;
> 
> But one cleanup that is possible from explicitly zero-initializing the
> whole structure would be dropping all the individual "= 0" assignments.
> :)
> 

Sure if we decide to go that direction all those 0 assignments void.


Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info

2024-02-27 Thread Kees Cook
On Tue, Feb 27, 2024 at 07:02:59AM +, Christophe Leroy wrote:
> 
> 
> Le 26/02/2024 à 20:09, Rick Edgecombe a écrit :
> > Future changes will need to add a field to struct vm_unmapped_area_info.
> > This would cause trouble for any archs that don't initialize the
> > struct. Currently every user sets each field, so if new fields are
> > added, the core code parsing the struct will see garbage in the new
> > field.
> > 
> > It could be possible to initialize the new field for each arch to 0, but
> > instead simply inialize the field with a C99 struct inializing syntax.
> 
> Why doing a full init of the struct when all fields are re-written a few 
> lines after ?

It's a nice change for robustness and makes future changes easier. It's
not actually wasteful since the compiler will throw away all redundant
stores.

> If I take the exemple of powerpc function slice_find_area_bottomup():
> 
>   struct vm_unmapped_area_info info;
> 
>   info.flags = 0;
>   info.length = len;
>   info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
>   info.align_offset = 0;

But one cleanup that is possible from explicitly zero-initializing the
whole structure would be dropping all the individual "= 0" assignments.
:)

-- 
Kees Cook


Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests

2024-02-27 Thread Sachin Sant



> On 27-Feb-2024, at 10:38 PM, Steven Rostedt  wrote:
> 
> On Tue, 27 Feb 2024 11:56:14 -0500
> Steven Rostedt  wrote:
> 
>> On Tue, 27 Feb 2024 22:08:18 +0530
>> Sachin Sant  wrote:
>> 
 Can you apply this, and see if it triggers and if it does, print the line
 that has the max size?
 
>>> 
>>> With this I see following trace
>>> 
>>> [   61.327138] [ cut here ]
>>> [   61.327159] MAX OUT OF RANGE 63492  
>> 
>> Well I guess there you have it ;-)
>> 
>> vsprintf() doesn't like a precision of 63492!
>> 
>> I'll look to see what the best way to deal with this is.
> 
> Does this fix it?
> 
Thank You. Yup this fixes the reported problem.

# ./ftracetest test.d/00basic/trace_marker.tc 
=== Ftrace unit tests ===
[1] Basic tests on writing to trace_marker [PASS]
[2] (instance)  Basic tests on writing to trace_marker [PASS]


# of passed:  2
# of failed:  0
# of unresolved:  0
# of untested:  0
# of unsupported:  0
# of xfailed:  0
# of undefined(test bug):  0
# 

Remaining test also completed without any issues. Based on the test results

Tested-by: Sachin Sant 

Thanks
— Sachin


Re: [PATCH v8 09/10] PCI: qcom-ep: Use the generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event

2024-02-27 Thread Frank Li
On Tue, Feb 27, 2024 at 06:02:30PM +0530, Manivannan Sadhasivam wrote:
> On Mon, Feb 26, 2024 at 12:20:41PM -0500, Frank Li wrote:
> > On Sat, Feb 24, 2024 at 12:24:15PM +0530, Manivannan Sadhasivam wrote:
> > > Now that the API is available, let's make use of it. It also handles the
> > > reinitialization of DWC non-sticky registers in addition to sending the
> > > notification to EPF drivers.
> > > 
> > > Signed-off-by: Manivannan Sadhasivam 
> > > ---
> > >  drivers/pci/controller/dwc/pcie-qcom-ep.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c 
> > > b/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > > index 2fb8c15e7a91..4e45bc4bca45 100644
> > > --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > > +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > > @@ -640,7 +640,7 @@ static irqreturn_t qcom_pcie_ep_global_irq_thread(int 
> > > irq, void *data)
> > >   if (FIELD_GET(PARF_INT_ALL_LINK_DOWN, status)) {
> > >   dev_dbg(dev, "Received Linkdown event\n");
> > >   pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN;
> > > - pci_epc_linkdown(pci->ep.epc);
> > > + dw_pcie_ep_linkdown(>ep);
> > 
> > Suppose pci_epc_linkdown() will call dw_pcie_ep_linkdown() ?
> > why need direct call dw_pcie_ep_linkdown() here?
> > 
> 
> I've already justified this in the commit message. Here is the excerpt:
> 
> "It also handles the reinitialization of DWC non-sticky registers in addition
> to sending the notification to EPF drivers."

API function name is too similar. It is hard to know difference from API
naming. It'd better to know what function do from function name.

Frank
> 
> - Mani
> 
> -- 
> மணிவண்ணன் சதாசிவம்


Re: [PATCH v8 06/10] PCI: dwc: ep: Call dw_pcie_ep_init_registers() API directly from all glue drivers

2024-02-27 Thread Frank Li
On Tue, Feb 27, 2024 at 05:51:41PM +0530, Manivannan Sadhasivam wrote:
> On Mon, Feb 26, 2024 at 12:04:33PM -0500, Frank Li wrote:
> > On Sat, Feb 24, 2024 at 12:24:12PM +0530, Manivannan Sadhasivam wrote:
> > > Currently, dw_pcie_ep_init_registers() API is directly called by the glue
> > > drivers requiring active refclk from host. But for the other drivers, it 
> > > is
> > > getting called implicitly by dw_pcie_ep_init(). This is due to the fact
> > > that this API initializes DWC EP specific registers and that requires an
> > > active refclk (either from host or generated locally by endpoint itsef).
> > > 
> > > But, this causes a discrepancy among the glue drivers. So to avoid this
> > > confusion, let's call this API directly from all glue drivers irrespective
> > > of refclk dependency. Only difference here is that the drivers requiring
> > > refclk from host will call this API only after the refclk is received and
> > > other drivers without refclk dependency will call this API right after
> > > dw_pcie_ep_init().
> > > 
> > > This change will also allow us to remove the "core_init_notifier" flag in
> > > the later commits.
> > > 
> > > Signed-off-by: Manivannan Sadhasivam 
> > > ---
> > >  drivers/pci/controller/dwc/pci-dra7xx.c   |  7 +++
> > >  drivers/pci/controller/dwc/pci-imx6.c |  8 
> > >  drivers/pci/controller/dwc/pci-keystone.c |  9 +
> > >  drivers/pci/controller/dwc/pci-layerscape-ep.c|  7 +++
> > >  drivers/pci/controller/dwc/pcie-designware-ep.c   | 22 
> > > --
> > >  drivers/pci/controller/dwc/pcie-designware-plat.c |  9 +
> > >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 12 +++-
> > >  drivers/pci/controller/dwc/pcie-uniphier-ep.c | 13 -
> > >  8 files changed, 63 insertions(+), 24 deletions(-)
> 
> [...]
> 
> > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> > > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > index ed1f2afd830a..278bdc9b2269 100644
> > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > @@ -729,7 +729,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > >   struct device *dev = pci->dev;
> > >   struct platform_device *pdev = to_platform_device(dev);
> > >   struct device_node *np = dev->of_node;
> > > - const struct pci_epc_features *epc_features;
> > >  
> > >   INIT_LIST_HEAD(>func_list);
> > >  
> > > @@ -775,29 +774,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > >   goto err_exit_epc_mem;
> > >   }
> > >  
> > > - if (ep->ops->get_features) {
> > > - epc_features = ep->ops->get_features(ep);
> > > - if (epc_features->core_init_notifier)
> > > - return 0;
> > > - }
> > 
> > why remove this check?
> > 
> 
> There is no point in keeping this check since we are removing the call to
> dw_pcie_ep_init_registers() below. But I should've described this change in 
> the
> commit message.

Sperated patch will be helpful. This clean up does not related with other
change.

Frank

> 
> - Mani
> 
> -- 
> மணிவண்ணன் சதாசிவம்


Re: [PATCH v8 08/10] PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event

2024-02-27 Thread Frank Li
On Tue, Feb 27, 2024 at 06:00:24PM +0530, Manivannan Sadhasivam wrote:
> On Mon, Feb 26, 2024 at 12:18:18PM -0500, Frank Li wrote:
> > On Sat, Feb 24, 2024 at 12:24:14PM +0530, Manivannan Sadhasivam wrote:
> > > The PCIe link can go to LINK_DOWN state in one of the following scenarios:
> > > 
> > > 1. Fundamental (PERST#)/hot/warm reset
> > > 2. Link transition from L2/L3 to L0
> > 
> > From L0 to L2/l3
> > 
> 
> I don't understand what you mean here. Link down won't happen while moving 
> from
> L0 to L2/L3, it is the opposite.

Strange, why there are not linkdown from L0 to L2/l3. But have linkdown
from L2/l3 to L0? when linkup happen? Any document show these?

Frank

> 
> > > 
> > > In those cases, LINK_DOWN causes some non-sticky DWC registers to loose 
> > > the
> > > state (like REBAR, PTM_CAP etc...). So the drivers need to reinitialize
> > > them to function properly once the link comes back again.
> > > 
> > > This is not a problem for drivers supporting PERST# IRQ, since they can
> > > reinitialize the registers in the PERST# IRQ callback. But for the drivers
> > > not supporting PERST#, there is no way they can reinitialize the registers
> > > other than relying on LINK_DOWN IRQ received when the link goes down. So
> > > let's add a DWC generic API dw_pcie_ep_linkdown() that reinitializes the
> > > non-sticky registers and also notifies the EPF drivers about link going
> > > down.
> > > 
> > > This API can also be used by the drivers supporting PERST# to handle the
> > > scenario (2) mentioned above.
> > > 
> > > Signed-off-by: Manivannan Sadhasivam 
> > > ---
> > >  drivers/pci/controller/dwc/pcie-designware-ep.c | 111 
> > > ++--
> > >  drivers/pci/controller/dwc/pcie-designware.h|   5 ++
> > >  2 files changed, 72 insertions(+), 44 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> > > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > index 278bdc9b2269..fed4c2936c78 100644
> > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > > @@ -14,14 +14,6 @@
> > >  #include 
> > >  #include 
> > >  
> > > -void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
> > > -{
> > > - struct pci_epc *epc = ep->epc;
> > > -
> > > - pci_epc_linkup(epc);
> > > -}
> > > -EXPORT_SYMBOL_GPL(dw_pcie_ep_linkup);
> > > -
> > 
> > No sure why git remove this block and add these back.
> > 
> 
> Because, we are adding dw_pcie_ep_linkdown() API way below and it makes sense 
> to
> move this API also to keep it ordered. Maybe I should've described it in 
> commit
> message.
> 
> - Mani
> 
> > 
> > >  void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep)
> > >  {
> > >   struct pci_epc *epc = ep->epc;
> > > @@ -603,19 +595,56 @@ static unsigned int 
> > > dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap)
> > >   return 0;
> > >  }
> > >  
> > > +static void dw_pcie_ep_init_non_sticky_registers(struct dw_pcie *pci)
> > > +{
> > > + unsigned int offset, ptm_cap_base;
> > > + unsigned int nbars;
> > > + u32 reg, i;
> > > +
> > > + offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
> > > + ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM);
> > > +
> > > + dw_pcie_dbi_ro_wr_en(pci);
> > > +
> > > + if (offset) {
> > > + reg = dw_pcie_readl_dbi(pci, offset + PCI_REBAR_CTRL);
> > > + nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >>
> > > + PCI_REBAR_CTRL_NBAR_SHIFT;
> > > +
> > > + for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL)
> > > + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0);
> > > + }
> > > +
> > > + /*
> > > +  * PTM responder capability can be disabled only after disabling
> > > +  * PTM root capability.
> > > +  */
> > > + if (ptm_cap_base) {
> > > + dw_pcie_dbi_ro_wr_en(pci);
> > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
> > > + reg &= ~PCI_PTM_CAP_ROOT;
> > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
> > > +
> > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
> > > + reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK);
> > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
> > > + dw_pcie_dbi_ro_wr_dis(pci);
> > > + }
> > > +
> > > + dw_pcie_setup(pci);
> > > + dw_pcie_dbi_ro_wr_dis(pci);
> > > +}
> > > +
> > >  int dw_pcie_ep_init_registers(struct dw_pcie_ep *ep)
> > >  {
> > >   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > >   struct dw_pcie_ep_func *ep_func;
> > >   struct device *dev = pci->dev;
> > >   struct pci_epc *epc = ep->epc;
> > > - unsigned int offset, ptm_cap_base;
> > > - unsigned int nbars;
> > >   u8 hdr_type;
> > >   u8 func_no;
> > > - int i, ret;
> > >   void *addr;
> > > - u32 reg;
> > > + int ret;
> > >  
> > >   hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE) &
> > >  PCI_HEADER_TYPE_MASK;
> > > @@ -678,38 +707,7 

Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests

2024-02-27 Thread Steven Rostedt
On Tue, 27 Feb 2024 11:56:14 -0500
Steven Rostedt  wrote:

> On Tue, 27 Feb 2024 22:08:18 +0530
> Sachin Sant  wrote:
> 
> > > Can you apply this, and see if it triggers and if it does, print the line
> > > that has the max size?
> > > 
> > 
> > With this I see following trace
> > 
> > [   61.327138] [ cut here ]
> > [   61.327159] MAX OUT OF RANGE 63492  
> 
> Well I guess there you have it ;-)
> 
> vsprintf() doesn't like a precision of 63492!
> 
> I'll look to see what the best way to deal with this is.

Does this fix it?

-- Steve

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ff0b0a999171..e0840b94f1a2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6882,7 +6882,9 @@ tracing_mark_write(struct file *filp, const char __user 
*ubuf,
 /* Used in tracing_mark_raw_write() as well */
 #define FAULTED_STR ""
 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted 
for */
-
+#ifndef SHORT_MAX
+#define SHORT_MAX  ((1<<15) - 1)
+#endif
if (tracing_disabled)
return -EINVAL;
 
@@ -6900,6 +6902,16 @@ tracing_mark_write(struct file *filp, const char __user 
*ubuf,
if (cnt < FAULTED_SIZE)
size += FAULTED_SIZE - cnt;
 
+   /*
+* trace_print_print() uses vsprintf() to determine the size via
+* the precision format "%*.s" which can not be greater than
+* a signed short.
+*/
+   if (size > SHORT_MAX) {
+   cnt -= size - SHORT_MAX;
+   goto again;
+   }
+
if (size > TRACE_SEQ_BUFFER_SIZE) {
cnt -= size - TRACE_SEQ_BUFFER_SIZE;
goto again;



Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests

2024-02-27 Thread Steven Rostedt
On Tue, 27 Feb 2024 22:08:18 +0530
Sachin Sant  wrote:

> > Can you apply this, and see if it triggers and if it does, print the line
> > that has the max size?
> >   
> 
> With this I see following trace
> 
> [   61.327138] [ cut here ]
> [   61.327159] MAX OUT OF RANGE 63492

Well I guess there you have it ;-)

vsprintf() doesn't like a precision of 63492!

I'll look to see what the best way to deal with this is.

-- Steve


Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests

2024-02-27 Thread Sachin Sant



> On 27-Feb-2024, at 9:54 PM, Steven Rostedt  wrote:
> 
> On Tue, 27 Feb 2024 21:38:57 +0530
> Sachin Sant  wrote:
> 
>> This warning was not triggered.
> 
> Interesting.
> 
>> 
>> I have attached .config
> 
> This is what I was looking for:
> 
>> # CONFIG_PPC_4K_PAGES is not set
>> CONFIG_PPC_64K_PAGES=y
>> CONFIG_PAGE_SIZE_64KB=y
>> CONFIG_PPC_PAGE_SHIFT=16
> 
> So the pages are 64K in size. I wonder if this is causing an issue.
> 
> Can you apply this, and see if it triggers and if it does, print the line
> that has the max size?
> 

With this I see following trace

[   61.327138] [ cut here ]
[   61.327159] MAX OUT OF RANGE 63492
[   61.327167] WARNING: CPU: 1 PID: 5180 at kernel/trace/trace_output.c:1592 
trace_print_print+0xf4/0x114
[   61.327178] Modules linked in: rpadlpar_io(E) rpaphp(E) xsk_diag(E) 
nft_fib_inet(E) nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) 
nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) nft_chain_nat(E) 
nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) bonding(E) tls(E) 
rfkill(E) ip_set(E) nf_tables(E) nfnetlink(E) sunrpc(E) binfmt_misc(E) 
pseries_rng(E) aes_gcm_p10_crypto(E) drm(E) drm_panel_orientation_quirks(E) 
xfs(E) libcrc32c(E) sd_mod(E) sr_mod(E) t10_pi(E) crc64_rocksoft_generic(E) 
cdrom(E) crc64_rocksoft(E) crc64(E) sg(E) ibmvscsi(E) scsi_transport_srp(E) 
ibmveth(E) vmx_crypto(E) fuse(E)
[   61.327232] CPU: 1 PID: 5180 Comm: awk Kdump: loaded Tainted: GE 
 6.8.0-rc5-00329-gab0a97cffa0b-dirty #37
[   61.327238] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf06 
of:IBM,FW1060.00 (NH1060_018) hv:phyp pSeries
[   61.327243] NIP:  c032eb7c LR: c032eb78 CTR: 
[   61.327247] REGS: c000505cf730 TRAP: 0700   Tainted: GE  
 (6.8.0-rc5-00329-gab0a97cffa0b-dirty)
[   61.327253] MSR:  82029033   CR: 48008202  
XER: 0005
[   61.327263] CFAR: c0161e7c IRQMASK: 0 
[   61.327263] GPR00: c032eb78 c000505cf9d0 c1502700 
0016 
[   61.327263] GPR04:  c000505cf7e0 c000505cf7d8 
0003bd8c 
[   61.327263] GPR08: 0027 c003bfa58110 0001 
0001 
[   61.327263] GPR12:  c003b300  
 
[   61.327263] GPR16:    
c0009008 
[   61.327263] GPR20: 064c 00400cc0 c0005e7377c0 
c2a6ed98 
[   61.327263] GPR24: c000900c c000900a0190 c9d50008 
03b54601 
[   61.327263] GPR28: c2a711a8 c000900c f804 
0002 
[   61.327307] NIP [c032eb7c] trace_print_print+0xf4/0x114
[   61.327312] LR [c032eb78] trace_print_print+0xf0/0x114
[   61.327317] Call Trace:
[   61.327319] [c000505cf9d0] [c032eb78] 
trace_print_print+0xf0/0x114 (unreliable)
[   61.327325] [c000505cfa50] [c031832c] print_trace_fmt+0x13c/0x26c
[   61.327331] [c000505cfb00] [c0328488] s_show+0x5c/0x2cc
[   61.327335] [c000505cfb90] [c0608e88] seq_read_iter+0x500/0x69c
[   61.327342] [c000505cfc70] [c0609128] seq_read+0x104/0x15c
[   61.327348] [c000505cfd10] [c05bf79c] vfs_read+0xdc/0x390
[   61.327353] [c000505cfdc0] [c05c076c] ksys_read+0x84/0x144
[   61.327358] [c000505cfe10] [c0033328] 
system_call_exception+0x138/0x330
[   61.327365] [c000505cfe50] [c000d05c] 
system_call_vectored_common+0x15c/0x2ec
[   61.327372] --- interrupt: 3000 at 0x7fff97933c74
[   61.327376] NIP:  7fff97933c74 LR: 7fff97933c74 CTR: 
[   61.327381] REGS: c000505cfe80 TRAP: 3000   Tainted: GE  
 (6.8.0-rc5-00329-gab0a97cffa0b-dirty)
[   61.327386] MSR:  8000f033   CR: 
44008408  XER: 
[   61.327395] IRQMASK: 0 
[   61.327395] GPR00: 0003 7fffe9723220 000106787e00 
0003 
[   61.327395] GPR04: 00012f0e8150 0001 fff5 
0012f0e5 
[   61.327395] GPR08: 0001   
 
[   61.327395] GPR12:  7fff97f4c940 00012f0c5a40 
7fffe97236d8 
[   61.327395] GPR16: 00012f0e3320 00012f0d3a28 00012f0e30f0 
0001 
[   61.327395] GPR20: 8001 0001 05d6 
7fffe9723447 
[   61.327395] GPR24: 7fffe9723448 7fffe9723448 7fffe97233a0 
 
[   61.327395] GPR28: 0001066e466c  7fffe9723398 
00012f0e8030 
[   61.327437] NIP [7fff97933c74] 0x7fff97933c74
[   61.327440] LR [7fff97933c74] 0x7fff97933c74
[   61.327443] --- interrupt: 3000
[   61.327445] Code: 2c09 4082ffd4 7c0802a6 3c62ffe4 3921 3d420160 
7fc407b4 38635608 992a493c f8010090 4be33281 6000 <0fe0> e8010090 

Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests

2024-02-27 Thread Steven Rostedt
On Tue, 27 Feb 2024 21:38:57 +0530
Sachin Sant  wrote:

> This warning was not triggered.

Interesting.

> 
> I have attached .config

This is what I was looking for:

> # CONFIG_PPC_4K_PAGES is not set
> CONFIG_PPC_64K_PAGES=y
> CONFIG_PAGE_SIZE_64KB=y
> CONFIG_PPC_PAGE_SHIFT=16

So the pages are 64K in size. I wonder if this is causing an issue.

Can you apply this, and see if it triggers and if it does, print the line
that has the max size?

-- Steve

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3e7fa44dc2b2..43e270bf8d78 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1589,6 +1589,9 @@ static enum print_line_t trace_print_print(struct 
trace_iterator *iter,
struct trace_seq *s = >seq;
int max = iter->ent_size - offsetof(struct print_entry, buf);
 
+   if (WARN_ONCE(max < 0 || max > 2000, "MAX OUT OF RANGE %d", max))
+   return TRACE_TYPE_UNHANDLED;
+
trace_assign_type(field, iter->ent);
 
seq_print_ip_sym(s, field->ip, flags);


Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 16:44, Christophe Leroy wrote:
> Le 27/02/2024 à 16:40, Arnd Bergmann a écrit :
>> On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote:
>
>
> For 256K pages, powerpc has the following help. I think you should have 
> it too:
>
> The kernel will only be able to run applications that have been
> compiled with '-zmax-page-size' set to 256K (the default is 64K) using
> binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE
> definition from 0x1 to 0x4 in older versions.

I don't think we need to mention pre-2.18 binutils any more, but the
rest seems useful, changed the text now to

config PAGE_SIZE_256KB
bool "256KiB pages"
depends on HAVE_PAGE_SIZE_256KB
help
  256KiB pages have little practical value due to their extreme
  memory usage.  The kernel will only be able to run applications
  that have been compiled with '-zmax-page-size' set to 256KiB
  (the default is 64KiB or 4KiB on most architectures).

  Arnd


Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Christophe Leroy


Le 27/02/2024 à 16:40, Arnd Bergmann a écrit :
> On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote:
>> On 2024-02-26 10:14 AM, Arnd Bergmann wrote:
>>>   
>>> +config HAVE_PAGE_SIZE_4KB
>>> +   bool
>>> +
>>> +config HAVE_PAGE_SIZE_8KB
>>> +   bool
>>> +
>>> +config HAVE_PAGE_SIZE_16KB
>>> +   bool
>>> +
>>> +config HAVE_PAGE_SIZE_32KB
>>> +   bool
>>> +
>>> +config HAVE_PAGE_SIZE_64KB
>>> +   bool
>>> +
>>> +config HAVE_PAGE_SIZE_256KB
>>> +   bool
>>> +
>>> +choice
>>> +   prompt "MMU page size"
>>
>> Should this have some generic help text (at least a warning about
>> compatibility)?
> 
> Good point. I've added some of this now, based on the mips
> text with some generalizations for other architectures:
> 
> config PAGE_SIZE_4KB
>  bool "4KiB pages"
>  depends on HAVE_PAGE_SIZE_4KB
>  help
>This option select the standard 4KiB Linux page size and the only
>available option on many architectures. Using 4KiB page size will
>minimize memory consumption and is therefore recommended for low
>memory systems.
>Some software that is written for x86 systems makes incorrect
>assumptions about the page size and only runs on 4KiB pages.
> 
> config PAGE_SIZE_8KB
>  bool "8KiB pages"
>  depends on HAVE_PAGE_SIZE_8KB
>  help
>This option is the only supported page size on a few older
>processors, and can be slightly faster than 4KiB pages.
> 
> config PAGE_SIZE_16KB
>  bool "16KiB pages"
>  depends on HAVE_PAGE_SIZE_16KB
>  help
>This option is usually a good compromise between memory
>consumption and performance for typical desktop and server
>workloads, often saving a level of page table lookups compared
>to 4KB pages as well as reducing TLB pressure and overhead of
>per-page operations in the kernel at the expense of a larger
>page cache.
> 
> config PAGE_SIZE_32KB
>  bool "32KiB pages"
>  depends on HAVE_PAGE_SIZE_32KB
>Using 32KiB page size will result in slightly higher performance
>kernel at the price of higher memory consumption compared to
>16KiB pages.  This option is available only on cnMIPS cores.
>Note that you will need a suitable Linux distribution to
>support this.
> 
> config PAGE_SIZE_64KB
>  bool "64KiB pages"
>  depends on HAVE_PAGE_SIZE_64KB
>Using 64KiB page size will result in slightly higher performance
>kernel at the price of much higher memory consumption compared to
>4KiB or 16KiB pages.
>This is not suitable for general-purpose workloads but the
>better performance may be worth the cost for certain types of
>supercomputing or database applications that work mostly with
>large in-memory data rather than small files.
> 
> config PAGE_SIZE_256KB
>  bool "256KiB pages"
>  depends on HAVE_PAGE_SIZE_256KB
>  help
>256KB pages have little practical value due to their extreme
>memory usage.


For 256K pages, powerpc has the following help. I think you should have 
it too:

  The kernel will only be able to run applications that have been
  compiled with '-zmax-page-size' set to 256K (the default is 64K) using
  binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE
  definition from 0x1 to 0x4 in older versions.


Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 09:45, Geert Uytterhoeven wrote:
>
>> +config PAGE_SIZE_4KB
>> +   bool "4KB pages"
>
> Now you got rid of the 4000-byte ("4kB") pages and friends, please
> do not replace these by Kelvin-bytes, and use the official binary
> prefixes => "4 KiB".
>

Done, thanks.

Arnd


Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Mon, Feb 26, 2024, at 20:02, Christophe Leroy wrote:
> Le 26/02/2024 à 17:14, Arnd Bergmann a écrit :
>> From: Arnd Bergmann 
>
> That's a nice re-factor.
>
> The only drawback I see is that we are loosing several interesting 
> arch-specific comments/help text. Don't know if there could be an easy 
> way to keep them.

This is what I have now, trying to write it as generic as
possible while still giving useful advice:

config PAGE_SIZE_4KB
bool "4KiB pages"
depends on HAVE_PAGE_SIZE_4KB
help
  This option select the standard 4KiB Linux page size and the only
  available option on many architectures. Using 4KiB page size will
  minimize memory consumption and is therefore recommended for low
  memory systems.
  Some software that is written for x86 systems makes incorrect
  assumptions about the page size and only runs on 4KiB pages.

config PAGE_SIZE_8KB
bool "8KiB pages"
depends on HAVE_PAGE_SIZE_8KB
help
  This option is the only supported page size on a few older
  processors, and can be slightly faster than 4KiB pages.

config PAGE_SIZE_16KB
bool "16KiB pages"
depends on HAVE_PAGE_SIZE_16KB
help
  This option is usually a good compromise between memory
  consumption and performance for typical desktop and server
  workloads, often saving a level of page table lookups compared
  to 4KB pages as well as reducing TLB pressure and overhead of
  per-page operations in the kernel at the expense of a larger
  page cache.

config PAGE_SIZE_32KB
bool "32KiB pages"
depends on HAVE_PAGE_SIZE_32KB
  Using 32KiB page size will result in slightly higher performance
  kernel at the price of higher memory consumption compared to
  16KiB pages.  This option is available only on cnMIPS cores.
  Note that you will need a suitable Linux distribution to
  support this.

config PAGE_SIZE_64KB
bool "64KiB pages"
depends on HAVE_PAGE_SIZE_64KB
  Using 64KiB page size will result in slightly higher performance
  kernel at the price of much higher memory consumption compared to
  4KiB or 16KiB pages.
  This is not suitable for general-purpose workloads but the
  better performance may be worth the cost for certain types of
  supercomputing or database applications that work mostly with
  large in-memory data rather than small files.

config PAGE_SIZE_256KB
bool "256KiB pages"
depends on HAVE_PAGE_SIZE_256KB
help
  256KB pages have little practical value due to their extreme
  memory usage.

Let me know if you think some of this should be adapted further.

>>   
>> +#define PAGE_SHIFT CONFIG_PAGE_SHIFT
>>   #define PAGE_SIZE  (1UL << PAGE_SHIFT)
>>   #define PAGE_MASK  (~((1 << PAGE_SHIFT) - 1))
>>   
>
> Could we move PAGE_SIZE and PAGE_MASK in a generic/core header instead 
> of having it duplicated for each arch ?

Yes, but I'm leaving this for a follow-up series, since I had
to stop somewhere and there is always room for cleanup up headers
further ;-)

  Arnd


Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Arnd Bergmann
On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote:
> On 2024-02-26 10:14 AM, Arnd Bergmann wrote:
>>  
>> +config HAVE_PAGE_SIZE_4KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_8KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_16KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_32KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_64KB
>> +bool
>> +
>> +config HAVE_PAGE_SIZE_256KB
>> +bool
>> +
>> +choice
>> +prompt "MMU page size"
>
> Should this have some generic help text (at least a warning about 
> compatibility)?

Good point. I've added some of this now, based on the mips
text with some generalizations for other architectures:

config PAGE_SIZE_4KB
bool "4KiB pages" 
depends on HAVE_PAGE_SIZE_4KB
help 
  This option select the standard 4KiB Linux page size and the only
  available option on many architectures. Using 4KiB page size will
  minimize memory consumption and is therefore recommended for low
  memory systems.
  Some software that is written for x86 systems makes incorrect
  assumptions about the page size and only runs on 4KiB pages.

config PAGE_SIZE_8KB
bool "8KiB pages"
depends on HAVE_PAGE_SIZE_8KB
help
  This option is the only supported page size on a few older
  processors, and can be slightly faster than 4KiB pages.

config PAGE_SIZE_16KB
bool "16KiB pages"
depends on HAVE_PAGE_SIZE_16KB
help 
  This option is usually a good compromise between memory
  consumption and performance for typical desktop and server
  workloads, often saving a level of page table lookups compared
  to 4KB pages as well as reducing TLB pressure and overhead of
  per-page operations in the kernel at the expense of a larger
  page cache. 

config PAGE_SIZE_32KB
bool "32KiB pages"
depends on HAVE_PAGE_SIZE_32KB
  Using 32KiB page size will result in slightly higher performance
  kernel at the price of higher memory consumption compared to
  16KiB pages.  This option is available only on cnMIPS cores.
  Note that you will need a suitable Linux distribution to
  support this.

config PAGE_SIZE_64KB
bool "64KiB pages"
depends on HAVE_PAGE_SIZE_64KB
  Using 64KiB page size will result in slightly higher performance
  kernel at the price of much higher memory consumption compared to
  4KiB or 16KiB pages.
  This is not suitable for general-purpose workloads but the
  better performance may be worth the cost for certain types of
  supercomputing or database applications that work mostly with
  large in-memory data rather than small files.

config PAGE_SIZE_256KB
bool "256KiB pages"
depends on HAVE_PAGE_SIZE_256KB
help
  256KB pages have little practical value due to their extreme
  memory usage.

>> diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
>> index a880ee067d2e..aac46ee1a000 100644
>> --- a/arch/hexagon/Kconfig
>> +++ b/arch/hexagon/Kconfig
>> @@ -8,6 +8,11 @@ config HEXAGON
>>  select ARCH_HAS_SYNC_DMA_FOR_DEVICE
>>  select ARCH_NO_PREEMPT
>>  select DMA_GLOBAL_POOL
>> +select FRAME_POINTER
>
> Looks like a paste error.
>

Fixed, thanks! I think that happened during a rebase.

>>  #ifdef CONFIG_PAGE_SIZE_1MB
>> -#define PAGE_SHIFT 20
>>  #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB
>>  #endif
>
> The corresponding Kconfig option does not exist (and did not exist before this
> patch).

Yes, I noticed that as well. It's clearly harmless.

 Arnd


Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts

2024-02-27 Thread Thomas Gleixner
On Tue, Feb 27 2024 at 19:20, Bitao Hu wrote:
> On 2024/2/27 17:26, Thomas Gleixner wrote:
>> 
>> and then let kstat_irqs() and show_interrupts() use it. See?
>
> I have a concern. kstat_irqs() uses for_each_possible_cpu() for
> summation. However, show_interrupts() uses for_each_online_cpu(),
> which means it only outputs interrupt statistics for online cpus.
> If we use for_each_possible_cpu() in show_interrupts() to calculate
> 'any_count', there could be a problem with the following scenario:
> If an interrupt has a count of zero on online cpus but a non-zero
> count on possible cpus, then 'any_count' would not be zero, and the
> statistics for that interrupt would be output, which is not the
> desired behavior for show_interrupts(). Therefore, I think it's not
> good to have kstat_irqs() and show_interrupts() both use the same
> logic. What do you think?

Good point. But you simply can have

unsigned int kstat_irq_desc(struct irq_desc *desc, const struct cpumask *mask)

and hand in the appropriate cpumask, which still shares the code, no?

Thanks,

tglx


[Bug 207129] PowerMac G4 DP (5.6.2 debug kernel + inline KASAN) freezes shortly after booting with "do_IRQ: stack overflow: 1760"

2024-02-27 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=207129

--- Comment #10 from Christophe Leroy (christophe.le...@csgroup.eu) ---
I built a kernel with your .config, the problem is a size problem.

PPC32 kernels are not designed to be that big.

Extract from generated System.map:
  c2394000 D _etext
  c280 T _sinittext
  c2bf5000 B _end

You need to keep the size of the kernel below 32Mbytes, or a deep work is
required to enable the kernel to perform far jumps before the kernel is
relocated.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info

2024-02-27 Thread Edgecombe, Rick P
On Tue, 2024-02-27 at 07:02 +, Christophe Leroy wrote:
> > It could be possible to initialize the new field for each arch to
> > 0, but
> > instead simply inialize the field with a C99 struct inializing
> > syntax.
> 
> Why doing a full init of the struct when all fields are re-written a
> few 
> lines after ?
> 
> If I take the exemple of powerpc function slice_find_area_bottomup():
> 
> struct vm_unmapped_area_info info;
> 
> info.flags = 0;
> info.length = len;
> info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
> info.align_offset = 0;
> 
> For me it looks better to just add:
> 
> info.new_field = 0; /* or whatever value it needs to have */

Hi,

Thanks for taking a look. Yes, I guess that should have some
justification. I was thinking of two reasons:
1. No future additions of optional parameters would need to make tree
wide changes like this.
2. The change is easier to review and get correct because the necessary
context is within a single line. For example, in that function some of
members are set within a while loop. The place you pointed seems to be
the correct one, but a diff that had the new field set after:
   info.high_limit = addr;
...would look correct too, but not be.

What is the concern with C99 initialization? FWIW, the full series also
removes an indirect branch, and probably is a net win for performance
in this path.



Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Christophe Leroy


Le 27/02/2024 à 15:00, Uwe Kleine-König a écrit :
> On Tue, Feb 27, 2024 at 01:52:07PM +, Christophe Leroy wrote:
>>
>>
>> Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit :
>>> Hello Christophe,
>>>
>>> On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote:
 Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit :
> recently the spi-ppc4xx.c driver suffered from build errors and warnings
> that were undetected for longer than I expected. I think it would be
> very beneficial if this driver was enabled in (at least) a powerpc
> allmodconfig build.
>
> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is
> only defined for 4xx (as these select PPC_DCR_NATIVE).
>
> I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case,
> too. I tried and failed. The best I came up without extensive doc
> reading is:
>
> diff --git a/arch/powerpc/include/asm/dcr-native.h 
> b/arch/powerpc/include/asm/dcr-native.h
> index a92059964579..159ab7abfe46 100644
> --- a/arch/powerpc/include/asm/dcr-native.h
> +++ b/arch/powerpc/include/asm/dcr-native.h
> @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int 
> base_data, int reg,
>   unsigned int val;
> 
>   spin_lock_irqsave(_ind_lock, flags);
> - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
> - mtdcrx(base_addr, reg);
> - val = (mfdcrx(base_data) & ~clr) | set;
> - mtdcrx(base_data, val);
> - } else {
> - __mtdcr(base_addr, reg);
> - val = (__mfdcr(base_data) & ~clr) | set;
> - __mtdcr(base_data, val);
> - }
> +
> + mtdcr(base_addr, reg);
> + val = (mfdcr(base_data) & ~clr) | set;
> + mtdcr(base_data, val);
> +
>   spin_unlock_irqrestore(_ind_lock, flags);
> }
> 
> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> index bc7021da2fe9..9a0a5e8c70c8 100644
> --- a/drivers/spi/Kconfig
> +++ b/drivers/spi/Kconfig
> @@ -810,7 +810,8 @@ config SPI_PL022
> 
> config SPI_PPC4xx
>   tristate "PPC4xx SPI Controller"
> - depends on PPC32 && 4xx
> + depends on 4xx || COMPILE_TEST
> + depends on PPC32 || PPC64
>   select SPI_BITBANG
>   help
> This selects a driver for the PPC4xx SPI Controller.
>
> While this is a step in the right direction (I think) it's not enough to
> make the driver build (but maybe make it easier to define
> dcri_clrset()?)
>
> Could someone with more powerpc knowledge jump in and help (for the
> benefit of better compile coverage of the spi driver and so less
> breakage)? (If you do so based on my changes above, you don't need to
> credit me for my effort, claim it as your's. I'm happy enough if the
> situation improves.)

 What about this ?

 diff --git a/arch/powerpc/include/asm/dcr-mmio.h
 b/arch/powerpc/include/asm/dcr-mmio.h
 index fc6d93ef4a13..38b515afbffc 100644
 --- a/arch/powerpc/include/asm/dcr-mmio.h
 +++ b/arch/powerpc/include/asm/dcr-mmio.h
 @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host,
out_be32(host.token + ((host.base + dcr_n) * host.stride), 
 value);
 }

 +static inline void __dcri_clrset(int base_addr, int base_data, int reg,
 +   unsigned clr, unsigned set)
 +{
 +}
 +
>>>
>>> The downside of that one is that if we find a matching device where
>>> dcr-mmio is used, the driver claims to work but silently fails. Is this
>>> good enough?
>>
>> I don't know the details of DCR, but it looks like this spi-ppc4xx
>> driver is really specific to 4xx, which is PPC32.
>>
>> Do you really need/want it to be built with allmodconfig ?
>>
>> Maybe it would be easier to have it work with ppc32_allmodconfig ?
>>
>> Or even easier with ppc44x_defconfig ?
> 
> The reason I'd like to see it in allmodconfig is that testing
> allmodconfig on several archs is the check I do for my patch series.

I think for powerpc you should really check ppc32_allmodconfig in 
addition to allmodconfig

> Also I assume I'm not the only one relying on allmodconfig for this
> purpose. So in my eyes every driver that is built there is a net win.

When I look into drivers/net/ethernet/ibm/emac/core.c it is not much 
different, they #ifdef out the call to dcri_clrset() when 
CONFIG_PPC_DCR_NATIVE is not defined.

A possible way is to do:

+static inline void __dcri_clrset(int base_addr, int base_data, int reg,
+unsigned clr, unsigned set)
+{
+   BUILD_BUG_ON(!IS_ENABLED(CONFIG_COMPILE_TEST);
+}


Christophe


Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 12:12, Geert Uytterhoeven wrote:
> On Tue, Feb 27, 2024 at 11:59 AM Arnd Bergmann  wrote:
>> On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote:
>> I was a bit unsure about how to best do this since there
>> is not really a need for a fixed page size on nommu kernels,
>> whereas the three MMU configs clearly tie the page size to
>> the MMU rather than the platform.
>>
>> There should be no reason for coldfire to have a different
>> page size from dragonball if neither of them actually uses
>> hardware pages, so one of them could be changed later.
>
> Indeed, in theory, PAGE_SIZE doesn't matter for nommu, but the concept
> of pages is used all over the place in Linux.
>
> I'm mostly worried about some Coldfire code relying on the actual value
> of PAGE_SIZE in some other context. e.g. for configuring non-cacheable
> regions.

Right, any change here would have to be carefully tested. I would
expect that a 4K page size would reduce memory consumption even on
NOMMU systems that should have the same tradeoffs for representing
files in the page cache and in mem_map[].

> And does this impact running nommu binaries on a system with MMU?
> I.e. if nommu binaries were built with a 4 KiB PAGE_SIZE, do they
> still run on MMU systems with an 8 KiB PAGE_SIZE (coldfire and sun3),
> or are there some subtleties to take into account?

As far as I understand, binaries have to be built and linked for
the largest page size they can run on, so running them on a kernel
with smaller page size usually works.

One notable exception is sys_mmap2(), which on most architectures
takes units of 4KiB but on m68k is actually written to take
PAGE_SIZE units. As Al pointed out in f8b7256096a2 ("Unify
sys_mmap*"), it has always been wrong on sun3, presumably
because users of that predate modern glibc. Running coldfire
nommu binaries on coldfire mmu kernels would run into the same
bug if either of them changes PAGE_SIZE. If you can run
coldfire nommu binaries on classic m68k, that is already
broken in the same way.

  Arnd


Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests

2024-02-27 Thread Steven Rostedt
On Tue, 27 Feb 2024 15:06:18 +0530
Sachin Sant  wrote:

> I used this setup to again run bisect between 6.7.0 and 6.8-rc1.
> Bisect points to following patch 
> 
> commit 8ec90be7f15fac42992ea821be929d3b06cd0fd9
> tracing: Allow for max buffer data size trace_marker writes

Thanks, that was what I was looking for. Hmm, can you send me your config.
One thing that powerpc architecture has different is that the page size can
be much larger, and the calculations are based off of that.

Can you try this patch and see if it triggers?

Thanks,

-- Steve

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3e7fa44dc2b2..c3dba537f342 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1589,6 +1589,9 @@ static enum print_line_t trace_print_print(struct 
trace_iterator *iter,
struct trace_seq *s = >seq;
int max = iter->ent_size - offsetof(struct print_entry, buf);
 
+   if (WARN_ON_ONCE(max < 0))
+   return TRACE_TYPE_UNHANDLED;
+
trace_assign_type(field, iter->ent);
 
seq_print_ip_sym(s, field->ip, flags);


Re: [PATCH 2/4] arch: simplify architecture specific page size configuration

2024-02-27 Thread Helge Deller

On 2/26/24 17:14, Arnd Bergmann wrote:

From: Arnd Bergmann 

arc, arm64, parisc and powerpc all have their own Kconfig symbols
in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these
so the common symbols are the ones that are actually used, while
leaving the arhcitecture specific ones as the user visible
place for configuring it, to avoid breaking user configs.

Signed-off-by: Arnd Bergmann 
---
  arch/arc/Kconfig  |  3 +++
  arch/arc/include/uapi/asm/page.h  |  6 ++
  arch/arm64/Kconfig| 29 +
  arch/arm64/include/asm/page-def.h |  2 +-
  arch/parisc/Kconfig   |  3 +++
  arch/parisc/include/asm/page.h| 10 +-


Acked-by: Helge Deller  # parisc

Thanks for the cleanups!

Helge



Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Uwe Kleine-König
On Tue, Feb 27, 2024 at 01:52:07PM +, Christophe Leroy wrote:
> 
> 
> Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit :
> > Hello Christophe,
> > 
> > On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote:
> >> Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit :
> >>> recently the spi-ppc4xx.c driver suffered from build errors and warnings
> >>> that were undetected for longer than I expected. I think it would be
> >>> very beneficial if this driver was enabled in (at least) a powerpc
> >>> allmodconfig build.
> >>>
> >>> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is
> >>> only defined for 4xx (as these select PPC_DCR_NATIVE).
> >>>
> >>> I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case,
> >>> too. I tried and failed. The best I came up without extensive doc
> >>> reading is:
> >>>
> >>> diff --git a/arch/powerpc/include/asm/dcr-native.h 
> >>> b/arch/powerpc/include/asm/dcr-native.h
> >>> index a92059964579..159ab7abfe46 100644
> >>> --- a/arch/powerpc/include/asm/dcr-native.h
> >>> +++ b/arch/powerpc/include/asm/dcr-native.h
> >>> @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int 
> >>> base_data, int reg,
> >>>   unsigned int val;
> >>>
> >>>   spin_lock_irqsave(_ind_lock, flags);
> >>> - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
> >>> - mtdcrx(base_addr, reg);
> >>> - val = (mfdcrx(base_data) & ~clr) | set;
> >>> - mtdcrx(base_data, val);
> >>> - } else {
> >>> - __mtdcr(base_addr, reg);
> >>> - val = (__mfdcr(base_data) & ~clr) | set;
> >>> - __mtdcr(base_data, val);
> >>> - }
> >>> +
> >>> + mtdcr(base_addr, reg);
> >>> + val = (mfdcr(base_data) & ~clr) | set;
> >>> + mtdcr(base_data, val);
> >>> +
> >>>   spin_unlock_irqrestore(_ind_lock, flags);
> >>>}
> >>>
> >>> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> >>> index bc7021da2fe9..9a0a5e8c70c8 100644
> >>> --- a/drivers/spi/Kconfig
> >>> +++ b/drivers/spi/Kconfig
> >>> @@ -810,7 +810,8 @@ config SPI_PL022
> >>>
> >>>config SPI_PPC4xx
> >>>   tristate "PPC4xx SPI Controller"
> >>> - depends on PPC32 && 4xx
> >>> + depends on 4xx || COMPILE_TEST
> >>> + depends on PPC32 || PPC64
> >>>   select SPI_BITBANG
> >>>   help
> >>> This selects a driver for the PPC4xx SPI Controller.
> >>>
> >>> While this is a step in the right direction (I think) it's not enough to
> >>> make the driver build (but maybe make it easier to define
> >>> dcri_clrset()?)
> >>>
> >>> Could someone with more powerpc knowledge jump in and help (for the
> >>> benefit of better compile coverage of the spi driver and so less
> >>> breakage)? (If you do so based on my changes above, you don't need to
> >>> credit me for my effort, claim it as your's. I'm happy enough if the
> >>> situation improves.)
> >>
> >> What about this ?
> >>
> >> diff --git a/arch/powerpc/include/asm/dcr-mmio.h
> >> b/arch/powerpc/include/asm/dcr-mmio.h
> >> index fc6d93ef4a13..38b515afbffc 100644
> >> --- a/arch/powerpc/include/asm/dcr-mmio.h
> >> +++ b/arch/powerpc/include/asm/dcr-mmio.h
> >> @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host,
> >>out_be32(host.token + ((host.base + dcr_n) * host.stride), 
> >> value);
> >>}
> >>
> >> +static inline void __dcri_clrset(int base_addr, int base_data, int reg,
> >> +   unsigned clr, unsigned set)
> >> +{
> >> +}
> >> +
> > 
> > The downside of that one is that if we find a matching device where
> > dcr-mmio is used, the driver claims to work but silently fails. Is this
> > good enough?
> 
> I don't know the details of DCR, but it looks like this spi-ppc4xx 
> driver is really specific to 4xx, which is PPC32.
> 
> Do you really need/want it to be built with allmodconfig ?
> 
> Maybe it would be easier to have it work with ppc32_allmodconfig ?
> 
> Or even easier with ppc44x_defconfig ?

The reason I'd like to see it in allmodconfig is that testing
allmodconfig on several archs is the check I do for my patch series.
Also I assume I'm not the only one relying on allmodconfig for this
purpose. So in my eyes every driver that is built there is a net win.

Best regards
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | https://www.pengutronix.de/ |


signature.asc
Description: PGP signature


Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-27 Thread Heiko Carstens
On Mon, Feb 26, 2024 at 05:14:13PM +0100, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> Most architectures only support a single hardcoded page size. In order
> to ensure that each one of these sets the corresponding Kconfig symbols,
> change over the PAGE_SHIFT definition to the common one and allow
> only the hardware page size to be selected.
> 
> Signed-off-by: Arnd Bergmann 
> ---
...
>  arch/s390/Kconfig  | 1 +
>  arch/s390/include/asm/page.h   | 2 +-
...
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index fe565f3a3a91..b61c74c10050 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -199,6 +199,7 @@ config S390
>   select HAVE_MOD_ARCH_SPECIFIC
>   select HAVE_NMI
>   select HAVE_NOP_MCOUNT
> + select HAVE_PAGE_SIZE_4KB
>   select HAVE_PCI
>   select HAVE_PERF_EVENTS
>   select HAVE_PERF_REGS
> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
> index 73b9c3bf377f..ded9548d11d9 100644
> --- a/arch/s390/include/asm/page.h
> +++ b/arch/s390/include/asm/page.h
> @@ -11,7 +11,7 @@
>  #include 
>  #include 
>  
> -#define _PAGE_SHIFT  12
> +#define _PAGE_SHIFT  CONFIG_PAGE_SHIFT

Acked-by: Heiko Carstens 


Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Christophe Leroy


Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit :
> Hello Christophe,
> 
> On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote:
>> Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit :
>>> recently the spi-ppc4xx.c driver suffered from build errors and warnings
>>> that were undetected for longer than I expected. I think it would be
>>> very beneficial if this driver was enabled in (at least) a powerpc
>>> allmodconfig build.
>>>
>>> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is
>>> only defined for 4xx (as these select PPC_DCR_NATIVE).
>>>
>>> I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case,
>>> too. I tried and failed. The best I came up without extensive doc
>>> reading is:
>>>
>>> diff --git a/arch/powerpc/include/asm/dcr-native.h 
>>> b/arch/powerpc/include/asm/dcr-native.h
>>> index a92059964579..159ab7abfe46 100644
>>> --- a/arch/powerpc/include/asm/dcr-native.h
>>> +++ b/arch/powerpc/include/asm/dcr-native.h
>>> @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int 
>>> base_data, int reg,
>>> unsigned int val;
>>>
>>> spin_lock_irqsave(_ind_lock, flags);
>>> -   if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
>>> -   mtdcrx(base_addr, reg);
>>> -   val = (mfdcrx(base_data) & ~clr) | set;
>>> -   mtdcrx(base_data, val);
>>> -   } else {
>>> -   __mtdcr(base_addr, reg);
>>> -   val = (__mfdcr(base_data) & ~clr) | set;
>>> -   __mtdcr(base_data, val);
>>> -   }
>>> +
>>> +   mtdcr(base_addr, reg);
>>> +   val = (mfdcr(base_data) & ~clr) | set;
>>> +   mtdcr(base_data, val);
>>> +
>>> spin_unlock_irqrestore(_ind_lock, flags);
>>>}
>>>
>>> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
>>> index bc7021da2fe9..9a0a5e8c70c8 100644
>>> --- a/drivers/spi/Kconfig
>>> +++ b/drivers/spi/Kconfig
>>> @@ -810,7 +810,8 @@ config SPI_PL022
>>>
>>>config SPI_PPC4xx
>>> tristate "PPC4xx SPI Controller"
>>> -   depends on PPC32 && 4xx
>>> +   depends on 4xx || COMPILE_TEST
>>> +   depends on PPC32 || PPC64
>>> select SPI_BITBANG
>>> help
>>>   This selects a driver for the PPC4xx SPI Controller.
>>>
>>> While this is a step in the right direction (I think) it's not enough to
>>> make the driver build (but maybe make it easier to define
>>> dcri_clrset()?)
>>>
>>> Could someone with more powerpc knowledge jump in and help (for the
>>> benefit of better compile coverage of the spi driver and so less
>>> breakage)? (If you do so based on my changes above, you don't need to
>>> credit me for my effort, claim it as your's. I'm happy enough if the
>>> situation improves.)
>>
>> What about this ?
>>
>> diff --git a/arch/powerpc/include/asm/dcr-mmio.h
>> b/arch/powerpc/include/asm/dcr-mmio.h
>> index fc6d93ef4a13..38b515afbffc 100644
>> --- a/arch/powerpc/include/asm/dcr-mmio.h
>> +++ b/arch/powerpc/include/asm/dcr-mmio.h
>> @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host,
>>  out_be32(host.token + ((host.base + dcr_n) * host.stride), value);
>>}
>>
>> +static inline void __dcri_clrset(int base_addr, int base_data, int reg,
>> + unsigned clr, unsigned set)
>> +{
>> +}
>> +
> 
> The downside of that one is that if we find a matching device where
> dcr-mmio is used, the driver claims to work but silently fails. Is this
> good enough?

I don't know the details of DCR, but it looks like this spi-ppc4xx 
driver is really specific to 4xx, which is PPC32.

Do you really need/want it to be built with allmodconfig ?

Maybe it would be easier to have it work with ppc32_allmodconfig ?

Or even easier with ppc44x_defconfig ?

Christophe


> 
>>#endif /* __KERNEL__ */
>>#endif /* _ASM_POWERPC_DCR_MMIO_H */
>>
>> diff --git a/arch/powerpc/include/asm/dcr-native.h
>> b/arch/powerpc/include/asm/dcr-native.h
>> index a92059964579..2f6221bf5406 100644
>> --- a/arch/powerpc/include/asm/dcr-native.h
>> +++ b/arch/powerpc/include/asm/dcr-native.h
>> @@ -135,10 +135,6 @@ static inline void __dcri_clrset(int base_addr, int
>> base_data, int reg,
>>   DCRN_ ## base ## _CONFIG_DATA, \
>>   reg, data)
>>
>> -#define dcri_clrset(base, reg, clr, set)__dcri_clrset(DCRN_ ## base ##
>> _CONFIG_ADDR,\
>> -  DCRN_ ## base ## 
>> _CONFIG_DATA,\
>> -  reg, clr, set)
>> -
>>#endif /* __ASSEMBLY__ */
>>#endif /* __KERNEL__ */
>>#endif /* _ASM_POWERPC_DCR_NATIVE_H */
>> diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h
>> index 64030e3a1f30..15c123ae38a1 100644
>> --- a/arch/powerpc/include/asm/dcr.h
>> +++ b/arch/powerpc/include/asm/dcr.h
>> @@ -18,6 +18,9 @@
>>#include 
>>#endif
>>
>> +#define dcri_clrset(base, reg, clr, set)__dcri_clrset(DCRN_ ## base ##
>> 

Re: [PATCH 4/4] vdso: avoid including asm/page.h

2024-02-27 Thread Catalin Marinas
On Mon, Feb 26, 2024 at 05:14:14PM +0100, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> The recent change to the vdso_data_store broke building compat VDSO
> on at least arm64 because it includes headers outside of the include/vdso/
> namespace:
> 
> In file included from arch/arm64/include/asm/lse.h:5,
>  from arch/arm64/include/asm/cmpxchg.h:14,
>  from arch/arm64/include/asm/atomic.h:16,
>  from include/linux/atomic.h:7,
>  from include/asm-generic/bitops/atomic.h:5,
>  from arch/arm64/include/asm/bitops.h:25,
>  from include/linux/bitops.h:68,
>  from arch/arm64/include/asm/memory.h:209,
>  from arch/arm64/include/asm/page.h:46,
>  from include/vdso/datapage.h:22,
>  from lib/vdso/gettimeofday.c:5,
>  from :
> arch/arm64/include/asm/atomic_ll_sc.h:298:9: error: unknown type name 'u128'
>   298 | u128 full;
> 
> Use an open-coded page size calculation based on the new CONFIG_PAGE_SHIFT
> Kconfig symbol instead.
> 
> Reported-by: Linux Kernel Functional Testing 
> Fixes: a0d2fcd62ac2 ("vdso/ARM: Make union vdso_data_store available for all 
> architectures")
> Link: 
> https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/
> Signed-off-by: Arnd Bergmann 

Acked-by: Catalin Marinas 


Re: [PATCH 2/4] arch: simplify architecture specific page size configuration

2024-02-27 Thread Catalin Marinas
On Mon, Feb 26, 2024 at 05:14:12PM +0100, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> arc, arm64, parisc and powerpc all have their own Kconfig symbols
> in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these
> so the common symbols are the ones that are actually used, while
> leaving the arhcitecture specific ones as the user visible
> place for configuring it, to avoid breaking user configs.
> 
> Signed-off-by: Arnd Bergmann 

For arm64:

Acked-by: Catalin Marinas 


Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests

2024-02-27 Thread Sachin Sant
>> --- interrupt: c00
>> Code: f821ff91 2f89 409e0034 7c0802a6 3c62fff0 3921 3d420177 
>> 3863e310 992ad6db f8010080 4b209899 6000 <0fe0> e8010080 7c0803a6 
>> 2f9f 
>> ---[ end trace  ]—
>> 
>> This warning is seen while running test that was added by
>> following commit:
>> 
>> commit 3bf7009251f0f41cdd0188ab7b3879df81810703
>> tracing/selftests: Add test to test the trace_marker
> 
> This adds the user space selftest that triggered this warning, but it is
> not the cause of it. Could you run this test against kernel builds before
> this commit. Does this test cause this to trigger on older versions of the
> kernel?
> 

Running the mentioned test against an older kernel does not trigger this
warning.

# uname -r
6.7.0
#
#  ./ftracetest test.d/00basic/trace_marker.tc
=== Ftrace unit tests ===
[1] Basic tests on writing to trace_marker [PASS]
[2] (instance)  Basic tests on writing to trace_marker [PASS]


# of passed:  2
# of failed:  0
# of unresolved:  0
# of untested:  0
# of unsupported:  0
# of xfailed:  0
# of undefined(test bug):  0
#

I used this setup to again run bisect between 6.7.0 and 6.8-rc1.
Bisect points to following patch 

commit 8ec90be7f15fac42992ea821be929d3b06cd0fd9
tracing: Allow for max buffer data size trace_marker writes


— Sachin




Re: [PATCH 4/4] vdso: avoid including asm/page.h

2024-02-27 Thread Michael Ellerman
Christophe Leroy  writes:
> Le 26/02/2024 à 17:14, Arnd Bergmann a écrit :
>> From: Arnd Bergmann 
>> 
>> The recent change to the vdso_data_store broke building compat VDSO
>> on at least arm64 because it includes headers outside of the include/vdso/
>> namespace:
>
> I understand that powerpc64 also has an issue, see 
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20231221120410.2226678-1-...@ellerman.id.au/

Yeah, and that patch would silently conflict with this series, which is
not ideal.

I could delay merging my patch above until after this series goes in,
mine only fixes a fairly obscure build warning.

cheers


Re: [RFC PATCH] selftest/powerpc: Add rule file to address sub-folder test fail

2024-02-27 Thread Sachin Sant



> On 25-Feb-2024, at 10:09 PM, Madhavan Srinivasan  wrote:
> 
> When running `make -C powerpc/pmu run_tests` from top level selftests
> directory, currently this error is being reported
> 
> make: Entering directory 
> '/home/maddy/linux/tools/testing/selftests/powerpc/pmu'
> Makefile:40: warning: overriding recipe for target 'emit_tests'
> ../../lib.mk:111: warning: ignoring old recipe for target 'emit_tests'
> gcc -m64count_instructions.c ../harness.c event.c lib.c ../utils.c loop.S 
>  -o /home/maddy/selftest_output//count_instructions
> In file included from count_instructions.c:13:
> event.h:12:10: fatal error: utils.h: No such file or directory
>   12 | #include "utils.h"
>  |  ^
> compilation terminated.
> 
> This is due to missing of include path in CFLAGS. That is, CFLAGS and
> GIT_VERSION macros are defined in the powerpc/ folder Makefile which
> in this case not involved.
> 
> To address the failure incase of executing specific sub-folder test directly,
> a new rule file has been addded by the patch called "include.mk" under
> selftest/powerpc/ folder and is linked to all Makefile of powerpc/pmu
> sub-folders.
> 
> Signed-off-by: Madhavan Srinivasan 
> ---

Thanks for the fix Maddy. The patch fixes the reported problem
for me.

— Sachin



Re: [PATCH v8 09/10] PCI: qcom-ep: Use the generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event

2024-02-27 Thread Manivannan Sadhasivam
On Mon, Feb 26, 2024 at 12:20:41PM -0500, Frank Li wrote:
> On Sat, Feb 24, 2024 at 12:24:15PM +0530, Manivannan Sadhasivam wrote:
> > Now that the API is available, let's make use of it. It also handles the
> > reinitialization of DWC non-sticky registers in addition to sending the
> > notification to EPF drivers.
> > 
> > Signed-off-by: Manivannan Sadhasivam 
> > ---
> >  drivers/pci/controller/dwc/pcie-qcom-ep.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c 
> > b/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > index 2fb8c15e7a91..4e45bc4bca45 100644
> > --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
> > @@ -640,7 +640,7 @@ static irqreturn_t qcom_pcie_ep_global_irq_thread(int 
> > irq, void *data)
> > if (FIELD_GET(PARF_INT_ALL_LINK_DOWN, status)) {
> > dev_dbg(dev, "Received Linkdown event\n");
> > pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN;
> > -   pci_epc_linkdown(pci->ep.epc);
> > +   dw_pcie_ep_linkdown(>ep);
> 
> Suppose pci_epc_linkdown() will call dw_pcie_ep_linkdown() ?
> why need direct call dw_pcie_ep_linkdown() here?
> 

I've already justified this in the commit message. Here is the excerpt:

"It also handles the reinitialization of DWC non-sticky registers in addition
to sending the notification to EPF drivers."

- Mani

-- 
மணிவண்ணன் சதாசிவம்


Re: [PATCH v8 08/10] PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event

2024-02-27 Thread Manivannan Sadhasivam
On Mon, Feb 26, 2024 at 12:18:18PM -0500, Frank Li wrote:
> On Sat, Feb 24, 2024 at 12:24:14PM +0530, Manivannan Sadhasivam wrote:
> > The PCIe link can go to LINK_DOWN state in one of the following scenarios:
> > 
> > 1. Fundamental (PERST#)/hot/warm reset
> > 2. Link transition from L2/L3 to L0
> 
> From L0 to L2/l3
> 

I don't understand what you mean here. Link down won't happen while moving from
L0 to L2/L3, it is the opposite.

> > 
> > In those cases, LINK_DOWN causes some non-sticky DWC registers to loose the
> > state (like REBAR, PTM_CAP etc...). So the drivers need to reinitialize
> > them to function properly once the link comes back again.
> > 
> > This is not a problem for drivers supporting PERST# IRQ, since they can
> > reinitialize the registers in the PERST# IRQ callback. But for the drivers
> > not supporting PERST#, there is no way they can reinitialize the registers
> > other than relying on LINK_DOWN IRQ received when the link goes down. So
> > let's add a DWC generic API dw_pcie_ep_linkdown() that reinitializes the
> > non-sticky registers and also notifies the EPF drivers about link going
> > down.
> > 
> > This API can also be used by the drivers supporting PERST# to handle the
> > scenario (2) mentioned above.
> > 
> > Signed-off-by: Manivannan Sadhasivam 
> > ---
> >  drivers/pci/controller/dwc/pcie-designware-ep.c | 111 
> > ++--
> >  drivers/pci/controller/dwc/pcie-designware.h|   5 ++
> >  2 files changed, 72 insertions(+), 44 deletions(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > index 278bdc9b2269..fed4c2936c78 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > @@ -14,14 +14,6 @@
> >  #include 
> >  #include 
> >  
> > -void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
> > -{
> > -   struct pci_epc *epc = ep->epc;
> > -
> > -   pci_epc_linkup(epc);
> > -}
> > -EXPORT_SYMBOL_GPL(dw_pcie_ep_linkup);
> > -
> 
> No sure why git remove this block and add these back.
> 

Because, we are adding dw_pcie_ep_linkdown() API way below and it makes sense to
move this API also to keep it ordered. Maybe I should've described it in commit
message.

- Mani

> 
> >  void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep)
> >  {
> > struct pci_epc *epc = ep->epc;
> > @@ -603,19 +595,56 @@ static unsigned int 
> > dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap)
> > return 0;
> >  }
> >  
> > +static void dw_pcie_ep_init_non_sticky_registers(struct dw_pcie *pci)
> > +{
> > +   unsigned int offset, ptm_cap_base;
> > +   unsigned int nbars;
> > +   u32 reg, i;
> > +
> > +   offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
> > +   ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM);
> > +
> > +   dw_pcie_dbi_ro_wr_en(pci);
> > +
> > +   if (offset) {
> > +   reg = dw_pcie_readl_dbi(pci, offset + PCI_REBAR_CTRL);
> > +   nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >>
> > +   PCI_REBAR_CTRL_NBAR_SHIFT;
> > +
> > +   for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL)
> > +   dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0);
> > +   }
> > +
> > +   /*
> > +* PTM responder capability can be disabled only after disabling
> > +* PTM root capability.
> > +*/
> > +   if (ptm_cap_base) {
> > +   dw_pcie_dbi_ro_wr_en(pci);
> > +   reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
> > +   reg &= ~PCI_PTM_CAP_ROOT;
> > +   dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
> > +
> > +   reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
> > +   reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK);
> > +   dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
> > +   dw_pcie_dbi_ro_wr_dis(pci);
> > +   }
> > +
> > +   dw_pcie_setup(pci);
> > +   dw_pcie_dbi_ro_wr_dis(pci);
> > +}
> > +
> >  int dw_pcie_ep_init_registers(struct dw_pcie_ep *ep)
> >  {
> > struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > struct dw_pcie_ep_func *ep_func;
> > struct device *dev = pci->dev;
> > struct pci_epc *epc = ep->epc;
> > -   unsigned int offset, ptm_cap_base;
> > -   unsigned int nbars;
> > u8 hdr_type;
> > u8 func_no;
> > -   int i, ret;
> > void *addr;
> > -   u32 reg;
> > +   int ret;
> >  
> > hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE) &
> >PCI_HEADER_TYPE_MASK;
> > @@ -678,38 +707,7 @@ int dw_pcie_ep_init_registers(struct dw_pcie_ep *ep)
> > if (ep->ops->init)
> > ep->ops->init(ep);
> >  
> > -   offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
> > -   ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM);
> > -
> > -   dw_pcie_dbi_ro_wr_en(pci);
> > -
> > -   if (offset) {
> > -   reg = dw_pcie_readl_dbi(pci, 

Re: [PATCH v8 06/10] PCI: dwc: ep: Call dw_pcie_ep_init_registers() API directly from all glue drivers

2024-02-27 Thread Manivannan Sadhasivam
On Mon, Feb 26, 2024 at 12:04:33PM -0500, Frank Li wrote:
> On Sat, Feb 24, 2024 at 12:24:12PM +0530, Manivannan Sadhasivam wrote:
> > Currently, dw_pcie_ep_init_registers() API is directly called by the glue
> > drivers requiring active refclk from host. But for the other drivers, it is
> > getting called implicitly by dw_pcie_ep_init(). This is due to the fact
> > that this API initializes DWC EP specific registers and that requires an
> > active refclk (either from host or generated locally by endpoint itsef).
> > 
> > But, this causes a discrepancy among the glue drivers. So to avoid this
> > confusion, let's call this API directly from all glue drivers irrespective
> > of refclk dependency. Only difference here is that the drivers requiring
> > refclk from host will call this API only after the refclk is received and
> > other drivers without refclk dependency will call this API right after
> > dw_pcie_ep_init().
> > 
> > This change will also allow us to remove the "core_init_notifier" flag in
> > the later commits.
> > 
> > Signed-off-by: Manivannan Sadhasivam 
> > ---
> >  drivers/pci/controller/dwc/pci-dra7xx.c   |  7 +++
> >  drivers/pci/controller/dwc/pci-imx6.c |  8 
> >  drivers/pci/controller/dwc/pci-keystone.c |  9 +
> >  drivers/pci/controller/dwc/pci-layerscape-ep.c|  7 +++
> >  drivers/pci/controller/dwc/pcie-designware-ep.c   | 22 
> > --
> >  drivers/pci/controller/dwc/pcie-designware-plat.c |  9 +
> >  drivers/pci/controller/dwc/pcie-rcar-gen4.c   | 12 +++-
> >  drivers/pci/controller/dwc/pcie-uniphier-ep.c | 13 -
> >  8 files changed, 63 insertions(+), 24 deletions(-)

[...]

> > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > index ed1f2afd830a..278bdc9b2269 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > @@ -729,7 +729,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > struct device *dev = pci->dev;
> > struct platform_device *pdev = to_platform_device(dev);
> > struct device_node *np = dev->of_node;
> > -   const struct pci_epc_features *epc_features;
> >  
> > INIT_LIST_HEAD(>func_list);
> >  
> > @@ -775,29 +774,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > goto err_exit_epc_mem;
> > }
> >  
> > -   if (ep->ops->get_features) {
> > -   epc_features = ep->ops->get_features(ep);
> > -   if (epc_features->core_init_notifier)
> > -   return 0;
> > -   }
> 
> why remove this check?
> 

There is no point in keeping this check since we are removing the call to
dw_pcie_ep_init_registers() below. But I should've described this change in the
commit message.

- Mani

-- 
மணிவண்ணன் சதாசிவம்


Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts

2024-02-27 Thread Bitao Hu

Hi,

On 2024/2/27 17:26, Thomas Gleixner wrote:

On Mon, Feb 26 2024 at 10:09, Bitao Hu wrote:

We could use the irq_desc::tot_count member to avoid the summation
loop for interrupts which are not marked as 'PER_CPU' interrupts in
'show_interrupts'. This could reduce the time overhead of reading
/proc/interrupts.


"Could" is not really a technical term. Either we do or we do not. Also
please provide context for your change and avoid the 'We'.

OK.



--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -121,6 +121,8 @@ static inline void irq_unlock_sparse(void) { }
  extern struct irq_desc irq_desc[NR_IRQS];
  #endif

+extern bool irq_is_nmi(struct irq_desc *desc);
+


If at all this wants to be in kernel/irq/internal.h. There is zero
reason to expose this globally.


-static bool irq_is_nmi(struct irq_desc *desc)
+bool irq_is_nmi(struct irq_desc *desc)
  {
return desc->istate & IRQS_NMI;
  }


If at all this really wants to be a static inline in internals.h, but
instead of blindly copying code this can be done smarter:

unsigned int kstat_irq_desc(struct irq_desc *desc)
{
unsigned int sum = 0;
int cpu;

if (!irq_settings_is_per_cpu_devid(desc) &&
!irq_settings_is_per_cpu(desc) &&
!irq_is_nmi(desc))
return data_race(desc->tot_count);

for_each_possible_cpu(cpu)
sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu));
return sum;
}

and then let kstat_irqs() and show_interrupts() use it. See?


I have a concern. kstat_irqs() uses for_each_possible_cpu() for
summation. However, show_interrupts() uses for_each_online_cpu(),
which means it only outputs interrupt statistics for online cpus.
If we use for_each_possible_cpu() in show_interrupts() to calculate
'any_count', there could be a problem with the following scenario:
If an interrupt has a count of zero on online cpus but a non-zero
count on possible cpus, then 'any_count' would not be zero, and the
statistics for that interrupt would be output, which is not the
desired behavior for show_interrupts(). Therefore, I think it's not
good to have kstat_irqs() and show_interrupts() both use the same
logic. What do you think?



With that a proper changelog would be:

show_interrupts() unconditionally accumulates the per CPU interrupt
statistics to determine whether an interrupt was ever raised.

This can be avoided for all interrupts which are not strictly per CPU
and not of type NMI because those interrupts provide already an
accumulated counter. The required logic is already implemented in
kstat_irqs().

Split the inner access logic out of kstat_irqs() and use it for
kstat_irqs() and show_interrupts() to avoid the accumulation loop
when possible.



Best Regards,
Bitao Hu


Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-27 Thread Geert Uytterhoeven
Hi Arnd,

CC Greg

On Tue, Feb 27, 2024 at 11:59 AM Arnd Bergmann  wrote:
> On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote:
> >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
> >> index 9dcf245c9cbf..c777a129768a 100644
> >> --- a/arch/m68k/Kconfig.cpu
> >> +++ b/arch/m68k/Kconfig.cpu
> >> @@ -30,6 +30,7 @@ config COLDFIRE
> >> select GENERIC_CSUM
> >> select GPIOLIB
> >> select HAVE_LEGACY_CLK
> >> +   select HAVE_PAGE_SIZE_8KB if !MMU
> >
> >  if you would drop the !MMU-dependency here.
> >
> >>
> >>  endchoice
> >>
> >> @@ -45,6 +46,7 @@ config M68000
> >> select GENERIC_CSUM
> >> select CPU_NO_EFFICIENT_FFS
> >> select HAVE_ARCH_HASH
> >> +   select HAVE_PAGE_SIZE_4KB
> >
> > Perhaps replace this by
> >
> > config M68KCLASSIC
> > bool "Classic M68K CPU family support"
> > select HAVE_ARCH_PFN_VALID
> >   + select HAVE_PAGE_SIZE_4KB if !MMU
> >
> > so it covers all 680x0 CPUs without MMU?
>
> I was a bit unsure about how to best do this since there
> is not really a need for a fixed page size on nommu kernels,
> whereas the three MMU configs clearly tie the page size to
> the MMU rather than the platform.
>
> There should be no reason for coldfire to have a different
> page size from dragonball if neither of them actually uses
> hardware pages, so one of them could be changed later.

Indeed, in theory, PAGE_SIZE doesn't matter for nommu, but the concept
of pages is used all over the place in Linux.

I'm mostly worried about some Coldfire code relying on the actual value
of PAGE_SIZE in some other context. e.g. for configuring non-cacheable
regions.

And does this impact running nommu binaries on a system with MMU?
I.e. if nommu binaries were built with a 4 KiB PAGE_SIZE, do they
still run on MMU systems with an 8 KiB PAGE_SIZE (coldfire and sun3),
or are there some subtleties to take into account?

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-27 Thread Arnd Bergmann
On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote:
> Hi Arnd,
>> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
>> index 9dcf245c9cbf..c777a129768a 100644
>> --- a/arch/m68k/Kconfig.cpu
>> +++ b/arch/m68k/Kconfig.cpu
>> @@ -30,6 +30,7 @@ config COLDFIRE
>> select GENERIC_CSUM
>> select GPIOLIB
>> select HAVE_LEGACY_CLK
>> +   select HAVE_PAGE_SIZE_8KB if !MMU
>
>  if you would drop the !MMU-dependency here.
>
>>
>>  endchoice
>>
>> @@ -45,6 +46,7 @@ config M68000
>> select GENERIC_CSUM
>> select CPU_NO_EFFICIENT_FFS
>> select HAVE_ARCH_HASH
>> +   select HAVE_PAGE_SIZE_4KB
>
> Perhaps replace this by
>
> config M68KCLASSIC
> bool "Classic M68K CPU family support"
> select HAVE_ARCH_PFN_VALID
>   + select HAVE_PAGE_SIZE_4KB if !MMU
>
> so it covers all 680x0 CPUs without MMU?

I was a bit unsure about how to best do this since there
is not really a need for a fixed page size on nommu kernels,
whereas the three MMU configs clearly tie the page size to
the MMU rather than the platform.

There should be no reason for coldfire to have a different
page size from dragonball if neither of them actually uses
hardware pages, so one of them could be changed later.

Let me know if that makes sense to you, or you still
prefer me to change it like you suggested.

  Arnd


Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Uwe Kleine-König
Hello Christophe,

On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote:
> Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit :
> > recently the spi-ppc4xx.c driver suffered from build errors and warnings
> > that were undetected for longer than I expected. I think it would be
> > very beneficial if this driver was enabled in (at least) a powerpc
> > allmodconfig build.
> > 
> > The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is
> > only defined for 4xx (as these select PPC_DCR_NATIVE).
> > 
> > I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case,
> > too. I tried and failed. The best I came up without extensive doc
> > reading is:
> > 
> > diff --git a/arch/powerpc/include/asm/dcr-native.h 
> > b/arch/powerpc/include/asm/dcr-native.h
> > index a92059964579..159ab7abfe46 100644
> > --- a/arch/powerpc/include/asm/dcr-native.h
> > +++ b/arch/powerpc/include/asm/dcr-native.h
> > @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int 
> > base_data, int reg,
> > unsigned int val;
> >   
> > spin_lock_irqsave(_ind_lock, flags);
> > -   if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
> > -   mtdcrx(base_addr, reg);
> > -   val = (mfdcrx(base_data) & ~clr) | set;
> > -   mtdcrx(base_data, val);
> > -   } else {
> > -   __mtdcr(base_addr, reg);
> > -   val = (__mfdcr(base_data) & ~clr) | set;
> > -   __mtdcr(base_data, val);
> > -   }
> > +
> > +   mtdcr(base_addr, reg);
> > +   val = (mfdcr(base_data) & ~clr) | set;
> > +   mtdcr(base_data, val);
> > +
> > spin_unlock_irqrestore(_ind_lock, flags);
> >   }
> >   
> > diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> > index bc7021da2fe9..9a0a5e8c70c8 100644
> > --- a/drivers/spi/Kconfig
> > +++ b/drivers/spi/Kconfig
> > @@ -810,7 +810,8 @@ config SPI_PL022
> >   
> >   config SPI_PPC4xx
> > tristate "PPC4xx SPI Controller"
> > -   depends on PPC32 && 4xx
> > +   depends on 4xx || COMPILE_TEST
> > +   depends on PPC32 || PPC64
> > select SPI_BITBANG
> > help
> >   This selects a driver for the PPC4xx SPI Controller.
> > 
> > While this is a step in the right direction (I think) it's not enough to
> > make the driver build (but maybe make it easier to define
> > dcri_clrset()?)
> > 
> > Could someone with more powerpc knowledge jump in and help (for the
> > benefit of better compile coverage of the spi driver and so less
> > breakage)? (If you do so based on my changes above, you don't need to
> > credit me for my effort, claim it as your's. I'm happy enough if the
> > situation improves.)
> 
> What about this ?
> 
> diff --git a/arch/powerpc/include/asm/dcr-mmio.h 
> b/arch/powerpc/include/asm/dcr-mmio.h
> index fc6d93ef4a13..38b515afbffc 100644
> --- a/arch/powerpc/include/asm/dcr-mmio.h
> +++ b/arch/powerpc/include/asm/dcr-mmio.h
> @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host,
>   out_be32(host.token + ((host.base + dcr_n) * host.stride), value);
>   }
> 
> +static inline void __dcri_clrset(int base_addr, int base_data, int reg,
> +  unsigned clr, unsigned set)
> +{
> +}
> +

The downside of that one is that if we find a matching device where
dcr-mmio is used, the driver claims to work but silently fails. Is this
good enough?

>   #endif /* __KERNEL__ */
>   #endif /* _ASM_POWERPC_DCR_MMIO_H */
> 
> diff --git a/arch/powerpc/include/asm/dcr-native.h 
> b/arch/powerpc/include/asm/dcr-native.h
> index a92059964579..2f6221bf5406 100644
> --- a/arch/powerpc/include/asm/dcr-native.h
> +++ b/arch/powerpc/include/asm/dcr-native.h
> @@ -135,10 +135,6 @@ static inline void __dcri_clrset(int base_addr, int 
> base_data, int reg,
>DCRN_ ## base ## _CONFIG_DATA, \
>reg, data)
> 
> -#define dcri_clrset(base, reg, clr, set) __dcri_clrset(DCRN_ ## base ## 
> _CONFIG_ADDR, \
> -   DCRN_ ## base ## 
> _CONFIG_DATA,\
> -   reg, clr, set)
> -
>   #endif /* __ASSEMBLY__ */
>   #endif /* __KERNEL__ */
>   #endif /* _ASM_POWERPC_DCR_NATIVE_H */
> diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h
> index 64030e3a1f30..15c123ae38a1 100644
> --- a/arch/powerpc/include/asm/dcr.h
> +++ b/arch/powerpc/include/asm/dcr.h
> @@ -18,6 +18,9 @@
>   #include 
>   #endif
> 
> +#define dcri_clrset(base, reg, clr, set) __dcri_clrset(DCRN_ ## base ## 
> _CONFIG_ADDR, \
> +   DCRN_ ## base ## 
> _CONFIG_DATA,\
> +   reg, clr, set)
> 
>   /* Indirection layer for providing both NATIVE and MMIO support. */
> 
> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> index ddae0fde798e..7b003c5dd613 100644
> --- a/drivers/spi/Kconfig
> +++ b/drivers/spi/Kconfig

Re: [PATCH linux-next v2 1/3] kexec/kdump: make struct crash_mem available without CONFIG_CRASH_DUMP

2024-02-27 Thread Baoquan He
On 02/26/24 at 04:00pm, Hari Bathini wrote:
> struct crash_mem defined under include/linux/crash_core.h represents
> a list of memory ranges. While it is used to represent memory ranges
> for kdump kernel, it can also be used for other kind of memory ranges.
> In fact, KEXEC_FILE_LOAD syscall in powerpc uses this structure to
> represent reserved memory ranges and exclude memory ranges needed to
> find the right memory regions to load kexec kernel. So, make the

We may be able to refactor code to make struct crash_mem only used for
crash handling, or get another data structure for loading kexec kernel.
Anyway, for the time being, this is the simplest way we can take. Thanks
for the work on ppc to make crash split out from the kexec code.

Acked-by: Baoquan He 

> definition of crash_mem structure available for !CONFIG_CRASH_DUMP
> case too.
> 
> Signed-off-by: Hari Bathini 
> ---
> 
> * No changes in v2.
> 
>  include/linux/crash_core.h | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index 23270b16e1db..d33352c2e386 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -8,6 +8,12 @@
>  
>  struct kimage;
>  
> +struct crash_mem {
> + unsigned int max_nr_ranges;
> + unsigned int nr_ranges;
> + struct range ranges[] __counted_by(max_nr_ranges);
> +};
> +
>  #ifdef CONFIG_CRASH_DUMP
>  
>  int crash_shrink_memory(unsigned long new_size);
> @@ -51,12 +57,6 @@ static inline unsigned int crash_get_elfcorehdr_size(void) 
> { return 0; }
>  /* Alignment required for elf header segment */
>  #define ELF_CORE_HEADER_ALIGN   4096
>  
> -struct crash_mem {
> - unsigned int max_nr_ranges;
> - unsigned int nr_ranges;
> - struct range ranges[] __counted_by(max_nr_ranges);
> -};
> -
>  extern int crash_exclude_mem_range(struct crash_mem *mem,
>  unsigned long long mstart,
>  unsigned long long mend);
> -- 
> 2.43.2
> 



Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Christophe Leroy


Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit :
> Hello,
> 
> recently the spi-ppc4xx.c driver suffered from build errors and warnings
> that were undetected for longer than I expected. I think it would be
> very beneficial if this driver was enabled in (at least) a powerpc
> allmodconfig build.
> 
> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is
> only defined for 4xx (as these select PPC_DCR_NATIVE).
> 
> I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case,
> too. I tried and failed. The best I came up without extensive doc
> reading is:
> 
> diff --git a/arch/powerpc/include/asm/dcr-native.h 
> b/arch/powerpc/include/asm/dcr-native.h
> index a92059964579..159ab7abfe46 100644
> --- a/arch/powerpc/include/asm/dcr-native.h
> +++ b/arch/powerpc/include/asm/dcr-native.h
> @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int 
> base_data, int reg,
>   unsigned int val;
>   
>   spin_lock_irqsave(_ind_lock, flags);
> - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
> - mtdcrx(base_addr, reg);
> - val = (mfdcrx(base_data) & ~clr) | set;
> - mtdcrx(base_data, val);
> - } else {
> - __mtdcr(base_addr, reg);
> - val = (__mfdcr(base_data) & ~clr) | set;
> - __mtdcr(base_data, val);
> - }
> +
> + mtdcr(base_addr, reg);
> + val = (mfdcr(base_data) & ~clr) | set;
> + mtdcr(base_data, val);
> +
>   spin_unlock_irqrestore(_ind_lock, flags);
>   }
>   
> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> index bc7021da2fe9..9a0a5e8c70c8 100644
> --- a/drivers/spi/Kconfig
> +++ b/drivers/spi/Kconfig
> @@ -810,7 +810,8 @@ config SPI_PL022
>   
>   config SPI_PPC4xx
>   tristate "PPC4xx SPI Controller"
> - depends on PPC32 && 4xx
> + depends on 4xx || COMPILE_TEST
> + depends on PPC32 || PPC64
>   select SPI_BITBANG
>   help
> This selects a driver for the PPC4xx SPI Controller.
> 
> While this is a step in the right direction (I think) it's not enough to
> make the driver build (but maybe make it easier to define
> dcri_clrset()?)
> 
> Could someone with more powerpc knowledge jump in and help (for the
> benefit of better compile coverage of the spi driver and so less
> breakage)? (If you do so based on my changes above, you don't need to
> credit me for my effort, claim it as your's. I'm happy enough if the
> situation improves.)

What about this ?

diff --git a/arch/powerpc/include/asm/dcr-mmio.h 
b/arch/powerpc/include/asm/dcr-mmio.h
index fc6d93ef4a13..38b515afbffc 100644
--- a/arch/powerpc/include/asm/dcr-mmio.h
+++ b/arch/powerpc/include/asm/dcr-mmio.h
@@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host,
out_be32(host.token + ((host.base + dcr_n) * host.stride), value);
  }

+static inline void __dcri_clrset(int base_addr, int base_data, int reg,
+unsigned clr, unsigned set)
+{
+}
+
  #endif /* __KERNEL__ */
  #endif /* _ASM_POWERPC_DCR_MMIO_H */

diff --git a/arch/powerpc/include/asm/dcr-native.h 
b/arch/powerpc/include/asm/dcr-native.h
index a92059964579..2f6221bf5406 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -135,10 +135,6 @@ static inline void __dcri_clrset(int base_addr, int 
base_data, int reg,
 DCRN_ ## base ## _CONFIG_DATA, \
 reg, data)

-#define dcri_clrset(base, reg, clr, set)   __dcri_clrset(DCRN_ ## base ## 
_CONFIG_ADDR,   \
- DCRN_ ## base ## 
_CONFIG_DATA,\
- reg, clr, set)
-
  #endif /* __ASSEMBLY__ */
  #endif /* __KERNEL__ */
  #endif /* _ASM_POWERPC_DCR_NATIVE_H */
diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h
index 64030e3a1f30..15c123ae38a1 100644
--- a/arch/powerpc/include/asm/dcr.h
+++ b/arch/powerpc/include/asm/dcr.h
@@ -18,6 +18,9 @@
  #include 
  #endif

+#define dcri_clrset(base, reg, clr, set)   __dcri_clrset(DCRN_ ## base ## 
_CONFIG_ADDR,   \
+ DCRN_ ## base ## 
_CONFIG_DATA,\
+ reg, clr, set)

  /* Indirection layer for providing both NATIVE and MMIO support. */

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index ddae0fde798e..7b003c5dd613 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -810,7 +810,7 @@ config SPI_PL022

  config SPI_PPC4xx
tristate "PPC4xx SPI Controller"
-   depends on PPC32 && 4xx
+   depends on PPC && (4xx || COMPILE_TEST)
select SPI_BITBANG
help
  This selects a driver for the PPC4xx SPI Controller.


Re: [PATCH] powerpc/mm: Code cleanup for __hash_page_thp

2024-02-27 Thread Kunwu Chan

Thanks for the reply.
On 2024/2/27 14:07, Michael Ellerman wrote:

Kunwu Chan  writes:

Thanks for the reply.

On 2024/2/26 18:49, Michael Ellerman wrote:

Kunwu Chan  writes:

This part was commented from commit 6d492ecc6489
("powerpc/THP: Add code to handle HPTE faults for hugepages")
in about 11 years before.

If there are no plans to enable this part code in the future,
we can remove this dead code.


I agree the code can go. But I'd like it to be replaced with a comment
explaining what the dead code was trying to say.



Thanks, i'll update a new patch with the following comment:
  /*
  * No CPU has hugepages but lacks no execute, so we
  * don't need to worry about cpu no CPU_FTR_COHERENT_ICACHE feature case
  */


Maybe wait until we can get some input from Aneesh. I'm not sure the
code/comment are really up to date.

I won't do anything until I get a reply.
I'll wait for the latest msg.



cheers

--
Thanks,
  Kunwu



Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts

2024-02-27 Thread Thomas Gleixner
On Mon, Feb 26 2024 at 10:09, Bitao Hu wrote:
> We could use the irq_desc::tot_count member to avoid the summation
> loop for interrupts which are not marked as 'PER_CPU' interrupts in
> 'show_interrupts'. This could reduce the time overhead of reading
> /proc/interrupts.

"Could" is not really a technical term. Either we do or we do not. Also
please provide context for your change and avoid the 'We'.

> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -121,6 +121,8 @@ static inline void irq_unlock_sparse(void) { }
>  extern struct irq_desc irq_desc[NR_IRQS];
>  #endif
>
> +extern bool irq_is_nmi(struct irq_desc *desc);
> +

If at all this wants to be in kernel/irq/internal.h. There is zero
reason to expose this globally.

> -static bool irq_is_nmi(struct irq_desc *desc)
> +bool irq_is_nmi(struct irq_desc *desc)
>  {
>   return desc->istate & IRQS_NMI;
>  }

If at all this really wants to be a static inline in internals.h, but
instead of blindly copying code this can be done smarter:

unsigned int kstat_irq_desc(struct irq_desc *desc)
{
unsigned int sum = 0;
int cpu;

if (!irq_settings_is_per_cpu_devid(desc) &&
!irq_settings_is_per_cpu(desc) &&
!irq_is_nmi(desc))
return data_race(desc->tot_count);

for_each_possible_cpu(cpu)
sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu));
return sum;
}

and then let kstat_irqs() and show_interrupts() use it. See?

With that a proper changelog would be:

   show_interrupts() unconditionally accumulates the per CPU interrupt
   statistics to determine whether an interrupt was ever raised.

   This can be avoided for all interrupts which are not strictly per CPU
   and not of type NMI because those interrupts provide already an
   accumulated counter. The required logic is already implemented in
   kstat_irqs().

   Split the inner access logic out of kstat_irqs() and use it for
   kstat_irqs() and show_interrupts() to avoid the accumulation loop
   when possible.

Thanks,

tglx


Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Uwe Kleine-König
Hello,

On Tue, Feb 27, 2024 at 08:54:03AM +, Tudor Ambarus wrote:
> On 2/27/24 08:46, Uwe Kleine-König wrote:
> > recently the spi-ppc4xx.c driver suffered from build errors and warnings
> > that were undetected for longer than I expected. I think it would be
> 
> long enough so that we remove the driver altogether?

I know at least one user who noticed the driver being broken because he
needs it and not because a build bot stumbled.

Best regards
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | https://www.pengutronix.de/ |


signature.asc
Description: PGP signature


Re: [PATCHv10 4/4] watchdog/softlockup: report the most frequent interrupts

2024-02-27 Thread Liu Song



在 2024/2/26 10:09, Bitao Hu 写道:

When the watchdog determines that the current soft lockup is due
to an interrupt storm based on CPU utilization, reporting the
most frequent interrupts could be good enough for further
troubleshooting.

Below is an example of interrupt storm. The call tree does not
provide useful information, but we can analyze which interrupt
caused the soft lockup by comparing the counts of interrupts.

[  638.870231] watchdog: BUG: soft lockup - CPU#9 stuck for 26s! [swapper/9:0]
[  638.870825] CPU#9 Utilization every 4s during lockup:
[  638.871194]  #1:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.871652]  #2:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.872107]  #3:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.872563]  #4:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.873018]  #5:   0% system,  0% softirq,   100% hardirq, 0% 
idle
[  638.873494] CPU#9 Detect HardIRQ Time exceeds 50%. Most frequent HardIRQs:
[  638.873994]  #1: 330945  irq#7
[  638.874236]  #2: 31  irq#82
[  638.874493]  #3: 10  irq#10
[  638.874744]  #4: 2   irq#89
[  638.874992]  #5: 1   irq#102
...
[  638.875313] Call trace:
[  638.875315]  __do_softirq+0xa8/0x364

Signed-off-by: Bitao Hu 
---
  kernel/watchdog.c | 115 --
  1 file changed, 111 insertions(+), 4 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 69e72d7e461d..c9d49ae8d045 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -12,22 +12,25 @@
  
  #define pr_fmt(fmt) "watchdog: " fmt
  
-#include 

  #include 
-#include 
  #include 
+#include 
+#include 
  #include 
+#include 
  #include 
+#include 
  #include 
+#include 
+#include 
  #include 
  #include 
+
  #include 
  #include 
  #include 
-#include 
  
  #include 

-#include 
  
  static DEFINE_MUTEX(watchdog_mutex);
  
@@ -417,13 +420,104 @@ static void print_cpustat(void)

}
  }
  
+#define HARDIRQ_PERCENT_THRESH  50

+#define NUM_HARDIRQ_REPORT  5
+struct irq_counts {
+   int irq;
+   u32 counts;
+};
+
+static DEFINE_PER_CPU(bool, snapshot_taken);
+
+/* Tabulate the most frequent interrupts. */
+static void tabulate_irq_count(struct irq_counts *irq_counts, int irq, u32 
counts, int rank)
+{
+   int i;
+   struct irq_counts new_count = {irq, counts};
+
+   for (i = 0; i < rank; i++) {
+   if (counts > irq_counts[i].counts)
+   swap(new_count, irq_counts[i]);
+   }
+}
+
+/*
+ * If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period,
+ * then the cause of softlockup might be interrupt storm. In this case, it
+ * would be useful to start interrupt counting.
+ */
+static bool need_counting_irqs(void)
+{
+   u8 util;
+   int tail = __this_cpu_read(cpustat_tail);
+
+   tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT;
+   util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]);
+   return util > HARDIRQ_PERCENT_THRESH;
+}
+
+static void start_counting_irqs(void)
+{
+   if (!__this_cpu_read(snapshot_taken)) {
+   kstat_snapshot_irqs();
+   __this_cpu_write(snapshot_taken, true);
+   }
+}
+
+static void stop_counting_irqs(void)
+{
+   __this_cpu_write(snapshot_taken, false);
+}
+
+static void print_irq_counts(void)
+{
+   unsigned int i, count;
+   struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = {
+   {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}
+   };
+
+   if (__this_cpu_read(snapshot_taken)) {
+   for_each_active_irq(i) {
+   count = kstat_get_irq_since_snapshot(i);
+   tabulate_irq_count(irq_counts_sorted, i, count, 
NUM_HARDIRQ_REPORT);
+   }
+
+   /*
+* We do not want the "watchdog: " prefix on every line,
+* hence we use "printk" instead of "pr_crit".
+*/
+   printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most 
frequent HardIRQs:\n",
+  smp_processor_id(), HARDIRQ_PERCENT_THRESH);
+
+   for (i = 0; i < NUM_HARDIRQ_REPORT; i++) {
+   if (irq_counts_sorted[i].irq == -1)
+   break;
+
+   printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n",
+  i + 1, irq_counts_sorted[i].counts,
+  irq_counts_sorted[i].irq);
+   }
+
+   /*
+* If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in 
the last
+* sample_period, then we suspect the interrupt storm might be 
subsiding.
+*/
+   if (!need_counting_irqs())
+   stop_counting_irqs();
+   }
+}
+
  static void report_cpu_status(void)
  {

Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures

2024-02-27 Thread Geert Uytterhoeven
Hi Arnd,

On Mon, Feb 26, 2024 at 5:15 PM Arnd Bergmann  wrote:
> From: Arnd Bergmann 
>
> Most architectures only support a single hardcoded page size. In order
> to ensure that each one of these sets the corresponding Kconfig symbols,
> change over the PAGE_SHIFT definition to the common one and allow
> only the hardware page size to be selected.
>
> Signed-off-by: Arnd Bergmann 

Thanks for your patch!

> --- a/arch/m68k/Kconfig
> +++ b/arch/m68k/Kconfig
> @@ -84,12 +84,15 @@ config MMU
>
>  config MMU_MOTOROLA
> bool
> +   select HAVE_PAGE_SIZE_4KB
>
>  config MMU_COLDFIRE
> +   select HAVE_PAGE_SIZE_8KB

I think you can do without this...

> bool
>
>  config MMU_SUN3
> bool
> +   select HAVE_PAGE_SIZE_8KB
> depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE
>
>  config ARCH_SUPPORTS_KEXEC
> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
> index 9dcf245c9cbf..c777a129768a 100644
> --- a/arch/m68k/Kconfig.cpu
> +++ b/arch/m68k/Kconfig.cpu
> @@ -30,6 +30,7 @@ config COLDFIRE
> select GENERIC_CSUM
> select GPIOLIB
> select HAVE_LEGACY_CLK
> +   select HAVE_PAGE_SIZE_8KB if !MMU

 if you would drop the !MMU-dependency here.

>
>  endchoice
>
> @@ -45,6 +46,7 @@ config M68000
> select GENERIC_CSUM
> select CPU_NO_EFFICIENT_FFS
> select HAVE_ARCH_HASH
> +   select HAVE_PAGE_SIZE_4KB

Perhaps replace this by

config M68KCLASSIC
bool "Classic M68K CPU family support"
select HAVE_ARCH_PFN_VALID
  + select HAVE_PAGE_SIZE_4KB if !MMU

so it covers all 680x0 CPUs without MMU?

> select LEGACY_TIMER_TICK
> help
>   The Freescale (was Motorola) 68000 CPU is the first generation of

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Tudor Ambarus



On 2/27/24 08:46, Uwe Kleine-König wrote:
> recently the spi-ppc4xx.c driver suffered from build errors and warnings
> that were undetected for longer than I expected. I think it would be

long enough so that we remove the driver altogether?


Re: [kvm-unit-tests PATCH 03/32] powerpc: Fix stack backtrace termination

2024-02-27 Thread Thomas Huth

On 26/02/2024 11.11, Nicholas Piggin wrote:

The backtrace handler terminates when it sees a NULL caller address,
but the powerpc stack setup does not keep such a NULL caller frame
at the start of the stack.

This happens to work on pseries because the memory at 0 is mapped and
it contains 0 at the location of the return address pointer if it
were a stack frame. But this is fragile, and does not work with powernv
where address 0 contains firmware instructions.

Use the existing dummy frame on stack as the NULL caller, and create a
new frame on stack for the entry code.

Signed-off-by: Nicholas Piggin 
---
  powerpc/cstart64.S | 12 ++--
  1 file changed, 10 insertions(+), 2 deletions(-)


Thanks for tackling this! ... however, not doing powerpc work since years 
anymore, I have some ignorant questions below...



diff --git a/powerpc/cstart64.S b/powerpc/cstart64.S
index e18ae9a22..14ab0c6c8 100644
--- a/powerpc/cstart64.S
+++ b/powerpc/cstart64.S
@@ -46,8 +46,16 @@ start:
add r1, r1, r31
add r2, r2, r31
  
+	/* Zero backpointers in initial stack frame so backtrace() stops */

+   li  r0,0
+   std r0,0(r1)


0(r1) is the back chain pointer ...


+   std r0,16(r1)


... but what is 16(r1) ? I suppose that should be the "LR save word" ? But 
isn't that at 8(r1) instead?? (not sure whether I'm looking at the right ELF 
abi spec right now...)


Anyway, a comment in the source would be helpful here.


+
+   /* Create entry frame */
+   stdur1,-INT_FRAME_SIZE(r1)


Since we already create an initial frame via stackptr from powerpc/flat.lds, 
do we really need to create this additional one here? Or does the one from 
flat.lds have to be completely empty, i.e. also no DTB pointer in it?



/* save DTB pointer */
-   std r3, 56(r1)
+   SAVE_GPR(3,r1)


Isn't SAVE_GPR rather meant for the interrupt frame, not for the normal C 
calling convention frames?


Sorry for asking dumb questions ... I still have a hard time understanding 
the changes here... :-/



/*
 * Call relocate. relocate is C code, but careful to not use
@@ -101,7 +109,7 @@ start:
stw r4, 0(r3)
  
  	/* complete setup */

-1: ld  r3, 56(r1)
+1: REST_GPR(3, r1)
bl  setup
  
  	/* run the test */


 Thomas



Increasing build coverage for drivers/spi/spi-ppc4xx.c

2024-02-27 Thread Uwe Kleine-König
Hello,

recently the spi-ppc4xx.c driver suffered from build errors and warnings
that were undetected for longer than I expected. I think it would be
very beneficial if this driver was enabled in (at least) a powerpc
allmodconfig build.

The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is
only defined for 4xx (as these select PPC_DCR_NATIVE).

I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case,
too. I tried and failed. The best I came up without extensive doc
reading is:

diff --git a/arch/powerpc/include/asm/dcr-native.h 
b/arch/powerpc/include/asm/dcr-native.h
index a92059964579..159ab7abfe46 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int 
base_data, int reg,
unsigned int val;
 
spin_lock_irqsave(_ind_lock, flags);
-   if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
-   mtdcrx(base_addr, reg);
-   val = (mfdcrx(base_data) & ~clr) | set;
-   mtdcrx(base_data, val);
-   } else {
-   __mtdcr(base_addr, reg);
-   val = (__mfdcr(base_data) & ~clr) | set;
-   __mtdcr(base_data, val);
-   }
+
+   mtdcr(base_addr, reg);
+   val = (mfdcr(base_data) & ~clr) | set;
+   mtdcr(base_data, val);
+
spin_unlock_irqrestore(_ind_lock, flags);
 }
 
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index bc7021da2fe9..9a0a5e8c70c8 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -810,7 +810,8 @@ config SPI_PL022
 
 config SPI_PPC4xx
tristate "PPC4xx SPI Controller"
-   depends on PPC32 && 4xx
+   depends on 4xx || COMPILE_TEST
+   depends on PPC32 || PPC64
select SPI_BITBANG
help
  This selects a driver for the PPC4xx SPI Controller.

While this is a step in the right direction (I think) it's not enough to
make the driver build (but maybe make it easier to define
dcri_clrset()?)

Could someone with more powerpc knowledge jump in and help (for the
benefit of better compile coverage of the spi driver and so less
breakage)? (If you do so based on my changes above, you don't need to
credit me for my effort, claim it as your's. I'm happy enough if the
situation improves.)

Thanks
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | https://www.pengutronix.de/ |


signature.asc
Description: PGP signature


Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions

2024-02-27 Thread Geert Uytterhoeven
Hi Arnd,

On Mon, Feb 26, 2024 at 5:14 PM Arnd Bergmann  wrote:
> From: Arnd Bergmann 
>
> These four architectures define the same Kconfig symbols for configuring
> the page size. Move the logic into a common place where it can be shared
> with all other architectures.
>
> Signed-off-by: Arnd Bergmann 

Thanks for your patch!

> --- a/arch/Kconfig
> +++ b/arch/Kconfig

> +config PAGE_SIZE_4KB
> +   bool "4KB pages"

Now you got rid of the 4000-byte ("4kB") pages and friends, please
do not replace these by Kelvin-bytes, and use the official binary
prefixes => "4 KiB".

> +   depends on HAVE_PAGE_SIZE_4KB

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v13 09/16] media: uapi: Define audio sample format fourcc type

2024-02-27 Thread Hans Verkuil
On 27/02/2024 04:44, Shengjiu Wang wrote:
> On Mon, Feb 26, 2024 at 9:55 PM Nicolas Dufresne  wrote:
>>
>> Le lundi 26 février 2024 à 16:28 +0800, Shengjiu Wang a écrit :
>>> The audio sample format definition is from alsa,
>>> the header file is include/uapi/sound/asound.h, but
>>> don't include this header file directly, because in
>>> user space, there is another copy in alsa-lib.
>>> There will be conflict in userspace for include
>>> videodev2.h & asound.h and asoundlib.h
>>>
>>> Here still use the fourcc format.
>>
>> I'd like to join Mauro's voice that duplicating the audio formats is a bad 
>> idea.
>> We have the same issues with video formats when you look at V4L2 vs DRM. 
>> You're
>> rationale is that videodev2.h will be ambiguous if it includes asound.h, but
>> looking at this change, there is no reason why you need to include asound.h 
>> in
>> videodev2.h at all. The format type can be abstracted out with a uint32 in 
>> the
>> API, and then it would be up to the users to include and use the appropriate
>> formats IDs.
>>
> 
> Resend for the plain text issue
> 
> Thanks.
> 
> There is another reason mentioned by Hans:
> 
> "
> The problem is that within V4L2 we use fourcc consistently to describe a
> format, including in VIDIOC_ENUM_FMT. And the expectation is that the fourcc
> can be printed to a human readable string (there is even a printk format for
> that these days).
> 
> But the pcm values are all small integers (and can even be 0!), and
> printing the fourcc will give garbage. It doesn't work well at all
> with the V4L2 API. But by having a straightforward conversion between the
> pcm identifier and a fourcc it was really easy to deal with this.
> 
> There might even be applications today that call VIDIOC_ENUM_FMT to see
> what is supported and fail if it is not a proper fourcc is returned.
> 
> It will certainly report nonsense in v4l_print_fmtdesc() (v4l2-ioctl.c).
> 
> One of the early versions of this patch series did precisely what you request,
> but it just doesn't work well within the V4L2 uAPI.
> "

Right, and remember that the fourcc codes are really meant to be shown as
characters. v4l2-ctl and friends all use this, and we all talk about them
as such (NV12, YUYV, etc.). If I would just stick in the standard PCM IDs,
then they would all show up as ''.



>>> +/*
>>> + * Audio-data formats
>>> + * All these audio formats use a fourcc starting with 'AU'
>>> + * followed by the SNDRV_PCM_FORMAT_ value from asound.h.
>>> + */
>>> +#define V4L2_AUDIO_FMT_S8v4l2_fourcc('A', 'U', '0', 
>>> '0')
>>> +#define V4L2_AUDIO_FMT_S16_LEv4l2_fourcc('A', 'U', 
>>> '0', '2')
>>> +#define V4L2_AUDIO_FMT_U16_LEv4l2_fourcc('A', 'U', 
>>> '0', '4')
>>> +#define V4L2_AUDIO_FMT_S24_LEv4l2_fourcc('A', 'U', 
>>> '0', '6')
>>> +#define V4L2_AUDIO_FMT_U24_LEv4l2_fourcc('A', 'U', 
>>> '0', '8')
>>> +#define V4L2_AUDIO_FMT_S32_LEv4l2_fourcc('A', 'U', 
>>> '1', '0')
>>> +#define V4L2_AUDIO_FMT_U32_LEv4l2_fourcc('A', 'U', 
>>> '1', '2')
>>> +#define V4L2_AUDIO_FMT_FLOAT_LE  v4l2_fourcc('A', 'U', 
>>> '1', '4')
>>> +#define V4L2_AUDIO_FMT_IEC958_SUBFRAME_LEv4l2_fourcc('A', 'U', '1', 
>>> '8')
>>> +#define V4L2_AUDIO_FMT_S24_3LE   v4l2_fourcc('A', 'U', 
>>> '3', '2')
>>> +#define V4L2_AUDIO_FMT_U24_3LE   v4l2_fourcc('A', 'U', 
>>> '3', '4')
>>> +#define V4L2_AUDIO_FMT_S20_3LE   v4l2_fourcc('A', 'U', 
>>> '3', '6')
>>> +#define V4L2_AUDIO_FMT_U20_3LE   v4l2_fourcc('A', 'U', 
>>> '3', '8')
>>> +
>>> +#define v4l2_fourcc_to_audfmt(fourcc)\
>>> + (__force snd_pcm_format_t)(fourcc) >> 16) & 0xff) - '0') * 10  \
>>> ++ fourcc) >> 24) & 0xff) - '0'))
>>> +

We did add a #define to go from a fourcc to an audio format.

Would it help to add a corresponding v4l2_audfmt_to_fourcc define as well?

#define v4l2_audfmt_to_fourcc(audfmt) \
v4l2_fourcc('A', 'U', '0' + (audfmt) / 10, '0' + (audfmt) % 10)

I would have no problem with that.

The mapping between the audio format and the fourcc is very simple, it is just
converted to something that can be safely shown as ASCII characters.

Regards,

Hans

>>>  /* priv field value to indicates that subsequent fields are valid. */
>>>  #define V4L2_PIX_FMT_PRIV_MAGIC  0xfeedcafe
>>>
>>