[PATCHv11 4/4] watchdog/softlockup: report the most frequent interrupts
When the watchdog determines that the current soft lockup is due to an interrupt storm based on CPU utilization, reporting the most frequent interrupts could be good enough for further troubleshooting. Below is an example of interrupt storm. The call tree does not provide useful information, but we can analyze which interrupt caused the soft lockup by comparing the counts of interrupts. [ 638.870231] watchdog: BUG: soft lockup - CPU#9 stuck for 26s! [swapper/9:0] [ 638.870825] CPU#9 Utilization every 4s during lockup: [ 638.871194] #1: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.871652] #2: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.872107] #3: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.872563] #4: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.873018] #5: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.873494] CPU#9 Detect HardIRQ Time exceeds 50%. Most frequent HardIRQs: [ 638.873994] #1: 330945 irq#7 [ 638.874236] #2: 31 irq#82 [ 638.874493] #3: 10 irq#10 [ 638.874744] #4: 2 irq#89 [ 638.874992] #5: 1 irq#102 ... [ 638.875313] Call trace: [ 638.875315] __do_softirq+0xa8/0x364 Signed-off-by: Bitao Hu Reviewed-by: Liu Song --- kernel/watchdog.c | 115 -- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 69e72d7e461d..c9d49ae8d045 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -12,22 +12,25 @@ #define pr_fmt(fmt) "watchdog: " fmt -#include #include -#include #include +#include +#include #include +#include #include +#include #include +#include +#include #include #include + #include #include #include -#include #include -#include static DEFINE_MUTEX(watchdog_mutex); @@ -417,13 +420,104 @@ static void print_cpustat(void) } } +#define HARDIRQ_PERCENT_THRESH 50 +#define NUM_HARDIRQ_REPORT 5 +struct irq_counts { + int irq; + u32 counts; +}; + +static DEFINE_PER_CPU(bool, snapshot_taken); + +/* Tabulate the most frequent interrupts. */ +static void tabulate_irq_count(struct irq_counts *irq_counts, int irq, u32 counts, int rank) +{ + int i; + struct irq_counts new_count = {irq, counts}; + + for (i = 0; i < rank; i++) { + if (counts > irq_counts[i].counts) + swap(new_count, irq_counts[i]); + } +} + +/* + * If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period, + * then the cause of softlockup might be interrupt storm. In this case, it + * would be useful to start interrupt counting. + */ +static bool need_counting_irqs(void) +{ + u8 util; + int tail = __this_cpu_read(cpustat_tail); + + tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT; + util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]); + return util > HARDIRQ_PERCENT_THRESH; +} + +static void start_counting_irqs(void) +{ + if (!__this_cpu_read(snapshot_taken)) { + kstat_snapshot_irqs(); + __this_cpu_write(snapshot_taken, true); + } +} + +static void stop_counting_irqs(void) +{ + __this_cpu_write(snapshot_taken, false); +} + +static void print_irq_counts(void) +{ + unsigned int i, count; + struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = { + {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0} + }; + + if (__this_cpu_read(snapshot_taken)) { + for_each_active_irq(i) { + count = kstat_get_irq_since_snapshot(i); + tabulate_irq_count(irq_counts_sorted, i, count, NUM_HARDIRQ_REPORT); + } + + /* +* We do not want the "watchdog: " prefix on every line, +* hence we use "printk" instead of "pr_crit". +*/ + printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most frequent HardIRQs:\n", + smp_processor_id(), HARDIRQ_PERCENT_THRESH); + + for (i = 0; i < NUM_HARDIRQ_REPORT; i++) { + if (irq_counts_sorted[i].irq == -1) + break; + + printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n", + i + 1, irq_counts_sorted[i].counts, + irq_counts_sorted[i].irq); + } + + /* +* If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in the last +* sample_period, then we suspect the interrupt storm might be subsiding. +*/ + if (!need_counting_irqs()) + stop_counting_irqs(); + } +} + static void report_cpu_status(void) { print_cpustat(); +
[PATCHv11 3/4] genirq: Avoid summation loops for /proc/interrupts
show_interrupts() unconditionally accumulates the per CPU interrupt statistics to determine whether an interrupt was ever raised. This can be avoided for all interrupts which are not strictly per CPU and not of type NMI because those interrupts provide already an accumulated counter. The required logic is already implemented in kstat_irqs(). Split the inner access logic out of kstat_irqs() and use it for kstat_irqs() and show_interrupts() to avoid the accumulation loop when possible. Originally-by: Thomas Gleixner Signed-off-by: Bitao Hu Reviewed-by: Liu Song --- kernel/irq/internals.h | 2 ++ kernel/irq/irqdesc.c | 16 +++- kernel/irq/proc.c | 6 ++ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 1d92532c2aae..6c43ef3e7308 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -98,6 +98,8 @@ extern void mask_irq(struct irq_desc *desc); extern void unmask_irq(struct irq_desc *desc); extern void unmask_threaded_irq(struct irq_desc *desc); +extern unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask *cpumask); + #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } #else diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9cd17080b2d8..65a7f2dcd17b 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -960,24 +960,30 @@ static bool irq_is_nmi(struct irq_desc *desc) return desc->istate & IRQS_NMI; } -static unsigned int kstat_irqs(unsigned int irq) +unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask *cpumask) { - struct irq_desc *desc = irq_to_desc(irq); unsigned int sum = 0; int cpu; - if (!desc || !desc->kstat_irqs) - return 0; if (!irq_settings_is_per_cpu_devid(desc) && !irq_settings_is_per_cpu(desc) && !irq_is_nmi(desc)) return data_race(desc->tot_count); - for_each_possible_cpu(cpu) + for_each_cpu(cpu, cpumask) sum += data_race(per_cpu(desc->kstat_irqs->cnt, cpu)); return sum; } +static unsigned int kstat_irqs(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !desc->kstat_irqs) + return 0; + return kstat_irqs_desc(desc, cpu_possible_mask); +} + void kstat_snapshot_irqs(void) { struct irq_desc *desc; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 6954e0a02047..5c320c3f10a7 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -488,10 +488,8 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc || irq_settings_is_hidden(desc)) goto outsparse; - if (desc->kstat_irqs) { - for_each_online_cpu(j) - any_count |= data_race(per_cpu(desc->kstat_irqs->cnt, j)); - } + if (desc->kstat_irqs) + any_count = kstat_irqs_desc(desc, cpu_online_mask); if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) goto outsparse; -- 2.37.1 (Apple Git-137.1)
[PATCHv11 2/4] genirq: Provide a snapshot mechanism for interrupt statistics
The soft lockup detector lacks a mechanism to identify interrupt storms as root cause of a lockup. To enable this the detector needs a mechanism to snapshot the interrupt count statistics on a CPU when the detector observes a potential lockup scenario and compare that against the interrupt count when it warns about the lockup later on. The number of interrupts in that period give a hint whether the lockup might be caused by an interrupt storm. Instead of having extra storage in the lockup detector and accessing the internals of the interrupt descriptor directly, convert the per CPU irq_desc::kstat_irq member to a data structure which contains the counter plus a snapshot member and provide interfaces to take a snapshot of all interrupts on the current CPU and to retrieve the delta of a specific interrupt later on. Originally-by: Thomas Gleixner Signed-off-by: Bitao Hu Reviewed-by: Liu Song --- arch/mips/dec/setup.c| 2 +- arch/parisc/kernel/smp.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 +- include/linux/irqdesc.h | 14 ++-- include/linux/kernel_stat.h | 3 +++ kernel/irq/internals.h | 2 +- kernel/irq/irqdesc.c | 34 ++-- kernel/irq/proc.c| 5 ++-- scripts/gdb/linux/interrupts.py | 6 ++--- 9 files changed, 51 insertions(+), 19 deletions(-) diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index 6c3704f51d0d..87f0a1436bf9 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -756,7 +756,7 @@ void __init arch_init_irq(void) NULL)) pr_err("Failed to register fpu interrupt\n"); desc_fpu = irq_to_desc(irq_fpu); - fpu_kstat_irq = this_cpu_ptr(desc_fpu->kstat_irqs); + fpu_kstat_irq = this_cpu_ptr(_fpu->kstat_irqs->cnt); } if (dec_interrupt[DEC_IRQ_CASCADE] >= 0) { if (request_irq(dec_interrupt[DEC_IRQ_CASCADE], no_action, diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 444154271f23..800eb64e91ad 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -344,7 +344,7 @@ static int smp_boot_one_cpu(int cpuid, struct task_struct *idle) struct irq_desc *desc = irq_to_desc(i); if (desc && desc->kstat_irqs) - *per_cpu_ptr(desc->kstat_irqs, cpuid) = 0; + *per_cpu_ptr(desc->kstat_irqs, cpuid) = (struct irqstat) { }; } #endif diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index e42984878503..f2636414d82a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -837,7 +837,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr) */ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) { - this_cpu_inc_rm(desc->kstat_irqs); + this_cpu_inc_rm(>kstat_irqs->cnt); __this_cpu_inc(kstat.irqs_sum); } diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index d9451d456a73..95d8def84471 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -17,6 +17,16 @@ struct irq_desc; struct irq_domain; struct pt_regs; +/** + * struct irqstat - interrupt statistics + * @cnt: real-time interrupt count + * @ref: snapshot of interrupt count + */ +struct irqstat { + unsigned intcnt; + unsigned intref; +}; + /** * struct irq_desc - interrupt descriptor * @irq_common_data: per irq and chip data passed down to chip functions @@ -55,7 +65,7 @@ struct pt_regs; struct irq_desc { struct irq_common_data irq_common_data; struct irq_data irq_data; - unsigned int __percpu *kstat_irqs; + struct irqstat __percpu *kstat_irqs; irq_flow_handler_t handle_irq; struct irqaction*action;/* IRQ action list */ unsigned intstatus_use_accessors; @@ -119,7 +129,7 @@ extern struct irq_desc irq_desc[NR_IRQS]; static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, unsigned int cpu) { - return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; + return desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 9935f7ecbfb9..98b3043ea5e6 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -79,6 +79,9 @@ static inline unsigned int kstat_cpu_softirqs_sum(int cpu) return sum; } +extern void kstat_snapshot_irqs(void); +extern unsigned int kstat_get_irq_since_snapshot(unsigned int irq); + /* * Number of interrupts per specific IRQ source, since bootup */ diff --git
[PATCHv11 1/4] watchdog/softlockup: low-overhead detection of interrupt storm
The following softlockup is caused by interrupt storm, but it cannot be identified from the call tree. Because the call tree is just a snapshot and doesn't fully capture the behavior of the CPU during the soft lockup. watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921] ... Call trace: __do_softirq+0xa0/0x37c __irq_exit_rcu+0x108/0x140 irq_exit+0x14/0x20 __handle_domain_irq+0x84/0xe0 gic_handle_irq+0x80/0x108 el0_irq_naked+0x50/0x58 Therefore,I think it is necessary to report CPU utilization during the softlockup_thresh period (report once every sample_period, for a total of 5 reportings), like this: watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921] CPU#28 Utilization every 4s during lockup: #1: 0% system, 0% softirq, 100% hardirq, 0% idle #2: 0% system, 0% softirq, 100% hardirq, 0% idle #3: 0% system, 0% softirq, 100% hardirq, 0% idle #4: 0% system, 0% softirq, 100% hardirq, 0% idle #5: 0% system, 0% softirq, 100% hardirq, 0% idle ... This would be helpful in determining whether an interrupt storm has occurred or in identifying the cause of the softlockup. The criteria for determination are as follows: a. If the hardirq utilization is high, then interrupt storm should be considered and the root cause cannot be determined from the call tree. b. If the softirq utilization is high, then we could analyze the call tree but it may cannot reflect the root cause. c. If the system utilization is high, then we could analyze the root cause from the call tree. The mechanism requires a considerable amount of global storage space when configured for the maximum number of CPUs. Therefore, adding a SOFTLOCKUP_DETECTOR_INTR_STORM Kconfig knob that defaults to "yes" if the max number of CPUs is <= 128. Signed-off-by: Bitao Hu Reviewed-by: Douglas Anderson Reviewed-by: Liu Song --- kernel/watchdog.c | 98 ++- lib/Kconfig.debug | 13 +++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 81a8862295d6..69e72d7e461d 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -35,6 +37,8 @@ static DEFINE_MUTEX(watchdog_mutex); # define WATCHDOG_HARDLOCKUP_DEFAULT 0 #endif +#define NUM_SAMPLE_PERIODS 5 + unsigned long __read_mostly watchdog_enabled; int __read_mostly watchdog_user_enabled = 1; static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT; @@ -333,6 +337,95 @@ __setup("watchdog_thresh=", watchdog_thresh_setup); static void __lockup_detector_cleanup(void); +#ifdef CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM +enum stats_per_group { + STATS_SYSTEM, + STATS_SOFTIRQ, + STATS_HARDIRQ, + STATS_IDLE, + NUM_STATS_PER_GROUP, +}; + +static const enum cpu_usage_stat tracked_stats[NUM_STATS_PER_GROUP] = { + CPUTIME_SYSTEM, + CPUTIME_SOFTIRQ, + CPUTIME_IRQ, + CPUTIME_IDLE, +}; + +static DEFINE_PER_CPU(u16, cpustat_old[NUM_STATS_PER_GROUP]); +static DEFINE_PER_CPU(u8, cpustat_util[NUM_SAMPLE_PERIODS][NUM_STATS_PER_GROUP]); +static DEFINE_PER_CPU(u8, cpustat_tail); + +/* + * We don't need nanosecond resolution. A granularity of 16ms is + * sufficient for our precision, allowing us to use u16 to store + * cpustats, which will roll over roughly every ~1000 seconds. + * 2^24 ~= 16 * 10^6 + */ +static u16 get_16bit_precision(u64 data_ns) +{ + return data_ns >> 24LL; /* 2^24ns ~= 16.8ms */ +} + +static void update_cpustat(void) +{ + int i; + u8 util; + u16 old_stat, new_stat; + struct kernel_cpustat kcpustat; + u64 *cpustat = kcpustat.cpustat; + u8 tail = __this_cpu_read(cpustat_tail); + u16 sample_period_16 = get_16bit_precision(sample_period); + + kcpustat_cpu_fetch(, smp_processor_id()); + + for (i = 0; i < NUM_STATS_PER_GROUP; i++) { + old_stat = __this_cpu_read(cpustat_old[i]); + new_stat = get_16bit_precision(cpustat[tracked_stats[i]]); + util = DIV_ROUND_UP(100 * (new_stat - old_stat), sample_period_16); + __this_cpu_write(cpustat_util[tail][i], util); + __this_cpu_write(cpustat_old[i], new_stat); + } + + __this_cpu_write(cpustat_tail, (tail + 1) % NUM_SAMPLE_PERIODS); +} + +static void print_cpustat(void) +{ + int i, group; + u8 tail = __this_cpu_read(cpustat_tail); + u64 sample_period_second = sample_period; + + do_div(sample_period_second, NSEC_PER_SEC); + + /* +* We do not want the "watchdog: " prefix on every line, +* hence we use "printk" instead of "pr_crit". +*/ + printk(KERN_CRIT "CPU#%d Utilization every %llus during lockup:\n", + smp_processor_id(), sample_period_second); + + for (i = 0; i <
[PATCHv11 0/4] *** Detect interrupt storm in softlockup ***
Hi, guys. I have implemented a low-overhead method for detecting interrupt storm in softlockup. Please review it, all comments are welcome. Changes from v10 to v11: - Only patch #2 and patch #3 have been changed. - Add comments to explain each field of 'struct irqstat' in patch #2. - Split the inner summation logic out of kstat_irqs() and encapsulate it into kstat_irqs_desc() in patch #3. - Adopt Thomas's change log for patch #3. - Add the 'Reviewed-by' tag of Liu Song. Changes from v9 to v10: - The two patches related to 'watchdog/softlockup' remain unchanged. - The majority of the work related to 'genirq' is contributed by Thomas, indicated by adding 'Originally-by' tag. And I'd like to express my gratitude for Thomas's contributions and guidance here. - Adopt Thomas's change log for the snapshot mechanism for interrupt statistics. - Split unrelated change in patch #2 into a separate patch #3. Changes from v8 to v9: - Patch #1 remains unchanged. - From Thomas Gleixner, split patch #2 into two patches. Interrupt infrastructure first and then the actual usage site in the watchdog code. Changes from v7 to v8: - From Thomas Gleixner, implement statistics within the interrupt core code and provide sensible interfaces for the watchdog code. - Patch #1 remains unchanged. Patch #2 has significant changes based on Thomas's suggestions, which is why I have removed Liu Song and Douglas's Reviewed-by from patch #2. Please review it again, and all comments are welcome. Changes from v6 to v7: - Remove "READ_ONCE" in "start_counting_irqs" - Replace the hard-coded 5 with "NUM_SAMPLE_PERIODS" macro in "set_sample_period". - Add empty lines to help with reading the code. - Remove the branch that processes IRQs where "counts_diff = 0". - Add the Reviewed-by of Liu Song and Douglas. Changes from v5 to v6: - Use "./scripts/checkpatch.pl --strict" to get a few extra style nits and fix them. - Squash patch #3 into patch #1, and wrapp the help text to 80 columns. - Sort existing headers alphabetically in watchdog.c - Drop "softlockup_hardirq_cpus", just read "hardirq_counts" and see if it's non-NULL. - Store "nr_irqs" in a local variable. - Simplify the calculation of "cpu_diff". Changes from v4 to v5: - Rearranging variable placement to make code look neater. Changes from v3 to v4: - Renaming some variable and function names to make the code logic more readable. - Change the code location to avoid predeclaring. - Just swap rather than a double loop in tabulate_irq_count. - Since nr_irqs has the potential to grow at runtime, bounds-check logic has been implemented. - Add SOFTLOCKUP_DETECTOR_INTR_STORM Kconfig knob. Changes from v2 to v3: - From Liu Song, using enum instead of macro for cpu_stats, shortening the name 'idx_to_stat' to 'stats', adding 'get_16bit_precesion' instead of using right shift operations, and using 'struct irq_counts'. - From kernel robot test, using '__this_cpu_read' and '__this_cpu_write' instead of accessing to an per-cpu array directly, in order to avoid this warning. 'sparse: incorrect type in initializer (different modifiers)' Changes from v1 to v2: - From Douglas, optimize the memory of cpustats. With the maximum number of CPUs, that's now this. 2 * 8192 * 4 + 1 * 8192 * 5 * 4 + 1 * 8192 = 237,568 bytes. - From Liu Song, refactor the code format and add necessary comments. - From Douglas, use interrupt counts instead of interrupt time to determine the cause of softlockup. - Remove the cmdline parameter added in PATCHv1. Bitao Hu (4): watchdog/softlockup: low-overhead detection of interrupt storm genirq: Provide a snapshot mechanism for interrupt statistics genirq: Avoid summation loops for /proc/interrupts watchdog/softlockup: report the most frequent interrupts arch/mips/dec/setup.c| 2 +- arch/parisc/kernel/smp.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 +- include/linux/irqdesc.h | 14 +- include/linux/kernel_stat.h | 3 + kernel/irq/internals.h | 4 +- kernel/irq/irqdesc.c | 50 +-- kernel/irq/proc.c| 9 +- kernel/watchdog.c| 213 ++- lib/Kconfig.debug| 13 ++ scripts/gdb/linux/interrupts.py | 6 +- 11 files changed, 286 insertions(+), 32 deletions(-) -- 2.37.1 (Apple Git-137.1)
Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts
On 2024/2/27 23:39, Thomas Gleixner wrote: On Tue, Feb 27 2024 at 19:20, Bitao Hu wrote: On 2024/2/27 17:26, Thomas Gleixner wrote: and then let kstat_irqs() and show_interrupts() use it. See? I have a concern. kstat_irqs() uses for_each_possible_cpu() for summation. However, show_interrupts() uses for_each_online_cpu(), which means it only outputs interrupt statistics for online cpus. If we use for_each_possible_cpu() in show_interrupts() to calculate 'any_count', there could be a problem with the following scenario: If an interrupt has a count of zero on online cpus but a non-zero count on possible cpus, then 'any_count' would not be zero, and the statistics for that interrupt would be output, which is not the desired behavior for show_interrupts(). Therefore, I think it's not good to have kstat_irqs() and show_interrupts() both use the same logic. What do you think? Good point. But you simply can have unsigned int kstat_irq_desc(struct irq_desc *desc, const struct cpumask *mask) and hand in the appropriate cpumask, which still shares the code, no? Alright, that is a good approach.
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
Christophe Leroy writes: > Le 27/02/2024 à 15:00, Uwe Kleine-König a écrit : >> On Tue, Feb 27, 2024 at 01:52:07PM +, Christophe Leroy wrote: >>> Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit : On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote: > Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit : >> recently the spi-ppc4xx.c driver suffered from build errors and warnings >> that were undetected for longer than I expected. I think it would be >> very beneficial if this driver was enabled in (at least) a powerpc >> allmodconfig build. >> >> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is >> only defined for 4xx (as these select PPC_DCR_NATIVE). ... >> >> The reason I'd like to see it in allmodconfig is that testing >> allmodconfig on several archs is the check I do for my patch series. > > I think for powerpc you should really check ppc32_allmodconfig in > addition to allmodconfig Yeah. arch/powerpc is really ~7 separate sub architectures. Building allmodconfig and ppc32_allmodconfig at least covers the bulk of the code. But it doesn't include 4xx, or several other platforms. I think the best/easiest option is just to enable this driver in the 44x defconfig. At least that way the auto builders should catch any problems. eg. diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 8b595f67068c..95a1e7efb79f 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -67,6 +67,8 @@ CONFIG_I2C=m CONFIG_I2C_CHARDEV=m CONFIG_I2C_GPIO=m CONFIG_I2C_IBM_IIC=m +CONFIG_SPI=y +CONFIG_SPI_PPC4xx=y # CONFIG_HWMON is not set CONFIG_FB=m CONFIG_USB=m cheers
Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info
On Tue, 2024-02-27 at 18:16 +, Christophe Leroy wrote: > > > Why doing a full init of the struct when all fields are re- > > > written a few > > > lines after ? > > > > It's a nice change for robustness and makes future changes easier. > > It's > > not actually wasteful since the compiler will throw away all > > redundant > > stores. > > Well, I tend to dislike default init at declaration because it often > hides missed real init. When a field is not initialized GCC should > emit > a Warning, at least when built with W=2 which sets > -Wmissing-field-initializers ? Sorry, I'm not following where you are going with this. There aren't any struct vm_unmapped_area_info users that use initializers today, so that warning won't apply in this case. Meanwhile, designated style struct initialization (which would zero new members) is very common, as well as not get anything checked by that warning. Anything with this many members is probably going to use the designated style. If we are optimizing to avoid bugs, the way this struct is used today is not great. It is essentially being used as an argument passer. Normally when a function signature changes, but a caller is missed, of course the compiler will notice loudly. But not here. So I think probably zero initializing it is safer than being setup to pass garbage. I'm trying to figure out what to do here. If I changed it so that just powerpc set the new field manually, then the convention across the kernel would be for everything to be default zero, and future other new parameters could have a greater chance of turning into garbage on powerpc. Since it could be easy to miss that powerpc was special. Would you prefer it? Or maybe I could try a new vm_unmapped_area() that takes the extra argument separately? The old callers could call the old function and not need any arch updates. It all seems strange though, because automatic zero initializing struct members is so common in the kernel. But it also wouldn't add the cleanup Kees was pointing out. Hmm. Any preference? Or maybe am I missing your point and talking nonsense?
Re: [PATCH v8 08/10] PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event
On Tue, Feb 27, 2024 at 12:26:05PM -0500, Frank Li wrote: > On Tue, Feb 27, 2024 at 06:00:24PM +0530, Manivannan Sadhasivam wrote: > > On Mon, Feb 26, 2024 at 12:18:18PM -0500, Frank Li wrote: > > > On Sat, Feb 24, 2024 at 12:24:14PM +0530, Manivannan Sadhasivam wrote: > > > > The PCIe link can go to LINK_DOWN state in one of the following > > > > scenarios: > > > > > > > > 1. Fundamental (PERST#)/hot/warm reset > > > > 2. Link transition from L2/L3 to L0 > > > > > > From L0 to L2/l3 > > > > > > > I don't understand what you mean here. Link down won't happen while moving > > from > > L0 to L2/L3, it is the opposite. > > Strange, why there are not linkdown from L0 to L2/l3. But have linkdown > from L2/l3 to L0? when linkup happen? Any document show these? > Refer PCIe Spec 5.0, Figure 5-1 Link Power Management State Flow Diagram. - Mani > Frank > > > > > > > > > > > In those cases, LINK_DOWN causes some non-sticky DWC registers to loose > > > > the > > > > state (like REBAR, PTM_CAP etc...). So the drivers need to reinitialize > > > > them to function properly once the link comes back again. > > > > > > > > This is not a problem for drivers supporting PERST# IRQ, since they can > > > > reinitialize the registers in the PERST# IRQ callback. But for the > > > > drivers > > > > not supporting PERST#, there is no way they can reinitialize the > > > > registers > > > > other than relying on LINK_DOWN IRQ received when the link goes down. So > > > > let's add a DWC generic API dw_pcie_ep_linkdown() that reinitializes the > > > > non-sticky registers and also notifies the EPF drivers about link going > > > > down. > > > > > > > > This API can also be used by the drivers supporting PERST# to handle the > > > > scenario (2) mentioned above. > > > > > > > > Signed-off-by: Manivannan Sadhasivam > > > > --- > > > > drivers/pci/controller/dwc/pcie-designware-ep.c | 111 > > > > ++-- > > > > drivers/pci/controller/dwc/pcie-designware.h| 5 ++ > > > > 2 files changed, 72 insertions(+), 44 deletions(-) > > > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > index 278bdc9b2269..fed4c2936c78 100644 > > > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > @@ -14,14 +14,6 @@ > > > > #include > > > > #include > > > > > > > > -void dw_pcie_ep_linkup(struct dw_pcie_ep *ep) > > > > -{ > > > > - struct pci_epc *epc = ep->epc; > > > > - > > > > - pci_epc_linkup(epc); > > > > -} > > > > -EXPORT_SYMBOL_GPL(dw_pcie_ep_linkup); > > > > - > > > > > > No sure why git remove this block and add these back. > > > > > > > Because, we are adding dw_pcie_ep_linkdown() API way below and it makes > > sense to > > move this API also to keep it ordered. Maybe I should've described it in > > commit > > message. > > > > - Mani > > > > > > > > > void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep) > > > > { > > > > struct pci_epc *epc = ep->epc; > > > > @@ -603,19 +595,56 @@ static unsigned int > > > > dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap) > > > > return 0; > > > > } > > > > > > > > +static void dw_pcie_ep_init_non_sticky_registers(struct dw_pcie *pci) > > > > +{ > > > > + unsigned int offset, ptm_cap_base; > > > > + unsigned int nbars; > > > > + u32 reg, i; > > > > + > > > > + offset = dw_pcie_ep_find_ext_capability(pci, > > > > PCI_EXT_CAP_ID_REBAR); > > > > + ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, > > > > PCI_EXT_CAP_ID_PTM); > > > > + > > > > + dw_pcie_dbi_ro_wr_en(pci); > > > > + > > > > + if (offset) { > > > > + reg = dw_pcie_readl_dbi(pci, offset + PCI_REBAR_CTRL); > > > > + nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >> > > > > + PCI_REBAR_CTRL_NBAR_SHIFT; > > > > + > > > > + for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) > > > > + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, > > > > 0x0); > > > > + } > > > > + > > > > + /* > > > > +* PTM responder capability can be disabled only after disabling > > > > +* PTM root capability. > > > > +*/ > > > > + if (ptm_cap_base) { > > > > + dw_pcie_dbi_ro_wr_en(pci); > > > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + > > > > PCI_PTM_CAP); > > > > + reg &= ~PCI_PTM_CAP_ROOT; > > > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, > > > > reg); > > > > + > > > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + > > > > PCI_PTM_CAP); > > > > + reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK); > > > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, > > > > reg); > > > > + dw_pcie_dbi_ro_wr_dis(pci); > > > > +
Re: [PATCH v8 09/10] PCI: qcom-ep: Use the generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event
On Tue, Feb 27, 2024 at 12:34:15PM -0500, Frank Li wrote: > On Tue, Feb 27, 2024 at 06:02:30PM +0530, Manivannan Sadhasivam wrote: > > On Mon, Feb 26, 2024 at 12:20:41PM -0500, Frank Li wrote: > > > On Sat, Feb 24, 2024 at 12:24:15PM +0530, Manivannan Sadhasivam wrote: > > > > Now that the API is available, let's make use of it. It also handles the > > > > reinitialization of DWC non-sticky registers in addition to sending the > > > > notification to EPF drivers. > > > > > > > > Signed-off-by: Manivannan Sadhasivam > > > > --- > > > > drivers/pci/controller/dwc/pcie-qcom-ep.c | 2 +- > > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > > b/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > > index 2fb8c15e7a91..4e45bc4bca45 100644 > > > > --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > > +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > > @@ -640,7 +640,7 @@ static irqreturn_t > > > > qcom_pcie_ep_global_irq_thread(int irq, void *data) > > > > if (FIELD_GET(PARF_INT_ALL_LINK_DOWN, status)) { > > > > dev_dbg(dev, "Received Linkdown event\n"); > > > > pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN; > > > > - pci_epc_linkdown(pci->ep.epc); > > > > + dw_pcie_ep_linkdown(>ep); > > > > > > Suppose pci_epc_linkdown() will call dw_pcie_ep_linkdown() ? > > > why need direct call dw_pcie_ep_linkdown() here? > > > > > > > I've already justified this in the commit message. Here is the excerpt: > > > > "It also handles the reinitialization of DWC non-sticky registers in > > addition > > to sending the notification to EPF drivers." > > API function name is too similar. It is hard to know difference from API > naming. It'd better to know what function do from function name. > In reality we cannot name a function based on everything it does. The naming is mostly based on what is the primary motive of the API and here it is handling Link down event. Maybe dw_pcie_ep_handle_linkdown() would be an apt one, but that's out of scope of this series (since changing that would also require changes to other similar APIs). - Mani > Frank > > > > - Mani > > > > -- > > மணிவண்ணன் சதாசிவம் -- மணிவண்ணன் சதாசிவம்
Re: [PATCH v8 06/10] PCI: dwc: ep: Call dw_pcie_ep_init_registers() API directly from all glue drivers
On Tue, Feb 27, 2024 at 12:28:41PM -0500, Frank Li wrote: > On Tue, Feb 27, 2024 at 05:51:41PM +0530, Manivannan Sadhasivam wrote: > > On Mon, Feb 26, 2024 at 12:04:33PM -0500, Frank Li wrote: > > > On Sat, Feb 24, 2024 at 12:24:12PM +0530, Manivannan Sadhasivam wrote: > > > > Currently, dw_pcie_ep_init_registers() API is directly called by the > > > > glue > > > > drivers requiring active refclk from host. But for the other drivers, > > > > it is > > > > getting called implicitly by dw_pcie_ep_init(). This is due to the fact > > > > that this API initializes DWC EP specific registers and that requires an > > > > active refclk (either from host or generated locally by endpoint itsef). > > > > > > > > But, this causes a discrepancy among the glue drivers. So to avoid this > > > > confusion, let's call this API directly from all glue drivers > > > > irrespective > > > > of refclk dependency. Only difference here is that the drivers requiring > > > > refclk from host will call this API only after the refclk is received > > > > and > > > > other drivers without refclk dependency will call this API right after > > > > dw_pcie_ep_init(). > > > > > > > > This change will also allow us to remove the "core_init_notifier" flag > > > > in > > > > the later commits. > > > > > > > > Signed-off-by: Manivannan Sadhasivam > > > > --- > > > > drivers/pci/controller/dwc/pci-dra7xx.c | 7 +++ > > > > drivers/pci/controller/dwc/pci-imx6.c | 8 > > > > drivers/pci/controller/dwc/pci-keystone.c | 9 + > > > > drivers/pci/controller/dwc/pci-layerscape-ep.c| 7 +++ > > > > drivers/pci/controller/dwc/pcie-designware-ep.c | 22 > > > > -- > > > > drivers/pci/controller/dwc/pcie-designware-plat.c | 9 + > > > > drivers/pci/controller/dwc/pcie-rcar-gen4.c | 12 +++- > > > > drivers/pci/controller/dwc/pcie-uniphier-ep.c | 13 - > > > > 8 files changed, 63 insertions(+), 24 deletions(-) > > > > [...] > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > index ed1f2afd830a..278bdc9b2269 100644 > > > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > > @@ -729,7 +729,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) > > > > struct device *dev = pci->dev; > > > > struct platform_device *pdev = to_platform_device(dev); > > > > struct device_node *np = dev->of_node; > > > > - const struct pci_epc_features *epc_features; > > > > > > > > INIT_LIST_HEAD(>func_list); > > > > > > > > @@ -775,29 +774,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) > > > > goto err_exit_epc_mem; > > > > } > > > > > > > > - if (ep->ops->get_features) { > > > > - epc_features = ep->ops->get_features(ep); > > > > - if (epc_features->core_init_notifier) > > > > - return 0; > > > > - } > > > > > > why remove this check? > > > > > > > There is no point in keeping this check since we are removing the call to > > dw_pcie_ep_init_registers() below. But I should've described this change in > > the > > commit message. > > Sperated patch will be helpful. This clean up does not related with other > change. > Well this is not a generic cleanup that could be moved to a separate patch. Due to the changes in this patch, the use of the flag becomes redundant. So it has to removed here itself. - Mani -- மணிவண்ணன் சதாசிவம்
Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info
Le 27/02/2024 à 19:07, Kees Cook a écrit : > On Tue, Feb 27, 2024 at 07:02:59AM +, Christophe Leroy wrote: >> >> >> Le 26/02/2024 à 20:09, Rick Edgecombe a écrit : >>> Future changes will need to add a field to struct vm_unmapped_area_info. >>> This would cause trouble for any archs that don't initialize the >>> struct. Currently every user sets each field, so if new fields are >>> added, the core code parsing the struct will see garbage in the new >>> field. >>> >>> It could be possible to initialize the new field for each arch to 0, but >>> instead simply inialize the field with a C99 struct inializing syntax. >> >> Why doing a full init of the struct when all fields are re-written a few >> lines after ? > > It's a nice change for robustness and makes future changes easier. It's > not actually wasteful since the compiler will throw away all redundant > stores. Well, I tend to dislike default init at declaration because it often hides missed real init. When a field is not initialized GCC should emit a Warning, at least when built with W=2 which sets -Wmissing-field-initializers ? > >> If I take the exemple of powerpc function slice_find_area_bottomup(): >> >> struct vm_unmapped_area_info info; >> >> info.flags = 0; >> info.length = len; >> info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); >> info.align_offset = 0; > > But one cleanup that is possible from explicitly zero-initializing the > whole structure would be dropping all the individual "= 0" assignments. > :) > Sure if we decide to go that direction all those 0 assignments void.
Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info
On Tue, Feb 27, 2024 at 07:02:59AM +, Christophe Leroy wrote: > > > Le 26/02/2024 à 20:09, Rick Edgecombe a écrit : > > Future changes will need to add a field to struct vm_unmapped_area_info. > > This would cause trouble for any archs that don't initialize the > > struct. Currently every user sets each field, so if new fields are > > added, the core code parsing the struct will see garbage in the new > > field. > > > > It could be possible to initialize the new field for each arch to 0, but > > instead simply inialize the field with a C99 struct inializing syntax. > > Why doing a full init of the struct when all fields are re-written a few > lines after ? It's a nice change for robustness and makes future changes easier. It's not actually wasteful since the compiler will throw away all redundant stores. > If I take the exemple of powerpc function slice_find_area_bottomup(): > > struct vm_unmapped_area_info info; > > info.flags = 0; > info.length = len; > info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); > info.align_offset = 0; But one cleanup that is possible from explicitly zero-initializing the whole structure would be dropping all the individual "= 0" assignments. :) -- Kees Cook
Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests
> On 27-Feb-2024, at 10:38 PM, Steven Rostedt wrote: > > On Tue, 27 Feb 2024 11:56:14 -0500 > Steven Rostedt wrote: > >> On Tue, 27 Feb 2024 22:08:18 +0530 >> Sachin Sant wrote: >> Can you apply this, and see if it triggers and if it does, print the line that has the max size? >>> >>> With this I see following trace >>> >>> [ 61.327138] [ cut here ] >>> [ 61.327159] MAX OUT OF RANGE 63492 >> >> Well I guess there you have it ;-) >> >> vsprintf() doesn't like a precision of 63492! >> >> I'll look to see what the best way to deal with this is. > > Does this fix it? > Thank You. Yup this fixes the reported problem. # ./ftracetest test.d/00basic/trace_marker.tc === Ftrace unit tests === [1] Basic tests on writing to trace_marker [PASS] [2] (instance) Basic tests on writing to trace_marker [PASS] # of passed: 2 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 # Remaining test also completed without any issues. Based on the test results Tested-by: Sachin Sant Thanks — Sachin
Re: [PATCH v8 09/10] PCI: qcom-ep: Use the generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event
On Tue, Feb 27, 2024 at 06:02:30PM +0530, Manivannan Sadhasivam wrote: > On Mon, Feb 26, 2024 at 12:20:41PM -0500, Frank Li wrote: > > On Sat, Feb 24, 2024 at 12:24:15PM +0530, Manivannan Sadhasivam wrote: > > > Now that the API is available, let's make use of it. It also handles the > > > reinitialization of DWC non-sticky registers in addition to sending the > > > notification to EPF drivers. > > > > > > Signed-off-by: Manivannan Sadhasivam > > > --- > > > drivers/pci/controller/dwc/pcie-qcom-ep.c | 2 +- > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > b/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > index 2fb8c15e7a91..4e45bc4bca45 100644 > > > --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c > > > @@ -640,7 +640,7 @@ static irqreturn_t qcom_pcie_ep_global_irq_thread(int > > > irq, void *data) > > > if (FIELD_GET(PARF_INT_ALL_LINK_DOWN, status)) { > > > dev_dbg(dev, "Received Linkdown event\n"); > > > pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN; > > > - pci_epc_linkdown(pci->ep.epc); > > > + dw_pcie_ep_linkdown(>ep); > > > > Suppose pci_epc_linkdown() will call dw_pcie_ep_linkdown() ? > > why need direct call dw_pcie_ep_linkdown() here? > > > > I've already justified this in the commit message. Here is the excerpt: > > "It also handles the reinitialization of DWC non-sticky registers in addition > to sending the notification to EPF drivers." API function name is too similar. It is hard to know difference from API naming. It'd better to know what function do from function name. Frank > > - Mani > > -- > மணிவண்ணன் சதாசிவம்
Re: [PATCH v8 06/10] PCI: dwc: ep: Call dw_pcie_ep_init_registers() API directly from all glue drivers
On Tue, Feb 27, 2024 at 05:51:41PM +0530, Manivannan Sadhasivam wrote: > On Mon, Feb 26, 2024 at 12:04:33PM -0500, Frank Li wrote: > > On Sat, Feb 24, 2024 at 12:24:12PM +0530, Manivannan Sadhasivam wrote: > > > Currently, dw_pcie_ep_init_registers() API is directly called by the glue > > > drivers requiring active refclk from host. But for the other drivers, it > > > is > > > getting called implicitly by dw_pcie_ep_init(). This is due to the fact > > > that this API initializes DWC EP specific registers and that requires an > > > active refclk (either from host or generated locally by endpoint itsef). > > > > > > But, this causes a discrepancy among the glue drivers. So to avoid this > > > confusion, let's call this API directly from all glue drivers irrespective > > > of refclk dependency. Only difference here is that the drivers requiring > > > refclk from host will call this API only after the refclk is received and > > > other drivers without refclk dependency will call this API right after > > > dw_pcie_ep_init(). > > > > > > This change will also allow us to remove the "core_init_notifier" flag in > > > the later commits. > > > > > > Signed-off-by: Manivannan Sadhasivam > > > --- > > > drivers/pci/controller/dwc/pci-dra7xx.c | 7 +++ > > > drivers/pci/controller/dwc/pci-imx6.c | 8 > > > drivers/pci/controller/dwc/pci-keystone.c | 9 + > > > drivers/pci/controller/dwc/pci-layerscape-ep.c| 7 +++ > > > drivers/pci/controller/dwc/pcie-designware-ep.c | 22 > > > -- > > > drivers/pci/controller/dwc/pcie-designware-plat.c | 9 + > > > drivers/pci/controller/dwc/pcie-rcar-gen4.c | 12 +++- > > > drivers/pci/controller/dwc/pcie-uniphier-ep.c | 13 - > > > 8 files changed, 63 insertions(+), 24 deletions(-) > > [...] > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > index ed1f2afd830a..278bdc9b2269 100644 > > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > @@ -729,7 +729,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) > > > struct device *dev = pci->dev; > > > struct platform_device *pdev = to_platform_device(dev); > > > struct device_node *np = dev->of_node; > > > - const struct pci_epc_features *epc_features; > > > > > > INIT_LIST_HEAD(>func_list); > > > > > > @@ -775,29 +774,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) > > > goto err_exit_epc_mem; > > > } > > > > > > - if (ep->ops->get_features) { > > > - epc_features = ep->ops->get_features(ep); > > > - if (epc_features->core_init_notifier) > > > - return 0; > > > - } > > > > why remove this check? > > > > There is no point in keeping this check since we are removing the call to > dw_pcie_ep_init_registers() below. But I should've described this change in > the > commit message. Sperated patch will be helpful. This clean up does not related with other change. Frank > > - Mani > > -- > மணிவண்ணன் சதாசிவம்
Re: [PATCH v8 08/10] PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event
On Tue, Feb 27, 2024 at 06:00:24PM +0530, Manivannan Sadhasivam wrote: > On Mon, Feb 26, 2024 at 12:18:18PM -0500, Frank Li wrote: > > On Sat, Feb 24, 2024 at 12:24:14PM +0530, Manivannan Sadhasivam wrote: > > > The PCIe link can go to LINK_DOWN state in one of the following scenarios: > > > > > > 1. Fundamental (PERST#)/hot/warm reset > > > 2. Link transition from L2/L3 to L0 > > > > From L0 to L2/l3 > > > > I don't understand what you mean here. Link down won't happen while moving > from > L0 to L2/L3, it is the opposite. Strange, why there are not linkdown from L0 to L2/l3. But have linkdown from L2/l3 to L0? when linkup happen? Any document show these? Frank > > > > > > > In those cases, LINK_DOWN causes some non-sticky DWC registers to loose > > > the > > > state (like REBAR, PTM_CAP etc...). So the drivers need to reinitialize > > > them to function properly once the link comes back again. > > > > > > This is not a problem for drivers supporting PERST# IRQ, since they can > > > reinitialize the registers in the PERST# IRQ callback. But for the drivers > > > not supporting PERST#, there is no way they can reinitialize the registers > > > other than relying on LINK_DOWN IRQ received when the link goes down. So > > > let's add a DWC generic API dw_pcie_ep_linkdown() that reinitializes the > > > non-sticky registers and also notifies the EPF drivers about link going > > > down. > > > > > > This API can also be used by the drivers supporting PERST# to handle the > > > scenario (2) mentioned above. > > > > > > Signed-off-by: Manivannan Sadhasivam > > > --- > > > drivers/pci/controller/dwc/pcie-designware-ep.c | 111 > > > ++-- > > > drivers/pci/controller/dwc/pcie-designware.h| 5 ++ > > > 2 files changed, 72 insertions(+), 44 deletions(-) > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > index 278bdc9b2269..fed4c2936c78 100644 > > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c > > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c > > > @@ -14,14 +14,6 @@ > > > #include > > > #include > > > > > > -void dw_pcie_ep_linkup(struct dw_pcie_ep *ep) > > > -{ > > > - struct pci_epc *epc = ep->epc; > > > - > > > - pci_epc_linkup(epc); > > > -} > > > -EXPORT_SYMBOL_GPL(dw_pcie_ep_linkup); > > > - > > > > No sure why git remove this block and add these back. > > > > Because, we are adding dw_pcie_ep_linkdown() API way below and it makes sense > to > move this API also to keep it ordered. Maybe I should've described it in > commit > message. > > - Mani > > > > > > void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep) > > > { > > > struct pci_epc *epc = ep->epc; > > > @@ -603,19 +595,56 @@ static unsigned int > > > dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap) > > > return 0; > > > } > > > > > > +static void dw_pcie_ep_init_non_sticky_registers(struct dw_pcie *pci) > > > +{ > > > + unsigned int offset, ptm_cap_base; > > > + unsigned int nbars; > > > + u32 reg, i; > > > + > > > + offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR); > > > + ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM); > > > + > > > + dw_pcie_dbi_ro_wr_en(pci); > > > + > > > + if (offset) { > > > + reg = dw_pcie_readl_dbi(pci, offset + PCI_REBAR_CTRL); > > > + nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >> > > > + PCI_REBAR_CTRL_NBAR_SHIFT; > > > + > > > + for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) > > > + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); > > > + } > > > + > > > + /* > > > + * PTM responder capability can be disabled only after disabling > > > + * PTM root capability. > > > + */ > > > + if (ptm_cap_base) { > > > + dw_pcie_dbi_ro_wr_en(pci); > > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); > > > + reg &= ~PCI_PTM_CAP_ROOT; > > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); > > > + > > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); > > > + reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK); > > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); > > > + dw_pcie_dbi_ro_wr_dis(pci); > > > + } > > > + > > > + dw_pcie_setup(pci); > > > + dw_pcie_dbi_ro_wr_dis(pci); > > > +} > > > + > > > int dw_pcie_ep_init_registers(struct dw_pcie_ep *ep) > > > { > > > struct dw_pcie *pci = to_dw_pcie_from_ep(ep); > > > struct dw_pcie_ep_func *ep_func; > > > struct device *dev = pci->dev; > > > struct pci_epc *epc = ep->epc; > > > - unsigned int offset, ptm_cap_base; > > > - unsigned int nbars; > > > u8 hdr_type; > > > u8 func_no; > > > - int i, ret; > > > void *addr; > > > - u32 reg; > > > + int ret; > > > > > > hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE) & > > > PCI_HEADER_TYPE_MASK; > > > @@ -678,38 +707,7
Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests
On Tue, 27 Feb 2024 11:56:14 -0500 Steven Rostedt wrote: > On Tue, 27 Feb 2024 22:08:18 +0530 > Sachin Sant wrote: > > > > Can you apply this, and see if it triggers and if it does, print the line > > > that has the max size? > > > > > > > With this I see following trace > > > > [ 61.327138] [ cut here ] > > [ 61.327159] MAX OUT OF RANGE 63492 > > Well I guess there you have it ;-) > > vsprintf() doesn't like a precision of 63492! > > I'll look to see what the best way to deal with this is. Does this fix it? -- Steve diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff0b0a999171..e0840b94f1a2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6882,7 +6882,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, /* Used in tracing_mark_raw_write() as well */ #define FAULTED_STR "" #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */ - +#ifndef SHORT_MAX +#define SHORT_MAX ((1<<15) - 1) +#endif if (tracing_disabled) return -EINVAL; @@ -6900,6 +6902,16 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt < FAULTED_SIZE) size += FAULTED_SIZE - cnt; + /* +* trace_print_print() uses vsprintf() to determine the size via +* the precision format "%*.s" which can not be greater than +* a signed short. +*/ + if (size > SHORT_MAX) { + cnt -= size - SHORT_MAX; + goto again; + } + if (size > TRACE_SEQ_BUFFER_SIZE) { cnt -= size - TRACE_SEQ_BUFFER_SIZE; goto again;
Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests
On Tue, 27 Feb 2024 22:08:18 +0530 Sachin Sant wrote: > > Can you apply this, and see if it triggers and if it does, print the line > > that has the max size? > > > > With this I see following trace > > [ 61.327138] [ cut here ] > [ 61.327159] MAX OUT OF RANGE 63492 Well I guess there you have it ;-) vsprintf() doesn't like a precision of 63492! I'll look to see what the best way to deal with this is. -- Steve
Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests
> On 27-Feb-2024, at 9:54 PM, Steven Rostedt wrote: > > On Tue, 27 Feb 2024 21:38:57 +0530 > Sachin Sant wrote: > >> This warning was not triggered. > > Interesting. > >> >> I have attached .config > > This is what I was looking for: > >> # CONFIG_PPC_4K_PAGES is not set >> CONFIG_PPC_64K_PAGES=y >> CONFIG_PAGE_SIZE_64KB=y >> CONFIG_PPC_PAGE_SHIFT=16 > > So the pages are 64K in size. I wonder if this is causing an issue. > > Can you apply this, and see if it triggers and if it does, print the line > that has the max size? > With this I see following trace [ 61.327138] [ cut here ] [ 61.327159] MAX OUT OF RANGE 63492 [ 61.327167] WARNING: CPU: 1 PID: 5180 at kernel/trace/trace_output.c:1592 trace_print_print+0xf4/0x114 [ 61.327178] Modules linked in: rpadlpar_io(E) rpaphp(E) xsk_diag(E) nft_fib_inet(E) nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) bonding(E) tls(E) rfkill(E) ip_set(E) nf_tables(E) nfnetlink(E) sunrpc(E) binfmt_misc(E) pseries_rng(E) aes_gcm_p10_crypto(E) drm(E) drm_panel_orientation_quirks(E) xfs(E) libcrc32c(E) sd_mod(E) sr_mod(E) t10_pi(E) crc64_rocksoft_generic(E) cdrom(E) crc64_rocksoft(E) crc64(E) sg(E) ibmvscsi(E) scsi_transport_srp(E) ibmveth(E) vmx_crypto(E) fuse(E) [ 61.327232] CPU: 1 PID: 5180 Comm: awk Kdump: loaded Tainted: GE 6.8.0-rc5-00329-gab0a97cffa0b-dirty #37 [ 61.327238] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf06 of:IBM,FW1060.00 (NH1060_018) hv:phyp pSeries [ 61.327243] NIP: c032eb7c LR: c032eb78 CTR: [ 61.327247] REGS: c000505cf730 TRAP: 0700 Tainted: GE (6.8.0-rc5-00329-gab0a97cffa0b-dirty) [ 61.327253] MSR: 82029033 CR: 48008202 XER: 0005 [ 61.327263] CFAR: c0161e7c IRQMASK: 0 [ 61.327263] GPR00: c032eb78 c000505cf9d0 c1502700 0016 [ 61.327263] GPR04: c000505cf7e0 c000505cf7d8 0003bd8c [ 61.327263] GPR08: 0027 c003bfa58110 0001 0001 [ 61.327263] GPR12: c003b300 [ 61.327263] GPR16: c0009008 [ 61.327263] GPR20: 064c 00400cc0 c0005e7377c0 c2a6ed98 [ 61.327263] GPR24: c000900c c000900a0190 c9d50008 03b54601 [ 61.327263] GPR28: c2a711a8 c000900c f804 0002 [ 61.327307] NIP [c032eb7c] trace_print_print+0xf4/0x114 [ 61.327312] LR [c032eb78] trace_print_print+0xf0/0x114 [ 61.327317] Call Trace: [ 61.327319] [c000505cf9d0] [c032eb78] trace_print_print+0xf0/0x114 (unreliable) [ 61.327325] [c000505cfa50] [c031832c] print_trace_fmt+0x13c/0x26c [ 61.327331] [c000505cfb00] [c0328488] s_show+0x5c/0x2cc [ 61.327335] [c000505cfb90] [c0608e88] seq_read_iter+0x500/0x69c [ 61.327342] [c000505cfc70] [c0609128] seq_read+0x104/0x15c [ 61.327348] [c000505cfd10] [c05bf79c] vfs_read+0xdc/0x390 [ 61.327353] [c000505cfdc0] [c05c076c] ksys_read+0x84/0x144 [ 61.327358] [c000505cfe10] [c0033328] system_call_exception+0x138/0x330 [ 61.327365] [c000505cfe50] [c000d05c] system_call_vectored_common+0x15c/0x2ec [ 61.327372] --- interrupt: 3000 at 0x7fff97933c74 [ 61.327376] NIP: 7fff97933c74 LR: 7fff97933c74 CTR: [ 61.327381] REGS: c000505cfe80 TRAP: 3000 Tainted: GE (6.8.0-rc5-00329-gab0a97cffa0b-dirty) [ 61.327386] MSR: 8000f033 CR: 44008408 XER: [ 61.327395] IRQMASK: 0 [ 61.327395] GPR00: 0003 7fffe9723220 000106787e00 0003 [ 61.327395] GPR04: 00012f0e8150 0001 fff5 0012f0e5 [ 61.327395] GPR08: 0001 [ 61.327395] GPR12: 7fff97f4c940 00012f0c5a40 7fffe97236d8 [ 61.327395] GPR16: 00012f0e3320 00012f0d3a28 00012f0e30f0 0001 [ 61.327395] GPR20: 8001 0001 05d6 7fffe9723447 [ 61.327395] GPR24: 7fffe9723448 7fffe9723448 7fffe97233a0 [ 61.327395] GPR28: 0001066e466c 7fffe9723398 00012f0e8030 [ 61.327437] NIP [7fff97933c74] 0x7fff97933c74 [ 61.327440] LR [7fff97933c74] 0x7fff97933c74 [ 61.327443] --- interrupt: 3000 [ 61.327445] Code: 2c09 4082ffd4 7c0802a6 3c62ffe4 3921 3d420160 7fc407b4 38635608 992a493c f8010090 4be33281 6000 <0fe0> e8010090
Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests
On Tue, 27 Feb 2024 21:38:57 +0530 Sachin Sant wrote: > This warning was not triggered. Interesting. > > I have attached .config This is what I was looking for: > # CONFIG_PPC_4K_PAGES is not set > CONFIG_PPC_64K_PAGES=y > CONFIG_PAGE_SIZE_64KB=y > CONFIG_PPC_PAGE_SHIFT=16 So the pages are 64K in size. I wonder if this is causing an issue. Can you apply this, and see if it triggers and if it does, print the line that has the max size? -- Steve diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3e7fa44dc2b2..43e270bf8d78 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1589,6 +1589,9 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, struct trace_seq *s = >seq; int max = iter->ent_size - offsetof(struct print_entry, buf); + if (WARN_ONCE(max < 0 || max > 2000, "MAX OUT OF RANGE %d", max)) + return TRACE_TYPE_UNHANDLED; + trace_assign_type(field, iter->ent); seq_print_ip_sym(s, field->ip, flags);
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Tue, Feb 27, 2024, at 16:44, Christophe Leroy wrote: > Le 27/02/2024 à 16:40, Arnd Bergmann a écrit : >> On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote: > > > For 256K pages, powerpc has the following help. I think you should have > it too: > > The kernel will only be able to run applications that have been > compiled with '-zmax-page-size' set to 256K (the default is 64K) using > binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE > definition from 0x1 to 0x4 in older versions. I don't think we need to mention pre-2.18 binutils any more, but the rest seems useful, changed the text now to config PAGE_SIZE_256KB bool "256KiB pages" depends on HAVE_PAGE_SIZE_256KB help 256KiB pages have little practical value due to their extreme memory usage. The kernel will only be able to run applications that have been compiled with '-zmax-page-size' set to 256KiB (the default is 64KiB or 4KiB on most architectures). Arnd
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
Le 27/02/2024 à 16:40, Arnd Bergmann a écrit : > On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote: >> On 2024-02-26 10:14 AM, Arnd Bergmann wrote: >>> >>> +config HAVE_PAGE_SIZE_4KB >>> + bool >>> + >>> +config HAVE_PAGE_SIZE_8KB >>> + bool >>> + >>> +config HAVE_PAGE_SIZE_16KB >>> + bool >>> + >>> +config HAVE_PAGE_SIZE_32KB >>> + bool >>> + >>> +config HAVE_PAGE_SIZE_64KB >>> + bool >>> + >>> +config HAVE_PAGE_SIZE_256KB >>> + bool >>> + >>> +choice >>> + prompt "MMU page size" >> >> Should this have some generic help text (at least a warning about >> compatibility)? > > Good point. I've added some of this now, based on the mips > text with some generalizations for other architectures: > > config PAGE_SIZE_4KB > bool "4KiB pages" > depends on HAVE_PAGE_SIZE_4KB > help >This option select the standard 4KiB Linux page size and the only >available option on many architectures. Using 4KiB page size will >minimize memory consumption and is therefore recommended for low >memory systems. >Some software that is written for x86 systems makes incorrect >assumptions about the page size and only runs on 4KiB pages. > > config PAGE_SIZE_8KB > bool "8KiB pages" > depends on HAVE_PAGE_SIZE_8KB > help >This option is the only supported page size on a few older >processors, and can be slightly faster than 4KiB pages. > > config PAGE_SIZE_16KB > bool "16KiB pages" > depends on HAVE_PAGE_SIZE_16KB > help >This option is usually a good compromise between memory >consumption and performance for typical desktop and server >workloads, often saving a level of page table lookups compared >to 4KB pages as well as reducing TLB pressure and overhead of >per-page operations in the kernel at the expense of a larger >page cache. > > config PAGE_SIZE_32KB > bool "32KiB pages" > depends on HAVE_PAGE_SIZE_32KB >Using 32KiB page size will result in slightly higher performance >kernel at the price of higher memory consumption compared to >16KiB pages. This option is available only on cnMIPS cores. >Note that you will need a suitable Linux distribution to >support this. > > config PAGE_SIZE_64KB > bool "64KiB pages" > depends on HAVE_PAGE_SIZE_64KB >Using 64KiB page size will result in slightly higher performance >kernel at the price of much higher memory consumption compared to >4KiB or 16KiB pages. >This is not suitable for general-purpose workloads but the >better performance may be worth the cost for certain types of >supercomputing or database applications that work mostly with >large in-memory data rather than small files. > > config PAGE_SIZE_256KB > bool "256KiB pages" > depends on HAVE_PAGE_SIZE_256KB > help >256KB pages have little practical value due to their extreme >memory usage. For 256K pages, powerpc has the following help. I think you should have it too: The kernel will only be able to run applications that have been compiled with '-zmax-page-size' set to 256K (the default is 64K) using binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE definition from 0x1 to 0x4 in older versions.
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Tue, Feb 27, 2024, at 09:45, Geert Uytterhoeven wrote: > >> +config PAGE_SIZE_4KB >> + bool "4KB pages" > > Now you got rid of the 4000-byte ("4kB") pages and friends, please > do not replace these by Kelvin-bytes, and use the official binary > prefixes => "4 KiB". > Done, thanks. Arnd
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Mon, Feb 26, 2024, at 20:02, Christophe Leroy wrote: > Le 26/02/2024 à 17:14, Arnd Bergmann a écrit : >> From: Arnd Bergmann > > That's a nice re-factor. > > The only drawback I see is that we are loosing several interesting > arch-specific comments/help text. Don't know if there could be an easy > way to keep them. This is what I have now, trying to write it as generic as possible while still giving useful advice: config PAGE_SIZE_4KB bool "4KiB pages" depends on HAVE_PAGE_SIZE_4KB help This option select the standard 4KiB Linux page size and the only available option on many architectures. Using 4KiB page size will minimize memory consumption and is therefore recommended for low memory systems. Some software that is written for x86 systems makes incorrect assumptions about the page size and only runs on 4KiB pages. config PAGE_SIZE_8KB bool "8KiB pages" depends on HAVE_PAGE_SIZE_8KB help This option is the only supported page size on a few older processors, and can be slightly faster than 4KiB pages. config PAGE_SIZE_16KB bool "16KiB pages" depends on HAVE_PAGE_SIZE_16KB help This option is usually a good compromise between memory consumption and performance for typical desktop and server workloads, often saving a level of page table lookups compared to 4KB pages as well as reducing TLB pressure and overhead of per-page operations in the kernel at the expense of a larger page cache. config PAGE_SIZE_32KB bool "32KiB pages" depends on HAVE_PAGE_SIZE_32KB Using 32KiB page size will result in slightly higher performance kernel at the price of higher memory consumption compared to 16KiB pages. This option is available only on cnMIPS cores. Note that you will need a suitable Linux distribution to support this. config PAGE_SIZE_64KB bool "64KiB pages" depends on HAVE_PAGE_SIZE_64KB Using 64KiB page size will result in slightly higher performance kernel at the price of much higher memory consumption compared to 4KiB or 16KiB pages. This is not suitable for general-purpose workloads but the better performance may be worth the cost for certain types of supercomputing or database applications that work mostly with large in-memory data rather than small files. config PAGE_SIZE_256KB bool "256KiB pages" depends on HAVE_PAGE_SIZE_256KB help 256KB pages have little practical value due to their extreme memory usage. Let me know if you think some of this should be adapted further. >> >> +#define PAGE_SHIFT CONFIG_PAGE_SHIFT >> #define PAGE_SIZE (1UL << PAGE_SHIFT) >> #define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) >> > > Could we move PAGE_SIZE and PAGE_MASK in a generic/core header instead > of having it duplicated for each arch ? Yes, but I'm leaving this for a follow-up series, since I had to stop somewhere and there is always room for cleanup up headers further ;-) Arnd
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
On Mon, Feb 26, 2024, at 17:55, Samuel Holland wrote: > On 2024-02-26 10:14 AM, Arnd Bergmann wrote: >> >> +config HAVE_PAGE_SIZE_4KB >> +bool >> + >> +config HAVE_PAGE_SIZE_8KB >> +bool >> + >> +config HAVE_PAGE_SIZE_16KB >> +bool >> + >> +config HAVE_PAGE_SIZE_32KB >> +bool >> + >> +config HAVE_PAGE_SIZE_64KB >> +bool >> + >> +config HAVE_PAGE_SIZE_256KB >> +bool >> + >> +choice >> +prompt "MMU page size" > > Should this have some generic help text (at least a warning about > compatibility)? Good point. I've added some of this now, based on the mips text with some generalizations for other architectures: config PAGE_SIZE_4KB bool "4KiB pages" depends on HAVE_PAGE_SIZE_4KB help This option select the standard 4KiB Linux page size and the only available option on many architectures. Using 4KiB page size will minimize memory consumption and is therefore recommended for low memory systems. Some software that is written for x86 systems makes incorrect assumptions about the page size and only runs on 4KiB pages. config PAGE_SIZE_8KB bool "8KiB pages" depends on HAVE_PAGE_SIZE_8KB help This option is the only supported page size on a few older processors, and can be slightly faster than 4KiB pages. config PAGE_SIZE_16KB bool "16KiB pages" depends on HAVE_PAGE_SIZE_16KB help This option is usually a good compromise between memory consumption and performance for typical desktop and server workloads, often saving a level of page table lookups compared to 4KB pages as well as reducing TLB pressure and overhead of per-page operations in the kernel at the expense of a larger page cache. config PAGE_SIZE_32KB bool "32KiB pages" depends on HAVE_PAGE_SIZE_32KB Using 32KiB page size will result in slightly higher performance kernel at the price of higher memory consumption compared to 16KiB pages. This option is available only on cnMIPS cores. Note that you will need a suitable Linux distribution to support this. config PAGE_SIZE_64KB bool "64KiB pages" depends on HAVE_PAGE_SIZE_64KB Using 64KiB page size will result in slightly higher performance kernel at the price of much higher memory consumption compared to 4KiB or 16KiB pages. This is not suitable for general-purpose workloads but the better performance may be worth the cost for certain types of supercomputing or database applications that work mostly with large in-memory data rather than small files. config PAGE_SIZE_256KB bool "256KiB pages" depends on HAVE_PAGE_SIZE_256KB help 256KB pages have little practical value due to their extreme memory usage. >> diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig >> index a880ee067d2e..aac46ee1a000 100644 >> --- a/arch/hexagon/Kconfig >> +++ b/arch/hexagon/Kconfig >> @@ -8,6 +8,11 @@ config HEXAGON >> select ARCH_HAS_SYNC_DMA_FOR_DEVICE >> select ARCH_NO_PREEMPT >> select DMA_GLOBAL_POOL >> +select FRAME_POINTER > > Looks like a paste error. > Fixed, thanks! I think that happened during a rebase. >> #ifdef CONFIG_PAGE_SIZE_1MB >> -#define PAGE_SHIFT 20 >> #define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB >> #endif > > The corresponding Kconfig option does not exist (and did not exist before this > patch). Yes, I noticed that as well. It's clearly harmless. Arnd
Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts
On Tue, Feb 27 2024 at 19:20, Bitao Hu wrote: > On 2024/2/27 17:26, Thomas Gleixner wrote: >> >> and then let kstat_irqs() and show_interrupts() use it. See? > > I have a concern. kstat_irqs() uses for_each_possible_cpu() for > summation. However, show_interrupts() uses for_each_online_cpu(), > which means it only outputs interrupt statistics for online cpus. > If we use for_each_possible_cpu() in show_interrupts() to calculate > 'any_count', there could be a problem with the following scenario: > If an interrupt has a count of zero on online cpus but a non-zero > count on possible cpus, then 'any_count' would not be zero, and the > statistics for that interrupt would be output, which is not the > desired behavior for show_interrupts(). Therefore, I think it's not > good to have kstat_irqs() and show_interrupts() both use the same > logic. What do you think? Good point. But you simply can have unsigned int kstat_irq_desc(struct irq_desc *desc, const struct cpumask *mask) and hand in the appropriate cpumask, which still shares the code, no? Thanks, tglx
[Bug 207129] PowerMac G4 DP (5.6.2 debug kernel + inline KASAN) freezes shortly after booting with "do_IRQ: stack overflow: 1760"
https://bugzilla.kernel.org/show_bug.cgi?id=207129 --- Comment #10 from Christophe Leroy (christophe.le...@csgroup.eu) --- I built a kernel with your .config, the problem is a size problem. PPC32 kernels are not designed to be that big. Extract from generated System.map: c2394000 D _etext c280 T _sinittext c2bf5000 B _end You need to keep the size of the kernel below 32Mbytes, or a deep work is required to enable the kernel to perform far jumps before the kernel is relocated. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [PATCH v2 5/9] mm: Initialize struct vm_unmapped_area_info
On Tue, 2024-02-27 at 07:02 +, Christophe Leroy wrote: > > It could be possible to initialize the new field for each arch to > > 0, but > > instead simply inialize the field with a C99 struct inializing > > syntax. > > Why doing a full init of the struct when all fields are re-written a > few > lines after ? > > If I take the exemple of powerpc function slice_find_area_bottomup(): > > struct vm_unmapped_area_info info; > > info.flags = 0; > info.length = len; > info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); > info.align_offset = 0; > > For me it looks better to just add: > > info.new_field = 0; /* or whatever value it needs to have */ Hi, Thanks for taking a look. Yes, I guess that should have some justification. I was thinking of two reasons: 1. No future additions of optional parameters would need to make tree wide changes like this. 2. The change is easier to review and get correct because the necessary context is within a single line. For example, in that function some of members are set within a while loop. The place you pointed seems to be the correct one, but a diff that had the new field set after: info.high_limit = addr; ...would look correct too, but not be. What is the concern with C99 initialization? FWIW, the full series also removes an indirect branch, and probably is a net win for performance in this path.
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
Le 27/02/2024 à 15:00, Uwe Kleine-König a écrit : > On Tue, Feb 27, 2024 at 01:52:07PM +, Christophe Leroy wrote: >> >> >> Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit : >>> Hello Christophe, >>> >>> On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote: Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit : > recently the spi-ppc4xx.c driver suffered from build errors and warnings > that were undetected for longer than I expected. I think it would be > very beneficial if this driver was enabled in (at least) a powerpc > allmodconfig build. > > The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is > only defined for 4xx (as these select PPC_DCR_NATIVE). > > I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case, > too. I tried and failed. The best I came up without extensive doc > reading is: > > diff --git a/arch/powerpc/include/asm/dcr-native.h > b/arch/powerpc/include/asm/dcr-native.h > index a92059964579..159ab7abfe46 100644 > --- a/arch/powerpc/include/asm/dcr-native.h > +++ b/arch/powerpc/include/asm/dcr-native.h > @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int > base_data, int reg, > unsigned int val; > > spin_lock_irqsave(_ind_lock, flags); > - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) { > - mtdcrx(base_addr, reg); > - val = (mfdcrx(base_data) & ~clr) | set; > - mtdcrx(base_data, val); > - } else { > - __mtdcr(base_addr, reg); > - val = (__mfdcr(base_data) & ~clr) | set; > - __mtdcr(base_data, val); > - } > + > + mtdcr(base_addr, reg); > + val = (mfdcr(base_data) & ~clr) | set; > + mtdcr(base_data, val); > + > spin_unlock_irqrestore(_ind_lock, flags); > } > > diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig > index bc7021da2fe9..9a0a5e8c70c8 100644 > --- a/drivers/spi/Kconfig > +++ b/drivers/spi/Kconfig > @@ -810,7 +810,8 @@ config SPI_PL022 > > config SPI_PPC4xx > tristate "PPC4xx SPI Controller" > - depends on PPC32 && 4xx > + depends on 4xx || COMPILE_TEST > + depends on PPC32 || PPC64 > select SPI_BITBANG > help > This selects a driver for the PPC4xx SPI Controller. > > While this is a step in the right direction (I think) it's not enough to > make the driver build (but maybe make it easier to define > dcri_clrset()?) > > Could someone with more powerpc knowledge jump in and help (for the > benefit of better compile coverage of the spi driver and so less > breakage)? (If you do so based on my changes above, you don't need to > credit me for my effort, claim it as your's. I'm happy enough if the > situation improves.) What about this ? diff --git a/arch/powerpc/include/asm/dcr-mmio.h b/arch/powerpc/include/asm/dcr-mmio.h index fc6d93ef4a13..38b515afbffc 100644 --- a/arch/powerpc/include/asm/dcr-mmio.h +++ b/arch/powerpc/include/asm/dcr-mmio.h @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host, out_be32(host.token + ((host.base + dcr_n) * host.stride), value); } +static inline void __dcri_clrset(int base_addr, int base_data, int reg, + unsigned clr, unsigned set) +{ +} + >>> >>> The downside of that one is that if we find a matching device where >>> dcr-mmio is used, the driver claims to work but silently fails. Is this >>> good enough? >> >> I don't know the details of DCR, but it looks like this spi-ppc4xx >> driver is really specific to 4xx, which is PPC32. >> >> Do you really need/want it to be built with allmodconfig ? >> >> Maybe it would be easier to have it work with ppc32_allmodconfig ? >> >> Or even easier with ppc44x_defconfig ? > > The reason I'd like to see it in allmodconfig is that testing > allmodconfig on several archs is the check I do for my patch series. I think for powerpc you should really check ppc32_allmodconfig in addition to allmodconfig > Also I assume I'm not the only one relying on allmodconfig for this > purpose. So in my eyes every driver that is built there is a net win. When I look into drivers/net/ethernet/ibm/emac/core.c it is not much different, they #ifdef out the call to dcri_clrset() when CONFIG_PPC_DCR_NATIVE is not defined. A possible way is to do: +static inline void __dcri_clrset(int base_addr, int base_data, int reg, +unsigned clr, unsigned set) +{ + BUILD_BUG_ON(!IS_ENABLED(CONFIG_COMPILE_TEST); +} Christophe
Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
On Tue, Feb 27, 2024, at 12:12, Geert Uytterhoeven wrote: > On Tue, Feb 27, 2024 at 11:59 AM Arnd Bergmann wrote: >> On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote: >> I was a bit unsure about how to best do this since there >> is not really a need for a fixed page size on nommu kernels, >> whereas the three MMU configs clearly tie the page size to >> the MMU rather than the platform. >> >> There should be no reason for coldfire to have a different >> page size from dragonball if neither of them actually uses >> hardware pages, so one of them could be changed later. > > Indeed, in theory, PAGE_SIZE doesn't matter for nommu, but the concept > of pages is used all over the place in Linux. > > I'm mostly worried about some Coldfire code relying on the actual value > of PAGE_SIZE in some other context. e.g. for configuring non-cacheable > regions. Right, any change here would have to be carefully tested. I would expect that a 4K page size would reduce memory consumption even on NOMMU systems that should have the same tradeoffs for representing files in the page cache and in mem_map[]. > And does this impact running nommu binaries on a system with MMU? > I.e. if nommu binaries were built with a 4 KiB PAGE_SIZE, do they > still run on MMU systems with an 8 KiB PAGE_SIZE (coldfire and sun3), > or are there some subtleties to take into account? As far as I understand, binaries have to be built and linked for the largest page size they can run on, so running them on a kernel with smaller page size usually works. One notable exception is sys_mmap2(), which on most architectures takes units of 4KiB but on m68k is actually written to take PAGE_SIZE units. As Al pointed out in f8b7256096a2 ("Unify sys_mmap*"), it has always been wrong on sun3, presumably because users of that predate modern glibc. Running coldfire nommu binaries on coldfire mmu kernels would run into the same bug if either of them changes PAGE_SIZE. If you can run coldfire nommu binaries on classic m68k, that is already broken in the same way. Arnd
Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests
On Tue, 27 Feb 2024 15:06:18 +0530 Sachin Sant wrote: > I used this setup to again run bisect between 6.7.0 and 6.8-rc1. > Bisect points to following patch > > commit 8ec90be7f15fac42992ea821be929d3b06cd0fd9 > tracing: Allow for max buffer data size trace_marker writes Thanks, that was what I was looking for. Hmm, can you send me your config. One thing that powerpc architecture has different is that the page size can be much larger, and the calculations are based off of that. Can you try this patch and see if it triggers? Thanks, -- Steve diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3e7fa44dc2b2..c3dba537f342 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1589,6 +1589,9 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, struct trace_seq *s = >seq; int max = iter->ent_size - offsetof(struct print_entry, buf); + if (WARN_ON_ONCE(max < 0)) + return TRACE_TYPE_UNHANDLED; + trace_assign_type(field, iter->ent); seq_print_ip_sym(s, field->ip, flags);
Re: [PATCH 2/4] arch: simplify architecture specific page size configuration
On 2/26/24 17:14, Arnd Bergmann wrote: From: Arnd Bergmann arc, arm64, parisc and powerpc all have their own Kconfig symbols in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these so the common symbols are the ones that are actually used, while leaving the arhcitecture specific ones as the user visible place for configuring it, to avoid breaking user configs. Signed-off-by: Arnd Bergmann --- arch/arc/Kconfig | 3 +++ arch/arc/include/uapi/asm/page.h | 6 ++ arch/arm64/Kconfig| 29 + arch/arm64/include/asm/page-def.h | 2 +- arch/parisc/Kconfig | 3 +++ arch/parisc/include/asm/page.h| 10 +- Acked-by: Helge Deller # parisc Thanks for the cleanups! Helge
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
On Tue, Feb 27, 2024 at 01:52:07PM +, Christophe Leroy wrote: > > > Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit : > > Hello Christophe, > > > > On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote: > >> Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit : > >>> recently the spi-ppc4xx.c driver suffered from build errors and warnings > >>> that were undetected for longer than I expected. I think it would be > >>> very beneficial if this driver was enabled in (at least) a powerpc > >>> allmodconfig build. > >>> > >>> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is > >>> only defined for 4xx (as these select PPC_DCR_NATIVE). > >>> > >>> I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case, > >>> too. I tried and failed. The best I came up without extensive doc > >>> reading is: > >>> > >>> diff --git a/arch/powerpc/include/asm/dcr-native.h > >>> b/arch/powerpc/include/asm/dcr-native.h > >>> index a92059964579..159ab7abfe46 100644 > >>> --- a/arch/powerpc/include/asm/dcr-native.h > >>> +++ b/arch/powerpc/include/asm/dcr-native.h > >>> @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int > >>> base_data, int reg, > >>> unsigned int val; > >>> > >>> spin_lock_irqsave(_ind_lock, flags); > >>> - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) { > >>> - mtdcrx(base_addr, reg); > >>> - val = (mfdcrx(base_data) & ~clr) | set; > >>> - mtdcrx(base_data, val); > >>> - } else { > >>> - __mtdcr(base_addr, reg); > >>> - val = (__mfdcr(base_data) & ~clr) | set; > >>> - __mtdcr(base_data, val); > >>> - } > >>> + > >>> + mtdcr(base_addr, reg); > >>> + val = (mfdcr(base_data) & ~clr) | set; > >>> + mtdcr(base_data, val); > >>> + > >>> spin_unlock_irqrestore(_ind_lock, flags); > >>>} > >>> > >>> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig > >>> index bc7021da2fe9..9a0a5e8c70c8 100644 > >>> --- a/drivers/spi/Kconfig > >>> +++ b/drivers/spi/Kconfig > >>> @@ -810,7 +810,8 @@ config SPI_PL022 > >>> > >>>config SPI_PPC4xx > >>> tristate "PPC4xx SPI Controller" > >>> - depends on PPC32 && 4xx > >>> + depends on 4xx || COMPILE_TEST > >>> + depends on PPC32 || PPC64 > >>> select SPI_BITBANG > >>> help > >>> This selects a driver for the PPC4xx SPI Controller. > >>> > >>> While this is a step in the right direction (I think) it's not enough to > >>> make the driver build (but maybe make it easier to define > >>> dcri_clrset()?) > >>> > >>> Could someone with more powerpc knowledge jump in and help (for the > >>> benefit of better compile coverage of the spi driver and so less > >>> breakage)? (If you do so based on my changes above, you don't need to > >>> credit me for my effort, claim it as your's. I'm happy enough if the > >>> situation improves.) > >> > >> What about this ? > >> > >> diff --git a/arch/powerpc/include/asm/dcr-mmio.h > >> b/arch/powerpc/include/asm/dcr-mmio.h > >> index fc6d93ef4a13..38b515afbffc 100644 > >> --- a/arch/powerpc/include/asm/dcr-mmio.h > >> +++ b/arch/powerpc/include/asm/dcr-mmio.h > >> @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host, > >>out_be32(host.token + ((host.base + dcr_n) * host.stride), > >> value); > >>} > >> > >> +static inline void __dcri_clrset(int base_addr, int base_data, int reg, > >> + unsigned clr, unsigned set) > >> +{ > >> +} > >> + > > > > The downside of that one is that if we find a matching device where > > dcr-mmio is used, the driver claims to work but silently fails. Is this > > good enough? > > I don't know the details of DCR, but it looks like this spi-ppc4xx > driver is really specific to 4xx, which is PPC32. > > Do you really need/want it to be built with allmodconfig ? > > Maybe it would be easier to have it work with ppc32_allmodconfig ? > > Or even easier with ppc44x_defconfig ? The reason I'd like to see it in allmodconfig is that testing allmodconfig on several archs is the check I do for my patch series. Also I assume I'm not the only one relying on allmodconfig for this purpose. So in my eyes every driver that is built there is a net win. Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | https://www.pengutronix.de/ | signature.asc Description: PGP signature
Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
On Mon, Feb 26, 2024 at 05:14:13PM +0100, Arnd Bergmann wrote: > From: Arnd Bergmann > > Most architectures only support a single hardcoded page size. In order > to ensure that each one of these sets the corresponding Kconfig symbols, > change over the PAGE_SHIFT definition to the common one and allow > only the hardware page size to be selected. > > Signed-off-by: Arnd Bergmann > --- ... > arch/s390/Kconfig | 1 + > arch/s390/include/asm/page.h | 2 +- ... > diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig > index fe565f3a3a91..b61c74c10050 100644 > --- a/arch/s390/Kconfig > +++ b/arch/s390/Kconfig > @@ -199,6 +199,7 @@ config S390 > select HAVE_MOD_ARCH_SPECIFIC > select HAVE_NMI > select HAVE_NOP_MCOUNT > + select HAVE_PAGE_SIZE_4KB > select HAVE_PCI > select HAVE_PERF_EVENTS > select HAVE_PERF_REGS > diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h > index 73b9c3bf377f..ded9548d11d9 100644 > --- a/arch/s390/include/asm/page.h > +++ b/arch/s390/include/asm/page.h > @@ -11,7 +11,7 @@ > #include > #include > > -#define _PAGE_SHIFT 12 > +#define _PAGE_SHIFT CONFIG_PAGE_SHIFT Acked-by: Heiko Carstens
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
Le 27/02/2024 à 11:58, Uwe Kleine-König a écrit : > Hello Christophe, > > On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote: >> Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit : >>> recently the spi-ppc4xx.c driver suffered from build errors and warnings >>> that were undetected for longer than I expected. I think it would be >>> very beneficial if this driver was enabled in (at least) a powerpc >>> allmodconfig build. >>> >>> The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is >>> only defined for 4xx (as these select PPC_DCR_NATIVE). >>> >>> I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case, >>> too. I tried and failed. The best I came up without extensive doc >>> reading is: >>> >>> diff --git a/arch/powerpc/include/asm/dcr-native.h >>> b/arch/powerpc/include/asm/dcr-native.h >>> index a92059964579..159ab7abfe46 100644 >>> --- a/arch/powerpc/include/asm/dcr-native.h >>> +++ b/arch/powerpc/include/asm/dcr-native.h >>> @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int >>> base_data, int reg, >>> unsigned int val; >>> >>> spin_lock_irqsave(_ind_lock, flags); >>> - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) { >>> - mtdcrx(base_addr, reg); >>> - val = (mfdcrx(base_data) & ~clr) | set; >>> - mtdcrx(base_data, val); >>> - } else { >>> - __mtdcr(base_addr, reg); >>> - val = (__mfdcr(base_data) & ~clr) | set; >>> - __mtdcr(base_data, val); >>> - } >>> + >>> + mtdcr(base_addr, reg); >>> + val = (mfdcr(base_data) & ~clr) | set; >>> + mtdcr(base_data, val); >>> + >>> spin_unlock_irqrestore(_ind_lock, flags); >>>} >>> >>> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig >>> index bc7021da2fe9..9a0a5e8c70c8 100644 >>> --- a/drivers/spi/Kconfig >>> +++ b/drivers/spi/Kconfig >>> @@ -810,7 +810,8 @@ config SPI_PL022 >>> >>>config SPI_PPC4xx >>> tristate "PPC4xx SPI Controller" >>> - depends on PPC32 && 4xx >>> + depends on 4xx || COMPILE_TEST >>> + depends on PPC32 || PPC64 >>> select SPI_BITBANG >>> help >>> This selects a driver for the PPC4xx SPI Controller. >>> >>> While this is a step in the right direction (I think) it's not enough to >>> make the driver build (but maybe make it easier to define >>> dcri_clrset()?) >>> >>> Could someone with more powerpc knowledge jump in and help (for the >>> benefit of better compile coverage of the spi driver and so less >>> breakage)? (If you do so based on my changes above, you don't need to >>> credit me for my effort, claim it as your's. I'm happy enough if the >>> situation improves.) >> >> What about this ? >> >> diff --git a/arch/powerpc/include/asm/dcr-mmio.h >> b/arch/powerpc/include/asm/dcr-mmio.h >> index fc6d93ef4a13..38b515afbffc 100644 >> --- a/arch/powerpc/include/asm/dcr-mmio.h >> +++ b/arch/powerpc/include/asm/dcr-mmio.h >> @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host, >> out_be32(host.token + ((host.base + dcr_n) * host.stride), value); >>} >> >> +static inline void __dcri_clrset(int base_addr, int base_data, int reg, >> + unsigned clr, unsigned set) >> +{ >> +} >> + > > The downside of that one is that if we find a matching device where > dcr-mmio is used, the driver claims to work but silently fails. Is this > good enough? I don't know the details of DCR, but it looks like this spi-ppc4xx driver is really specific to 4xx, which is PPC32. Do you really need/want it to be built with allmodconfig ? Maybe it would be easier to have it work with ppc32_allmodconfig ? Or even easier with ppc44x_defconfig ? Christophe > >>#endif /* __KERNEL__ */ >>#endif /* _ASM_POWERPC_DCR_MMIO_H */ >> >> diff --git a/arch/powerpc/include/asm/dcr-native.h >> b/arch/powerpc/include/asm/dcr-native.h >> index a92059964579..2f6221bf5406 100644 >> --- a/arch/powerpc/include/asm/dcr-native.h >> +++ b/arch/powerpc/include/asm/dcr-native.h >> @@ -135,10 +135,6 @@ static inline void __dcri_clrset(int base_addr, int >> base_data, int reg, >> DCRN_ ## base ## _CONFIG_DATA, \ >> reg, data) >> >> -#define dcri_clrset(base, reg, clr, set)__dcri_clrset(DCRN_ ## base ## >> _CONFIG_ADDR,\ >> - DCRN_ ## base ## >> _CONFIG_DATA,\ >> - reg, clr, set) >> - >>#endif /* __ASSEMBLY__ */ >>#endif /* __KERNEL__ */ >>#endif /* _ASM_POWERPC_DCR_NATIVE_H */ >> diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h >> index 64030e3a1f30..15c123ae38a1 100644 >> --- a/arch/powerpc/include/asm/dcr.h >> +++ b/arch/powerpc/include/asm/dcr.h >> @@ -18,6 +18,9 @@ >>#include >>#endif >> >> +#define dcri_clrset(base, reg, clr, set)__dcri_clrset(DCRN_ ## base ## >>
Re: [PATCH 4/4] vdso: avoid including asm/page.h
On Mon, Feb 26, 2024 at 05:14:14PM +0100, Arnd Bergmann wrote: > From: Arnd Bergmann > > The recent change to the vdso_data_store broke building compat VDSO > on at least arm64 because it includes headers outside of the include/vdso/ > namespace: > > In file included from arch/arm64/include/asm/lse.h:5, > from arch/arm64/include/asm/cmpxchg.h:14, > from arch/arm64/include/asm/atomic.h:16, > from include/linux/atomic.h:7, > from include/asm-generic/bitops/atomic.h:5, > from arch/arm64/include/asm/bitops.h:25, > from include/linux/bitops.h:68, > from arch/arm64/include/asm/memory.h:209, > from arch/arm64/include/asm/page.h:46, > from include/vdso/datapage.h:22, > from lib/vdso/gettimeofday.c:5, > from : > arch/arm64/include/asm/atomic_ll_sc.h:298:9: error: unknown type name 'u128' > 298 | u128 full; > > Use an open-coded page size calculation based on the new CONFIG_PAGE_SHIFT > Kconfig symbol instead. > > Reported-by: Linux Kernel Functional Testing > Fixes: a0d2fcd62ac2 ("vdso/ARM: Make union vdso_data_store available for all > architectures") > Link: > https://lore.kernel.org/lkml/ca+g9fytrxxm_ko9fnpz3xarxhv7ud_yqp-teupqrnrhu+_0...@mail.gmail.com/ > Signed-off-by: Arnd Bergmann Acked-by: Catalin Marinas
Re: [PATCH 2/4] arch: simplify architecture specific page size configuration
On Mon, Feb 26, 2024 at 05:14:12PM +0100, Arnd Bergmann wrote: > From: Arnd Bergmann > > arc, arm64, parisc and powerpc all have their own Kconfig symbols > in place of the common CONFIG_PAGE_SIZE_4KB symbols. Change these > so the common symbols are the ones that are actually used, while > leaving the arhcitecture specific ones as the user visible > place for configuring it, to avoid breaking user configs. > > Signed-off-by: Arnd Bergmann For arm64: Acked-by: Catalin Marinas
Re: Kernel WARNING at lib/vsprintf.c:2721 while running ftrace kernel selftests
>> --- interrupt: c00 >> Code: f821ff91 2f89 409e0034 7c0802a6 3c62fff0 3921 3d420177 >> 3863e310 992ad6db f8010080 4b209899 6000 <0fe0> e8010080 7c0803a6 >> 2f9f >> ---[ end trace ]— >> >> This warning is seen while running test that was added by >> following commit: >> >> commit 3bf7009251f0f41cdd0188ab7b3879df81810703 >> tracing/selftests: Add test to test the trace_marker > > This adds the user space selftest that triggered this warning, but it is > not the cause of it. Could you run this test against kernel builds before > this commit. Does this test cause this to trigger on older versions of the > kernel? > Running the mentioned test against an older kernel does not trigger this warning. # uname -r 6.7.0 # # ./ftracetest test.d/00basic/trace_marker.tc === Ftrace unit tests === [1] Basic tests on writing to trace_marker [PASS] [2] (instance) Basic tests on writing to trace_marker [PASS] # of passed: 2 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 # I used this setup to again run bisect between 6.7.0 and 6.8-rc1. Bisect points to following patch commit 8ec90be7f15fac42992ea821be929d3b06cd0fd9 tracing: Allow for max buffer data size trace_marker writes — Sachin
Re: [PATCH 4/4] vdso: avoid including asm/page.h
Christophe Leroy writes: > Le 26/02/2024 à 17:14, Arnd Bergmann a écrit : >> From: Arnd Bergmann >> >> The recent change to the vdso_data_store broke building compat VDSO >> on at least arm64 because it includes headers outside of the include/vdso/ >> namespace: > > I understand that powerpc64 also has an issue, see > https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20231221120410.2226678-1-...@ellerman.id.au/ Yeah, and that patch would silently conflict with this series, which is not ideal. I could delay merging my patch above until after this series goes in, mine only fixes a fairly obscure build warning. cheers
Re: [RFC PATCH] selftest/powerpc: Add rule file to address sub-folder test fail
> On 25-Feb-2024, at 10:09 PM, Madhavan Srinivasan wrote: > > When running `make -C powerpc/pmu run_tests` from top level selftests > directory, currently this error is being reported > > make: Entering directory > '/home/maddy/linux/tools/testing/selftests/powerpc/pmu' > Makefile:40: warning: overriding recipe for target 'emit_tests' > ../../lib.mk:111: warning: ignoring old recipe for target 'emit_tests' > gcc -m64count_instructions.c ../harness.c event.c lib.c ../utils.c loop.S > -o /home/maddy/selftest_output//count_instructions > In file included from count_instructions.c:13: > event.h:12:10: fatal error: utils.h: No such file or directory > 12 | #include "utils.h" > | ^ > compilation terminated. > > This is due to missing of include path in CFLAGS. That is, CFLAGS and > GIT_VERSION macros are defined in the powerpc/ folder Makefile which > in this case not involved. > > To address the failure incase of executing specific sub-folder test directly, > a new rule file has been addded by the patch called "include.mk" under > selftest/powerpc/ folder and is linked to all Makefile of powerpc/pmu > sub-folders. > > Signed-off-by: Madhavan Srinivasan > --- Thanks for the fix Maddy. The patch fixes the reported problem for me. — Sachin
Re: [PATCH v8 09/10] PCI: qcom-ep: Use the generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event
On Mon, Feb 26, 2024 at 12:20:41PM -0500, Frank Li wrote: > On Sat, Feb 24, 2024 at 12:24:15PM +0530, Manivannan Sadhasivam wrote: > > Now that the API is available, let's make use of it. It also handles the > > reinitialization of DWC non-sticky registers in addition to sending the > > notification to EPF drivers. > > > > Signed-off-by: Manivannan Sadhasivam > > --- > > drivers/pci/controller/dwc/pcie-qcom-ep.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c > > b/drivers/pci/controller/dwc/pcie-qcom-ep.c > > index 2fb8c15e7a91..4e45bc4bca45 100644 > > --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c > > +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c > > @@ -640,7 +640,7 @@ static irqreturn_t qcom_pcie_ep_global_irq_thread(int > > irq, void *data) > > if (FIELD_GET(PARF_INT_ALL_LINK_DOWN, status)) { > > dev_dbg(dev, "Received Linkdown event\n"); > > pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN; > > - pci_epc_linkdown(pci->ep.epc); > > + dw_pcie_ep_linkdown(>ep); > > Suppose pci_epc_linkdown() will call dw_pcie_ep_linkdown() ? > why need direct call dw_pcie_ep_linkdown() here? > I've already justified this in the commit message. Here is the excerpt: "It also handles the reinitialization of DWC non-sticky registers in addition to sending the notification to EPF drivers." - Mani -- மணிவண்ணன் சதாசிவம்
Re: [PATCH v8 08/10] PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle LINK_DOWN event
On Mon, Feb 26, 2024 at 12:18:18PM -0500, Frank Li wrote: > On Sat, Feb 24, 2024 at 12:24:14PM +0530, Manivannan Sadhasivam wrote: > > The PCIe link can go to LINK_DOWN state in one of the following scenarios: > > > > 1. Fundamental (PERST#)/hot/warm reset > > 2. Link transition from L2/L3 to L0 > > From L0 to L2/l3 > I don't understand what you mean here. Link down won't happen while moving from L0 to L2/L3, it is the opposite. > > > > In those cases, LINK_DOWN causes some non-sticky DWC registers to loose the > > state (like REBAR, PTM_CAP etc...). So the drivers need to reinitialize > > them to function properly once the link comes back again. > > > > This is not a problem for drivers supporting PERST# IRQ, since they can > > reinitialize the registers in the PERST# IRQ callback. But for the drivers > > not supporting PERST#, there is no way they can reinitialize the registers > > other than relying on LINK_DOWN IRQ received when the link goes down. So > > let's add a DWC generic API dw_pcie_ep_linkdown() that reinitializes the > > non-sticky registers and also notifies the EPF drivers about link going > > down. > > > > This API can also be used by the drivers supporting PERST# to handle the > > scenario (2) mentioned above. > > > > Signed-off-by: Manivannan Sadhasivam > > --- > > drivers/pci/controller/dwc/pcie-designware-ep.c | 111 > > ++-- > > drivers/pci/controller/dwc/pcie-designware.h| 5 ++ > > 2 files changed, 72 insertions(+), 44 deletions(-) > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c > > b/drivers/pci/controller/dwc/pcie-designware-ep.c > > index 278bdc9b2269..fed4c2936c78 100644 > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c > > @@ -14,14 +14,6 @@ > > #include > > #include > > > > -void dw_pcie_ep_linkup(struct dw_pcie_ep *ep) > > -{ > > - struct pci_epc *epc = ep->epc; > > - > > - pci_epc_linkup(epc); > > -} > > -EXPORT_SYMBOL_GPL(dw_pcie_ep_linkup); > > - > > No sure why git remove this block and add these back. > Because, we are adding dw_pcie_ep_linkdown() API way below and it makes sense to move this API also to keep it ordered. Maybe I should've described it in commit message. - Mani > > > void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep) > > { > > struct pci_epc *epc = ep->epc; > > @@ -603,19 +595,56 @@ static unsigned int > > dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap) > > return 0; > > } > > > > +static void dw_pcie_ep_init_non_sticky_registers(struct dw_pcie *pci) > > +{ > > + unsigned int offset, ptm_cap_base; > > + unsigned int nbars; > > + u32 reg, i; > > + > > + offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR); > > + ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM); > > + > > + dw_pcie_dbi_ro_wr_en(pci); > > + > > + if (offset) { > > + reg = dw_pcie_readl_dbi(pci, offset + PCI_REBAR_CTRL); > > + nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >> > > + PCI_REBAR_CTRL_NBAR_SHIFT; > > + > > + for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) > > + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); > > + } > > + > > + /* > > +* PTM responder capability can be disabled only after disabling > > +* PTM root capability. > > +*/ > > + if (ptm_cap_base) { > > + dw_pcie_dbi_ro_wr_en(pci); > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); > > + reg &= ~PCI_PTM_CAP_ROOT; > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); > > + > > + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); > > + reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK); > > + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); > > + dw_pcie_dbi_ro_wr_dis(pci); > > + } > > + > > + dw_pcie_setup(pci); > > + dw_pcie_dbi_ro_wr_dis(pci); > > +} > > + > > int dw_pcie_ep_init_registers(struct dw_pcie_ep *ep) > > { > > struct dw_pcie *pci = to_dw_pcie_from_ep(ep); > > struct dw_pcie_ep_func *ep_func; > > struct device *dev = pci->dev; > > struct pci_epc *epc = ep->epc; > > - unsigned int offset, ptm_cap_base; > > - unsigned int nbars; > > u8 hdr_type; > > u8 func_no; > > - int i, ret; > > void *addr; > > - u32 reg; > > + int ret; > > > > hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE) & > >PCI_HEADER_TYPE_MASK; > > @@ -678,38 +707,7 @@ int dw_pcie_ep_init_registers(struct dw_pcie_ep *ep) > > if (ep->ops->init) > > ep->ops->init(ep); > > > > - offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR); > > - ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM); > > - > > - dw_pcie_dbi_ro_wr_en(pci); > > - > > - if (offset) { > > - reg = dw_pcie_readl_dbi(pci,
Re: [PATCH v8 06/10] PCI: dwc: ep: Call dw_pcie_ep_init_registers() API directly from all glue drivers
On Mon, Feb 26, 2024 at 12:04:33PM -0500, Frank Li wrote: > On Sat, Feb 24, 2024 at 12:24:12PM +0530, Manivannan Sadhasivam wrote: > > Currently, dw_pcie_ep_init_registers() API is directly called by the glue > > drivers requiring active refclk from host. But for the other drivers, it is > > getting called implicitly by dw_pcie_ep_init(). This is due to the fact > > that this API initializes DWC EP specific registers and that requires an > > active refclk (either from host or generated locally by endpoint itsef). > > > > But, this causes a discrepancy among the glue drivers. So to avoid this > > confusion, let's call this API directly from all glue drivers irrespective > > of refclk dependency. Only difference here is that the drivers requiring > > refclk from host will call this API only after the refclk is received and > > other drivers without refclk dependency will call this API right after > > dw_pcie_ep_init(). > > > > This change will also allow us to remove the "core_init_notifier" flag in > > the later commits. > > > > Signed-off-by: Manivannan Sadhasivam > > --- > > drivers/pci/controller/dwc/pci-dra7xx.c | 7 +++ > > drivers/pci/controller/dwc/pci-imx6.c | 8 > > drivers/pci/controller/dwc/pci-keystone.c | 9 + > > drivers/pci/controller/dwc/pci-layerscape-ep.c| 7 +++ > > drivers/pci/controller/dwc/pcie-designware-ep.c | 22 > > -- > > drivers/pci/controller/dwc/pcie-designware-plat.c | 9 + > > drivers/pci/controller/dwc/pcie-rcar-gen4.c | 12 +++- > > drivers/pci/controller/dwc/pcie-uniphier-ep.c | 13 - > > 8 files changed, 63 insertions(+), 24 deletions(-) [...] > > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c > > b/drivers/pci/controller/dwc/pcie-designware-ep.c > > index ed1f2afd830a..278bdc9b2269 100644 > > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c > > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c > > @@ -729,7 +729,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) > > struct device *dev = pci->dev; > > struct platform_device *pdev = to_platform_device(dev); > > struct device_node *np = dev->of_node; > > - const struct pci_epc_features *epc_features; > > > > INIT_LIST_HEAD(>func_list); > > > > @@ -775,29 +774,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) > > goto err_exit_epc_mem; > > } > > > > - if (ep->ops->get_features) { > > - epc_features = ep->ops->get_features(ep); > > - if (epc_features->core_init_notifier) > > - return 0; > > - } > > why remove this check? > There is no point in keeping this check since we are removing the call to dw_pcie_ep_init_registers() below. But I should've described this change in the commit message. - Mani -- மணிவண்ணன் சதாசிவம்
Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts
Hi, On 2024/2/27 17:26, Thomas Gleixner wrote: On Mon, Feb 26 2024 at 10:09, Bitao Hu wrote: We could use the irq_desc::tot_count member to avoid the summation loop for interrupts which are not marked as 'PER_CPU' interrupts in 'show_interrupts'. This could reduce the time overhead of reading /proc/interrupts. "Could" is not really a technical term. Either we do or we do not. Also please provide context for your change and avoid the 'We'. OK. --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -121,6 +121,8 @@ static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif +extern bool irq_is_nmi(struct irq_desc *desc); + If at all this wants to be in kernel/irq/internal.h. There is zero reason to expose this globally. -static bool irq_is_nmi(struct irq_desc *desc) +bool irq_is_nmi(struct irq_desc *desc) { return desc->istate & IRQS_NMI; } If at all this really wants to be a static inline in internals.h, but instead of blindly copying code this can be done smarter: unsigned int kstat_irq_desc(struct irq_desc *desc) { unsigned int sum = 0; int cpu; if (!irq_settings_is_per_cpu_devid(desc) && !irq_settings_is_per_cpu(desc) && !irq_is_nmi(desc)) return data_race(desc->tot_count); for_each_possible_cpu(cpu) sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); return sum; } and then let kstat_irqs() and show_interrupts() use it. See? I have a concern. kstat_irqs() uses for_each_possible_cpu() for summation. However, show_interrupts() uses for_each_online_cpu(), which means it only outputs interrupt statistics for online cpus. If we use for_each_possible_cpu() in show_interrupts() to calculate 'any_count', there could be a problem with the following scenario: If an interrupt has a count of zero on online cpus but a non-zero count on possible cpus, then 'any_count' would not be zero, and the statistics for that interrupt would be output, which is not the desired behavior for show_interrupts(). Therefore, I think it's not good to have kstat_irqs() and show_interrupts() both use the same logic. What do you think? With that a proper changelog would be: show_interrupts() unconditionally accumulates the per CPU interrupt statistics to determine whether an interrupt was ever raised. This can be avoided for all interrupts which are not strictly per CPU and not of type NMI because those interrupts provide already an accumulated counter. The required logic is already implemented in kstat_irqs(). Split the inner access logic out of kstat_irqs() and use it for kstat_irqs() and show_interrupts() to avoid the accumulation loop when possible. Best Regards, Bitao Hu
Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
Hi Arnd, CC Greg On Tue, Feb 27, 2024 at 11:59 AM Arnd Bergmann wrote: > On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote: > >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu > >> index 9dcf245c9cbf..c777a129768a 100644 > >> --- a/arch/m68k/Kconfig.cpu > >> +++ b/arch/m68k/Kconfig.cpu > >> @@ -30,6 +30,7 @@ config COLDFIRE > >> select GENERIC_CSUM > >> select GPIOLIB > >> select HAVE_LEGACY_CLK > >> + select HAVE_PAGE_SIZE_8KB if !MMU > > > > if you would drop the !MMU-dependency here. > > > >> > >> endchoice > >> > >> @@ -45,6 +46,7 @@ config M68000 > >> select GENERIC_CSUM > >> select CPU_NO_EFFICIENT_FFS > >> select HAVE_ARCH_HASH > >> + select HAVE_PAGE_SIZE_4KB > > > > Perhaps replace this by > > > > config M68KCLASSIC > > bool "Classic M68K CPU family support" > > select HAVE_ARCH_PFN_VALID > > + select HAVE_PAGE_SIZE_4KB if !MMU > > > > so it covers all 680x0 CPUs without MMU? > > I was a bit unsure about how to best do this since there > is not really a need for a fixed page size on nommu kernels, > whereas the three MMU configs clearly tie the page size to > the MMU rather than the platform. > > There should be no reason for coldfire to have a different > page size from dragonball if neither of them actually uses > hardware pages, so one of them could be changed later. Indeed, in theory, PAGE_SIZE doesn't matter for nommu, but the concept of pages is used all over the place in Linux. I'm mostly worried about some Coldfire code relying on the actual value of PAGE_SIZE in some other context. e.g. for configuring non-cacheable regions. And does this impact running nommu binaries on a system with MMU? I.e. if nommu binaries were built with a 4 KiB PAGE_SIZE, do they still run on MMU systems with an 8 KiB PAGE_SIZE (coldfire and sun3), or are there some subtleties to take into account? Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
On Tue, Feb 27, 2024, at 09:54, Geert Uytterhoeven wrote: > Hi Arnd, >> diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu >> index 9dcf245c9cbf..c777a129768a 100644 >> --- a/arch/m68k/Kconfig.cpu >> +++ b/arch/m68k/Kconfig.cpu >> @@ -30,6 +30,7 @@ config COLDFIRE >> select GENERIC_CSUM >> select GPIOLIB >> select HAVE_LEGACY_CLK >> + select HAVE_PAGE_SIZE_8KB if !MMU > > if you would drop the !MMU-dependency here. > >> >> endchoice >> >> @@ -45,6 +46,7 @@ config M68000 >> select GENERIC_CSUM >> select CPU_NO_EFFICIENT_FFS >> select HAVE_ARCH_HASH >> + select HAVE_PAGE_SIZE_4KB > > Perhaps replace this by > > config M68KCLASSIC > bool "Classic M68K CPU family support" > select HAVE_ARCH_PFN_VALID > + select HAVE_PAGE_SIZE_4KB if !MMU > > so it covers all 680x0 CPUs without MMU? I was a bit unsure about how to best do this since there is not really a need for a fixed page size on nommu kernels, whereas the three MMU configs clearly tie the page size to the MMU rather than the platform. There should be no reason for coldfire to have a different page size from dragonball if neither of them actually uses hardware pages, so one of them could be changed later. Let me know if that makes sense to you, or you still prefer me to change it like you suggested. Arnd
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
Hello Christophe, On Tue, Feb 27, 2024 at 10:25:15AM +, Christophe Leroy wrote: > Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit : > > recently the spi-ppc4xx.c driver suffered from build errors and warnings > > that were undetected for longer than I expected. I think it would be > > very beneficial if this driver was enabled in (at least) a powerpc > > allmodconfig build. > > > > The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is > > only defined for 4xx (as these select PPC_DCR_NATIVE). > > > > I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case, > > too. I tried and failed. The best I came up without extensive doc > > reading is: > > > > diff --git a/arch/powerpc/include/asm/dcr-native.h > > b/arch/powerpc/include/asm/dcr-native.h > > index a92059964579..159ab7abfe46 100644 > > --- a/arch/powerpc/include/asm/dcr-native.h > > +++ b/arch/powerpc/include/asm/dcr-native.h > > @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int > > base_data, int reg, > > unsigned int val; > > > > spin_lock_irqsave(_ind_lock, flags); > > - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) { > > - mtdcrx(base_addr, reg); > > - val = (mfdcrx(base_data) & ~clr) | set; > > - mtdcrx(base_data, val); > > - } else { > > - __mtdcr(base_addr, reg); > > - val = (__mfdcr(base_data) & ~clr) | set; > > - __mtdcr(base_data, val); > > - } > > + > > + mtdcr(base_addr, reg); > > + val = (mfdcr(base_data) & ~clr) | set; > > + mtdcr(base_data, val); > > + > > spin_unlock_irqrestore(_ind_lock, flags); > > } > > > > diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig > > index bc7021da2fe9..9a0a5e8c70c8 100644 > > --- a/drivers/spi/Kconfig > > +++ b/drivers/spi/Kconfig > > @@ -810,7 +810,8 @@ config SPI_PL022 > > > > config SPI_PPC4xx > > tristate "PPC4xx SPI Controller" > > - depends on PPC32 && 4xx > > + depends on 4xx || COMPILE_TEST > > + depends on PPC32 || PPC64 > > select SPI_BITBANG > > help > > This selects a driver for the PPC4xx SPI Controller. > > > > While this is a step in the right direction (I think) it's not enough to > > make the driver build (but maybe make it easier to define > > dcri_clrset()?) > > > > Could someone with more powerpc knowledge jump in and help (for the > > benefit of better compile coverage of the spi driver and so less > > breakage)? (If you do so based on my changes above, you don't need to > > credit me for my effort, claim it as your's. I'm happy enough if the > > situation improves.) > > What about this ? > > diff --git a/arch/powerpc/include/asm/dcr-mmio.h > b/arch/powerpc/include/asm/dcr-mmio.h > index fc6d93ef4a13..38b515afbffc 100644 > --- a/arch/powerpc/include/asm/dcr-mmio.h > +++ b/arch/powerpc/include/asm/dcr-mmio.h > @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host, > out_be32(host.token + ((host.base + dcr_n) * host.stride), value); > } > > +static inline void __dcri_clrset(int base_addr, int base_data, int reg, > + unsigned clr, unsigned set) > +{ > +} > + The downside of that one is that if we find a matching device where dcr-mmio is used, the driver claims to work but silently fails. Is this good enough? > #endif /* __KERNEL__ */ > #endif /* _ASM_POWERPC_DCR_MMIO_H */ > > diff --git a/arch/powerpc/include/asm/dcr-native.h > b/arch/powerpc/include/asm/dcr-native.h > index a92059964579..2f6221bf5406 100644 > --- a/arch/powerpc/include/asm/dcr-native.h > +++ b/arch/powerpc/include/asm/dcr-native.h > @@ -135,10 +135,6 @@ static inline void __dcri_clrset(int base_addr, int > base_data, int reg, >DCRN_ ## base ## _CONFIG_DATA, \ >reg, data) > > -#define dcri_clrset(base, reg, clr, set) __dcri_clrset(DCRN_ ## base ## > _CONFIG_ADDR, \ > - DCRN_ ## base ## > _CONFIG_DATA,\ > - reg, clr, set) > - > #endif /* __ASSEMBLY__ */ > #endif /* __KERNEL__ */ > #endif /* _ASM_POWERPC_DCR_NATIVE_H */ > diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h > index 64030e3a1f30..15c123ae38a1 100644 > --- a/arch/powerpc/include/asm/dcr.h > +++ b/arch/powerpc/include/asm/dcr.h > @@ -18,6 +18,9 @@ > #include > #endif > > +#define dcri_clrset(base, reg, clr, set) __dcri_clrset(DCRN_ ## base ## > _CONFIG_ADDR, \ > + DCRN_ ## base ## > _CONFIG_DATA,\ > + reg, clr, set) > > /* Indirection layer for providing both NATIVE and MMIO support. */ > > diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig > index ddae0fde798e..7b003c5dd613 100644 > --- a/drivers/spi/Kconfig > +++ b/drivers/spi/Kconfig
Re: [PATCH linux-next v2 1/3] kexec/kdump: make struct crash_mem available without CONFIG_CRASH_DUMP
On 02/26/24 at 04:00pm, Hari Bathini wrote: > struct crash_mem defined under include/linux/crash_core.h represents > a list of memory ranges. While it is used to represent memory ranges > for kdump kernel, it can also be used for other kind of memory ranges. > In fact, KEXEC_FILE_LOAD syscall in powerpc uses this structure to > represent reserved memory ranges and exclude memory ranges needed to > find the right memory regions to load kexec kernel. So, make the We may be able to refactor code to make struct crash_mem only used for crash handling, or get another data structure for loading kexec kernel. Anyway, for the time being, this is the simplest way we can take. Thanks for the work on ppc to make crash split out from the kexec code. Acked-by: Baoquan He > definition of crash_mem structure available for !CONFIG_CRASH_DUMP > case too. > > Signed-off-by: Hari Bathini > --- > > * No changes in v2. > > include/linux/crash_core.h | 12 ++-- > 1 file changed, 6 insertions(+), 6 deletions(-) > > diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h > index 23270b16e1db..d33352c2e386 100644 > --- a/include/linux/crash_core.h > +++ b/include/linux/crash_core.h > @@ -8,6 +8,12 @@ > > struct kimage; > > +struct crash_mem { > + unsigned int max_nr_ranges; > + unsigned int nr_ranges; > + struct range ranges[] __counted_by(max_nr_ranges); > +}; > + > #ifdef CONFIG_CRASH_DUMP > > int crash_shrink_memory(unsigned long new_size); > @@ -51,12 +57,6 @@ static inline unsigned int crash_get_elfcorehdr_size(void) > { return 0; } > /* Alignment required for elf header segment */ > #define ELF_CORE_HEADER_ALIGN 4096 > > -struct crash_mem { > - unsigned int max_nr_ranges; > - unsigned int nr_ranges; > - struct range ranges[] __counted_by(max_nr_ranges); > -}; > - > extern int crash_exclude_mem_range(struct crash_mem *mem, > unsigned long long mstart, > unsigned long long mend); > -- > 2.43.2 >
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
Le 27/02/2024 à 09:46, Uwe Kleine-König a écrit : > Hello, > > recently the spi-ppc4xx.c driver suffered from build errors and warnings > that were undetected for longer than I expected. I think it would be > very beneficial if this driver was enabled in (at least) a powerpc > allmodconfig build. > > The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is > only defined for 4xx (as these select PPC_DCR_NATIVE). > > I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case, > too. I tried and failed. The best I came up without extensive doc > reading is: > > diff --git a/arch/powerpc/include/asm/dcr-native.h > b/arch/powerpc/include/asm/dcr-native.h > index a92059964579..159ab7abfe46 100644 > --- a/arch/powerpc/include/asm/dcr-native.h > +++ b/arch/powerpc/include/asm/dcr-native.h > @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int > base_data, int reg, > unsigned int val; > > spin_lock_irqsave(_ind_lock, flags); > - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) { > - mtdcrx(base_addr, reg); > - val = (mfdcrx(base_data) & ~clr) | set; > - mtdcrx(base_data, val); > - } else { > - __mtdcr(base_addr, reg); > - val = (__mfdcr(base_data) & ~clr) | set; > - __mtdcr(base_data, val); > - } > + > + mtdcr(base_addr, reg); > + val = (mfdcr(base_data) & ~clr) | set; > + mtdcr(base_data, val); > + > spin_unlock_irqrestore(_ind_lock, flags); > } > > diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig > index bc7021da2fe9..9a0a5e8c70c8 100644 > --- a/drivers/spi/Kconfig > +++ b/drivers/spi/Kconfig > @@ -810,7 +810,8 @@ config SPI_PL022 > > config SPI_PPC4xx > tristate "PPC4xx SPI Controller" > - depends on PPC32 && 4xx > + depends on 4xx || COMPILE_TEST > + depends on PPC32 || PPC64 > select SPI_BITBANG > help > This selects a driver for the PPC4xx SPI Controller. > > While this is a step in the right direction (I think) it's not enough to > make the driver build (but maybe make it easier to define > dcri_clrset()?) > > Could someone with more powerpc knowledge jump in and help (for the > benefit of better compile coverage of the spi driver and so less > breakage)? (If you do so based on my changes above, you don't need to > credit me for my effort, claim it as your's. I'm happy enough if the > situation improves.) What about this ? diff --git a/arch/powerpc/include/asm/dcr-mmio.h b/arch/powerpc/include/asm/dcr-mmio.h index fc6d93ef4a13..38b515afbffc 100644 --- a/arch/powerpc/include/asm/dcr-mmio.h +++ b/arch/powerpc/include/asm/dcr-mmio.h @@ -38,6 +38,11 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host, out_be32(host.token + ((host.base + dcr_n) * host.stride), value); } +static inline void __dcri_clrset(int base_addr, int base_data, int reg, +unsigned clr, unsigned set) +{ +} + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_DCR_MMIO_H */ diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index a92059964579..2f6221bf5406 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -135,10 +135,6 @@ static inline void __dcri_clrset(int base_addr, int base_data, int reg, DCRN_ ## base ## _CONFIG_DATA, \ reg, data) -#define dcri_clrset(base, reg, clr, set) __dcri_clrset(DCRN_ ## base ## _CONFIG_ADDR, \ - DCRN_ ## base ## _CONFIG_DATA,\ - reg, clr, set) - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_DCR_NATIVE_H */ diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h index 64030e3a1f30..15c123ae38a1 100644 --- a/arch/powerpc/include/asm/dcr.h +++ b/arch/powerpc/include/asm/dcr.h @@ -18,6 +18,9 @@ #include #endif +#define dcri_clrset(base, reg, clr, set) __dcri_clrset(DCRN_ ## base ## _CONFIG_ADDR, \ + DCRN_ ## base ## _CONFIG_DATA,\ + reg, clr, set) /* Indirection layer for providing both NATIVE and MMIO support. */ diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index ddae0fde798e..7b003c5dd613 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -810,7 +810,7 @@ config SPI_PL022 config SPI_PPC4xx tristate "PPC4xx SPI Controller" - depends on PPC32 && 4xx + depends on PPC && (4xx || COMPILE_TEST) select SPI_BITBANG help This selects a driver for the PPC4xx SPI Controller.
Re: [PATCH] powerpc/mm: Code cleanup for __hash_page_thp
Thanks for the reply. On 2024/2/27 14:07, Michael Ellerman wrote: Kunwu Chan writes: Thanks for the reply. On 2024/2/26 18:49, Michael Ellerman wrote: Kunwu Chan writes: This part was commented from commit 6d492ecc6489 ("powerpc/THP: Add code to handle HPTE faults for hugepages") in about 11 years before. If there are no plans to enable this part code in the future, we can remove this dead code. I agree the code can go. But I'd like it to be replaced with a comment explaining what the dead code was trying to say. Thanks, i'll update a new patch with the following comment: /* * No CPU has hugepages but lacks no execute, so we * don't need to worry about cpu no CPU_FTR_COHERENT_ICACHE feature case */ Maybe wait until we can get some input from Aneesh. I'm not sure the code/comment are really up to date. I won't do anything until I get a reply. I'll wait for the latest msg. cheers -- Thanks, Kunwu
Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts
On Mon, Feb 26 2024 at 10:09, Bitao Hu wrote: > We could use the irq_desc::tot_count member to avoid the summation > loop for interrupts which are not marked as 'PER_CPU' interrupts in > 'show_interrupts'. This could reduce the time overhead of reading > /proc/interrupts. "Could" is not really a technical term. Either we do or we do not. Also please provide context for your change and avoid the 'We'. > --- a/include/linux/irqdesc.h > +++ b/include/linux/irqdesc.h > @@ -121,6 +121,8 @@ static inline void irq_unlock_sparse(void) { } > extern struct irq_desc irq_desc[NR_IRQS]; > #endif > > +extern bool irq_is_nmi(struct irq_desc *desc); > + If at all this wants to be in kernel/irq/internal.h. There is zero reason to expose this globally. > -static bool irq_is_nmi(struct irq_desc *desc) > +bool irq_is_nmi(struct irq_desc *desc) > { > return desc->istate & IRQS_NMI; > } If at all this really wants to be a static inline in internals.h, but instead of blindly copying code this can be done smarter: unsigned int kstat_irq_desc(struct irq_desc *desc) { unsigned int sum = 0; int cpu; if (!irq_settings_is_per_cpu_devid(desc) && !irq_settings_is_per_cpu(desc) && !irq_is_nmi(desc)) return data_race(desc->tot_count); for_each_possible_cpu(cpu) sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); return sum; } and then let kstat_irqs() and show_interrupts() use it. See? With that a proper changelog would be: show_interrupts() unconditionally accumulates the per CPU interrupt statistics to determine whether an interrupt was ever raised. This can be avoided for all interrupts which are not strictly per CPU and not of type NMI because those interrupts provide already an accumulated counter. The required logic is already implemented in kstat_irqs(). Split the inner access logic out of kstat_irqs() and use it for kstat_irqs() and show_interrupts() to avoid the accumulation loop when possible. Thanks, tglx
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
Hello, On Tue, Feb 27, 2024 at 08:54:03AM +, Tudor Ambarus wrote: > On 2/27/24 08:46, Uwe Kleine-König wrote: > > recently the spi-ppc4xx.c driver suffered from build errors and warnings > > that were undetected for longer than I expected. I think it would be > > long enough so that we remove the driver altogether? I know at least one user who noticed the driver being broken because he needs it and not because a build bot stumbled. Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | https://www.pengutronix.de/ | signature.asc Description: PGP signature
Re: [PATCHv10 4/4] watchdog/softlockup: report the most frequent interrupts
在 2024/2/26 10:09, Bitao Hu 写道: When the watchdog determines that the current soft lockup is due to an interrupt storm based on CPU utilization, reporting the most frequent interrupts could be good enough for further troubleshooting. Below is an example of interrupt storm. The call tree does not provide useful information, but we can analyze which interrupt caused the soft lockup by comparing the counts of interrupts. [ 638.870231] watchdog: BUG: soft lockup - CPU#9 stuck for 26s! [swapper/9:0] [ 638.870825] CPU#9 Utilization every 4s during lockup: [ 638.871194] #1: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.871652] #2: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.872107] #3: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.872563] #4: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.873018] #5: 0% system, 0% softirq, 100% hardirq, 0% idle [ 638.873494] CPU#9 Detect HardIRQ Time exceeds 50%. Most frequent HardIRQs: [ 638.873994] #1: 330945 irq#7 [ 638.874236] #2: 31 irq#82 [ 638.874493] #3: 10 irq#10 [ 638.874744] #4: 2 irq#89 [ 638.874992] #5: 1 irq#102 ... [ 638.875313] Call trace: [ 638.875315] __do_softirq+0xa8/0x364 Signed-off-by: Bitao Hu --- kernel/watchdog.c | 115 -- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 69e72d7e461d..c9d49ae8d045 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -12,22 +12,25 @@ #define pr_fmt(fmt) "watchdog: " fmt -#include #include -#include #include +#include +#include #include +#include #include +#include #include +#include +#include #include #include + #include #include #include -#include #include -#include static DEFINE_MUTEX(watchdog_mutex); @@ -417,13 +420,104 @@ static void print_cpustat(void) } } +#define HARDIRQ_PERCENT_THRESH 50 +#define NUM_HARDIRQ_REPORT 5 +struct irq_counts { + int irq; + u32 counts; +}; + +static DEFINE_PER_CPU(bool, snapshot_taken); + +/* Tabulate the most frequent interrupts. */ +static void tabulate_irq_count(struct irq_counts *irq_counts, int irq, u32 counts, int rank) +{ + int i; + struct irq_counts new_count = {irq, counts}; + + for (i = 0; i < rank; i++) { + if (counts > irq_counts[i].counts) + swap(new_count, irq_counts[i]); + } +} + +/* + * If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period, + * then the cause of softlockup might be interrupt storm. In this case, it + * would be useful to start interrupt counting. + */ +static bool need_counting_irqs(void) +{ + u8 util; + int tail = __this_cpu_read(cpustat_tail); + + tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT; + util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]); + return util > HARDIRQ_PERCENT_THRESH; +} + +static void start_counting_irqs(void) +{ + if (!__this_cpu_read(snapshot_taken)) { + kstat_snapshot_irqs(); + __this_cpu_write(snapshot_taken, true); + } +} + +static void stop_counting_irqs(void) +{ + __this_cpu_write(snapshot_taken, false); +} + +static void print_irq_counts(void) +{ + unsigned int i, count; + struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = { + {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0} + }; + + if (__this_cpu_read(snapshot_taken)) { + for_each_active_irq(i) { + count = kstat_get_irq_since_snapshot(i); + tabulate_irq_count(irq_counts_sorted, i, count, NUM_HARDIRQ_REPORT); + } + + /* +* We do not want the "watchdog: " prefix on every line, +* hence we use "printk" instead of "pr_crit". +*/ + printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most frequent HardIRQs:\n", + smp_processor_id(), HARDIRQ_PERCENT_THRESH); + + for (i = 0; i < NUM_HARDIRQ_REPORT; i++) { + if (irq_counts_sorted[i].irq == -1) + break; + + printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n", + i + 1, irq_counts_sorted[i].counts, + irq_counts_sorted[i].irq); + } + + /* +* If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in the last +* sample_period, then we suspect the interrupt storm might be subsiding. +*/ + if (!need_counting_irqs()) + stop_counting_irqs(); + } +} + static void report_cpu_status(void) {
Re: [PATCH 3/4] arch: define CONFIG_PAGE_SIZE_*KB on all architectures
Hi Arnd, On Mon, Feb 26, 2024 at 5:15 PM Arnd Bergmann wrote: > From: Arnd Bergmann > > Most architectures only support a single hardcoded page size. In order > to ensure that each one of these sets the corresponding Kconfig symbols, > change over the PAGE_SHIFT definition to the common one and allow > only the hardware page size to be selected. > > Signed-off-by: Arnd Bergmann Thanks for your patch! > --- a/arch/m68k/Kconfig > +++ b/arch/m68k/Kconfig > @@ -84,12 +84,15 @@ config MMU > > config MMU_MOTOROLA > bool > + select HAVE_PAGE_SIZE_4KB > > config MMU_COLDFIRE > + select HAVE_PAGE_SIZE_8KB I think you can do without this... > bool > > config MMU_SUN3 > bool > + select HAVE_PAGE_SIZE_8KB > depends on MMU && !MMU_MOTOROLA && !MMU_COLDFIRE > > config ARCH_SUPPORTS_KEXEC > diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu > index 9dcf245c9cbf..c777a129768a 100644 > --- a/arch/m68k/Kconfig.cpu > +++ b/arch/m68k/Kconfig.cpu > @@ -30,6 +30,7 @@ config COLDFIRE > select GENERIC_CSUM > select GPIOLIB > select HAVE_LEGACY_CLK > + select HAVE_PAGE_SIZE_8KB if !MMU if you would drop the !MMU-dependency here. > > endchoice > > @@ -45,6 +46,7 @@ config M68000 > select GENERIC_CSUM > select CPU_NO_EFFICIENT_FFS > select HAVE_ARCH_HASH > + select HAVE_PAGE_SIZE_4KB Perhaps replace this by config M68KCLASSIC bool "Classic M68K CPU family support" select HAVE_ARCH_PFN_VALID + select HAVE_PAGE_SIZE_4KB if !MMU so it covers all 680x0 CPUs without MMU? > select LEGACY_TIMER_TICK > help > The Freescale (was Motorola) 68000 CPU is the first generation of Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: Increasing build coverage for drivers/spi/spi-ppc4xx.c
On 2/27/24 08:46, Uwe Kleine-König wrote: > recently the spi-ppc4xx.c driver suffered from build errors and warnings > that were undetected for longer than I expected. I think it would be long enough so that we remove the driver altogether?
Re: [kvm-unit-tests PATCH 03/32] powerpc: Fix stack backtrace termination
On 26/02/2024 11.11, Nicholas Piggin wrote: The backtrace handler terminates when it sees a NULL caller address, but the powerpc stack setup does not keep such a NULL caller frame at the start of the stack. This happens to work on pseries because the memory at 0 is mapped and it contains 0 at the location of the return address pointer if it were a stack frame. But this is fragile, and does not work with powernv where address 0 contains firmware instructions. Use the existing dummy frame on stack as the NULL caller, and create a new frame on stack for the entry code. Signed-off-by: Nicholas Piggin --- powerpc/cstart64.S | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) Thanks for tackling this! ... however, not doing powerpc work since years anymore, I have some ignorant questions below... diff --git a/powerpc/cstart64.S b/powerpc/cstart64.S index e18ae9a22..14ab0c6c8 100644 --- a/powerpc/cstart64.S +++ b/powerpc/cstart64.S @@ -46,8 +46,16 @@ start: add r1, r1, r31 add r2, r2, r31 + /* Zero backpointers in initial stack frame so backtrace() stops */ + li r0,0 + std r0,0(r1) 0(r1) is the back chain pointer ... + std r0,16(r1) ... but what is 16(r1) ? I suppose that should be the "LR save word" ? But isn't that at 8(r1) instead?? (not sure whether I'm looking at the right ELF abi spec right now...) Anyway, a comment in the source would be helpful here. + + /* Create entry frame */ + stdur1,-INT_FRAME_SIZE(r1) Since we already create an initial frame via stackptr from powerpc/flat.lds, do we really need to create this additional one here? Or does the one from flat.lds have to be completely empty, i.e. also no DTB pointer in it? /* save DTB pointer */ - std r3, 56(r1) + SAVE_GPR(3,r1) Isn't SAVE_GPR rather meant for the interrupt frame, not for the normal C calling convention frames? Sorry for asking dumb questions ... I still have a hard time understanding the changes here... :-/ /* * Call relocate. relocate is C code, but careful to not use @@ -101,7 +109,7 @@ start: stw r4, 0(r3) /* complete setup */ -1: ld r3, 56(r1) +1: REST_GPR(3, r1) bl setup /* run the test */ Thomas
Increasing build coverage for drivers/spi/spi-ppc4xx.c
Hello, recently the spi-ppc4xx.c driver suffered from build errors and warnings that were undetected for longer than I expected. I think it would be very beneficial if this driver was enabled in (at least) a powerpc allmodconfig build. The challenge to do so is that spi-ppc4xx.c uses dcri_clrset() which is only defined for 4xx (as these select PPC_DCR_NATIVE). I wonder if dcri_clrset() could be defined for the PPC_DCR_MMIO case, too. I tried and failed. The best I came up without extensive doc reading is: diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index a92059964579..159ab7abfe46 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -115,15 +115,11 @@ static inline void __dcri_clrset(int base_addr, int base_data, int reg, unsigned int val; spin_lock_irqsave(_ind_lock, flags); - if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) { - mtdcrx(base_addr, reg); - val = (mfdcrx(base_data) & ~clr) | set; - mtdcrx(base_data, val); - } else { - __mtdcr(base_addr, reg); - val = (__mfdcr(base_data) & ~clr) | set; - __mtdcr(base_data, val); - } + + mtdcr(base_addr, reg); + val = (mfdcr(base_data) & ~clr) | set; + mtdcr(base_data, val); + spin_unlock_irqrestore(_ind_lock, flags); } diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index bc7021da2fe9..9a0a5e8c70c8 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -810,7 +810,8 @@ config SPI_PL022 config SPI_PPC4xx tristate "PPC4xx SPI Controller" - depends on PPC32 && 4xx + depends on 4xx || COMPILE_TEST + depends on PPC32 || PPC64 select SPI_BITBANG help This selects a driver for the PPC4xx SPI Controller. While this is a step in the right direction (I think) it's not enough to make the driver build (but maybe make it easier to define dcri_clrset()?) Could someone with more powerpc knowledge jump in and help (for the benefit of better compile coverage of the spi driver and so less breakage)? (If you do so based on my changes above, you don't need to credit me for my effort, claim it as your's. I'm happy enough if the situation improves.) Thanks Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | https://www.pengutronix.de/ | signature.asc Description: PGP signature
Re: [PATCH 1/4] arch: consolidate existing CONFIG_PAGE_SIZE_*KB definitions
Hi Arnd, On Mon, Feb 26, 2024 at 5:14 PM Arnd Bergmann wrote: > From: Arnd Bergmann > > These four architectures define the same Kconfig symbols for configuring > the page size. Move the logic into a common place where it can be shared > with all other architectures. > > Signed-off-by: Arnd Bergmann Thanks for your patch! > --- a/arch/Kconfig > +++ b/arch/Kconfig > +config PAGE_SIZE_4KB > + bool "4KB pages" Now you got rid of the 4000-byte ("4kB") pages and friends, please do not replace these by Kelvin-bytes, and use the official binary prefixes => "4 KiB". > + depends on HAVE_PAGE_SIZE_4KB Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: [PATCH v13 09/16] media: uapi: Define audio sample format fourcc type
On 27/02/2024 04:44, Shengjiu Wang wrote: > On Mon, Feb 26, 2024 at 9:55 PM Nicolas Dufresne wrote: >> >> Le lundi 26 février 2024 à 16:28 +0800, Shengjiu Wang a écrit : >>> The audio sample format definition is from alsa, >>> the header file is include/uapi/sound/asound.h, but >>> don't include this header file directly, because in >>> user space, there is another copy in alsa-lib. >>> There will be conflict in userspace for include >>> videodev2.h & asound.h and asoundlib.h >>> >>> Here still use the fourcc format. >> >> I'd like to join Mauro's voice that duplicating the audio formats is a bad >> idea. >> We have the same issues with video formats when you look at V4L2 vs DRM. >> You're >> rationale is that videodev2.h will be ambiguous if it includes asound.h, but >> looking at this change, there is no reason why you need to include asound.h >> in >> videodev2.h at all. The format type can be abstracted out with a uint32 in >> the >> API, and then it would be up to the users to include and use the appropriate >> formats IDs. >> > > Resend for the plain text issue > > Thanks. > > There is another reason mentioned by Hans: > > " > The problem is that within V4L2 we use fourcc consistently to describe a > format, including in VIDIOC_ENUM_FMT. And the expectation is that the fourcc > can be printed to a human readable string (there is even a printk format for > that these days). > > But the pcm values are all small integers (and can even be 0!), and > printing the fourcc will give garbage. It doesn't work well at all > with the V4L2 API. But by having a straightforward conversion between the > pcm identifier and a fourcc it was really easy to deal with this. > > There might even be applications today that call VIDIOC_ENUM_FMT to see > what is supported and fail if it is not a proper fourcc is returned. > > It will certainly report nonsense in v4l_print_fmtdesc() (v4l2-ioctl.c). > > One of the early versions of this patch series did precisely what you request, > but it just doesn't work well within the V4L2 uAPI. > " Right, and remember that the fourcc codes are really meant to be shown as characters. v4l2-ctl and friends all use this, and we all talk about them as such (NV12, YUYV, etc.). If I would just stick in the standard PCM IDs, then they would all show up as ''. >>> +/* >>> + * Audio-data formats >>> + * All these audio formats use a fourcc starting with 'AU' >>> + * followed by the SNDRV_PCM_FORMAT_ value from asound.h. >>> + */ >>> +#define V4L2_AUDIO_FMT_S8v4l2_fourcc('A', 'U', '0', >>> '0') >>> +#define V4L2_AUDIO_FMT_S16_LEv4l2_fourcc('A', 'U', >>> '0', '2') >>> +#define V4L2_AUDIO_FMT_U16_LEv4l2_fourcc('A', 'U', >>> '0', '4') >>> +#define V4L2_AUDIO_FMT_S24_LEv4l2_fourcc('A', 'U', >>> '0', '6') >>> +#define V4L2_AUDIO_FMT_U24_LEv4l2_fourcc('A', 'U', >>> '0', '8') >>> +#define V4L2_AUDIO_FMT_S32_LEv4l2_fourcc('A', 'U', >>> '1', '0') >>> +#define V4L2_AUDIO_FMT_U32_LEv4l2_fourcc('A', 'U', >>> '1', '2') >>> +#define V4L2_AUDIO_FMT_FLOAT_LE v4l2_fourcc('A', 'U', >>> '1', '4') >>> +#define V4L2_AUDIO_FMT_IEC958_SUBFRAME_LEv4l2_fourcc('A', 'U', '1', >>> '8') >>> +#define V4L2_AUDIO_FMT_S24_3LE v4l2_fourcc('A', 'U', >>> '3', '2') >>> +#define V4L2_AUDIO_FMT_U24_3LE v4l2_fourcc('A', 'U', >>> '3', '4') >>> +#define V4L2_AUDIO_FMT_S20_3LE v4l2_fourcc('A', 'U', >>> '3', '6') >>> +#define V4L2_AUDIO_FMT_U20_3LE v4l2_fourcc('A', 'U', >>> '3', '8') >>> + >>> +#define v4l2_fourcc_to_audfmt(fourcc)\ >>> + (__force snd_pcm_format_t)(fourcc) >> 16) & 0xff) - '0') * 10 \ >>> ++ fourcc) >> 24) & 0xff) - '0')) >>> + We did add a #define to go from a fourcc to an audio format. Would it help to add a corresponding v4l2_audfmt_to_fourcc define as well? #define v4l2_audfmt_to_fourcc(audfmt) \ v4l2_fourcc('A', 'U', '0' + (audfmt) / 10, '0' + (audfmt) % 10) I would have no problem with that. The mapping between the audio format and the fourcc is very simple, it is just converted to something that can be safely shown as ASCII characters. Regards, Hans >>> /* priv field value to indicates that subsequent fields are valid. */ >>> #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe >>> >>