Re: [PATCH v2 5/6] powerpc/smp: Add read_mostly attribute

2023-10-18 Thread Michael Ellerman
Srikar Dronamraju  writes:
> There are some variables that are only updated at boot time.
> So add read_mostly attribute to such variables

If they're only updated at boot time then __ro_after_init would be the
better annotation.

cheers

> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 29da9262cb17..b1eb11a66902 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -77,10 +77,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
>  #endif
>  
>  struct task_struct *secondary_current;
> -bool has_big_cores;
> -bool coregroup_enabled;
> -bool thread_group_shares_l2;
> -bool thread_group_shares_l3;
> +bool has_big_cores __read_mostly;
> +bool coregroup_enabled __read_mostly;
> +bool thread_group_shares_l2 __read_mostly;
> +bool thread_group_shares_l3 __read_mostly;
>  
>  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
>  DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
> @@ -987,7 +987,7 @@ static int __init init_thread_group_cache_map(int cpu, 
> int cache_property)
>   return 0;
>  }
>  
> -static bool shared_caches;
> +static bool shared_caches __read_mostly;
>  DEFINE_STATIC_KEY_FALSE(powerpc_asym_packing);
>  
>  #ifdef CONFIG_SCHED_SMT
> -- 
> 2.31.1


Re: [PATCH v2 4/6] powerpc/smp: Disable MC domain for shared processor

2023-10-18 Thread Michael Ellerman
Srikar Dronamraju  writes:
> Like L2-cache info, coregroup information which is used to determine MC
> sched domains is only present on dedicated LPARs. i.e PowerVM doesn't
> export coregroup information for shared processor LPARs. Hence disable
> creating MC domains on shared LPAR Systems.
>
> Signed-off-by: Srikar Dronamraju 
> ---
>  arch/powerpc/kernel/smp.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 498c2d51fc20..29da9262cb17 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -1046,6 +1046,10 @@ static struct cpumask *cpu_coregroup_mask(int cpu)
>  
>  static bool has_coregroup_support(void)
>  {
> + /* Coregroup identification not available on shared systems */
> + if (is_shared_processor())
> + return 0;

That will catch guests running under KVM too right? Do we want that?

>   return coregroup_enabled;

What does coregroup_enabled mean now?

I'd rather this was actually checking the presence of something, rather
than just hard coding that shared processor means no coregroup support.

cheers


Re: [PATCH v2 3/6] powerpc/smp: Move shared_processor static key to smp.h

2023-10-18 Thread Michael Ellerman
Srikar Dronamraju  writes:
> The ability to detect if the system is running in a shared processor
> mode is helpful in few more generic cases not just in
> paravirtualization.
> For example: At boot time, different scheduler/ topology flags may be
> set based on the processor mode. Hence move it to a more generic file.

I'd rather you just included paravirt.h in the few files where you need it.

cheers

> diff --git a/arch/powerpc/include/asm/paravirt.h 
> b/arch/powerpc/include/asm/paravirt.h
> index 0372b0093f72..cf83e837a571 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -15,13 +15,6 @@
>  #include 
>  #include 
>  
> -DECLARE_STATIC_KEY_FALSE(shared_processor);
> -
> -static inline bool is_shared_processor(void)
> -{
> - return static_branch_unlikely(_processor);
> -}
> -
>  #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
>  extern struct static_key paravirt_steal_enabled;
>  extern struct static_key paravirt_steal_rq_enabled;
> @@ -77,11 +70,6 @@ static inline bool is_vcpu_idle(int vcpu)
>   return lppaca_of(vcpu).idle;
>  }
>  #else
> -static inline bool is_shared_processor(void)
> -{
> - return false;
> -}
> -
>  static inline u32 yield_count_of(int cpu)
>  {
>   return 0;
> diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
> index 576d0e15..08631b2a4528 100644
> --- a/arch/powerpc/include/asm/smp.h
> +++ b/arch/powerpc/include/asm/smp.h
> @@ -34,6 +34,20 @@ extern bool coregroup_enabled;
>  extern int cpu_to_chip_id(int cpu);
>  extern int *chip_id_lookup_table;
>  
> +#ifdef CONFIG_PPC_SPLPAR
> +DECLARE_STATIC_KEY_FALSE(shared_processor);
> +
> +static inline bool is_shared_processor(void)
> +{
> + return static_branch_unlikely(_processor);
> +}
> +#else
> +static inline bool is_shared_processor(void)
> +{
> + return false;
> +}
> +#endif
> +
>  DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
>  DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
>  DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
> -- 
> 2.31.1


Re: [PATCH v2 2/6] powerpc/smp: Enable Asym packing for cores on shared processor

2023-10-18 Thread Michael Ellerman
Srikar Dronamraju  writes:
> If there are shared processor LPARs, underlying Hypervisor can have more
> virtual cores to handle than actual physical cores.
>
> Starting with Power 9, a core has 2 nearly independent thread groups.

You need to be clearer here that you're talking about "big cores", not
SMT4 cores as seen on bare metal systems.

> On a shared processors LPARs, it helps to pack threads to lesser number
> of cores so that the overall system performance and utilization
> improves. PowerVM schedules at a core level. Hence packing to fewer
> cores helps.
>
> For example: Lets says there are two 8-core Shared LPARs that are
> actually sharing a 8 Core shared physical pool, each running 8 threads
> each. Then Consolidating 8 threads to 4 cores on each LPAR would help
> them to perform better. This is because each of the LPAR will get
> 100% time to run applications and there will no switching required by
> the Hypervisor.
>
> To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level.

.. when the system is running in shared processor mode and has big cores.

cheers

> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 37c41297c9ce..498c2d51fc20 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -1009,9 +1009,20 @@ static int powerpc_smt_flags(void)
>   */
>  static int powerpc_shared_cache_flags(void)
>  {
> + if (static_branch_unlikely(_asym_packing))
> + return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
> +
>   return SD_SHARE_PKG_RESOURCES;
>  }
>  
> +static int powerpc_shared_proc_flags(void)
> +{
> + if (static_branch_unlikely(_asym_packing))
> + return SD_ASYM_PACKING;
> +
> + return 0;
> +}
> +
>  /*
>   * We can't just pass cpu_l2_cache_mask() directly because
>   * returns a non-const pointer and the compiler barfs on that.
> @@ -1048,8 +1059,8 @@ static struct sched_domain_topology_level 
> powerpc_topology[] = {
>   { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
>  #endif
>   { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> - { cpu_mc_mask, SD_INIT_NAME(MC) },
> - { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> + { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
> + { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
>   { NULL, },
>  };
>  
> @@ -1687,6 +1698,8 @@ static void __init fixup_topology(void)
>   if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
>   pr_info_once("Enabling Asymmetric SMT scheduling\n");
>   static_branch_enable(_asym_packing);
> + } else if (is_shared_processor() && has_big_cores) {
> + static_branch_enable(_asym_packing);
>   }
>  
>  #ifdef CONFIG_SCHED_SMT
> -- 
> 2.31.1


Re: [PATCH v2 1/6] powerpc/smp: Cache CPU has Asymmetric SMP

2023-10-18 Thread Michael Ellerman
Srikar Dronamraju  writes:
> Currently cpu feature flag is checked whenever powerpc_smt_flags gets
> called. This is an unnecessary overhead. CPU_FTR_ASYM_SMT is set based
> on the processor and all processors will either have this set or will
> have it unset.

The cpu_has_feature() test is implemented with a static key.

So AFAICS this is just replacing one static key with another?

I see that you use the new static key in subsequent patches. But
couldn't those just use the existing cpu feature test?

Anyway I'd be interested to see how the generated code differs
before/after this.

cheers

> Hence only check for the feature flag once and cache it to be used
> subsequently. This commit will help avoid a branch in powerpc_smt_flags
>
> Signed-off-by: Srikar Dronamraju 
> ---
> Changelog:
> v1->v2: Using static keys instead of a variable.
> Using pr_info_once instead of printk
>
>  arch/powerpc/kernel/smp.c | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 5826f5108a12..37c41297c9ce 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -988,18 +988,16 @@ static int __init init_thread_group_cache_map(int cpu, 
> int cache_property)
>  }
>  
>  static bool shared_caches;
> +DEFINE_STATIC_KEY_FALSE(powerpc_asym_packing);
>  
>  #ifdef CONFIG_SCHED_SMT
>  /* cpumask of CPUs with asymmetric SMT dependency */
>  static int powerpc_smt_flags(void)
>  {
> - int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
> + if (static_branch_unlikely(_asym_packing))
> + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | 
> SD_ASYM_PACKING;
>  
> - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
> - flags |= SD_ASYM_PACKING;
> - }
> - return flags;
> + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
>  }
>  #endif
>  
> @@ -1686,6 +1684,11 @@ static void __init fixup_topology(void)
>  {
>   int i;
>  
> + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> + pr_info_once("Enabling Asymmetric SMT scheduling\n");
> + static_branch_enable(_asym_packing);
> + }
> +
>  #ifdef CONFIG_SCHED_SMT
>   if (has_big_cores) {
>   pr_info("Big cores detected but using small core scheduling\n");
> -- 
> 2.31.1


Re: [PATCH] powerpc/perf: Optimize find_alternatives_list() using binary search

2023-10-18 Thread Kuan-Wei Chiu
On Thu, Oct 19, 2023 at 12:41:45PM +1100, Michael Ellerman wrote:
> Kuan-Wei Chiu  writes:
> > This patch improves the performance of event alternative lookup by
> > replacing the previous linear search with a more efficient binary
> > search. This change reduces the time complexity for the search process
> > from O(n) to O(log(n)). A pre-sorted table of event values and their
> > corresponding indices has been introduced to expedite the search
> > process.
> 
> Thanks for the patch.
> 
> How did you test this? I assume you don't have a Power6 machine lying
> around? :)
> 
> cheers
> 

I indeed do not have a Power6 machine for testing. Therefore, I designed
a simple unit test [1] to verify the functionality of the patch. In this
test, I ran a loop from 0 to UINT_MAX, using these values as inputs to
compare the return values of the original function with the new function
I implemented, which utilizes binary search. If you have any suggestions
for a more suitable testing method, please let me know. I would greatly
appreciate your feedback.

Thanks,
Kuan-Wei Chiu

[1]:
/* return 0 on success and return non-zero on failure */
int test()
{
u64 event = 0;
for (u64 event = 0; event <= UINT_MAX; event++) {
/* result of the current function in the linux kernel */
int result_old = find_alternatives_list(event);
/* result of the new function using binary search */
int result_new = find_alternatives_list_new(event);

if (result_old != result_new)
return 1;
}
return 0;
}


> > diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
> > index 5729b6e059de..b6030ea130eb 100644
> > --- a/arch/powerpc/perf/power6-pmu.c
> > +++ b/arch/powerpc/perf/power6-pmu.c
> > @@ -335,25 +335,34 @@ static const unsigned int 
> > event_alternatives[][MAX_ALT] = {
> > { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
> >  };
> >  
> > -/*
> > - * This could be made more efficient with a binary search on
> > - * a presorted list, if necessary
> > - */
> >  static int find_alternatives_list(u64 event)
> >  {
> > -   int i, j;
> > -   unsigned int alt;
> > -
> > -   for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
> > -   if (event < event_alternatives[i][0])
> > -   return -1;
> > -   for (j = 0; j < MAX_ALT; ++j) {
> > -   alt = event_alternatives[i][j];
> > -   if (!alt || event < alt)
> > -   break;
> > -   if (event == alt)
> > -   return i;
> > -   }
> > +   const unsigned int presort_event_table[] = {
> > +   0x0130e8, 0x080080, 0x080088, 0x1a, 0x1b, 0x1d, 
> > 0x1e,
> > +   0x100010, 0x10001a, 0x100026, 0x100054, 0x100056, 0x1000f0, 
> > 0x1000f8,
> > +   0x1000fc, 0x28, 0x2e, 0x200010, 0x200012, 0x200054, 
> > 0x2000f0,
> > +   0x2000f2, 0x2000f4, 0x2000f5, 0x2000f6, 0x2000f8, 0x2000fc, 
> > 0x2000fe,
> > +   0x2d0030, 0x3a, 0x3c, 0x300010, 0x300012, 0x30001a, 
> > 0x300056,
> > +   0x3000f0, 0x3000f2, 0x3000f6, 0x3000f8, 0x3000fc, 0x3000fe, 
> > 0x46,
> > +   0x47, 0x4a, 0x4e, 0x400010, 0x400018, 0x400056, 
> > 0x4000f0,
> > +   0x4000f8, 0x65};
> > +   const unsigned int event_index_table[] = {
> > +   0,  1,  2,  3,  4,  1, 5,  6,  7,  8,  9,  10, 11, 12, 13, 12, 
> > 14,
> > +   7,  15, 2,  9,  16, 3, 4,  0,  17, 10, 18, 19, 20, 1,  17, 15, 
> > 19,
> > +   18, 2,  16, 21, 8,  0, 22, 13, 14, 11, 21, 5,  20, 22, 1,  6,  
> > 3};
> > +   int lo = 0;
> > +   int hi = ARRAY_SIZE(presort_event_table) - 1;
> > +
> > +   while (lo <= hi) {
> > +   int mid = lo + (hi - lo) / 2;
> > +   unsigned int alt = presort_event_table[mid];
> > +
> > +   if (alt < event)
> > +   lo = mid + 1;
> > +   else if (alt > event)
> > +   hi = mid - 1;
> > +   else
> > +   return event_index_table[mid];
> > }
> > return -1;
> >  }
> > -- 
> > 2.25.1


Re: [PATCH] powerpc/perf: Optimize find_alternatives_list() using binary search

2023-10-18 Thread Michael Ellerman
Kuan-Wei Chiu  writes:
> This patch improves the performance of event alternative lookup by
> replacing the previous linear search with a more efficient binary
> search. This change reduces the time complexity for the search process
> from O(n) to O(log(n)). A pre-sorted table of event values and their
> corresponding indices has been introduced to expedite the search
> process.

Thanks for the patch.

How did you test this? I assume you don't have a Power6 machine lying
around? :)

cheers

> diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
> index 5729b6e059de..b6030ea130eb 100644
> --- a/arch/powerpc/perf/power6-pmu.c
> +++ b/arch/powerpc/perf/power6-pmu.c
> @@ -335,25 +335,34 @@ static const unsigned int event_alternatives[][MAX_ALT] 
> = {
>   { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
>  };
>  
> -/*
> - * This could be made more efficient with a binary search on
> - * a presorted list, if necessary
> - */
>  static int find_alternatives_list(u64 event)
>  {
> - int i, j;
> - unsigned int alt;
> -
> - for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
> - if (event < event_alternatives[i][0])
> - return -1;
> - for (j = 0; j < MAX_ALT; ++j) {
> - alt = event_alternatives[i][j];
> - if (!alt || event < alt)
> - break;
> - if (event == alt)
> - return i;
> - }
> + const unsigned int presort_event_table[] = {
> + 0x0130e8, 0x080080, 0x080088, 0x1a, 0x1b, 0x1d, 
> 0x1e,
> + 0x100010, 0x10001a, 0x100026, 0x100054, 0x100056, 0x1000f0, 
> 0x1000f8,
> + 0x1000fc, 0x28, 0x2e, 0x200010, 0x200012, 0x200054, 
> 0x2000f0,
> + 0x2000f2, 0x2000f4, 0x2000f5, 0x2000f6, 0x2000f8, 0x2000fc, 
> 0x2000fe,
> + 0x2d0030, 0x3a, 0x3c, 0x300010, 0x300012, 0x30001a, 
> 0x300056,
> + 0x3000f0, 0x3000f2, 0x3000f6, 0x3000f8, 0x3000fc, 0x3000fe, 
> 0x46,
> + 0x47, 0x4a, 0x4e, 0x400010, 0x400018, 0x400056, 
> 0x4000f0,
> + 0x4000f8, 0x65};
> + const unsigned int event_index_table[] = {
> + 0,  1,  2,  3,  4,  1, 5,  6,  7,  8,  9,  10, 11, 12, 13, 12, 
> 14,
> + 7,  15, 2,  9,  16, 3, 4,  0,  17, 10, 18, 19, 20, 1,  17, 15, 
> 19,
> + 18, 2,  16, 21, 8,  0, 22, 13, 14, 11, 21, 5,  20, 22, 1,  6,  
> 3};
> + int lo = 0;
> + int hi = ARRAY_SIZE(presort_event_table) - 1;
> +
> + while (lo <= hi) {
> + int mid = lo + (hi - lo) / 2;
> + unsigned int alt = presort_event_table[mid];
> +
> + if (alt < event)
> + lo = mid + 1;
> + else if (alt > event)
> + hi = mid - 1;
> + else
> + return event_index_table[mid];
>   }
>   return -1;
>  }
> -- 
> 2.25.1


[PATCH v2] mtd: powernv_flash: check return value of devm_kasprintf()

2023-10-18 Thread Yi Yang
devm_kasprintf() returns a pointer to dynamically allocated memory
which can be NULL upon failure. Ensure the allocation was successful by
checking the pointer validity.

Fixes: acfe63ec1c59 ("mtd: Convert to using %pOFn instead of device_node.name")
Signed-off-by: Yi Yang 
---
 drivers/mtd/devices/powernv_flash.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
index 66044f4f5bad..956a79c739e5 100644
--- a/drivers/mtd/devices/powernv_flash.c
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -207,6 +207,10 @@ static int powernv_flash_set_driver_info(struct device 
*dev,
 * get them
 */
mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+   if (!mtd->name) {
+   dev_err(dev, "failed to allocate mtd->name\n");
+   return -ENOMEM;
+   }
mtd->type = MTD_NORFLASH;
mtd->flags = MTD_WRITEABLE;
mtd->size = size;
-- 
2.25.1



Re: [PATCH] mtd: powernv_flash: check return value of devm_kasprintf()

2023-10-18 Thread yiyang (D)

On 2023/10/19 9:16, Michael Ellerman wrote:

Yi Yang  writes:

The devm_kasprintf() returns a pointer to dynamically allocated memory.
that will return NULL when allocate failed.
Fix it by check return value of devm_kasprintf().

Fixes: acfe63ec1c59 ("mtd: Convert to using %pOFn instead of device_node.name")
Signed-off-by: Yi Yang 
---
  drivers/mtd/devices/powernv_flash.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
index 66044f4f5bad..b305e555ddbf 100644
--- a/drivers/mtd/devices/powernv_flash.c
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -207,6 +207,10 @@ static int powernv_flash_set_driver_info(struct device 
*dev,
 * get them
 */
mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+   if (mtd->name) {


That seems like the wrong check?

cheers


You're right, I'm fixing this in v2 patch.

Thanks,
Yi Yang


+   dev_err(dev, "failed to allocate mtd->name\n");
+   return -ENOMEM;
+   }
mtd->type = MTD_NORFLASH;
mtd->flags = MTD_WRITEABLE;
mtd->size = size;
--
2.25.1

.





Re: [PATCH] mtd: powernv_flash: check return value of devm_kasprintf()

2023-10-18 Thread Michael Ellerman
Yi Yang  writes:
> The devm_kasprintf() returns a pointer to dynamically allocated memory.
> that will return NULL when allocate failed.
> Fix it by check return value of devm_kasprintf().
>
> Fixes: acfe63ec1c59 ("mtd: Convert to using %pOFn instead of 
> device_node.name")
> Signed-off-by: Yi Yang 
> ---
>  drivers/mtd/devices/powernv_flash.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/mtd/devices/powernv_flash.c 
> b/drivers/mtd/devices/powernv_flash.c
> index 66044f4f5bad..b305e555ddbf 100644
> --- a/drivers/mtd/devices/powernv_flash.c
> +++ b/drivers/mtd/devices/powernv_flash.c
> @@ -207,6 +207,10 @@ static int powernv_flash_set_driver_info(struct device 
> *dev,
>* get them
>*/
>   mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
> + if (mtd->name) {

That seems like the wrong check?

cheers

> + dev_err(dev, "failed to allocate mtd->name\n");
> + return -ENOMEM;
> + }
>   mtd->type = MTD_NORFLASH;
>   mtd->flags = MTD_WRITEABLE;
>   mtd->size = size;
> -- 
> 2.25.1


Re: [PATCH v2] powerpc/paravirt: Improve vcpu_is_preempted

2023-10-18 Thread Michael Ellerman
Hi Srikar,

Srikar Dronamraju  writes:
> PowerVM Hypervisor dispatches on a whole core basis. In a shared LPAR, a
> CPU from a core that is CEDED or preempted may have a larger latency. In
> such a scenario, its preferable to choose a different CPU to run.
>
> If one of the CPUs in the core is active, i.e neither CEDED nor
> preempted, then consider this CPU as not preempted.
>
> Also if any of the CPUs in the core has yielded but OS has not requested
> CEDE or CONFER, then consider this CPU to be preempted.

I think the change is OK, but the change log and comments are slightly
confusing IMHO.

In several places you use "this CPU", but that usually means "the CPU
the code is currently executing on".

I think it would be clearer if you used eg. "target CPU" or something to
make it clear that you're not talking about the currently executing CPU.

cheers

> Correct detection of preempted CPUs is important for detecting idle
> CPUs/cores in task scheduler.
>
> Changelog:
> v1 -> v2: Handle lppaca_of(cpu) in !PPC_SPLPAR case.
> v1: 
> https://lore.kernel.org/r/20231009051740.17683-1-srikar%40linux.vnet.ibm.com
> 1. Fixed some compilation issues reported by kernelbot
> a. https://lore.kernel.org/oe-kbuild-all/202310102341.k0sgoqql-...@intel.com/
> b.  https://lore.kernel.org/oe-kbuild-all/202310091636.lelmjkyv-...@intel.com/
> 2. Resolved comments from Shrikanth

That change log should appear below the break "---".

> Tested-by: Aboorva Devarajan 
> Reviewed-by: Shrikanth Hegde 
> Signed-off-by: Srikar Dronamraju 
> ---
>  arch/powerpc/include/asm/paravirt.h | 42 ++---
>  1 file changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/paravirt.h 
> b/arch/powerpc/include/asm/paravirt.h
> index e08513d73119..0372b0093f72 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -71,6 +71,11 @@ static inline void yield_to_any(void)
>  {
>   plpar_hcall_norets_notrace(H_CONFER, -1, 0);
>  }
> +
> +static inline bool is_vcpu_idle(int vcpu)
> +{
> + return lppaca_of(vcpu).idle;
> +}
>  #else
>  static inline bool is_shared_processor(void)
>  {
> @@ -100,6 +105,10 @@ static inline void prod_cpu(int cpu)
>   ___bad_prod_cpu(); /* This would be a bug */
>  }
>  
> +static inline bool is_vcpu_idle(int vcpu)
> +{
> + return false;
> +}
>  #endif
>  
>  #define vcpu_is_preempted vcpu_is_preempted
> @@ -121,9 +130,19 @@ static inline bool vcpu_is_preempted(int cpu)
>   if (!is_shared_processor())
>   return false;
>  
> + if (!(yield_count_of(cpu) & 1))
> + return false;

Would be nice for that to have a short comment too.

> +
> + /*
> +  * If CPU has yielded to Hypervisor but OS has not requested idle
> +  * then this CPU is definitely preempted.

eg. If the target CPU has yielded to the Hypervisor, but the OS has not
requested idle then the target CPU has definitely been preempted.

> +  */
> + if (!is_vcpu_idle(cpu))
> + return true;
> +
>  #ifdef CONFIG_PPC_SPLPAR
>   if (!is_kvm_guest()) {
> - int first_cpu;
> + int first_cpu, i;
>  
>   /*
>* The result of vcpu_is_preempted() is used in a
> @@ -149,11 +168,28 @@ static inline bool vcpu_is_preempted(int cpu)
>*/
>   if (cpu_first_thread_sibling(cpu) == first_cpu)
>   return false;
> +
> + /*
> +  * If any of the threads of this core is not preempted or
> +  * ceded, then consider this CPU to be non-preempted
> +  */

eg. If any of the threads of the target CPU's core are not preempted or
ceded, then consider that the target CPU is also not preempted.

> + first_cpu = cpu_first_thread_sibling(cpu);
> + for (i = first_cpu; i < first_cpu + threads_per_core; i++) {
> + if (i == cpu)
> + continue;
> + if (!(yield_count_of(i) & 1))
> + return false;
> + if (!is_vcpu_idle(i))
> + return true;
> + }
>   }
>  #endif
>  
> - if (yield_count_of(cpu) & 1)
> - return true;
> + /*
> +  * None of the threads in this core are running but none of
> +  * them were preempted too. Hence assume the thread to be
> +  * non-preempted.
> +  */
>   return false;
>  }
>  
>
> base-commit: eddc90ea2af5933249ea1a78119f2c8ef8d07156
> -- 
> 2.31.1


Re: [PATCH v2] powerpc/pseries/vas: Migration suspend waits for no in-progress open windows

2023-10-18 Thread Nathan Lynch
Haren Myneni  writes:
> The hypervisor returns migration failure if all VAS windows are not
> closed. During pre-migration stage, vas_migration_handler() sets
> migration_in_progress flag and closes all windows from the list.
> The allocate VAS window routine checks the migration flag, setup
> the window and then add it to the list. So there is possibility of
> the migration handler missing the window that is still in the
> process of setup.
>
> t1: Allocate and open VAS t2: Migration event
> window
>
> lock vas_pseries_mutex
> If migration_in_progress set
>   unlock vas_pseries_mutex
>   return
> open window HCALL
> unlock vas_pseries_mutex
> Modify window HCALL   lock vas_pseries_mutex
> setup window  migration_in_progress=true
>   Closes all windows from
>   the list
>   unlock vas_pseries_mutex
> lock vas_pseries_mutexreturn
> if nr_closed_windows == 0
>   // No DLPAR CPU or migration
>   add to the list
>   unlock vas_pseries_mutex
>   return
> unlock vas_pseries_mutex
> Close VAS window
> // due to DLPAR CPU or migration
> return -EBUSY
>
> This patch resolves the issue with the following steps:
> - Define migration_in_progress as atomic so that the migration
>   handler sets this flag without holding mutex.

This part of the commit message is no longer accurate...

> - Introduce nr_open_wins_progress counter in VAS capabilities
>   struct
> - This counter tracks the number of open windows are still in
>   progress
> - The allocate setup window thread closes windows if the migration
>   is set and decrements nr_open_window_progress counter
> - The migration handler waits for no in-progress open windows.
>
> Fixes: 37e6764895ef ("powerpc/pseries/vas: Add VAS migration handler")
> Signed-off-by: Haren Myneni 
>
> ---
> Changes from v1:
> - Do not define the migration_in_progress flag as atomic as
>   suggested by Nathan
> ---
>  arch/powerpc/platforms/pseries/vas.c | 45 +++-
>  arch/powerpc/platforms/pseries/vas.h |  2 ++
>  2 files changed, 40 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/vas.c 
> b/arch/powerpc/platforms/pseries/vas.c
> index 15d958e38eca..b86f0db08e98 100644
> --- a/arch/powerpc/platforms/pseries/vas.c
> +++ b/arch/powerpc/platforms/pseries/vas.c
> @@ -32,6 +32,7 @@ static struct hv_vas_cop_feat_caps hv_cop_caps;
>  static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
>  static DEFINE_MUTEX(vas_pseries_mutex);
>  static bool migration_in_progress;
> +static DECLARE_WAIT_QUEUE_HEAD(open_win_progress_wq);
>  
>  static long hcall_return_busy_check(long rc)
>  {
> @@ -384,11 +385,15 @@ static struct vas_window *vas_allocate_window(int 
> vas_id, u64 flags,
>* same fault IRQ is not freed by the OS before.
>*/
>   mutex_lock(_pseries_mutex);
> - if (migration_in_progress)
> + if (migration_in_progress) {
>   rc = -EBUSY;
> - else
> + } else {
>   rc = allocate_setup_window(txwin, (u64 *)[0],
>  cop_feat_caps->win_type);
> + if (!rc)
> + atomic_inc(>nr_open_wins_progress);
> + }
> +
>   mutex_unlock(_pseries_mutex);
>   if (rc)
>   goto out;
> @@ -403,8 +408,17 @@ static struct vas_window *vas_allocate_window(int 
> vas_id, u64 flags,
>   goto out_free;
>  
>   txwin->win_type = cop_feat_caps->win_type;
> - mutex_lock(_pseries_mutex);
> +
>   /*
> +  * The migration SUSPEND thread sets migration_in_progress and
> +  * closes all open windows from the list. But the window is
> +  * added to the list after open and modify HCALLs. So possible
> +  * that migration_in_progress is set before modify HCALL which
> +  * may cause some windows are still open when the hypervisor
> +  * initiates the migration.
> +  * So checks the migration_in_progress flag again and close all
> +  * open windows.
> +  *
>* Possible to lose the acquired credit with DLPAR core
>* removal after the window is opened. So if there are any
>* closed windows (means with lost credits), do not give new
> @@ -412,9 +426,11 @@ static struct vas_window *vas_allocate_window(int 
> vas_id, u64 flags,
>* after the existing windows are reopened when credits are
>* available.
>*/
> - if (!caps->nr_close_wins) {
> + mutex_lock(_pseries_mutex);
> + if (!caps->nr_close_wins && !migration_in_progress) {
>   list_add(>win_list, >list);
>   caps->nr_open_windows++;
> + atomic_dec(>nr_open_wins_progress);

Should there not be a test and wakeup here?

if (atomic_dec_return(>nr_open_wins_progress) == 0)
wake_up(_win_progress_wq);

>   mutex_unlock(_pseries_mutex);
>   vas_user_win_add_mm_context(>vas_win.task_ref);
>

Re: [PING][PATCH] uapi/auxvec: Define AT_HWCAP3 and AT_HWCAP4 aux vector, entries

2023-10-18 Thread Szabolcs Nagy
The 10/17/2023 18:14, Peter Bergner wrote:
> CCing linux-kernel for more exposure.
> 
> PING.  I'm waiting on a reply from anyone on the kernel side of things
> to see whether they have an issue with reserving values for AT_HWCAP3
> and AT_HWCAP4.  
> 
> I'll note reviews from the GLIBC camp did not have an issue with the below 
> patch.

fwiw, aarch64 is quickly filling up AT_HWCAP2 so this will be
useful for arm64 too eventually, but we are not in a hurry.

> > +#define AT_HWCAP3 29   /* extension of AT_HWCAP */
> > +#define AT_HWCAP4 30   /* extension of AT_HWCAP */


RE: [EXT] [Bisected] [efeda3bf912f] OOPS crash while performing Block device module parameter test [qla2xxx / FC]

2023-10-18 Thread Nilesh Javali
Hi Tasmiya,

> -Original Message-
> From: Tasmiya Nalatwad 
> Sent: Wednesday, October 18, 2023 6:51 PM
> To: linux-s...@vger.kernel.org; linux-ker...@vger.kernel.org; linuxppc-
> d...@lists.ozlabs.org; linux-bl...@vger.kernel.org; linux-n...@vger.kernel.org
> Cc: Quinn Tran ; Nilesh Javali ;
> himanshu.madh...@oracle.com; martin.peter...@oracle.com; GR-QLogic-
> Storage-Upstream ;
> j...@linux.ibm.com; abdha...@linux.vnet.ibm.com; mputt...@linux.vnet.com;
> sach...@linux.vnet.com
> Subject: [EXT] [Bisected] [efeda3bf912f] OOPS crash while performing Block
> device module parameter test [qla2xxx / FC]
> 
> External Email
> 
> --
> Greetings,
> 
> OOPs Kernel crash while performing Block device module parameter test
> [qla2xxx / FC] on linux-next 6.6.0-rc5-next-20231010
> 
> --- Traces ---
> 
> [30876.431678] Kernel attempted to read user page (30) - exploit
> attempt? (uid: 0)
> [30876.431687] BUG: Kernel NULL pointer dereference on read at 0x0030
> [30876.431692] Faulting instruction address: 0xc008018e3180
> [30876.431697] Oops: Kernel access of bad area, sig: 11 [#1]
> [30876.431700] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=8192 NUMA
> pSeries
> [30876.431705] Modules linked in: qla2xxx(+) nvme_fc nvme_fabrics
> nvme_core dm_round_robin dm_queue_length exfat vfat fat btrfs
> blake2b_generic zstd_compress loop raid10 raid456 async_raid6_recov
> async_memcpy async_pq async_xor async_tx xor raid6_pq raid1 linear xfs
> libcrc32c raid0 nvram rpadlpar_io rpaphp xsk_diag bonding tls rfkill
> vmx_crypto pseries_rng binfmt_misc ext4 mbcache jbd2 dm_service_time
> sd_mod sg ibmvfc ibmveth t10_pi crc64_rocksoft crc64 scsi_transport_fc
> dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded:
> nvme_core]
> [30876.431767] CPU: 0 PID: 1289400 Comm: kworker/0:2 Kdump: loaded Not
> tainted 6.6.0-rc5-next-20231010-auto #1
> [30876.431773] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200
> 0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries
> [30876.431779] Workqueue: events work_for_cpu_fn
> [30876.431788] NIP:  c008018e3180 LR: c008018e3128 CTR:
> c0513f80
> [30876.431792] REGS: c00062a8b930 TRAP: 0300   Not tainted
> (6.6.0-rc5-next-20231010-auto)
> [30876.431797] MSR:  8280b033 
> CR: 28000482  XER: 2004000f
> [30876.431811] CFAR: c008018e3138 DAR: 0030 DSISR:
> 4000 IRQMASK: 0
> [30876.431811] GPR00: c008018e3128 c00062a8bbd0
> c00800eb8300
> 
> [30876.431811] GPR04:  
> 
> 0017bbac
> [30876.431811] GPR08:  0030
> 
> c008019a6d68
> [30876.431811] GPR12:  c2ff
> c019cb98
> c00082a97980
> [30876.431811] GPR16:  
> 
> c3071ab0
> [30876.431811] GPR20: c3491c0d c00063bb9a00
> c00063bb30c0
> c001d8b52928
> [30876.431811] GPR24: c00800eb63a8 ffed c001d8b52000
> 0102
> [30876.431811] GPR28: c00800ebaf00 c001d8b52890
> 
> c001d8b58000
> [30876.431856] NIP [c008018e3180] qla2x00_mem_free+0x298/0x6b0
> [qla2xxx]
> [30876.431876] LR [c008018e3128] qla2x00_mem_free+0x240/0x6b0
> [qla2xxx]
> [30876.431895] Call Trace:
> [30876.431897] [c00062a8bbd0] [c008018e2f1c]
> qla2x00_mem_free+0x34/0x6b0 [qla2xxx] (unreliable)
> [30876.431917] [c00062a8bc20] [c008018eed30]
> qla2x00_probe_one+0x16d8/0x2640 [qla2xxx]
> [30876.431937] [c00062a8bd90] [c08c589c]
> local_pci_probe+0x6c/0x110
> [30876.431943] [c00062a8be10] [c0189ba8]
> work_for_cpu_fn+0x38/0x60
> [30876.431948] [c00062a8be40] [c018d0d0]
> process_scheduled_works+0x230/0x4f0
> [30876.431952] [c00062a8bf10] [c018fe14]
> worker_thread+0x1e4/0x500
> [30876.431955] [c00062a8bf90] [c019ccc8]
> kthread+0x138/0x140
> [30876.431960] [c00062a8bfe0] [c000df98]
> start_kernel_thread+0x14/0x18
> [30876.431965] Code: 4082000c a09f0198 78841b68 e8df0278 38e0
> 480c3b8d e8410018 3920 e91f0178 f93f0280 f93f0278 39280030
>  7fa95040 419e00b8 ebc80030
> [30876.431977] ---[ end trace  ]---
> [30876.480385] pstore: backend (nvram) writing error (-1)
> 
> 
> Git bisect points to below commit. Reverting this commit fixes the problem.
> commit efeda3bf912f269bcae16816683f432f58d68075
>      scsi: qla2xxx: Move resource to allow code reuse
> 
> --
> Regards,
> Tasmiya Nalatwad
> IBM Linux Technology Center

We have recently posted a fix for the commit that you have pointed here,
https://marc.info/?l=linux-scsi=169750508721982=2

Thanks,
Nilesh


[PATCH] mtd: powernv_flash: check return value of devm_kasprintf()

2023-10-18 Thread Yi Yang
The devm_kasprintf() returns a pointer to dynamically allocated memory.
that will return NULL when allocate failed.
Fix it by check return value of devm_kasprintf().

Fixes: acfe63ec1c59 ("mtd: Convert to using %pOFn instead of device_node.name")
Signed-off-by: Yi Yang 
---
 drivers/mtd/devices/powernv_flash.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
index 66044f4f5bad..b305e555ddbf 100644
--- a/drivers/mtd/devices/powernv_flash.c
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -207,6 +207,10 @@ static int powernv_flash_set_driver_info(struct device 
*dev,
 * get them
 */
mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+   if (mtd->name) {
+   dev_err(dev, "failed to allocate mtd->name\n");
+   return -ENOMEM;
+   }
mtd->type = MTD_NORFLASH;
mtd->flags = MTD_WRITEABLE;
mtd->size = size;
-- 
2.25.1



Re: [linux-next:master] BUILD REGRESSION 2dac75696c6da3c848daa118a729827541c89d33

2023-10-18 Thread Steven Rostedt
On Thu, 19 Oct 2023 04:07:35 +0800
kernel test robot  wrote:

> Documentation/devicetree/bindings/mfd/qcom,tcsr.yaml:
> Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml:
> fs/tracefs/event_inode.c:782:11-21: ERROR: ei is NULL but dereferenced.

This was already reported and I'm currently testing a patch to fix it.

-- Steve


Re: [PATCH] [RFC] wireless: move obsolete drivers to staging

2023-10-18 Thread Jeff Johnson

On 10/13/2023 11:02 AM, Johannes Berg wrote:

On Fri, 2023-10-13 at 17:44 +0200, Arnd Bergmann wrote:

On Thu, Oct 12, 2023, at 18:36, Geoff Levand wrote:

On 10/12/23 17:41, Johannes Berg wrote:

But seriously - is it worth to try to keep a wireless driver for it if
we don't even know anyone using a PS3 at all?


There is still a considerable user base for the PS3, so we
must keep the ps3-gelic-wireless driver.


Do you know if anyone has tried changing this driver over to the
cfg80211 interface from the wireless extensions?


I looked at that yesterday, and sadly I _think_ it's not even possible,
there are some corner cases in it like "no WPA2" that don't seem to be
fully covered in cfg80211/nl80211, at least not with the APIs today and
with current versions of wpa_supplicant.

It might still be doable because things like
WPA_DRIVER_CAPA_KEY_MGMT_WPA2_PSK don't really seem to be used much in
wpa_supplicant, but we'd have to carefully test that I guess.

Also, it depends on the PS3 firmware version whether or not that's
supported.

Then again, arguably wifi without WPA2 is pretty much useless these
days?


This is a good point. It doesn't matter if the clients work if there are 
no Access Points to connect to. And if you do have an old one you can 
connect to, it will be an insecure connection. Wardriving, anyone?


/jeff



[linux-next:master] BUILD REGRESSION 2dac75696c6da3c848daa118a729827541c89d33

2023-10-18 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 2dac75696c6da3c848daa118a729827541c89d33  Add linux-next specific 
files for 20231018

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202309200103.grxwdktx-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310121802.cdagvdf2-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310170132.irophgla-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310171905.azfrkoid-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310181800.bh66q0t1-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310181854.pkthd7fd-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310182303.v3ttgnqz-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310190002.utcopmyf-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310190116.5jjceozj-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310190201.4wyyj6j5-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

arch/powerpc/kvm/powerpc.c:1061:9: error: implicit declaration of function 
'kvmppc_get_vsx_vr'; did you mean 'kvmppc_get_sr'? 
[-Werror=implicit-function-declaration]
arch/powerpc/kvm/powerpc.c:1063:9: error: implicit declaration of function 
'kvmppc_set_vsx_vr'; did you mean 'kvmppc_set_sr'? 
[-Werror=implicit-function-declaration]
arch/powerpc/kvm/powerpc.c:1729:52: error: implicit declaration of function 
'kvmppc_get_vscr'; did you mean 'kvmppc_get_sr'? 
[-Werror=implicit-function-declaration]
arch/powerpc/kvm/powerpc.c:1732:52: error: implicit declaration of function 
'kvmppc_get_vrsave'; did you mean 'kvmppc_get_sr'? 
[-Werror=implicit-function-declaration]
arch/powerpc/kvm/powerpc.c:1780:25: error: implicit declaration of function 
'kvmppc_set_vscr'; did you mean 'kvmppc_set_sr'? 
[-Werror=implicit-function-declaration]
arch/powerpc/kvm/powerpc.c:1787:25: error: implicit declaration of function 
'kvmppc_set_vrsave'; did you mean 'kvmppc_set_sr'? 
[-Werror=implicit-function-declaration]
arch/s390/include/asm/ctlreg.h:129:9: warning: array subscript 0 is outside 
array bounds of 'struct ctlreg[0]' [-Warray-bounds=]
arch/s390/include/asm/ctlreg.h:80:9: warning: array subscript 0 is outside 
array bounds of 'struct ctlreg[0]' [-Warray-bounds=]
drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu13/smu_v13_0_6_ppt.c:286:52: warning: 
'%s' directive output may be truncated writing up to 29 bytes into a region of 
size 23 [-Wformat-truncation=]
drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu14/smu_v14_0.c:72:52: warning: '%s' 
directive output may be truncated writing up to 29 bytes into a region of size 
23 [-Wformat-truncation=]
fs/bcachefs/journal_seq_blacklist.c:110:18: warning: array subscript 'i' is 
outside the bounds of an interior zero-length array 'struct 
journal_seq_blacklist_entry[0]' [-Wzero-length-bounds]
fs/bcachefs/journal_seq_blacklist.c:148:26: warning: array subscript  
is outside array bounds of 'struct journal_seq_blacklist_table_entry[0]' 
[-Warray-bounds=]
fs/bcachefs/journal_seq_blacklist.c:148:26: warning: array subscript idx is 
outside array bounds of 'struct journal_seq_blacklist_table_entry[0]' 
[-Warray-bounds=]
fs/bcachefs/journal_seq_blacklist.c:159:26: warning: array subscript  
is outside array bounds of 'struct journal_seq_blacklist_table_entry[0]' 
[-Warray-bounds=]
fs/bcachefs/journal_seq_blacklist.c:159:26: warning: array subscript idx is 
outside array bounds of 'struct journal_seq_blacklist_table_entry[0]' 
[-Warray-bounds=]
fs/bcachefs/journal_seq_blacklist.c:176:27: warning: array subscript i is 
outside array bounds of 'struct journal_seq_blacklist_table_entry[0]' 
[-Warray-bounds=]
fs/bcachefs/journal_seq_blacklist.c:176:64: warning: array subscript '(unsigned 
int) _33 + 4294967295' is outside the bounds of an interior zero-length array 
'struct journal_seq_blacklist_entry[0]' [-Wzero-length-bounds]
fs/bcachefs/journal_seq_blacklist.c:189:27: warning: array subscript i is 
outside array bounds of 'struct journal_seq_blacklist_table_entry[0]' 
[-Warray-bounds=]
fs/bcachefs/journal_seq_blacklist.h:9:56: warning: array subscript 0 is outside 
the bounds of an interior zero-length array 'struct 
journal_seq_blacklist_entry[0]' [-Wzero-length-bounds]
fs/bcachefs/snapshot.c:118:66: warning: array subscript  is outside 
array bounds of 'struct snapshot_t[0]' [-Warray-bounds=]
fs/bcachefs/snapshot.c:134:70: warning: array subscript  is outside 
array bounds of 'struct snapshot_t[0]' [-Warray-bounds=]
fs/bcachefs/snapshot.c:168:16: warning: array subscript idx is outside array 
bounds of 'struct snapshot_t[0]' [-Warray-bounds=]
fs/bcachefs/snapshot.c:181:16: warning: array subscript idx is outside array 
bounds of 'struct snapshot_t[0]' [-Warray-bounds=]
fs/bcachefs/snapshot.h:36:21: warning: array subscript  is outside 
array bounds of 'struct snapshot_t[0]' [-Warray-bounds=]
include/asm-generic/rwonce.h:44:26: warning: array subscript 0 is outside array 
bounds of '__u8[0]' {aka

[PATCH v12 18/20] PCI/AER: Unmask RCEC internal errors to enable RCH downstream port error handling

2023-10-18 Thread Robert Richter
AER corrected and uncorrectable internal errors (CIE/UIE) are masked
in their corresponding mask registers per default once in power-up
state. [1][2] Enable internal errors for RCECs to receive CXL
downstream port errors of Restricted CXL Hosts (RCHs).

[1] CXL 3.0 Spec, 12.2.1.1 - RCH Downstream Port Detected Errors
[2] PCIe Base Spec r6.0, 7.8.4.3 Uncorrectable Error Mask Register,
7.8.4.6 Correctable Error Mask Register

Co-developed-by: Terry Bowman 
Signed-off-by: Terry Bowman 
Signed-off-by: Robert Richter 
Acked-by: Bjorn Helgaas 
Reviewed-by: Jonathan Cameron 
Reviewed-by: Dave Jiang 
---
 drivers/pci/pcie/aer.c | 57 ++
 1 file changed, 57 insertions(+)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index f1e8494f5bb6..41076cb2956e 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -936,6 +936,30 @@ static bool find_source_device(struct pci_dev *parent,
 
 #ifdef CONFIG_PCIEAER_CXL
 
+/**
+ * pci_aer_unmask_internal_errors - unmask internal errors
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Unmasks internal errors in the Uncorrectable and Correctable Error
+ * Mask registers.
+ *
+ * Note: AER must be enabled and supported by the device which must be
+ * checked in advance, e.g. with pcie_aer_is_native().
+ */
+static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+{
+   int aer = dev->aer_cap;
+   u32 mask;
+
+   pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, );
+   mask &= ~PCI_ERR_UNC_INTN;
+   pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask);
+
+   pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, );
+   mask &= ~PCI_ERR_COR_INTERNAL;
+   pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
+}
+
 static bool is_cxl_mem_dev(struct pci_dev *dev)
 {
/*
@@ -1012,7 +1036,39 @@ static void cxl_rch_handle_error(struct pci_dev *dev, 
struct aer_err_info *info)
pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
 }
 
+static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
+{
+   bool *handles_cxl = data;
+
+   if (!*handles_cxl)
+   *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
+
+   /* Non-zero terminates iteration */
+   return *handles_cxl;
+}
+
+static bool handles_cxl_errors(struct pci_dev *rcec)
+{
+   bool handles_cxl = false;
+
+   if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
+   pcie_aer_is_native(rcec))
+   pcie_walk_rcec(rcec, handles_cxl_error_iter, _cxl);
+
+   return handles_cxl;
+}
+
+static void cxl_rch_enable_rcec(struct pci_dev *rcec)
+{
+   if (!handles_cxl_errors(rcec))
+   return;
+
+   pci_aer_unmask_internal_errors(rcec);
+   pci_info(rcec, "CXL: Internal errors unmasked");
+}
+
 #else
+static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
 static inline void cxl_rch_handle_error(struct pci_dev *dev,
struct aer_err_info *info) { }
 #endif
@@ -1412,6 +1468,7 @@ static int aer_probe(struct pcie_device *dev)
return status;
}
 
+   cxl_rch_enable_rcec(port);
aer_enable_rootport(rpc);
pci_info(port, "enabled with IRQ %d\n", dev->irq);
return 0;
-- 
2.30.2



[PATCH v12 17/20] PCI/AER: Forward RCH downstream port-detected errors to the CXL.mem dev handler

2023-10-18 Thread Robert Richter
In Restricted CXL Device (RCD) mode a CXL device is exposed as an
RCiEP, but CXL downstream and upstream ports are not enumerated and
not visible in the PCIe hierarchy. [1] Protocol and link errors from
these non-enumerated ports are signaled as internal AER errors, either
Uncorrectable Internal Error (UIE) or Corrected Internal Errors (CIE)
via an RCEC.

Restricted CXL host (RCH) downstream port-detected errors have the
Requester ID of the RCEC set in the RCEC's AER Error Source ID
register. A CXL handler must then inspect the error status in various
CXL registers residing in the dport's component register space (CXL
RAS capability) or the dport's RCRB (PCIe AER extended
capability). [2]

Errors showing up in the RCEC's error handler must be handled and
connected to the CXL subsystem. Implement this by forwarding the error
to all CXL devices below the RCEC. Since the entire CXL device is
controlled only using PCIe Configuration Space of device 0, function
0, only pass it there [3]. The error handling is limited to currently
supported devices with the Memory Device class code set (CXL Type 3
Device, PCI_CLASS_MEMORY_CXL, 502h), handle downstream port errors in
the device's cxl_pci driver. Support for other CXL Device Types
(e.g. a CXL.cache Device) can be added later.

To handle downstream port errors in addition to errors directed to the
CXL endpoint device, a handler must also inspect the CXL RAS and PCIe
AER capabilities of the CXL downstream port the device is connected
to.

Since CXL downstream port errors are signaled using internal errors,
the handler requires those errors to be unmasked. This is subject of a
follow-on patch.

The reason for choosing this implementation is that the AER service
driver claims the RCEC device, but does not allow it to register a
custom specific handler to support CXL. Connecting the RCEC hard-wired
with a CXL handler does not work, as the CXL subsystem might not be
present all the time. The alternative to add an implementation to the
portdrv to allow the registration of a custom RCEC error handler isn't
worth doing it as CXL would be its only user. Instead, just check for
an CXL RCEC and pass it down to the connected CXL device's error
handler. With this approach the code can entirely be implemented in
the PCIe AER driver and is independent of the CXL subsystem. The CXL
driver only provides the handler.

[1] CXL 3.0 spec: 9.11.8 CXL Devices Attached to an RCH
[2] CXL 3.0 spec, 12.2.1.1 RCH Downstream Port-detected Errors
[3] CXL 3.0 spec, 8.1.3 PCIe DVSEC for CXL Devices

Co-developed-by: Terry Bowman 
Signed-off-by: Terry Bowman 
Signed-off-by: Robert Richter 
Cc: "Oliver O'Halloran" 
Cc: Bjorn Helgaas 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-...@vger.kernel.org
Acked-by: Bjorn Helgaas 
Reviewed-by: Jonathan Cameron 
Reviewed-by: Dave Jiang 
---
 drivers/pci/pcie/Kconfig |  9 
 drivers/pci/pcie/aer.c   | 93 +++-
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 228652a59f27..8999fcebde6a 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -49,6 +49,15 @@ config PCIEAER_INJECT
  gotten from:
 
https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
 
+config PCIEAER_CXL
+   bool "PCI Express CXL RAS support"
+   default y
+   depends on PCIEAER && CXL_PCI
+   help
+ Enables CXL error handling.
+
+ If unsure, say Y.
+
 #
 # PCI Express ECRC
 #
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 6593fe3fc555..f1e8494f5bb6 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -934,14 +934,97 @@ static bool find_source_device(struct pci_dev *parent,
return true;
 }
 
+#ifdef CONFIG_PCIEAER_CXL
+
+static bool is_cxl_mem_dev(struct pci_dev *dev)
+{
+   /*
+* The capability, status, and control fields in Device 0,
+* Function 0 DVSEC control the CXL functionality of the
+* entire device (CXL 3.0, 8.1.3).
+*/
+   if (dev->devfn != PCI_DEVFN(0, 0))
+   return false;
+
+   /*
+* CXL Memory Devices must have the 502h class code set (CXL
+* 3.0, 8.1.12.1).
+*/
+   if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
+   return false;
+
+   return true;
+}
+
+static bool cxl_error_is_native(struct pci_dev *dev)
+{
+   struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+   return (pcie_ports_native || host->native_aer);
+}
+
+static bool is_internal_error(struct aer_err_info *info)
+{
+   if (info->severity == AER_CORRECTABLE)
+   return info->status & PCI_ERR_COR_INTERNAL;
+
+   return info->status & PCI_ERR_UNC_INTN;
+}
+
+static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
+{
+   struct aer_err_info *info = (struct aer_err_info *)data;
+   const struct pci_error_handlers 

[PATCH v12 12/20] PCI/AER: Refactor cper_print_aer() for use by CXL driver module

2023-10-18 Thread Robert Richter
From: Terry Bowman 

The CXL driver plans to use cper_print_aer() for logging restricted CXL
host (RCH) AER errors. cper_print_aer() is not currently exported and
therefore not usable by the CXL drivers built as loadable modules. Export
the cper_print_aer() function. Use the EXPORT_SYMBOL_NS_GPL() variant
to restrict the export to CXL drivers.

The CONFIG_ACPI_APEI_PCIEAER kernel config is currently used to enable
cper_print_aer(). cper_print_aer() logs the AER registers and is
useful in PCIE AER logging outside of APEI. Remove the
CONFIG_ACPI_APEI_PCIEAER dependency to enable cper_print_aer().

The cper_print_aer() function name implies CPER specific use but is useful
in non-CPER cases as well. Rename cper_print_aer() to pci_print_aer().

Also, update cxl_core to import CXL namespace imports.

Co-developed-by: Robert Richter 
Signed-off-by: Terry Bowman 
Signed-off-by: Robert Richter 
Cc: Mahesh J Salgaonkar 
Cc: "Oliver O'Halloran" 
Cc: Bjorn Helgaas 
Cc: linux-...@vger.kernel.org
Reviewed-by: Jonathan Cameron 
Acked-by: Bjorn Helgaas 
Reviewed-by: Dave Jiang 
---
 drivers/cxl/core/port.c | 1 +
 drivers/pci/pcie/aer.c  | 9 +
 include/linux/aer.h | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 41a8aa56cffd..802e85321a63 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -2101,3 +2101,4 @@ static void cxl_core_exit(void)
 subsys_initcall(cxl_core_init);
 module_exit(cxl_core_exit);
 MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(CXL);
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 9c8fd69ae5ad..6593fe3fc555 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -759,9 +759,10 @@ int cper_severity_to_aer(int cper_severity)
}
 }
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
+#endif
 
-void cper_print_aer(struct pci_dev *dev, int aer_severity,
-   struct aer_capability_regs *aer)
+void pci_print_aer(struct pci_dev *dev, int aer_severity,
+  struct aer_capability_regs *aer)
 {
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
@@ -800,7 +801,7 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
trace_aer_event(dev_name(>dev), (status & ~mask),
aer_severity, tlp_header_valid, >header_log);
 }
-#endif
+EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL);
 
 /**
  * add_error_device - list device to be handled
@@ -996,7 +997,7 @@ static void aer_recover_work_func(struct work_struct *work)
   PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
continue;
}
-   cper_print_aer(pdev, entry.severity, entry.regs);
+   pci_print_aer(pdev, entry.severity, entry.regs);
if (entry.severity == AER_NONFATAL)
pcie_do_recovery(pdev, pci_channel_io_normal,
 aer_root_reset);
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 29cc10220952..f6ea2f57d808 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -51,7 +51,7 @@ static inline int pci_aer_clear_nonfatal_status(struct 
pci_dev *dev)
 static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
 #endif
 
-void cper_print_aer(struct pci_dev *dev, int aer_severity,
+void pci_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer);
 int cper_severity_to_aer(int cper_severity);
 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
-- 
2.30.2



Re: [EXT] [Bisected] [efeda3bf912f] OOPS crash while performing Block device module parameter test [qla2xxx / FC]

2023-10-18 Thread Tasmiya Nalatwad

Thanks Nilesh. The patch fixes the issue.

On 10/18/23 19:59, Nilesh Javali wrote:

Hi Tasmiya,


-Original Message-
From: Tasmiya Nalatwad 
Sent: Wednesday, October 18, 2023 6:51 PM
To: linux-s...@vger.kernel.org; linux-ker...@vger.kernel.org; linuxppc-
d...@lists.ozlabs.org; linux-bl...@vger.kernel.org; linux-n...@vger.kernel.org
Cc: Quinn Tran ; Nilesh Javali ;
himanshu.madh...@oracle.com; martin.peter...@oracle.com; GR-QLogic-
Storage-Upstream ;
j...@linux.ibm.com; abdha...@linux.vnet.ibm.com; mputt...@linux.vnet.com;
sach...@linux.vnet.com
Subject: [EXT] [Bisected] [efeda3bf912f] OOPS crash while performing Block
device module parameter test [qla2xxx / FC]

External Email

--
Greetings,

OOPs Kernel crash while performing Block device module parameter test
[qla2xxx / FC] on linux-next 6.6.0-rc5-next-20231010

--- Traces ---

[30876.431678] Kernel attempted to read user page (30) - exploit
attempt? (uid: 0)
[30876.431687] BUG: Kernel NULL pointer dereference on read at 0x0030
[30876.431692] Faulting instruction address: 0xc008018e3180
[30876.431697] Oops: Kernel access of bad area, sig: 11 [#1]
[30876.431700] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=8192 NUMA
pSeries
[30876.431705] Modules linked in: qla2xxx(+) nvme_fc nvme_fabrics
nvme_core dm_round_robin dm_queue_length exfat vfat fat btrfs
blake2b_generic zstd_compress loop raid10 raid456 async_raid6_recov
async_memcpy async_pq async_xor async_tx xor raid6_pq raid1 linear xfs
libcrc32c raid0 nvram rpadlpar_io rpaphp xsk_diag bonding tls rfkill
vmx_crypto pseries_rng binfmt_misc ext4 mbcache jbd2 dm_service_time
sd_mod sg ibmvfc ibmveth t10_pi crc64_rocksoft crc64 scsi_transport_fc
dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded:
nvme_core]
[30876.431767] CPU: 0 PID: 1289400 Comm: kworker/0:2 Kdump: loaded Not
tainted 6.6.0-rc5-next-20231010-auto #1
[30876.431773] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries
[30876.431779] Workqueue: events work_for_cpu_fn
[30876.431788] NIP:  c008018e3180 LR: c008018e3128 CTR:
c0513f80
[30876.431792] REGS: c00062a8b930 TRAP: 0300   Not tainted
(6.6.0-rc5-next-20231010-auto)
[30876.431797] MSR:  8280b033 
CR: 28000482  XER: 2004000f
[30876.431811] CFAR: c008018e3138 DAR: 0030 DSISR:
4000 IRQMASK: 0
[30876.431811] GPR00: c008018e3128 c00062a8bbd0
c00800eb8300

[30876.431811] GPR04:  

0017bbac
[30876.431811] GPR08:  0030

c008019a6d68
[30876.431811] GPR12:  c2ff
c019cb98
c00082a97980
[30876.431811] GPR16:  

c3071ab0
[30876.431811] GPR20: c3491c0d c00063bb9a00
c00063bb30c0
c001d8b52928
[30876.431811] GPR24: c00800eb63a8 ffed c001d8b52000
0102
[30876.431811] GPR28: c00800ebaf00 c001d8b52890

c001d8b58000
[30876.431856] NIP [c008018e3180] qla2x00_mem_free+0x298/0x6b0
[qla2xxx]
[30876.431876] LR [c008018e3128] qla2x00_mem_free+0x240/0x6b0
[qla2xxx]
[30876.431895] Call Trace:
[30876.431897] [c00062a8bbd0] [c008018e2f1c]
qla2x00_mem_free+0x34/0x6b0 [qla2xxx] (unreliable)
[30876.431917] [c00062a8bc20] [c008018eed30]
qla2x00_probe_one+0x16d8/0x2640 [qla2xxx]
[30876.431937] [c00062a8bd90] [c08c589c]
local_pci_probe+0x6c/0x110
[30876.431943] [c00062a8be10] [c0189ba8]
work_for_cpu_fn+0x38/0x60
[30876.431948] [c00062a8be40] [c018d0d0]
process_scheduled_works+0x230/0x4f0
[30876.431952] [c00062a8bf10] [c018fe14]
worker_thread+0x1e4/0x500
[30876.431955] [c00062a8bf90] [c019ccc8]
kthread+0x138/0x140
[30876.431960] [c00062a8bfe0] [c000df98]
start_kernel_thread+0x14/0x18
[30876.431965] Code: 4082000c a09f0198 78841b68 e8df0278 38e0
480c3b8d e8410018 3920 e91f0178 f93f0280 f93f0278 39280030
 7fa95040 419e00b8 ebc80030
[30876.431977] ---[ end trace  ]---
[30876.480385] pstore: backend (nvram) writing error (-1)


Git bisect points to below commit. Reverting this commit fixes the problem.
commit efeda3bf912f269bcae16816683f432f58d68075
      scsi: qla2xxx: Move resource to allow code reuse

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center

We have recently posted a fix for the commit that you have pointed here,
https://marc.info/?l=linux-scsi=169750508721982=2

Thanks,
Nilesh


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



[PATCH v2 1/6] powerpc/smp: Cache CPU has Asymmetric SMP

2023-10-18 Thread Srikar Dronamraju
Currently cpu feature flag is checked whenever powerpc_smt_flags gets
called. This is an unnecessary overhead. CPU_FTR_ASYM_SMT is set based
on the processor and all processors will either have this set or will
have it unset.

Hence only check for the feature flag once and cache it to be used
subsequently. This commit will help avoid a branch in powerpc_smt_flags

Signed-off-by: Srikar Dronamraju 
---
Changelog:
v1->v2: Using static keys instead of a variable.
Using pr_info_once instead of printk

 arch/powerpc/kernel/smp.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5826f5108a12..37c41297c9ce 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -988,18 +988,16 @@ static int __init init_thread_group_cache_map(int cpu, 
int cache_property)
 }
 
 static bool shared_caches;
+DEFINE_STATIC_KEY_FALSE(powerpc_asym_packing);
 
 #ifdef CONFIG_SCHED_SMT
 /* cpumask of CPUs with asymmetric SMT dependency */
 static int powerpc_smt_flags(void)
 {
-   int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+   if (static_branch_unlikely(_asym_packing))
+   return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | 
SD_ASYM_PACKING;
 
-   if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
-   printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
-   flags |= SD_ASYM_PACKING;
-   }
-   return flags;
+   return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 }
 #endif
 
@@ -1686,6 +1684,11 @@ static void __init fixup_topology(void)
 {
int i;
 
+   if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+   pr_info_once("Enabling Asymmetric SMT scheduling\n");
+   static_branch_enable(_asym_packing);
+   }
+
 #ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
-- 
2.31.1



[PATCH v2 2/6] powerpc/smp: Enable Asym packing for cores on shared processor

2023-10-18 Thread Srikar Dronamraju
If there are shared processor LPARs, underlying Hypervisor can have more
virtual cores to handle than actual physical cores.

Starting with Power 9, a core has 2 nearly independent thread groups.
On a shared processors LPARs, it helps to pack threads to lesser number
of cores so that the overall system performance and utilization
improves. PowerVM schedules at a core level. Hence packing to fewer
cores helps.

For example: Lets says there are two 8-core Shared LPARs that are
actually sharing a 8 Core shared physical pool, each running 8 threads
each. Then Consolidating 8 threads to 4 cores on each LPAR would help
them to perform better. This is because each of the LPAR will get
100% time to run applications and there will no switching required by
the Hypervisor.

To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level.

Signed-off-by: Srikar Dronamraju 
---
Changelog:
v1->v2: Using static key instead of a variable.

 arch/powerpc/kernel/smp.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 37c41297c9ce..498c2d51fc20 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1009,9 +1009,20 @@ static int powerpc_smt_flags(void)
  */
 static int powerpc_shared_cache_flags(void)
 {
+   if (static_branch_unlikely(_asym_packing))
+   return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
+
return SD_SHARE_PKG_RESOURCES;
 }
 
+static int powerpc_shared_proc_flags(void)
+{
+   if (static_branch_unlikely(_asym_packing))
+   return SD_ASYM_PACKING;
+
+   return 0;
+}
+
 /*
  * We can't just pass cpu_l2_cache_mask() directly because
  * returns a non-const pointer and the compiler barfs on that.
@@ -1048,8 +1059,8 @@ static struct sched_domain_topology_level 
powerpc_topology[] = {
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
 #endif
{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
-   { cpu_mc_mask, SD_INIT_NAME(MC) },
-   { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+   { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
+   { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
{ NULL, },
 };
 
@@ -1687,6 +1698,8 @@ static void __init fixup_topology(void)
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
pr_info_once("Enabling Asymmetric SMT scheduling\n");
static_branch_enable(_asym_packing);
+   } else if (is_shared_processor() && has_big_cores) {
+   static_branch_enable(_asym_packing);
}
 
 #ifdef CONFIG_SCHED_SMT
-- 
2.31.1



[PATCH v2 4/6] powerpc/smp: Disable MC domain for shared processor

2023-10-18 Thread Srikar Dronamraju
Like L2-cache info, coregroup information which is used to determine MC
sched domains is only present on dedicated LPARs. i.e PowerVM doesn't
export coregroup information for shared processor LPARs. Hence disable
creating MC domains on shared LPAR Systems.

Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/kernel/smp.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 498c2d51fc20..29da9262cb17 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1046,6 +1046,10 @@ static struct cpumask *cpu_coregroup_mask(int cpu)
 
 static bool has_coregroup_support(void)
 {
+   /* Coregroup identification not available on shared systems */
+   if (is_shared_processor())
+   return 0;
+
return coregroup_enabled;
 }
 
-- 
2.31.1



[PATCH v2 5/6] powerpc/smp: Add read_mostly attribute

2023-10-18 Thread Srikar Dronamraju
There are some variables that are only updated at boot time.
So add read_mostly attribute to such variables

Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/kernel/smp.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 29da9262cb17..b1eb11a66902 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -77,10 +77,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 #endif
 
 struct task_struct *secondary_current;
-bool has_big_cores;
-bool coregroup_enabled;
-bool thread_group_shares_l2;
-bool thread_group_shares_l3;
+bool has_big_cores __read_mostly;
+bool coregroup_enabled __read_mostly;
+bool thread_group_shares_l2 __read_mostly;
+bool thread_group_shares_l3 __read_mostly;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -987,7 +987,7 @@ static int __init init_thread_group_cache_map(int cpu, int 
cache_property)
return 0;
 }
 
-static bool shared_caches;
+static bool shared_caches __read_mostly;
 DEFINE_STATIC_KEY_FALSE(powerpc_asym_packing);
 
 #ifdef CONFIG_SCHED_SMT
-- 
2.31.1



[PATCH v2 6/6] powerpc/smp: Avoid asym packing within thread_group of a core

2023-10-18 Thread Srikar Dronamraju
PowerVM Hypervisor will schedule at a core granularity. However each
core can have more than one thread_groups. For better utilization in
case of a shared processor, its preferable for the scheduler to pack to
the lowest core. However there is no benefit of moving a thread between
two thread groups of the same core.

Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/kernel/smp.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index b1eb11a66902..a710fb32a2a9 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1695,6 +1695,8 @@ void start_secondary(void *unused)
BUG();
 }
 
+DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
+
 static void __init fixup_topology(void)
 {
int i;
@@ -1704,6 +1706,7 @@ static void __init fixup_topology(void)
static_branch_enable(_asym_packing);
} else if (is_shared_processor() && has_big_cores) {
static_branch_enable(_asym_packing);
+   static_branch_enable(_asym_pack);
}
 
 #ifdef CONFIG_SCHED_SMT
@@ -1758,6 +1761,19 @@ void __init smp_cpus_done(unsigned int max_cpus)
set_sched_topology(powerpc_topology);
 }
 
+/*
+ * For asym packing, by default lower numbered CPU has higher priority.
+ * On shared processors, pack to lower numbered core. However avoid moving
+ * between thread_groups within the same core.
+ */
+int arch_asym_cpu_priority(int cpu)
+{
+   if (static_branch_unlikely(_asym_pack))
+   return -cpu / threads_per_core;
+
+   return -cpu;
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 int __cpu_disable(void)
 {
-- 
2.31.1



[PATCH v2 3/6] powerpc/smp: Move shared_processor static key to smp.h

2023-10-18 Thread Srikar Dronamraju
The ability to detect if the system is running in a shared processor
mode is helpful in few more generic cases not just in
paravirtualization.
For example: At boot time, different scheduler/ topology flags may be
set based on the processor mode. Hence move it to a more generic file.

Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/include/asm/paravirt.h | 12 
 arch/powerpc/include/asm/smp.h  | 14 ++
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/paravirt.h 
b/arch/powerpc/include/asm/paravirt.h
index 0372b0093f72..cf83e837a571 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -15,13 +15,6 @@
 #include 
 #include 
 
-DECLARE_STATIC_KEY_FALSE(shared_processor);
-
-static inline bool is_shared_processor(void)
-{
-   return static_branch_unlikely(_processor);
-}
-
 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
@@ -77,11 +70,6 @@ static inline bool is_vcpu_idle(int vcpu)
return lppaca_of(vcpu).idle;
 }
 #else
-static inline bool is_shared_processor(void)
-{
-   return false;
-}
-
 static inline u32 yield_count_of(int cpu)
 {
return 0;
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 576d0e15..08631b2a4528 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -34,6 +34,20 @@ extern bool coregroup_enabled;
 extern int cpu_to_chip_id(int cpu);
 extern int *chip_id_lookup_table;
 
+#ifdef CONFIG_PPC_SPLPAR
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
+static inline bool is_shared_processor(void)
+{
+   return static_branch_unlikely(_processor);
+}
+#else
+static inline bool is_shared_processor(void)
+{
+   return false;
+}
+#endif
+
 DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
 DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
 DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
-- 
2.31.1



[PATCH v2 0/6] powerpc/smp: Shared processor sched optimizations

2023-10-18 Thread Srikar Dronamraju
PowerVM systems configured in shared processors mode have some unique
challenges. Some device-tree properties will be missing on a shared
processor. Hence some sched domains may not make sense for shared processor
systems.

Most shared processor systems are over-provisioned. Underlying PowerVM
Hypervisor would schedule at a Big Core granularity. The most recent power
processors support two almost independent cores. In a lightly loaded
condition, it helps the overall system performance if we pack to lesser
number of Big Cores.

System Configuration
type=Shared mode=Uncapped smt=8 lcpu=96 mem=1066409344 kB cpus=96 ent=64.00
So *64 Entitled cores/ 96 Virtual processor* Scenario

lscpu
Architecture:   ppc64le
Byte Order: Little Endian
CPU(s): 768
On-line CPU(s) list:0-767
Model name: POWER10 (architected), altivec supported
Model:  2.0 (pvr 0080 0200)
Thread(s) per core: 8
Core(s) per socket: 16
Socket(s):  6
Hypervisor vendor:  pHyp
Virtualization type:para
L1d cache:  6 MiB (192 instances)
L1i cache:  9 MiB (192 instances)
NUMA node(s):   6
NUMA node0 CPU(s):  
0-7,32-39,80-87,128-135,176-183,224-231,272-279,320-327,368-375,416-423,464-471,512-519,560-567,608-615,656-663,704-711,752-759
NUMA node1 CPU(s):  
8-15,40-47,88-95,136-143,184-191,232-239,280-287,328-335,376-383,424-431,472-479,520-527,568-575,616-623,664-671,712-719,760-767
NUMA node4 CPU(s):  
64-71,112-119,160-167,208-215,256-263,304-311,352-359,400-407,448-455,496-503,544-551,592-599,640-647,688-695,736-743
NUMA node5 CPU(s):  
16-23,48-55,96-103,144-151,192-199,240-247,288-295,336-343,384-391,432-439,480-487,528-535,576-583,624-631,672-679,720-727
NUMA node6 CPU(s):  
72-79,120-127,168-175,216-223,264-271,312-319,360-367,408-415,456-463,504-511,552-559,600-607,648-655,696-703,744-751
NUMA node7 CPU(s):  
24-31,56-63,104-111,152-159,200-207,248-255,296-303,344-351,392-399,440-447,488-495,536-543,584-591,632-639,680-687,728-735

ebizzy -t 32 -S 200 (5 iterations) Records per second. (Higher is better)
Kernel N  Min  Max  Median   AvgStddev %Change
6.6.0-rc3  5  3840178  4059268  3978042  3973936.6  84264.456
+patch 5  3768393  3927901  3874994  385404671532.926  -3.01692

>From lparstat (when the workload stabilized)
Kernel %user  %sys  %wait  %idle  physc  %entc  lbusy  appvcsw   
phint
6.6.0-rc3  4.16   0.00  0.00   95.84  26.06  40.72  4.16   69.88  276906989  578
+patch 4.16   0.00  0.00   95.83  17.70  27.66  4.17   78.26  70436663   119

ebizzy -t 128 -S 200 (5 iterations) Records per second. (Higher is better)
Kernel N Min  Max  Median   AvgStddev %Change
6.6.0-rc3  5 5520692  5981856  5717709  5727053.2  176093.2
+patch 5 5305888  6259610  5854590  5843311375917.03  2.02998

>From lparstat (when the workload stabilized)
Kernel %user  %sys  %wait  %idle  physc  %entc  lbusy  appvcsw   
phint
6.6.0-rc3  16.66  0.00  0.00   83.33  45.49  71.08  16.67  50.50  288778533  581
+patch 16.65  0.00  0.00   83.35  30.15  47.11  16.65  65.76  85196150   133

ebizzy -t 512 -S 200 (5 iterations) Records per second. (Higher is better)
Kernel N  Min   Max   MedianAvg   Stddev %Change
6.6.0-rc3  5  19563921  20049955  19701510  19728733  198295.18
+patch 5  19455992  20176445  19718427  19832017  304094.05  0.523521

>From lparstat (when the workload stabilized)
%Kernel user  %sys  %wait  %idle  physc  %entc   lbusy  app   vcsw   
phint
66.6.0-rc3  6.44  0.01  0.00   33.55  94.14  147.09  66.45  1.33  313345175  621
6+patch 6.44  0.01  0.00   33.55  94.15  147.11  66.45  1.33  109193889  309

System Configuration
type=Shared mode=Uncapped smt=8 lcpu=40 mem=1067539392 kB cpus=96 ent=40.00
So *40 Entitled cores/ 40 Virtual processor* Scenario

lscpu
Architecture:   ppc64le
Byte Order: Little Endian
CPU(s): 320
On-line CPU(s) list:0-319
Model name: POWER10 (architected), altivec supported
Model:  2.0 (pvr 0080 0200)
Thread(s) per core: 8
Core(s) per socket: 10
Socket(s):  4
Hypervisor vendor:  pHyp
Virtualization type:para
L1d cache:  2.5 MiB (80 instances)
L1i cache:  3.8 MiB (80 instances)
NUMA node(s):   4
NUMA node0 CPU(s):  
0-7,32-39,64-71,96-103,128-135,160-167,192-199,224-231,256-263,288-295
NUMA node1 CPU(s): 

[PATCH v2] powerpc/paravirt: Improve vcpu_is_preempted

2023-10-18 Thread Srikar Dronamraju
PowerVM Hypervisor dispatches on a whole core basis. In a shared LPAR, a
CPU from a core that is CEDED or preempted may have a larger latency. In
such a scenario, its preferable to choose a different CPU to run.

If one of the CPUs in the core is active, i.e neither CEDED nor
preempted, then consider this CPU as not preempted.

Also if any of the CPUs in the core has yielded but OS has not requested
CEDE or CONFER, then consider this CPU to be preempted.

Correct detection of preempted CPUs is important for detecting idle
CPUs/cores in task scheduler.

Changelog:
v1 -> v2: Handle lppaca_of(cpu) in !PPC_SPLPAR case.
v1: https://lore.kernel.org/r/20231009051740.17683-1-srikar%40linux.vnet.ibm.com
1. Fixed some compilation issues reported by kernelbot
a. https://lore.kernel.org/oe-kbuild-all/202310102341.k0sgoqql-...@intel.com/
b.  https://lore.kernel.org/oe-kbuild-all/202310091636.lelmjkyv-...@intel.com/
2. Resolved comments from Shrikanth

Tested-by: Aboorva Devarajan 
Reviewed-by: Shrikanth Hegde 
Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/include/asm/paravirt.h | 42 ++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/paravirt.h 
b/arch/powerpc/include/asm/paravirt.h
index e08513d73119..0372b0093f72 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -71,6 +71,11 @@ static inline void yield_to_any(void)
 {
plpar_hcall_norets_notrace(H_CONFER, -1, 0);
 }
+
+static inline bool is_vcpu_idle(int vcpu)
+{
+   return lppaca_of(vcpu).idle;
+}
 #else
 static inline bool is_shared_processor(void)
 {
@@ -100,6 +105,10 @@ static inline void prod_cpu(int cpu)
___bad_prod_cpu(); /* This would be a bug */
 }
 
+static inline bool is_vcpu_idle(int vcpu)
+{
+   return false;
+}
 #endif
 
 #define vcpu_is_preempted vcpu_is_preempted
@@ -121,9 +130,19 @@ static inline bool vcpu_is_preempted(int cpu)
if (!is_shared_processor())
return false;
 
+   if (!(yield_count_of(cpu) & 1))
+   return false;
+
+   /*
+* If CPU has yielded to Hypervisor but OS has not requested idle
+* then this CPU is definitely preempted.
+*/
+   if (!is_vcpu_idle(cpu))
+   return true;
+
 #ifdef CONFIG_PPC_SPLPAR
if (!is_kvm_guest()) {
-   int first_cpu;
+   int first_cpu, i;
 
/*
 * The result of vcpu_is_preempted() is used in a
@@ -149,11 +168,28 @@ static inline bool vcpu_is_preempted(int cpu)
 */
if (cpu_first_thread_sibling(cpu) == first_cpu)
return false;
+
+   /*
+* If any of the threads of this core is not preempted or
+* ceded, then consider this CPU to be non-preempted
+*/
+   first_cpu = cpu_first_thread_sibling(cpu);
+   for (i = first_cpu; i < first_cpu + threads_per_core; i++) {
+   if (i == cpu)
+   continue;
+   if (!(yield_count_of(i) & 1))
+   return false;
+   if (!is_vcpu_idle(i))
+   return true;
+   }
}
 #endif
 
-   if (yield_count_of(cpu) & 1)
-   return true;
+   /*
+* None of the threads in this core are running but none of
+* them were preempted too. Hence assume the thread to be
+* non-preempted.
+*/
return false;
 }
 

base-commit: eddc90ea2af5933249ea1a78119f2c8ef8d07156
-- 
2.31.1



[PATCH] powerpc/perf: Fix disabling BHRB and instruction sampling

2023-10-18 Thread Nicholas Piggin
When the PMU is disabled, MMCRA is not updated to disable BHRB and
instruction sampling. This can lead to those features remaining enabled,
which can slow down a real or emulated CPU.

Fixes: 1cade527f6e9 ("powerpc/perf: BHRB control to disable BHRB logic when not 
used")
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/perf/core-book3s.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 8c1f7def596e..10b946e9c6e7 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1371,8 +1371,7 @@ static void power_pmu_disable(struct pmu *pmu)
/*
 * Disable instruction sampling if it was enabled
 */
-   if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE)
-   val &= ~MMCRA_SAMPLE_ENABLE;
+   val &= ~MMCRA_SAMPLE_ENABLE;
 
/* Disable BHRB via mmcra (BHRBRD) for p10 */
if (ppmu->flags & PPMU_ARCH_31)
@@ -1383,7 +1382,7 @@ static void power_pmu_disable(struct pmu *pmu)
 * instruction sampling or BHRB.
 */
if (val != mmcra) {
-   mtspr(SPRN_MMCRA, mmcra);
+   mtspr(SPRN_MMCRA, val);
mb();
isync();
}
-- 
2.42.0



[bpf-next PATCH v2 1/4] kbuild: remove ARCH_POSTLINK from module builds

2023-10-18 Thread Masahiro Yamada
The '%.ko' rule in arch/*/Makefile.postlink does nothing but call the
'true' command.

Remove the meaningless code.

Signed-off-by: Masahiro Yamada 
Reviewed-by: Nicolas Schier 
---

(no changes since v1)

 arch/mips/Makefile.postlink| 3 ---
 arch/powerpc/Makefile.postlink | 3 ---
 arch/riscv/Makefile.postlink   | 3 ---
 arch/x86/Makefile.postlink | 3 ---
 scripts/Makefile.modfinal  | 5 +
 5 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink
index 34e3bd71f3b0..6cfdc149d3bc 100644
--- a/arch/mips/Makefile.postlink
+++ b/arch/mips/Makefile.postlink
@@ -31,9 +31,6 @@ ifeq ($(CONFIG_RELOCATABLE),y)
$(call if_changed,relocs)
 endif
 
-%.ko: FORCE
-   @true
-
 clean:
@true
 
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 1f860b3c9bec..ae5a4256b03d 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -35,9 +35,6 @@ ifdef CONFIG_RELOCATABLE
$(call if_changed,relocs_check)
 endif
 
-%.ko: FORCE
-   @true
-
 clean:
rm -f .tmp_symbols.txt
 
diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink
index a46fc578b30b..829b9abc91f6 100644
--- a/arch/riscv/Makefile.postlink
+++ b/arch/riscv/Makefile.postlink
@@ -36,9 +36,6 @@ ifdef CONFIG_RELOCATABLE
$(call if_changed,relocs_strip)
 endif
 
-%.ko: FORCE
-   @true
-
 clean:
@true
 
diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink
index 936093d29160..fef2e977cc7d 100644
--- a/arch/x86/Makefile.postlink
+++ b/arch/x86/Makefile.postlink
@@ -34,9 +34,6 @@ ifeq ($(CONFIG_X86_NEED_RELOCS),y)
$(call cmd,strip_relocs)
 endif
 
-%.ko: FORCE
-   @true
-
 clean:
@rm -f $(OUT_RELOCS)/vmlinux.relocs
 
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index b3a6aa8fbe8c..8568d256d6fb 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -28,14 +28,11 @@ quiet_cmd_cc_o_c = CC [M]  $@
 %.mod.o: %.mod.c FORCE
$(call if_changed_dep,cc_o_c)
 
-ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
-
 quiet_cmd_ld_ko_o = LD [M]  $@
   cmd_ld_ko_o +=   \
$(LD) -r $(KBUILD_LDFLAGS)  \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE)  \
-   -T scripts/module.lds -o $@ $(filter %.o, $^);  \
-   $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
+   -T scripts/module.lds -o $@ $(filter %.o, $^)
 
 quiet_cmd_btf_ko = BTF [M] $@
   cmd_btf_ko = \
-- 
2.40.1



Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Hans Verkuil
On 18/10/2023 15:50, Shengjiu Wang wrote:
> On Wed, Oct 18, 2023 at 9:09 PM Hans Verkuil  wrote:
>>
>> On 18/10/2023 14:52, Shengjiu Wang wrote:
>>> On Wed, Oct 18, 2023 at 3:58 PM Hans Verkuil  wrote:

 On 18/10/2023 09:40, Shengjiu Wang wrote:
> On Wed, Oct 18, 2023 at 3:31 PM Hans Verkuil  wrote:
>>
>> On 18/10/2023 09:23, Shengjiu Wang wrote:
>>> On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang 
>>>  wrote:

 On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  
 wrote:
>
> On 17/10/2023 15:11, Shengjiu Wang wrote:
>> On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  
>> wrote:
>>>
>>> Hi Shengjiu,
>>>
>>> On 13/10/2023 10:31, Shengjiu Wang wrote:
 Fixed point controls are used by the user to configure
 the audio sample rate to driver.

 Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
 new IDs for ASRC rate control.

 Signed-off-by: Shengjiu Wang 
 ---
  .../userspace-api/media/v4l/common.rst|  1 +
  .../media/v4l/ext-ctrls-fixed-point.rst   | 36 
 +++
  .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
  .../media/v4l/vidioc-queryctrl.rst|  7 
  .../media/videodev2.h.rst.exceptions  |  1 +
  drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
  drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
  include/media/v4l2-ctrls.h|  2 ++
  include/uapi/linux/v4l2-controls.h| 13 +++
  include/uapi/linux/videodev2.h|  3 ++
  10 files changed, 76 insertions(+)
  create mode 100644 
 Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst

 diff --git a/Documentation/userspace-api/media/v4l/common.rst 
 b/Documentation/userspace-api/media/v4l/common.rst
 index ea0435182e44..35707edffb13 100644
 --- a/Documentation/userspace-api/media/v4l/common.rst
 +++ b/Documentation/userspace-api/media/v4l/common.rst
 @@ -52,6 +52,7 @@ applicable to all devices.
  ext-ctrls-fm-rx
  ext-ctrls-detect
  ext-ctrls-colorimetry
 +ext-ctrls-fixed-point
>>>
>>> Rename this to ext-ctrls-audio-m2m.
>>>
  fourcc
  format
  planar-apis
 diff --git 
 a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
 b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
 new file mode 100644
 index ..2ef6e250580c
 --- /dev/null
 +++ 
 b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
 @@ -0,0 +1,36 @@
 +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
 +
 +.. _fixed-point-controls:
 +
 +***
 +Fixed Point Control Reference
>>>
>>> This is for audio controls. "Fixed Point" is just the type, and it 
>>> doesn't make
>>> sense to group fixed point controls. But it does make sense to 
>>> group the audio
>>> controls.
>>>
>>> V4L2 controls can be grouped into classes. Basically it is a way to 
>>> put controls
>>> into categories, and for each category there is also a control that 
>>> gives a
>>> description of the class (see 2.15.15 in
>>> https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
>>>
>>> If you use e.g. 'v4l2-ctl -l' to list all the controls, then you 
>>> will see that
>>> they are grouped based on what class of control they are.
>>>
>>> So I think it would be a good idea to create a new control class 
>>> for M2M audio controls,
>>> instead of just adding them to the catch-all 'User Controls' class.
>>>
>>> Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
>>> V4L2_CID_COLORIMETRY_CLASS to see how
>>> it is done.
>>>
>>> M2M_AUDIO would probably be a good name for the class.
>>>
 +***
 +
 +These controls are intended to support an asynchronous sample
 +rate converter.
>>>
>>> Add ' (ASRC).' at the end to indicate the common abbreviation for
>>> that.
>>>
 +
 +.. _v4l2-audio-asrc:
 +
 +``V4L2_CID_ASRC_SOURCE_RATE``
 +sets the resampler source rate.

[PATCH AUTOSEL 5.15 11/14] powerpc/85xx: Fix math emulation exception

2023-10-18 Thread Sasha Levin
From: Christophe Leroy 

[ Upstream commit 8e8a12ecbc86700b5e1a3596ce2b3c43dafad336 ]

Booting mpc85xx_defconfig kernel on QEMU leads to:

Bad trap at PC: fe9bab0, SR: 2d000, vector=800
awk[82]: unhandled trap (5) at 0 nip fe9bab0 lr fe9e01c code 5 in 
libc-2.27.so[fe5a000+17a000]
awk[82]: code: 3aa0 3a800010 4bffe03c 9421fff0 7ca62b78 38a0 93c10008 
83c10008
awk[82]: code: 38210010 4bffdec8 9421ffc0 7c0802a6  d8010008 4815190d 
93810030
Trace/breakpoint trap
WARNING: no useful console

This is because allthough CONFIG_MATH_EMULATION is selected,
Exception 800 calls unknown_exception().

Call emulation_assist_interrupt() instead.

Signed-off-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
Link: 
https://msgid.link/066caa6d9480365da9b8ed83692d7101e10ac5f8.1695657339.git.christophe.le...@csgroup.eu
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/head_fsl_booke.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 0a9a0f301474d..40687e271c106 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -394,7 +394,7 @@ interrupt_base:
 #ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
 #else
-   EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, 
unknown_exception)
+   EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, 
emulation_assist_interrupt)
 #endif
 
/* System Call Interrupt */
-- 
2.40.1



[PATCH AUTOSEL 6.1 16/19] powerpc/85xx: Fix math emulation exception

2023-10-18 Thread Sasha Levin
From: Christophe Leroy 

[ Upstream commit 8e8a12ecbc86700b5e1a3596ce2b3c43dafad336 ]

Booting mpc85xx_defconfig kernel on QEMU leads to:

Bad trap at PC: fe9bab0, SR: 2d000, vector=800
awk[82]: unhandled trap (5) at 0 nip fe9bab0 lr fe9e01c code 5 in 
libc-2.27.so[fe5a000+17a000]
awk[82]: code: 3aa0 3a800010 4bffe03c 9421fff0 7ca62b78 38a0 93c10008 
83c10008
awk[82]: code: 38210010 4bffdec8 9421ffc0 7c0802a6  d8010008 4815190d 
93810030
Trace/breakpoint trap
WARNING: no useful console

This is because allthough CONFIG_MATH_EMULATION is selected,
Exception 800 calls unknown_exception().

Call emulation_assist_interrupt() instead.

Signed-off-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
Link: 
https://msgid.link/066caa6d9480365da9b8ed83692d7101e10ac5f8.1695657339.git.christophe.le...@csgroup.eu
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/head_85xx.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index 52c0ab416326a..0e16aea7853b8 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -394,7 +394,7 @@ interrupt_base:
 #ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
 #else
-   EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, 
unknown_exception)
+   EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, 
emulation_assist_interrupt)
 #endif
 
/* System Call Interrupt */
-- 
2.40.1



[PATCH AUTOSEL 6.5 27/31] powerpc/85xx: Fix math emulation exception

2023-10-18 Thread Sasha Levin
From: Christophe Leroy 

[ Upstream commit 8e8a12ecbc86700b5e1a3596ce2b3c43dafad336 ]

Booting mpc85xx_defconfig kernel on QEMU leads to:

Bad trap at PC: fe9bab0, SR: 2d000, vector=800
awk[82]: unhandled trap (5) at 0 nip fe9bab0 lr fe9e01c code 5 in 
libc-2.27.so[fe5a000+17a000]
awk[82]: code: 3aa0 3a800010 4bffe03c 9421fff0 7ca62b78 38a0 93c10008 
83c10008
awk[82]: code: 38210010 4bffdec8 9421ffc0 7c0802a6  d8010008 4815190d 
93810030
Trace/breakpoint trap
WARNING: no useful console

This is because allthough CONFIG_MATH_EMULATION is selected,
Exception 800 calls unknown_exception().

Call emulation_assist_interrupt() instead.

Signed-off-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
Link: 
https://msgid.link/066caa6d9480365da9b8ed83692d7101e10ac5f8.1695657339.git.christophe.le...@csgroup.eu
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/head_85xx.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index fdbee1093e2ba..f9634111e82ed 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -396,7 +396,7 @@ interrupt_base:
 #ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
 #else
-   EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, 
unknown_exception)
+   EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, 
emulation_assist_interrupt)
 #endif
 
/* System Call Interrupt */
-- 
2.40.1



[PATCH AUTOSEL 6.5 07/31] ASoC: fsl-asoc-card: use integer type for fll_id and pll_id

2023-10-18 Thread Sasha Levin
From: Shengjiu Wang 

[ Upstream commit 2b21207afd06714986a3d22442ed4860ba4f9ced ]

As the pll_id and pll_id can be zero (WM8960_SYSCLK_AUTO)
with the commit 2bbc2df46e67 ("ASoC: wm8960: Make automatic the
default clocking mode")

Then the machine driver will skip to call set_sysclk() and set_pll()
for codec, when the sysclk rate is different with what wm8960 read
at probe, the output sound frequency is wrong.

So change the fll_id and pll_id initial value, still keep machine
driver's behavior same as before.

Signed-off-by: Shengjiu Wang 
Link: 
https://lore.kernel.org/r/1695202992-24864-1-git-send-email-shengjiu.w...@nxp.com
Signed-off-by: Mark Brown 
Signed-off-by: Sasha Levin 
---
 sound/soc/fsl/fsl-asoc-card.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index 76b5bfc288fde..bab7d34cf585b 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -52,8 +52,8 @@ struct codec_priv {
unsigned long mclk_freq;
unsigned long free_freq;
u32 mclk_id;
-   u32 fll_id;
-   u32 pll_id;
+   int fll_id;
+   int pll_id;
 };
 
 /**
@@ -206,7 +206,7 @@ static int fsl_asoc_card_hw_params(struct snd_pcm_substream 
*substream,
}
 
/* Specific configuration for PLL */
-   if (codec_priv->pll_id && codec_priv->fll_id) {
+   if (codec_priv->pll_id >= 0 && codec_priv->fll_id >= 0) {
if (priv->sample_format == SNDRV_PCM_FORMAT_S24_LE)
pll_out = priv->sample_rate * 384;
else
@@ -248,7 +248,7 @@ static int fsl_asoc_card_hw_free(struct snd_pcm_substream 
*substream)
 
priv->streams &= ~BIT(substream->stream);
 
-   if (!priv->streams && codec_priv->pll_id && codec_priv->fll_id) {
+   if (!priv->streams && codec_priv->pll_id >= 0 && codec_priv->fll_id >= 
0) {
/* Force freq to be free_freq to avoid error message in codec */
ret = snd_soc_dai_set_sysclk(asoc_rtd_to_codec(rtd, 0),
 codec_priv->mclk_id,
@@ -621,6 +621,10 @@ static int fsl_asoc_card_probe(struct platform_device 
*pdev)
priv->card.dapm_routes = audio_map;
priv->card.num_dapm_routes = ARRAY_SIZE(audio_map);
priv->card.driver_name = DRIVER_NAME;
+
+   priv->codec_priv.fll_id = -1;
+   priv->codec_priv.pll_id = -1;
+
/* Diversify the card configurations */
if (of_device_is_compatible(np, "fsl,imx-audio-cs42888")) {
codec_dai_name = "cs42888";
-- 
2.40.1



Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Shengjiu Wang
On Wed, Oct 18, 2023 at 9:09 PM Hans Verkuil  wrote:
>
> On 18/10/2023 14:52, Shengjiu Wang wrote:
> > On Wed, Oct 18, 2023 at 3:58 PM Hans Verkuil  wrote:
> >>
> >> On 18/10/2023 09:40, Shengjiu Wang wrote:
> >>> On Wed, Oct 18, 2023 at 3:31 PM Hans Verkuil  wrote:
> 
>  On 18/10/2023 09:23, Shengjiu Wang wrote:
> > On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang 
> >  wrote:
> >>
> >> On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  
> >> wrote:
> >>>
> >>> On 17/10/2023 15:11, Shengjiu Wang wrote:
>  On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  
>  wrote:
> >
> > Hi Shengjiu,
> >
> > On 13/10/2023 10:31, Shengjiu Wang wrote:
> >> Fixed point controls are used by the user to configure
> >> the audio sample rate to driver.
> >>
> >> Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
> >> new IDs for ASRC rate control.
> >>
> >> Signed-off-by: Shengjiu Wang 
> >> ---
> >>  .../userspace-api/media/v4l/common.rst|  1 +
> >>  .../media/v4l/ext-ctrls-fixed-point.rst   | 36 
> >> +++
> >>  .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
> >>  .../media/v4l/vidioc-queryctrl.rst|  7 
> >>  .../media/videodev2.h.rst.exceptions  |  1 +
> >>  drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
> >>  drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
> >>  include/media/v4l2-ctrls.h|  2 ++
> >>  include/uapi/linux/v4l2-controls.h| 13 +++
> >>  include/uapi/linux/videodev2.h|  3 ++
> >>  10 files changed, 76 insertions(+)
> >>  create mode 100644 
> >> Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> >>
> >> diff --git a/Documentation/userspace-api/media/v4l/common.rst 
> >> b/Documentation/userspace-api/media/v4l/common.rst
> >> index ea0435182e44..35707edffb13 100644
> >> --- a/Documentation/userspace-api/media/v4l/common.rst
> >> +++ b/Documentation/userspace-api/media/v4l/common.rst
> >> @@ -52,6 +52,7 @@ applicable to all devices.
> >>  ext-ctrls-fm-rx
> >>  ext-ctrls-detect
> >>  ext-ctrls-colorimetry
> >> +ext-ctrls-fixed-point
> >
> > Rename this to ext-ctrls-audio-m2m.
> >
> >>  fourcc
> >>  format
> >>  planar-apis
> >> diff --git 
> >> a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
> >> b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> >> new file mode 100644
> >> index ..2ef6e250580c
> >> --- /dev/null
> >> +++ 
> >> b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> >> @@ -0,0 +1,36 @@
> >> +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
> >> +
> >> +.. _fixed-point-controls:
> >> +
> >> +***
> >> +Fixed Point Control Reference
> >
> > This is for audio controls. "Fixed Point" is just the type, and it 
> > doesn't make
> > sense to group fixed point controls. But it does make sense to 
> > group the audio
> > controls.
> >
> > V4L2 controls can be grouped into classes. Basically it is a way to 
> > put controls
> > into categories, and for each category there is also a control that 
> > gives a
> > description of the class (see 2.15.15 in
> > https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
> >
> > If you use e.g. 'v4l2-ctl -l' to list all the controls, then you 
> > will see that
> > they are grouped based on what class of control they are.
> >
> > So I think it would be a good idea to create a new control class 
> > for M2M audio controls,
> > instead of just adding them to the catch-all 'User Controls' class.
> >
> > Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
> > V4L2_CID_COLORIMETRY_CLASS to see how
> > it is done.
> >
> > M2M_AUDIO would probably be a good name for the class.
> >
> >> +***
> >> +
> >> +These controls are intended to support an asynchronous sample
> >> +rate converter.
> >
> > Add ' (ASRC).' at the end to indicate the common abbreviation for
> > that.
> >
> >> +
> >> +.. _v4l2-audio-asrc:
> >> +
> >> +``V4L2_CID_ASRC_SOURCE_RATE``
> >> +sets the resampler source rate.
> >> +
> >> 

[Bisected] [efeda3bf912f] OOPS crash while performing Block device module parameter test [qla2xxx / FC]

2023-10-18 Thread Tasmiya Nalatwad

Greetings,

OOPs Kernel crash while performing Block device module parameter test 
[qla2xxx / FC] on linux-next 6.6.0-rc5-next-20231010


--- Traces ---

[30876.431678] Kernel attempted to read user page (30) - exploit 
attempt? (uid: 0)

[30876.431687] BUG: Kernel NULL pointer dereference on read at 0x0030
[30876.431692] Faulting instruction address: 0xc008018e3180
[30876.431697] Oops: Kernel access of bad area, sig: 11 [#1]
[30876.431700] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=8192 NUMA pSeries
[30876.431705] Modules linked in: qla2xxx(+) nvme_fc nvme_fabrics 
nvme_core dm_round_robin dm_queue_length exfat vfat fat btrfs 
blake2b_generic zstd_compress loop raid10 raid456 async_raid6_recov 
async_memcpy async_pq async_xor async_tx xor raid6_pq raid1 linear xfs 
libcrc32c raid0 nvram rpadlpar_io rpaphp xsk_diag bonding tls rfkill 
vmx_crypto pseries_rng binfmt_misc ext4 mbcache jbd2 dm_service_time 
sd_mod sg ibmvfc ibmveth t10_pi crc64_rocksoft crc64 scsi_transport_fc 
dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: 
nvme_core]
[30876.431767] CPU: 0 PID: 1289400 Comm: kworker/0:2 Kdump: loaded Not 
tainted 6.6.0-rc5-next-20231010-auto #1
[30876.431773] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries

[30876.431779] Workqueue: events work_for_cpu_fn
[30876.431788] NIP:  c008018e3180 LR: c008018e3128 CTR: 
c0513f80
[30876.431792] REGS: c00062a8b930 TRAP: 0300   Not tainted 
(6.6.0-rc5-next-20231010-auto)
[30876.431797] MSR:  8280b033   
CR: 28000482  XER: 2004000f
[30876.431811] CFAR: c008018e3138 DAR: 0030 DSISR: 
4000 IRQMASK: 0
[30876.431811] GPR00: c008018e3128 c00062a8bbd0 c00800eb8300 

[30876.431811] GPR04:    
0017bbac
[30876.431811] GPR08:  0030  
c008019a6d68
[30876.431811] GPR12:  c2ff c019cb98 
c00082a97980
[30876.431811] GPR16:    
c3071ab0
[30876.431811] GPR20: c3491c0d c00063bb9a00 c00063bb30c0 
c001d8b52928
[30876.431811] GPR24: c00800eb63a8 ffed c001d8b52000 
0102
[30876.431811] GPR28: c00800ebaf00 c001d8b52890  
c001d8b58000

[30876.431856] NIP [c008018e3180] qla2x00_mem_free+0x298/0x6b0 [qla2xxx]
[30876.431876] LR [c008018e3128] qla2x00_mem_free+0x240/0x6b0 [qla2xxx]
[30876.431895] Call Trace:
[30876.431897] [c00062a8bbd0] [c008018e2f1c] 
qla2x00_mem_free+0x34/0x6b0 [qla2xxx] (unreliable)
[30876.431917] [c00062a8bc20] [c008018eed30] 
qla2x00_probe_one+0x16d8/0x2640 [qla2xxx]
[30876.431937] [c00062a8bd90] [c08c589c] 
local_pci_probe+0x6c/0x110
[30876.431943] [c00062a8be10] [c0189ba8] 
work_for_cpu_fn+0x38/0x60
[30876.431948] [c00062a8be40] [c018d0d0] 
process_scheduled_works+0x230/0x4f0
[30876.431952] [c00062a8bf10] [c018fe14] 
worker_thread+0x1e4/0x500

[30876.431955] [c00062a8bf90] [c019ccc8] kthread+0x138/0x140
[30876.431960] [c00062a8bfe0] [c000df98] 
start_kernel_thread+0x14/0x18
[30876.431965] Code: 4082000c a09f0198 78841b68 e8df0278 38e0 
480c3b8d e8410018 3920 e91f0178 f93f0280 f93f0278 39280030 
 7fa95040 419e00b8 ebc80030

[30876.431977] ---[ end trace  ]---
[30876.480385] pstore: backend (nvram) writing error (-1)


Git bisect points to below commit. Reverting this commit fixes the problem.
commit efeda3bf912f269bcae16816683f432f58d68075
    scsi: qla2xxx: Move resource to allow code reuse

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Hans Verkuil
On 18/10/2023 14:52, Shengjiu Wang wrote:
> On Wed, Oct 18, 2023 at 3:58 PM Hans Verkuil  wrote:
>>
>> On 18/10/2023 09:40, Shengjiu Wang wrote:
>>> On Wed, Oct 18, 2023 at 3:31 PM Hans Verkuil  wrote:

 On 18/10/2023 09:23, Shengjiu Wang wrote:
> On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang  
> wrote:
>>
>> On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  wrote:
>>>
>>> On 17/10/2023 15:11, Shengjiu Wang wrote:
 On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  
 wrote:
>
> Hi Shengjiu,
>
> On 13/10/2023 10:31, Shengjiu Wang wrote:
>> Fixed point controls are used by the user to configure
>> the audio sample rate to driver.
>>
>> Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
>> new IDs for ASRC rate control.
>>
>> Signed-off-by: Shengjiu Wang 
>> ---
>>  .../userspace-api/media/v4l/common.rst|  1 +
>>  .../media/v4l/ext-ctrls-fixed-point.rst   | 36 
>> +++
>>  .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
>>  .../media/v4l/vidioc-queryctrl.rst|  7 
>>  .../media/videodev2.h.rst.exceptions  |  1 +
>>  drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
>>  drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
>>  include/media/v4l2-ctrls.h|  2 ++
>>  include/uapi/linux/v4l2-controls.h| 13 +++
>>  include/uapi/linux/videodev2.h|  3 ++
>>  10 files changed, 76 insertions(+)
>>  create mode 100644 
>> Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>>
>> diff --git a/Documentation/userspace-api/media/v4l/common.rst 
>> b/Documentation/userspace-api/media/v4l/common.rst
>> index ea0435182e44..35707edffb13 100644
>> --- a/Documentation/userspace-api/media/v4l/common.rst
>> +++ b/Documentation/userspace-api/media/v4l/common.rst
>> @@ -52,6 +52,7 @@ applicable to all devices.
>>  ext-ctrls-fm-rx
>>  ext-ctrls-detect
>>  ext-ctrls-colorimetry
>> +ext-ctrls-fixed-point
>
> Rename this to ext-ctrls-audio-m2m.
>
>>  fourcc
>>  format
>>  planar-apis
>> diff --git 
>> a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
>> b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>> new file mode 100644
>> index ..2ef6e250580c
>> --- /dev/null
>> +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>> @@ -0,0 +1,36 @@
>> +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
>> +
>> +.. _fixed-point-controls:
>> +
>> +***
>> +Fixed Point Control Reference
>
> This is for audio controls. "Fixed Point" is just the type, and it 
> doesn't make
> sense to group fixed point controls. But it does make sense to group 
> the audio
> controls.
>
> V4L2 controls can be grouped into classes. Basically it is a way to 
> put controls
> into categories, and for each category there is also a control that 
> gives a
> description of the class (see 2.15.15 in
> https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
>
> If you use e.g. 'v4l2-ctl -l' to list all the controls, then you will 
> see that
> they are grouped based on what class of control they are.
>
> So I think it would be a good idea to create a new control class for 
> M2M audio controls,
> instead of just adding them to the catch-all 'User Controls' class.
>
> Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
> V4L2_CID_COLORIMETRY_CLASS to see how
> it is done.
>
> M2M_AUDIO would probably be a good name for the class.
>
>> +***
>> +
>> +These controls are intended to support an asynchronous sample
>> +rate converter.
>
> Add ' (ASRC).' at the end to indicate the common abbreviation for
> that.
>
>> +
>> +.. _v4l2-audio-asrc:
>> +
>> +``V4L2_CID_ASRC_SOURCE_RATE``
>> +sets the resampler source rate.
>> +
>> +``V4L2_CID_ASRC_DEST_RATE``
>> +sets the resampler destination rate.
>
> Document the unit (Hz) for these two controls.
>
>> +
>> +.. c:type:: v4l2_ctrl_fixed_point
>> +
>> +.. cssclass:: longtable
>> +
>> +.. tabularcolumns:: 

Re: [RFC PATCH v6 10/11] media: imx-asrc: Add memory to memory driver

2023-10-18 Thread Hans Verkuil
On 18/10/2023 14:53, Shengjiu Wang wrote:
> On Mon, Oct 16, 2023 at 10:01 PM Hans Verkuil  wrote:
>>
>> On 13/10/2023 10:31, Shengjiu Wang wrote:
>>> Implement the ASRC memory to memory function using
>>> the v4l2 framework, user can use this function with
>>> v4l2 ioctl interface.
>>>
>>> User send the output and capture buffer to driver and
>>> driver store the converted data to the capture buffer.
>>>
>>> This feature can be shared by ASRC and EASRC drivers
>>>
>>> Signed-off-by: Shengjiu Wang 
>>> ---
>>>  drivers/media/platform/nxp/Kconfig|   12 +
>>>  drivers/media/platform/nxp/Makefile   |1 +
>>>  drivers/media/platform/nxp/imx-asrc.c | 1248 +
>>>  3 files changed, 1261 insertions(+)
>>>  create mode 100644 drivers/media/platform/nxp/imx-asrc.c
>>>
>>> diff --git a/drivers/media/platform/nxp/Kconfig 
>>> b/drivers/media/platform/nxp/Kconfig
>>> index 40e3436669e2..8234644ee341 100644
>>> --- a/drivers/media/platform/nxp/Kconfig
>>> +++ b/drivers/media/platform/nxp/Kconfig
>>> @@ -67,3 +67,15 @@ config VIDEO_MX2_EMMAPRP
>>>
>>>  source "drivers/media/platform/nxp/dw100/Kconfig"
>>>  source "drivers/media/platform/nxp/imx-jpeg/Kconfig"
>>> +
>>> +config VIDEO_IMX_ASRC
>>> + tristate "NXP i.MX ASRC M2M support"
>>> + depends on V4L_MEM2MEM_DRIVERS
>>> + depends on MEDIA_SUPPORT
>>> + select VIDEOBUF2_DMA_CONTIG
>>> + select V4L2_MEM2MEM_DEV
>>> + help
>>> + Say Y if you want to add ASRC M2M support for NXP CPUs.
>>> + It is a complement for ASRC M2P and ASRC P2M features.
>>> + This option is only useful for out-of-tree drivers since
>>> + in-tree drivers select it automatically.
>>> diff --git a/drivers/media/platform/nxp/Makefile 
>>> b/drivers/media/platform/nxp/Makefile
>>> index 4d90eb713652..1325675e34f5 100644
>>> --- a/drivers/media/platform/nxp/Makefile
>>> +++ b/drivers/media/platform/nxp/Makefile
>>> @@ -9,3 +9,4 @@ obj-$(CONFIG_VIDEO_IMX8MQ_MIPI_CSI2) += imx8mq-mipi-csi2.o
>>>  obj-$(CONFIG_VIDEO_IMX_MIPI_CSIS) += imx-mipi-csis.o
>>>  obj-$(CONFIG_VIDEO_IMX_PXP) += imx-pxp.o
>>>  obj-$(CONFIG_VIDEO_MX2_EMMAPRP) += mx2_emmaprp.o
>>> +obj-$(CONFIG_VIDEO_IMX_ASRC) += imx-asrc.o
>>> diff --git a/drivers/media/platform/nxp/imx-asrc.c 
>>> b/drivers/media/platform/nxp/imx-asrc.c
>>> new file mode 100644
>>> index ..373ca2b5ec90
>>> --- /dev/null
>>> +++ b/drivers/media/platform/nxp/imx-asrc.c
>>> @@ -0,0 +1,1248 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +//
>>> +// Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
>>> +// Copyright (C) 2019-2023 NXP
>>> +//
>>> +// Freescale ASRC Memory to Memory (M2M) driver
>>> +
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +
>>> +#define V4L_CAP OUT
>>> +#define V4L_OUT IN
>>> +
>>> +#define ASRC_xPUT_DMA_CALLBACK(dir) \
>>> + (((dir) == V4L_OUT) ? asrc_input_dma_callback \
>>> + : asrc_output_dma_callback)
>>> +
>>> +#define DIR_STR(dir) (dir) == V4L_OUT ? "out" : "cap"
>>> +
>>> +#define ASRC_M2M_BUFFER_SIZE (512 * 1024)
>>> +#define ASRC_M2M_PERIOD_SIZE (48 * 1024)
>>> +#define ASRC_M2M_SG_NUM (20)
>>
>> Where do all these values come from? How do they relate?
>> Some comments would be welcome.
>>
>> Esp. ASRC_M2M_SG_NUM is a bit odd.
>>
>>> +
>>> +struct asrc_fmt {
>>> + u32 fourcc;
>>> + snd_pcm_format_t format;
>>
>> Do you need this field? If not, then you can drop the whole
>> struct and just use u32 fourcc in the formats[] array.
>>
>>> +};
>>> +
>>> +struct asrc_pair_m2m {
>>> + struct fsl_asrc_pair *pair;
>>> + struct asrc_m2m *m2m;
>>> + struct v4l2_fh fh;
>>> + struct v4l2_ctrl_handler ctrl_handler;
>>> + int channels[2];
>>> + struct v4l2_ctrl_fixed_point src_rate;
>>> + struct v4l2_ctrl_fixed_point dst_rate;
>>> +
>>> +};
>>> +
>>> +struct asrc_m2m {
>>> + struct fsl_asrc_m2m_pdata pdata;
>>> + struct v4l2_device v4l2_dev;
>>> + struct v4l2_m2m_dev *m2m_dev;
>>> + struct video_device *dec_vdev;
>>> + struct mutex mlock; /* v4l2 ioctls serialization */
>>> + struct platform_device *pdev;
>>> +};
>>> +
>>> +static struct asrc_fmt formats[] = {
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_S8,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_S16_LE,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_U16_LE,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_S24_LE,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_S24_3LE,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_U24_LE,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_U24_3LE,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_S32_LE,
>>> + },
>>> + {
>>> + .fourcc = V4L2_AUDIO_FMT_U32_LE,
>>> + },
>>> + {
>>> + 

Re: [RFC PATCH v6 10/11] media: imx-asrc: Add memory to memory driver

2023-10-18 Thread Shengjiu Wang
On Mon, Oct 16, 2023 at 10:01 PM Hans Verkuil  wrote:
>
> On 13/10/2023 10:31, Shengjiu Wang wrote:
> > Implement the ASRC memory to memory function using
> > the v4l2 framework, user can use this function with
> > v4l2 ioctl interface.
> >
> > User send the output and capture buffer to driver and
> > driver store the converted data to the capture buffer.
> >
> > This feature can be shared by ASRC and EASRC drivers
> >
> > Signed-off-by: Shengjiu Wang 
> > ---
> >  drivers/media/platform/nxp/Kconfig|   12 +
> >  drivers/media/platform/nxp/Makefile   |1 +
> >  drivers/media/platform/nxp/imx-asrc.c | 1248 +
> >  3 files changed, 1261 insertions(+)
> >  create mode 100644 drivers/media/platform/nxp/imx-asrc.c
> >
> > diff --git a/drivers/media/platform/nxp/Kconfig 
> > b/drivers/media/platform/nxp/Kconfig
> > index 40e3436669e2..8234644ee341 100644
> > --- a/drivers/media/platform/nxp/Kconfig
> > +++ b/drivers/media/platform/nxp/Kconfig
> > @@ -67,3 +67,15 @@ config VIDEO_MX2_EMMAPRP
> >
> >  source "drivers/media/platform/nxp/dw100/Kconfig"
> >  source "drivers/media/platform/nxp/imx-jpeg/Kconfig"
> > +
> > +config VIDEO_IMX_ASRC
> > + tristate "NXP i.MX ASRC M2M support"
> > + depends on V4L_MEM2MEM_DRIVERS
> > + depends on MEDIA_SUPPORT
> > + select VIDEOBUF2_DMA_CONTIG
> > + select V4L2_MEM2MEM_DEV
> > + help
> > + Say Y if you want to add ASRC M2M support for NXP CPUs.
> > + It is a complement for ASRC M2P and ASRC P2M features.
> > + This option is only useful for out-of-tree drivers since
> > + in-tree drivers select it automatically.
> > diff --git a/drivers/media/platform/nxp/Makefile 
> > b/drivers/media/platform/nxp/Makefile
> > index 4d90eb713652..1325675e34f5 100644
> > --- a/drivers/media/platform/nxp/Makefile
> > +++ b/drivers/media/platform/nxp/Makefile
> > @@ -9,3 +9,4 @@ obj-$(CONFIG_VIDEO_IMX8MQ_MIPI_CSI2) += imx8mq-mipi-csi2.o
> >  obj-$(CONFIG_VIDEO_IMX_MIPI_CSIS) += imx-mipi-csis.o
> >  obj-$(CONFIG_VIDEO_IMX_PXP) += imx-pxp.o
> >  obj-$(CONFIG_VIDEO_MX2_EMMAPRP) += mx2_emmaprp.o
> > +obj-$(CONFIG_VIDEO_IMX_ASRC) += imx-asrc.o
> > diff --git a/drivers/media/platform/nxp/imx-asrc.c 
> > b/drivers/media/platform/nxp/imx-asrc.c
> > new file mode 100644
> > index ..373ca2b5ec90
> > --- /dev/null
> > +++ b/drivers/media/platform/nxp/imx-asrc.c
> > @@ -0,0 +1,1248 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +//
> > +// Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
> > +// Copyright (C) 2019-2023 NXP
> > +//
> > +// Freescale ASRC Memory to Memory (M2M) driver
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#define V4L_CAP OUT
> > +#define V4L_OUT IN
> > +
> > +#define ASRC_xPUT_DMA_CALLBACK(dir) \
> > + (((dir) == V4L_OUT) ? asrc_input_dma_callback \
> > + : asrc_output_dma_callback)
> > +
> > +#define DIR_STR(dir) (dir) == V4L_OUT ? "out" : "cap"
> > +
> > +#define ASRC_M2M_BUFFER_SIZE (512 * 1024)
> > +#define ASRC_M2M_PERIOD_SIZE (48 * 1024)
> > +#define ASRC_M2M_SG_NUM (20)
>
> Where do all these values come from? How do they relate?
> Some comments would be welcome.
>
> Esp. ASRC_M2M_SG_NUM is a bit odd.
>
> > +
> > +struct asrc_fmt {
> > + u32 fourcc;
> > + snd_pcm_format_t format;
>
> Do you need this field? If not, then you can drop the whole
> struct and just use u32 fourcc in the formats[] array.
>
> > +};
> > +
> > +struct asrc_pair_m2m {
> > + struct fsl_asrc_pair *pair;
> > + struct asrc_m2m *m2m;
> > + struct v4l2_fh fh;
> > + struct v4l2_ctrl_handler ctrl_handler;
> > + int channels[2];
> > + struct v4l2_ctrl_fixed_point src_rate;
> > + struct v4l2_ctrl_fixed_point dst_rate;
> > +
> > +};
> > +
> > +struct asrc_m2m {
> > + struct fsl_asrc_m2m_pdata pdata;
> > + struct v4l2_device v4l2_dev;
> > + struct v4l2_m2m_dev *m2m_dev;
> > + struct video_device *dec_vdev;
> > + struct mutex mlock; /* v4l2 ioctls serialization */
> > + struct platform_device *pdev;
> > +};
> > +
> > +static struct asrc_fmt formats[] = {
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_S8,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_S16_LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_U16_LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_S24_LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_S24_3LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_U24_LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_U24_3LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_S32_LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_U32_LE,
> > + },
> > + {
> > + .fourcc = V4L2_AUDIO_FMT_S20_3LE,
> > + },
> > + 

Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Shengjiu Wang
On Wed, Oct 18, 2023 at 3:58 PM Hans Verkuil  wrote:
>
> On 18/10/2023 09:40, Shengjiu Wang wrote:
> > On Wed, Oct 18, 2023 at 3:31 PM Hans Verkuil  wrote:
> >>
> >> On 18/10/2023 09:23, Shengjiu Wang wrote:
> >>> On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang  
> >>> wrote:
> 
>  On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  wrote:
> >
> > On 17/10/2023 15:11, Shengjiu Wang wrote:
> >> On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  
> >> wrote:
> >>>
> >>> Hi Shengjiu,
> >>>
> >>> On 13/10/2023 10:31, Shengjiu Wang wrote:
>  Fixed point controls are used by the user to configure
>  the audio sample rate to driver.
> 
>  Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
>  new IDs for ASRC rate control.
> 
>  Signed-off-by: Shengjiu Wang 
>  ---
>   .../userspace-api/media/v4l/common.rst|  1 +
>   .../media/v4l/ext-ctrls-fixed-point.rst   | 36 
>  +++
>   .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
>   .../media/v4l/vidioc-queryctrl.rst|  7 
>   .../media/videodev2.h.rst.exceptions  |  1 +
>   drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
>   drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
>   include/media/v4l2-ctrls.h|  2 ++
>   include/uapi/linux/v4l2-controls.h| 13 +++
>   include/uapi/linux/videodev2.h|  3 ++
>   10 files changed, 76 insertions(+)
>   create mode 100644 
>  Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> 
>  diff --git a/Documentation/userspace-api/media/v4l/common.rst 
>  b/Documentation/userspace-api/media/v4l/common.rst
>  index ea0435182e44..35707edffb13 100644
>  --- a/Documentation/userspace-api/media/v4l/common.rst
>  +++ b/Documentation/userspace-api/media/v4l/common.rst
>  @@ -52,6 +52,7 @@ applicable to all devices.
>   ext-ctrls-fm-rx
>   ext-ctrls-detect
>   ext-ctrls-colorimetry
>  +ext-ctrls-fixed-point
> >>>
> >>> Rename this to ext-ctrls-audio-m2m.
> >>>
>   fourcc
>   format
>   planar-apis
>  diff --git 
>  a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
>  b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>  new file mode 100644
>  index ..2ef6e250580c
>  --- /dev/null
>  +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>  @@ -0,0 +1,36 @@
>  +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
>  +
>  +.. _fixed-point-controls:
>  +
>  +***
>  +Fixed Point Control Reference
> >>>
> >>> This is for audio controls. "Fixed Point" is just the type, and it 
> >>> doesn't make
> >>> sense to group fixed point controls. But it does make sense to group 
> >>> the audio
> >>> controls.
> >>>
> >>> V4L2 controls can be grouped into classes. Basically it is a way to 
> >>> put controls
> >>> into categories, and for each category there is also a control that 
> >>> gives a
> >>> description of the class (see 2.15.15 in
> >>> https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
> >>>
> >>> If you use e.g. 'v4l2-ctl -l' to list all the controls, then you will 
> >>> see that
> >>> they are grouped based on what class of control they are.
> >>>
> >>> So I think it would be a good idea to create a new control class for 
> >>> M2M audio controls,
> >>> instead of just adding them to the catch-all 'User Controls' class.
> >>>
> >>> Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
> >>> V4L2_CID_COLORIMETRY_CLASS to see how
> >>> it is done.
> >>>
> >>> M2M_AUDIO would probably be a good name for the class.
> >>>
>  +***
>  +
>  +These controls are intended to support an asynchronous sample
>  +rate converter.
> >>>
> >>> Add ' (ASRC).' at the end to indicate the common abbreviation for
> >>> that.
> >>>
>  +
>  +.. _v4l2-audio-asrc:
>  +
>  +``V4L2_CID_ASRC_SOURCE_RATE``
>  +sets the resampler source rate.
>  +
>  +``V4L2_CID_ASRC_DEST_RATE``
>  +sets the resampler destination rate.
> >>>
> >>> Document the unit (Hz) for these two controls.
> >>>
>  +
>  +.. c:type:: v4l2_ctrl_fixed_point
>  +
>  +.. cssclass:: longtable
>  +
>  +.. tabularcolumns:: |p{1.5cm}|p{5.8cm}|p{10.0cm}|
>  +
>  +.. 

[PATCH 6/7] PCI/DPC: Use defines with DPC reason fields

2023-10-18 Thread Ilpo Järvinen
Add new defines for DPC reason fields and use them instead of literals.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pcie/dpc.c| 27 +--
 include/uapi/linux/pci_regs.h |  6 ++
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 0048a11bd119..94111e438241 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -274,20 +274,27 @@ void dpc_process_error(struct pci_dev *pdev)
pci_info(pdev, "containment event, status:%#06x source:%#06x\n",
 status, source);
 
-   reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1;
-   ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5;
+   reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN;
+   ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT;
pci_warn(pdev, "%s detected\n",
-(reason == 0) ? "unmasked uncorrectable error" :
-(reason == 1) ? "ERR_NONFATAL" :
-(reason == 2) ? "ERR_FATAL" :
-(ext_reason == 0) ? "RP PIO error" :
-(ext_reason == 1) ? "software trigger" :
-"reserved error");
+(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ?
+"unmasked uncorrectable error" :
+(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ?
+"ERR_NONFATAL" :
+(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ?
+"ERR_FATAL" :
+(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ?
+"RP PIO error" :
+(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ?
+"software trigger" :
+"reserved error");
 
/* show RP PIO error detail information */
-   if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0)
+   if (pdev->dpc_rp_extensions &&
+   reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT &&
+   ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO)
dpc_process_rp_pio_error(pdev);
-   else if (reason == 0 &&
+   else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR &&
 dpc_get_aer_uncorrect_severity(pdev, ) &&
 aer_get_device_error_info(pdev, )) {
aer_print_error(pdev, );
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 2d6df02a4b93..c4d67ceae20d 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1044,9 +1044,15 @@
 #define PCI_EXP_DPC_STATUS 0x08/* DPC Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER0x0001 /* Trigger Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER_RSN0x0006 /* Trigger Reason */
+#define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR  0x /* DPC due to unmasked 
uncorrectable error */
+#define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE0x0002 /* DPC due to receiving 
ERR_NONFATAL */
+#define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE 0x0004 /* DPC due to receiving 
ERR_FATAL */
+#define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT 0x0006 /* Reason in Trig Reason 
Extension field */
 #define  PCI_EXP_DPC_STATUS_INTERRUPT  0x0008 /* Interrupt Status */
 #define  PCI_EXP_DPC_RP_BUSY   0x0010 /* Root Port Busy */
 #define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */
+#define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO 0x  /* DPC due to 
RP PIO error */
+#define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER 0x0020  /* DPC due to 
DPC SW Trigger bit */
 #define  PCI_EXP_DPC_RP_PIO_FEP0x1f00 /* Root Port PIO 
First Error Pointer */
 
 #define PCI_EXP_DPC_SOURCE_ID   0x0A   /* DPC Source Identifier */
-- 
2.30.2



[PATCH 5/7] PCI/DPC: Use defined fields with DPC_CTL register

2023-10-18 Thread Ilpo Järvinen
Instead of using a literal to clear bits, add PCI_EXP_DPC_CTL_EN_MASK
and use the usual pattern to modify a bitfield.

While at it, rearrange RMW code more logically together.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pcie/dpc.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index a5c259ada9ea..0048a11bd119 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -18,6 +18,9 @@
 #include "portdrv.h"
 #include "../pci.h"
 
+#define PCI_EXP_DPC_CTL_EN_MASK(PCI_EXP_DPC_CTL_EN_FATAL | \
+PCI_EXP_DPC_CTL_EN_NONFATAL)
+
 static const char * const rp_pio_error_string[] = {
"Configuration Request received UR Completion",  /* Bit Position 0  */
"Configuration Request received CA Completion",  /* Bit Position 1  */
@@ -369,12 +372,13 @@ static int dpc_probe(struct pcie_device *dev)
}
 
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, );
-   pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, );
 
-   ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | 
PCI_EXP_DPC_CTL_INT_EN;
+   pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, );
+   ctl &= ~PCI_EXP_DPC_CTL_EN_MASK;
+   ctl |= PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
-   pci_info(pdev, "enabled with IRQ %d\n", dev->irq);
 
+   pci_info(pdev, "enabled with IRQ %d\n", dev->irq);
pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c 
PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
 cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
 FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),
-- 
2.30.2



[PATCH 4/7] PCI/DPC: Use FIELD_GET()

2023-10-18 Thread Ilpo Järvinen
From: Bjorn Helgaas 

Use FIELD_GET() to remove dependencies on the field position, i.e., the
shift value. No functional change intended.

Signed-off-by: Ilpo Järvinen 
Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/pcie/dpc.c| 5 +++--
 drivers/pci/quirks.c  | 2 +-
 include/uapi/linux/pci_regs.h | 1 +
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 3ceed8e3de41..a5c259ada9ea 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -9,6 +9,7 @@
 #define dev_fmt(fmt) "DPC: " fmt
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -202,7 +203,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev)
 
/* Get First Error Pointer */
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, _status);
-   first_error = (dpc_status & 0x1f00) >> 8;
+   first_error = FIELD_GET(PCI_EXP_DPC_RP_PIO_FEP, dpc_status);
 
for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) {
if ((status & ~mask) & (1 << i))
@@ -338,7 +339,7 @@ void pci_dpc_init(struct pci_dev *pdev)
/* Quirks may set dpc_rp_log_size if device or firmware is buggy */
if (!pdev->dpc_rp_log_size) {
pdev->dpc_rp_log_size =
-   (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
+   FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, cap);
if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) {
pci_err(pdev, "RP PIO log size %u is invalid\n",
pdev->dpc_rp_log_size);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index eeec1d6f9023..a9fdc2e3f110 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -6154,7 +6154,7 @@ static void dpc_log_size(struct pci_dev *dev)
if (!(val & PCI_EXP_DPC_CAP_RP_EXT))
return;
 
-   if (!((val & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8)) {
+   if (FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, val) == 0) {
pci_info(dev, "Overriding RP PIO Log Size to 4\n");
dev->dpc_rp_log_size = 4;
}
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 495f0ae4ecd5..2d6df02a4b93 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1047,6 +1047,7 @@
 #define  PCI_EXP_DPC_STATUS_INTERRUPT  0x0008 /* Interrupt Status */
 #define  PCI_EXP_DPC_RP_BUSY   0x0010 /* Root Port Busy */
 #define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */
+#define  PCI_EXP_DPC_RP_PIO_FEP0x1f00 /* Root Port PIO 
First Error Pointer */
 
 #define PCI_EXP_DPC_SOURCE_ID   0x0A   /* DPC Source Identifier */
 
-- 
2.30.2



[PATCH 3/7] PCI: hotplug: Use FIELD_GET/PREP()

2023-10-18 Thread Ilpo Järvinen
Instead of handcrafted shifts to handle register fields, use
FIELD_GET/FIELD_PREP().

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/hotplug/pciehp_core.c | 3 ++-
 drivers/pci/hotplug/pciehp_hpc.c  | 5 +++--
 drivers/pci/hotplug/pnv_php.c | 3 ++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_core.c 
b/drivers/pci/hotplug/pciehp_core.c
index 4042d87d539d..ddd55ad97a58 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -20,6 +20,7 @@
 #define pr_fmt(fmt) "pciehp: " fmt
 #define dev_fmt pr_fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -103,7 +104,7 @@ static int set_attention_status(struct hotplug_slot 
*hotplug_slot, u8 status)
struct pci_dev *pdev = ctrl->pcie->port;
 
if (status)
-   status <<= PCI_EXP_SLTCTL_ATTN_IND_SHIFT;
+   status = FIELD_PREP(PCI_EXP_SLTCTL_AIC, status);
else
status = PCI_EXP_SLTCTL_ATTN_IND_OFF;
 
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index fd713abdfb9f..b1d0a1b3917d 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -14,6 +14,7 @@
 
 #define dev_fmt(fmt) "pciehp: " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -484,7 +485,7 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot 
*hotplug_slot,
struct pci_dev *pdev = ctrl_dev(ctrl);
 
pci_config_pm_runtime_get(pdev);
-   pcie_write_cmd_nowait(ctrl, status << 6,
+   pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC, status),
  PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC);
pci_config_pm_runtime_put(pdev);
return 0;
@@ -1028,7 +1029,7 @@ struct controller *pcie_init(struct pcie_device *dev)
PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC);
 
ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c 
HotPlug%c Surprise%c Interlock%c NoCompl%c IbPresDis%c LLActRep%c%s\n",
-   (slot_cap & PCI_EXP_SLTCAP_PSN) >> 19,
+   FIELD_GET(PCI_EXP_SLTCAP_PSN, slot_cap),
FLAG(slot_cap, PCI_EXP_SLTCAP_ABP),
FLAG(slot_cap, PCI_EXP_SLTCAP_PCP),
FLAG(slot_cap, PCI_EXP_SLTCAP_MRLSP),
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 881d420637bf..694349be9d0a 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -5,6 +5,7 @@
  * Copyright Gavin Shan, IBM Corporation 2016.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -731,7 +732,7 @@ static int pnv_php_enable_msix(struct pnv_php_slot 
*php_slot)
 
/* Check hotplug MSIx entry is in range */
pcie_capability_read_word(pdev, PCI_EXP_FLAGS, _flag);
-   entry.entry = (pcie_flag & PCI_EXP_FLAGS_IRQ) >> 9;
+   entry.entry = FIELD_GET(PCI_EXP_FLAGS_IRQ, pcie_flag);
if (entry.entry >= nr_entries)
return -ERANGE;
 
-- 
2.30.2



Re: [PATCH v5 0/5] ppc, fbdev: Clean up fbdev mmap helper

2023-10-18 Thread Michael Ellerman
Thomas Zimmermann  writes:
> FYI, I intent to merge patches 1 and 2 of this patchset into 
> drm-misc-next. The updates for PowerPC can be merged through PPC trees 
> later. Let me know if this does not work for you.

Hi Thomas,

Sorry for the late reply, I was on leave.

Yeah that's fine.

cheers

> Am 22.09.23 um 10:04 schrieb Thomas Zimmermann:
>> Clean up and rename fb_pgprotect() to work without struct file. Then
>> refactor the implementation for PowerPC. This change has been discussed
>> at [1] in the context of refactoring fbdev's mmap code.
>> 
>> The first two patches update fbdev and replace fbdev's fb_pgprotect()
>> with pgprot_framebuffer() on all architectures. The new helper's stream-
>> lined interface enables more refactoring within fbdev's mmap
>> implementation.
>> 
>> Patches 3 to 5 adapt PowerPC's internal interfaces to provide
>> phys_mem_access_prot() that works without struct file. Neither the
>> architecture code or fbdev helpers need the parameter.
>> 
>> v5:
>>  * improve commit descriptions (Javier)
>>  * add missing tags (Geert)
>> v4:
>>  * fix commit message (Christophe)
>> v3:
>>  * rename fb_pgrotect() to pgprot_framebuffer() (Arnd)
>> v2:
>>  * reorder patches to simplify merging (Michael)
>> 
>> [1] 
>> https://lore.kernel.org/linuxppc-dev/5501ba80-bdb0-6344-16b0-0466a950f...@suse.com/
>> 
>> Thomas Zimmermann (5):
>>fbdev: Avoid file argument in fb_pgprotect()
>>fbdev: Replace fb_pgprotect() with pgprot_framebuffer()
>>arch/powerpc: Remove trailing whitespaces
>>arch/powerpc: Remove file parameter from phys_mem_access_prot code
>>arch/powerpc: Call internal __phys_mem_access_prot() in fbdev code
>> 
>>   arch/ia64/include/asm/fb.h| 15 +++
>>   arch/m68k/include/asm/fb.h| 19 ++-
>>   arch/mips/include/asm/fb.h| 11 +--
>>   arch/powerpc/include/asm/book3s/pgtable.h | 10 --
>>   arch/powerpc/include/asm/fb.h | 13 +
>>   arch/powerpc/include/asm/machdep.h| 13 ++---
>>   arch/powerpc/include/asm/nohash/pgtable.h | 10 --
>>   arch/powerpc/include/asm/pci.h|  4 +---
>>   arch/powerpc/kernel/pci-common.c  |  3 +--
>>   arch/powerpc/mm/mem.c |  8 
>>   arch/sparc/include/asm/fb.h   | 15 +--
>>   arch/x86/include/asm/fb.h | 10 ++
>>   arch/x86/video/fbdev.c| 15 ---
>>   drivers/video/fbdev/core/fb_chrdev.c  |  3 ++-
>>   include/asm-generic/fb.h  | 12 ++--
>>   15 files changed, 86 insertions(+), 75 deletions(-)
>> 
>> 
>> base-commit: f8d21cb17a99b75862196036bb4bb93ee9637b74
>
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Frankenstrasse 146, 90461 Nuernberg, Germany
> GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
> HRB 36809 (AG Nuernberg)


Re: [Bisected] PowerMac G5 fails booting kernel 6.6-rc3 (BUG: Unable to handle kernel data access at 0xfeffbb62ffec65fe)

2023-10-18 Thread Erhard Furtner
On Wed, 18 Oct 2023 16:45:04 +1100
Michael Ellerman  wrote:

> Thanks. Yeah text is generally better, it archives better and can be
> grepped etc. but in this case I was going a bit mad trying to make sense
> of the oops :)
> 
> In hindsight the bug is an obvious boot time ordering problem, can you
> confirm this fixes it. That should apply on top of Linus' current
> master.
> 
> cheers
> 
> diff --git a/arch/powerpc/kernel/setup-common.c 
> b/arch/powerpc/kernel/setup-common.c
> index 2f1026fba00d..71f16fb32ceb 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -948,6 +948,7 @@ void __init setup_arch(char **cmdline_p)
>  
>   /* Parse memory topology */
>   mem_topology_setup();
> + set_max_mapnr(max_pfn);
>  
>   /*
>* Release secondary cpus out of their spinloops at 0x60 now that
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 8b121df7b08f..07e8f4f1e07f 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -288,7 +288,6 @@ void __init mem_init(void)
>  #endif
>  
>   high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
> - set_max_mapnr(max_pfn);
>  
>   kasan_late_init();
>  

Yes, this fix actually does the trick. v6.6-rc6 booting up fine now (dmesg 
attached) on the G5. Patch also applies on 6.5.7 with seemingly no side effects 
. Many thanks to all involved!

I'll check whether this also helps on a older memory related bug I bisected 
recently. And post the bug if not. ;)

Regards,
Erhard


dmesg_66-rc6_g5
Description: Binary data


Re: [PATCH] arch: powerpc: net: bpf_jit_comp32.c: Fixed 'instead' typo

2023-10-18 Thread Michael Ellerman
Daniel Borkmann  writes:
> On 10/13/23 7:31 AM, Muhammad Muzammil wrote:
>> Fixed 'instead' typo
>> 
>> Signed-off-by: Muhammad Muzammil 
>
> Michael, I presume you'll pick it up?

Will do.

cheers


Re: [PATCH] powerpc/vas: Limit open window failure messages in log bufffer

2023-10-18 Thread Michael Ellerman
Haren Myneni  writes:
> The VAS open window call prints error message and returns -EBUSY
> after the migration suspend event initiated and until the resume
> event completed on the destination system. It can cause the log
> buffer filled with these error messages if the user space issues
> continuous open window calls.  Similar case even for DLPAR CPU
> remove event when no credits are available until the credits are
> freed or with the other DLPAR CPU add event.

This should probably have a Fixes: tag so it gets backported.

> So changes in the patch to use pr_err_ratelimited() instead of
> pr_err() to display open window failure and not-available credits
> error messages.
>
> Signed-off-by: Haren Myneni 
> ---
>  arch/powerpc/platforms/book3s/vas-api.c | 4 ++--
>  arch/powerpc/platforms/pseries/vas.c| 4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/platforms/book3s/vas-api.c 
> b/arch/powerpc/platforms/book3s/vas-api.c
> index 77ea9335fd04..203cfc2fb8ff 100644
> --- a/arch/powerpc/platforms/book3s/vas-api.c
> +++ b/arch/powerpc/platforms/book3s/vas-api.c
> @@ -311,8 +311,8 @@ static int coproc_ioc_tx_win_open(struct file *fp, 
> unsigned long arg)
>   txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
>   cp_inst->coproc->cop_type);
>   if (IS_ERR(txwin)) {
> - pr_err("%s() VAS window open failed, %ld\n", __func__,
> - PTR_ERR(txwin));
> + pr_err_ratelimited("%s() VAS window open failed, %ld\n",
> + __func__, PTR_ERR(txwin));

Rather than using __func__ which is a bit over specific for a user
visible error, I'd prefer something like "vas: window open failed rc = %ld".

Probably vas-api.c should use pr_fmt so that all the messages have a
consistent prefix.

cheers

>   return PTR_ERR(txwin);
>   }
>  
> diff --git a/arch/powerpc/platforms/pseries/vas.c 
> b/arch/powerpc/platforms/pseries/vas.c
> index b86f0db08e98..7259e6676503 100644
> --- a/arch/powerpc/platforms/pseries/vas.c
> +++ b/arch/powerpc/platforms/pseries/vas.c
> @@ -341,7 +341,7 @@ static struct vas_window *vas_allocate_window(int vas_id, 
> u64 flags,
>  
>   if (atomic_inc_return(_feat_caps->nr_used_credits) >
>   atomic_read(_feat_caps->nr_total_credits)) {
> - pr_err("Credits are not available to allocate window\n");
> + pr_err_ratelimited("Credits are not available to allocate 
> window\n");
>   rc = -EINVAL;
>   goto out;
>   }
> @@ -439,7 +439,7 @@ static struct vas_window *vas_allocate_window(int vas_id, 
> u64 flags,
>  
>   put_vas_user_win_ref(>vas_win.task_ref);
>   rc = -EBUSY;
> - pr_err("No credit is available to allocate window\n");
> + pr_err_ratelimited("No credit is available to allocate window\n");
>  
>  out_free:
>   /*
> -- 
> 2.26.3


Re: [PATCH] powerpc/mm: Update set_ptes to call pte_filter for all the ptes

2023-10-18 Thread Aneesh Kumar K.V
Aneesh Kumar K V  writes:

> On 10/18/23 11:25 AM, Christophe Leroy wrote:
>> 
>> 
>> Le 18/10/2023 à 06:55, Aneesh Kumar K.V a écrit :
>>> With commit 9fee28baa601 ("powerpc: implement the new page table range
>>> API") we added set_ptes to powerpc architecture but the implementation
>>> missed calling the pte filter for all the ptes we are setting in the
>>> range. set_pte_filter can be used for filter pte values and on some
>>> platforms which don't support coherent icache it clears the exec bit so
>>> that we can flush the icache on exec fault
>>>
>>> The patch also removes the usage of arch_enter/leave_lazy_mmu() because
>>> set_pte is not supposed to be used when updating a pte entry. Powerpc
>>> architecture uses this rule to skip the expensive tlb invalidate which
>>> is not needed when you are setting up the pte for the first time. See
>>> commit 56eecdb912b5 ("mm: Use ptep/pmdp_set_numa() for updating
>>> _PAGE_NUMA bit") for more details
>>>
>>> Fixes: 9fee28baa601 ("powerpc: implement the new page table range API")
>>> Signed-off-by: Aneesh Kumar K.V 
>>> ---
>>>   arch/powerpc/mm/pgtable.c | 33 -
>>>   1 file changed, 20 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>>> index 3ba9fe411604..95ab20cca2da 100644
>>> --- a/arch/powerpc/mm/pgtable.c
>>> +++ b/arch/powerpc/mm/pgtable.c
>>> @@ -191,28 +191,35 @@ void set_ptes(struct mm_struct *mm, unsigned long 
>>> addr, pte_t *ptep,
>>> pte_t pte, unsigned int nr)
>>>   {
>>> /*
>>> -* Make sure hardware valid bit is not set. We don't do
>>> -* tlb flush for this update.
>>> +* We don't need to call arch_enter/leave_lazy_mmu_mode()
>>> +* because we expect set_ptes to be only be used on not present
>>> +* and not hw_valid ptes. Hence there is not translation cache flush
>>> +* involved that need to be batched.
>>>  */
>>> -   VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
>>> +   for (;;) {
>>>   
>>> -   /* Note: mm->context.id might not yet have been assigned as
>>> -* this context might not have been activated yet when this
>>> -* is called.
>>> -*/
>>> -   pte = set_pte_filter(pte);
>>> +   /*
>>> +* Make sure hardware valid bit is not set. We don't do
>>> +* tlb flush for this update.
>>> +*/
>>> +   VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
>>>   
>>> -   /* Perform the setting of the PTE */
>>> -   arch_enter_lazy_mmu_mode();
>>> -   for (;;) {
>>> +   /* Note: mm->context.id might not yet have been assigned as
>>> +* this context might not have been activated yet when this
>>> +* is called.
>>> +*/
>>> +   pte = set_pte_filter(pte);
>> 
>> Why do you need to call set_pte_filter() inside the loop ?
>> The only difference between previous pte and next pte is the RPN, other 
>> flags remain untouched so I can't see why you need to call 
>> set_pte_filter() again.
>> 
>
> I missed the fact that we use the filtered pte in all the ptes in the range. 
> One other details
> that made me look at calling the filter in the loop was we clearing the 
> struct page->flags.
> The only flag right now we care about the PG_dcache_clean and that moved to 
> folio. So we might be
> good here. May be we add a comment in set_pte_filter saying can operate only 
> on folio->flags ? 
>
>>> +
>>> +   /* Perform the setting of the PTE */
>>> __set_pte_at(mm, addr, ptep, pte, 0);
>>> if (--nr == 0)
>>> break;
>>> ptep++;
>>> -   pte = __pte(pte_val(pte) + (1UL << PTE_RPN_SHIFT));
>>> addr += PAGE_SIZE;
>>> +   /* increment the pfn */
>>> +   pte = __pte(pte_val(pte) + PAGE_SIZE);
>> 
>> PAGE_SIZE doesn't work on all platforms, see for instance e500.
>> 
>> see comment at 
>> https://elixir.bootlin.com/linux/v6.3-rc2/source/arch/powerpc/include/asm/nohash/32/pgtable.h#L147
>> 
>> And then you see 
>> https://elixir.bootlin.com/linux/v6.3-rc2/source/arch/powerpc/include/asm/nohash/pte-e500.h#L63
>> 
>
> Didn't know that. I actually wanted to do pfn_pte(pte_pfn(pte) + 1) . But 
> that needs pgprot_t. I
> can move it back to PTE_RPN_SHIFT with details of the above documented. 
>

something like this ?

>From 62825870d4b48ffb53e9837dfb4cf7c0422732ec Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" 
Date: Fri, 6 Oct 2023 22:47:00 +0530
Subject: [PATCH] powerpc/mm: Avoid calling arch_enter/leave_lazy_mmu() in
 set_ptes

With commit 9fee28baa601 ("powerpc: implement the new page table range
API") we added set_ptes to powerpc architecture. The implementation
included calling arch_enter/leave_lazy_mmu() calls.

The patch removes the usage of arch_enter/leave_lazy_mmu() because
set_pte is not supposed to be used when updating a pte entry. Powerpc
architecture uses this rule to skip the expensive tlb 

Re: [PATCHv9 2/2] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt

2023-10-18 Thread Pingfan Liu
On Tue, Oct 17, 2023 at 6:39 PM Hari Bathini  wrote:
>
>
>
> On 17/10/23 7:58 am, Pingfan Liu wrote:
> > *** Idea ***
> > For kexec -p, the boot cpu can be not the cpu0, this causes the problem
> > of allocating memory for paca_ptrs[]. However, in theory, there is no
> > requirement to assign cpu's logical id as its present sequence in the
> > device tree. But there is something like cpu_first_thread_sibling(),
> > which makes assumption on the mapping inside a core. Hence partially
> > loosening the mapping, i.e. unbind the mapping of core while keep the
> > mapping inside a core.
> >
> > *** Implement ***
> > At this early stage, there are plenty of memory to utilize. Hence, this
> > patch allocates interim memory to link the cpu info on a list, then
> > reorder cpus by changing the list head. As a result, there is a rotate
> > shift between the sequence number in dt and the cpu logical number.
> >
> > *** Result ***
> > After this patch, a boot-cpu's logical id will always be mapped into the
> > range [0,threads_per_core).
> >
> > Besides this, at this phase, all threads in the boot core are forced to
> > be onlined. This restriction will be lifted in a later patch with
> > extra effort.
> >
> > Signed-off-by: Pingfan Liu 
> > Cc: Michael Ellerman 
> > Cc: Nicholas Piggin 
> > Cc: Christophe Leroy 
> > Cc: Mahesh Salgaonkar 
> > Cc: Wen Xiong 
> > Cc: Baoquan He 
> > Cc: Ming Lei 
> > Cc: Sourabh Jain 
> > Cc: Hari Bathini 
> > Cc: ke...@lists.infradead.org
> > To: linuxppc-dev@lists.ozlabs.org
>
> Thanks for working on this, Pingfan.
> Looks good to me.
>
> Acked-by: Hari Bathini 
>

Thank you for kindly reviewing. I hope that after all these years, we
have accomplished the objective.

Best Regards,

Pingfan



Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Hans Verkuil
On 18/10/2023 09:40, Shengjiu Wang wrote:
> On Wed, Oct 18, 2023 at 3:31 PM Hans Verkuil  wrote:
>>
>> On 18/10/2023 09:23, Shengjiu Wang wrote:
>>> On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang  
>>> wrote:

 On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  wrote:
>
> On 17/10/2023 15:11, Shengjiu Wang wrote:
>> On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  wrote:
>>>
>>> Hi Shengjiu,
>>>
>>> On 13/10/2023 10:31, Shengjiu Wang wrote:
 Fixed point controls are used by the user to configure
 the audio sample rate to driver.

 Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
 new IDs for ASRC rate control.

 Signed-off-by: Shengjiu Wang 
 ---
  .../userspace-api/media/v4l/common.rst|  1 +
  .../media/v4l/ext-ctrls-fixed-point.rst   | 36 +++
  .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
  .../media/v4l/vidioc-queryctrl.rst|  7 
  .../media/videodev2.h.rst.exceptions  |  1 +
  drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
  drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
  include/media/v4l2-ctrls.h|  2 ++
  include/uapi/linux/v4l2-controls.h| 13 +++
  include/uapi/linux/videodev2.h|  3 ++
  10 files changed, 76 insertions(+)
  create mode 100644 
 Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst

 diff --git a/Documentation/userspace-api/media/v4l/common.rst 
 b/Documentation/userspace-api/media/v4l/common.rst
 index ea0435182e44..35707edffb13 100644
 --- a/Documentation/userspace-api/media/v4l/common.rst
 +++ b/Documentation/userspace-api/media/v4l/common.rst
 @@ -52,6 +52,7 @@ applicable to all devices.
  ext-ctrls-fm-rx
  ext-ctrls-detect
  ext-ctrls-colorimetry
 +ext-ctrls-fixed-point
>>>
>>> Rename this to ext-ctrls-audio-m2m.
>>>
  fourcc
  format
  planar-apis
 diff --git 
 a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
 b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
 new file mode 100644
 index ..2ef6e250580c
 --- /dev/null
 +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
 @@ -0,0 +1,36 @@
 +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
 +
 +.. _fixed-point-controls:
 +
 +***
 +Fixed Point Control Reference
>>>
>>> This is for audio controls. "Fixed Point" is just the type, and it 
>>> doesn't make
>>> sense to group fixed point controls. But it does make sense to group 
>>> the audio
>>> controls.
>>>
>>> V4L2 controls can be grouped into classes. Basically it is a way to put 
>>> controls
>>> into categories, and for each category there is also a control that 
>>> gives a
>>> description of the class (see 2.15.15 in
>>> https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
>>>
>>> If you use e.g. 'v4l2-ctl -l' to list all the controls, then you will 
>>> see that
>>> they are grouped based on what class of control they are.
>>>
>>> So I think it would be a good idea to create a new control class for 
>>> M2M audio controls,
>>> instead of just adding them to the catch-all 'User Controls' class.
>>>
>>> Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
>>> V4L2_CID_COLORIMETRY_CLASS to see how
>>> it is done.
>>>
>>> M2M_AUDIO would probably be a good name for the class.
>>>
 +***
 +
 +These controls are intended to support an asynchronous sample
 +rate converter.
>>>
>>> Add ' (ASRC).' at the end to indicate the common abbreviation for
>>> that.
>>>
 +
 +.. _v4l2-audio-asrc:
 +
 +``V4L2_CID_ASRC_SOURCE_RATE``
 +sets the resampler source rate.
 +
 +``V4L2_CID_ASRC_DEST_RATE``
 +sets the resampler destination rate.
>>>
>>> Document the unit (Hz) for these two controls.
>>>
 +
 +.. c:type:: v4l2_ctrl_fixed_point
 +
 +.. cssclass:: longtable
 +
 +.. tabularcolumns:: |p{1.5cm}|p{5.8cm}|p{10.0cm}|
 +
 +.. flat-table:: struct v4l2_ctrl_fixed_point
 +:header-rows:  0
 +:stub-columns: 0
 +:widths:   1 1 2
 +
 +* - __u32
>>>
>>> Hmm, shouldn't this be __s32?
>>>
 +  - ``integer``
 +  - integer part of fixed point value.
 +* 

Re: [PATCH 0/6] powerpc/qspinlock: Fix yield latency bug and other

2023-10-18 Thread Shrikanth Hegde



On 10/16/23 6:12 PM, Nicholas Piggin wrote:
> This fixes a long-standing latency bug in the powerpc qspinlock
> implementation that quite a few people have reported and helped
> out with debugging.
> 
> The first patch is a minimal fix that avoids the problem. The
> other patches are streamlining and improvements after the fix.
> 

Hi Nick, Thanks for the fix. This issue has been happening in various
scenarios when there was vCPU contention.

Tested this on Power10 Shared processor LPAR(SPLPAR) based on powerVM.
System has two SPLPARs. on LPAR1 trying various scenarios and
LPAR2 is running constant stress-ng threads consuming 100% its CPU.
LPAR1: 96VP, 64EC and LPAR2 is 32VP, 32EC.

lscpu of LPAR1:
Architecture:ppc64le
  Byte Order:Little Endian
CPU(s):  768


  On-line CPU(s) list:   0-767


Model name:  POWER10 (architected), altivec supported
  Model: 2.0 (pvr 0080 0200)
  Thread(s) per core:8

Scenarios tried on LPAR1:
1. run ppc64_cpu --smt=1 and ppc64_cpu --smt=8 to switch between SMT=1
   and SMT=8
2. create a cgroup, assign 5% quota to it and run same number of
   stress-ng as number of CPUs within that cgroup.
3. Run a suite of microbenchmarks such as unixbench, schbench, hackbench
   stress-ng with perf enabled.

baseline was tip/master at 84ab57184ff4 (origin/master, origin/HEAD)
Merge branch into tip/master: 'x86/tdx'

Hard lockup was SEEN in each of the above scenario with baseline.
With this patch series applied hard lockup was NOT SEEN in each of
the above scenario.

So,
Tested-by: Shrikanth Hegde 

> Thanks,
> Nick
> 
> Nicholas Piggin (6):
>   powerpc/qspinlock: Fix stale propagated yield_cpu
>   powerpc/qspinlock: stop queued waiters trying to set lock sleepy
>   powerpc/qspinlock: propagate owner preemptedness rather than CPU
> number
>   powerpc/qspinlock: don't propagate the not-sleepy state
>   powerpc/qspinlock: Propagate sleepy if previous waiter is preempted
>   powerpc/qspinlock: Rename yield_propagate_owner tunable
> 
>  arch/powerpc/lib/qspinlock.c | 119 +++
>  1 file changed, 52 insertions(+), 67 deletions(-)
> 


Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Shengjiu Wang
On Wed, Oct 18, 2023 at 3:31 PM Hans Verkuil  wrote:
>
> On 18/10/2023 09:23, Shengjiu Wang wrote:
> > On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang  
> > wrote:
> >>
> >> On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  wrote:
> >>>
> >>> On 17/10/2023 15:11, Shengjiu Wang wrote:
>  On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  wrote:
> >
> > Hi Shengjiu,
> >
> > On 13/10/2023 10:31, Shengjiu Wang wrote:
> >> Fixed point controls are used by the user to configure
> >> the audio sample rate to driver.
> >>
> >> Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
> >> new IDs for ASRC rate control.
> >>
> >> Signed-off-by: Shengjiu Wang 
> >> ---
> >>  .../userspace-api/media/v4l/common.rst|  1 +
> >>  .../media/v4l/ext-ctrls-fixed-point.rst   | 36 +++
> >>  .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
> >>  .../media/v4l/vidioc-queryctrl.rst|  7 
> >>  .../media/videodev2.h.rst.exceptions  |  1 +
> >>  drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
> >>  drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
> >>  include/media/v4l2-ctrls.h|  2 ++
> >>  include/uapi/linux/v4l2-controls.h| 13 +++
> >>  include/uapi/linux/videodev2.h|  3 ++
> >>  10 files changed, 76 insertions(+)
> >>  create mode 100644 
> >> Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> >>
> >> diff --git a/Documentation/userspace-api/media/v4l/common.rst 
> >> b/Documentation/userspace-api/media/v4l/common.rst
> >> index ea0435182e44..35707edffb13 100644
> >> --- a/Documentation/userspace-api/media/v4l/common.rst
> >> +++ b/Documentation/userspace-api/media/v4l/common.rst
> >> @@ -52,6 +52,7 @@ applicable to all devices.
> >>  ext-ctrls-fm-rx
> >>  ext-ctrls-detect
> >>  ext-ctrls-colorimetry
> >> +ext-ctrls-fixed-point
> >
> > Rename this to ext-ctrls-audio-m2m.
> >
> >>  fourcc
> >>  format
> >>  planar-apis
> >> diff --git 
> >> a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
> >> b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> >> new file mode 100644
> >> index ..2ef6e250580c
> >> --- /dev/null
> >> +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> >> @@ -0,0 +1,36 @@
> >> +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
> >> +
> >> +.. _fixed-point-controls:
> >> +
> >> +***
> >> +Fixed Point Control Reference
> >
> > This is for audio controls. "Fixed Point" is just the type, and it 
> > doesn't make
> > sense to group fixed point controls. But it does make sense to group 
> > the audio
> > controls.
> >
> > V4L2 controls can be grouped into classes. Basically it is a way to put 
> > controls
> > into categories, and for each category there is also a control that 
> > gives a
> > description of the class (see 2.15.15 in
> > https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
> >
> > If you use e.g. 'v4l2-ctl -l' to list all the controls, then you will 
> > see that
> > they are grouped based on what class of control they are.
> >
> > So I think it would be a good idea to create a new control class for 
> > M2M audio controls,
> > instead of just adding them to the catch-all 'User Controls' class.
> >
> > Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
> > V4L2_CID_COLORIMETRY_CLASS to see how
> > it is done.
> >
> > M2M_AUDIO would probably be a good name for the class.
> >
> >> +***
> >> +
> >> +These controls are intended to support an asynchronous sample
> >> +rate converter.
> >
> > Add ' (ASRC).' at the end to indicate the common abbreviation for
> > that.
> >
> >> +
> >> +.. _v4l2-audio-asrc:
> >> +
> >> +``V4L2_CID_ASRC_SOURCE_RATE``
> >> +sets the resampler source rate.
> >> +
> >> +``V4L2_CID_ASRC_DEST_RATE``
> >> +sets the resampler destination rate.
> >
> > Document the unit (Hz) for these two controls.
> >
> >> +
> >> +.. c:type:: v4l2_ctrl_fixed_point
> >> +
> >> +.. cssclass:: longtable
> >> +
> >> +.. tabularcolumns:: |p{1.5cm}|p{5.8cm}|p{10.0cm}|
> >> +
> >> +.. flat-table:: struct v4l2_ctrl_fixed_point
> >> +:header-rows:  0
> >> +:stub-columns: 0
> >> +:widths:   1 1 2
> >> +
> >> +* - __u32
> >
> > Hmm, shouldn't this be __s32?
> >
> >> +  - ``integer``
> >> +  - integer part of fixed point value.
> >> +* - __s32
> >
> > and this __u32?
> 

Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Hans Verkuil
On 18/10/2023 09:23, Shengjiu Wang wrote:
> On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang  
> wrote:
>>
>> On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  wrote:
>>>
>>> On 17/10/2023 15:11, Shengjiu Wang wrote:
 On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  wrote:
>
> Hi Shengjiu,
>
> On 13/10/2023 10:31, Shengjiu Wang wrote:
>> Fixed point controls are used by the user to configure
>> the audio sample rate to driver.
>>
>> Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
>> new IDs for ASRC rate control.
>>
>> Signed-off-by: Shengjiu Wang 
>> ---
>>  .../userspace-api/media/v4l/common.rst|  1 +
>>  .../media/v4l/ext-ctrls-fixed-point.rst   | 36 +++
>>  .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
>>  .../media/v4l/vidioc-queryctrl.rst|  7 
>>  .../media/videodev2.h.rst.exceptions  |  1 +
>>  drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
>>  drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
>>  include/media/v4l2-ctrls.h|  2 ++
>>  include/uapi/linux/v4l2-controls.h| 13 +++
>>  include/uapi/linux/videodev2.h|  3 ++
>>  10 files changed, 76 insertions(+)
>>  create mode 100644 
>> Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>>
>> diff --git a/Documentation/userspace-api/media/v4l/common.rst 
>> b/Documentation/userspace-api/media/v4l/common.rst
>> index ea0435182e44..35707edffb13 100644
>> --- a/Documentation/userspace-api/media/v4l/common.rst
>> +++ b/Documentation/userspace-api/media/v4l/common.rst
>> @@ -52,6 +52,7 @@ applicable to all devices.
>>  ext-ctrls-fm-rx
>>  ext-ctrls-detect
>>  ext-ctrls-colorimetry
>> +ext-ctrls-fixed-point
>
> Rename this to ext-ctrls-audio-m2m.
>
>>  fourcc
>>  format
>>  planar-apis
>> diff --git 
>> a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
>> b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>> new file mode 100644
>> index ..2ef6e250580c
>> --- /dev/null
>> +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
>> @@ -0,0 +1,36 @@
>> +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
>> +
>> +.. _fixed-point-controls:
>> +
>> +***
>> +Fixed Point Control Reference
>
> This is for audio controls. "Fixed Point" is just the type, and it 
> doesn't make
> sense to group fixed point controls. But it does make sense to group the 
> audio
> controls.
>
> V4L2 controls can be grouped into classes. Basically it is a way to put 
> controls
> into categories, and for each category there is also a control that gives 
> a
> description of the class (see 2.15.15 in
> https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
>
> If you use e.g. 'v4l2-ctl -l' to list all the controls, then you will see 
> that
> they are grouped based on what class of control they are.
>
> So I think it would be a good idea to create a new control class for M2M 
> audio controls,
> instead of just adding them to the catch-all 'User Controls' class.
>
> Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
> V4L2_CID_COLORIMETRY_CLASS to see how
> it is done.
>
> M2M_AUDIO would probably be a good name for the class.
>
>> +***
>> +
>> +These controls are intended to support an asynchronous sample
>> +rate converter.
>
> Add ' (ASRC).' at the end to indicate the common abbreviation for
> that.
>
>> +
>> +.. _v4l2-audio-asrc:
>> +
>> +``V4L2_CID_ASRC_SOURCE_RATE``
>> +sets the resampler source rate.
>> +
>> +``V4L2_CID_ASRC_DEST_RATE``
>> +sets the resampler destination rate.
>
> Document the unit (Hz) for these two controls.
>
>> +
>> +.. c:type:: v4l2_ctrl_fixed_point
>> +
>> +.. cssclass:: longtable
>> +
>> +.. tabularcolumns:: |p{1.5cm}|p{5.8cm}|p{10.0cm}|
>> +
>> +.. flat-table:: struct v4l2_ctrl_fixed_point
>> +:header-rows:  0
>> +:stub-columns: 0
>> +:widths:   1 1 2
>> +
>> +* - __u32
>
> Hmm, shouldn't this be __s32?
>
>> +  - ``integer``
>> +  - integer part of fixed point value.
>> +* - __s32
>
> and this __u32?
>
> You want to be able to use this generic type as a signed value.
>
>> +  - ``fractional``
>> +  - fractional part of fixed point value, which is Q31.
>> diff --git 
>> a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst 
>> 

Re: [RFC PATCH v6 09/11] media: uapi: Add audio rate controls support

2023-10-18 Thread Shengjiu Wang
On Wed, Oct 18, 2023 at 10:27 AM Shengjiu Wang  wrote:
>
> On Tue, Oct 17, 2023 at 9:37 PM Hans Verkuil  wrote:
> >
> > On 17/10/2023 15:11, Shengjiu Wang wrote:
> > > On Mon, Oct 16, 2023 at 9:16 PM Hans Verkuil  wrote:
> > >>
> > >> Hi Shengjiu,
> > >>
> > >> On 13/10/2023 10:31, Shengjiu Wang wrote:
> > >>> Fixed point controls are used by the user to configure
> > >>> the audio sample rate to driver.
> > >>>
> > >>> Add V4L2_CID_ASRC_SOURCE_RATE and V4L2_CID_ASRC_DEST_RATE
> > >>> new IDs for ASRC rate control.
> > >>>
> > >>> Signed-off-by: Shengjiu Wang 
> > >>> ---
> > >>>  .../userspace-api/media/v4l/common.rst|  1 +
> > >>>  .../media/v4l/ext-ctrls-fixed-point.rst   | 36 +++
> > >>>  .../media/v4l/vidioc-g-ext-ctrls.rst  |  4 +++
> > >>>  .../media/v4l/vidioc-queryctrl.rst|  7 
> > >>>  .../media/videodev2.h.rst.exceptions  |  1 +
> > >>>  drivers/media/v4l2-core/v4l2-ctrls-core.c |  5 +++
> > >>>  drivers/media/v4l2-core/v4l2-ctrls-defs.c |  4 +++
> > >>>  include/media/v4l2-ctrls.h|  2 ++
> > >>>  include/uapi/linux/v4l2-controls.h| 13 +++
> > >>>  include/uapi/linux/videodev2.h|  3 ++
> > >>>  10 files changed, 76 insertions(+)
> > >>>  create mode 100644 
> > >>> Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> > >>>
> > >>> diff --git a/Documentation/userspace-api/media/v4l/common.rst 
> > >>> b/Documentation/userspace-api/media/v4l/common.rst
> > >>> index ea0435182e44..35707edffb13 100644
> > >>> --- a/Documentation/userspace-api/media/v4l/common.rst
> > >>> +++ b/Documentation/userspace-api/media/v4l/common.rst
> > >>> @@ -52,6 +52,7 @@ applicable to all devices.
> > >>>  ext-ctrls-fm-rx
> > >>>  ext-ctrls-detect
> > >>>  ext-ctrls-colorimetry
> > >>> +ext-ctrls-fixed-point
> > >>
> > >> Rename this to ext-ctrls-audio-m2m.
> > >>
> > >>>  fourcc
> > >>>  format
> > >>>  planar-apis
> > >>> diff --git 
> > >>> a/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst 
> > >>> b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> > >>> new file mode 100644
> > >>> index ..2ef6e250580c
> > >>> --- /dev/null
> > >>> +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-fixed-point.rst
> > >>> @@ -0,0 +1,36 @@
> > >>> +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
> > >>> +
> > >>> +.. _fixed-point-controls:
> > >>> +
> > >>> +***
> > >>> +Fixed Point Control Reference
> > >>
> > >> This is for audio controls. "Fixed Point" is just the type, and it 
> > >> doesn't make
> > >> sense to group fixed point controls. But it does make sense to group the 
> > >> audio
> > >> controls.
> > >>
> > >> V4L2 controls can be grouped into classes. Basically it is a way to put 
> > >> controls
> > >> into categories, and for each category there is also a control that 
> > >> gives a
> > >> description of the class (see 2.15.15 in
> > >> https://linuxtv.org/downloads/v4l-dvb-apis-new/driver-api/v4l2-controls.html#introduction)
> > >>
> > >> If you use e.g. 'v4l2-ctl -l' to list all the controls, then you will 
> > >> see that
> > >> they are grouped based on what class of control they are.
> > >>
> > >> So I think it would be a good idea to create a new control class for M2M 
> > >> audio controls,
> > >> instead of just adding them to the catch-all 'User Controls' class.
> > >>
> > >> Search e.g. for V4L2_CTRL_CLASS_COLORIMETRY and 
> > >> V4L2_CID_COLORIMETRY_CLASS to see how
> > >> it is done.
> > >>
> > >> M2M_AUDIO would probably be a good name for the class.
> > >>
> > >>> +***
> > >>> +
> > >>> +These controls are intended to support an asynchronous sample
> > >>> +rate converter.
> > >>
> > >> Add ' (ASRC).' at the end to indicate the common abbreviation for
> > >> that.
> > >>
> > >>> +
> > >>> +.. _v4l2-audio-asrc:
> > >>> +
> > >>> +``V4L2_CID_ASRC_SOURCE_RATE``
> > >>> +sets the resampler source rate.
> > >>> +
> > >>> +``V4L2_CID_ASRC_DEST_RATE``
> > >>> +sets the resampler destination rate.
> > >>
> > >> Document the unit (Hz) for these two controls.
> > >>
> > >>> +
> > >>> +.. c:type:: v4l2_ctrl_fixed_point
> > >>> +
> > >>> +.. cssclass:: longtable
> > >>> +
> > >>> +.. tabularcolumns:: |p{1.5cm}|p{5.8cm}|p{10.0cm}|
> > >>> +
> > >>> +.. flat-table:: struct v4l2_ctrl_fixed_point
> > >>> +:header-rows:  0
> > >>> +:stub-columns: 0
> > >>> +:widths:   1 1 2
> > >>> +
> > >>> +* - __u32
> > >>
> > >> Hmm, shouldn't this be __s32?
> > >>
> > >>> +  - ``integer``
> > >>> +  - integer part of fixed point value.
> > >>> +* - __s32
> > >>
> > >> and this __u32?
> > >>
> > >> You want to be able to use this generic type as a signed value.
> > >>
> > >>> +  - ``fractional``
> > >>> +  - fractional part of fixed point value, which is Q31.
> > >>> diff --git 
> > >>> 

Re: [PATCH] powerpc/mm: Update set_ptes to call pte_filter for all the ptes

2023-10-18 Thread Aneesh Kumar K V
On 10/18/23 11:25 AM, Christophe Leroy wrote:
> 
> 
> Le 18/10/2023 à 06:55, Aneesh Kumar K.V a écrit :
>> With commit 9fee28baa601 ("powerpc: implement the new page table range
>> API") we added set_ptes to powerpc architecture but the implementation
>> missed calling the pte filter for all the ptes we are setting in the
>> range. set_pte_filter can be used for filter pte values and on some
>> platforms which don't support coherent icache it clears the exec bit so
>> that we can flush the icache on exec fault
>>
>> The patch also removes the usage of arch_enter/leave_lazy_mmu() because
>> set_pte is not supposed to be used when updating a pte entry. Powerpc
>> architecture uses this rule to skip the expensive tlb invalidate which
>> is not needed when you are setting up the pte for the first time. See
>> commit 56eecdb912b5 ("mm: Use ptep/pmdp_set_numa() for updating
>> _PAGE_NUMA bit") for more details
>>
>> Fixes: 9fee28baa601 ("powerpc: implement the new page table range API")
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>   arch/powerpc/mm/pgtable.c | 33 -
>>   1 file changed, 20 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index 3ba9fe411604..95ab20cca2da 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -191,28 +191,35 @@ void set_ptes(struct mm_struct *mm, unsigned long 
>> addr, pte_t *ptep,
>>  pte_t pte, unsigned int nr)
>>   {
>>  /*
>> - * Make sure hardware valid bit is not set. We don't do
>> - * tlb flush for this update.
>> + * We don't need to call arch_enter/leave_lazy_mmu_mode()
>> + * because we expect set_ptes to be only be used on not present
>> + * and not hw_valid ptes. Hence there is not translation cache flush
>> + * involved that need to be batched.
>>   */
>> -VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
>> +for (;;) {
>>   
>> -/* Note: mm->context.id might not yet have been assigned as
>> - * this context might not have been activated yet when this
>> - * is called.
>> - */
>> -pte = set_pte_filter(pte);
>> +/*
>> + * Make sure hardware valid bit is not set. We don't do
>> + * tlb flush for this update.
>> + */
>> +VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
>>   
>> -/* Perform the setting of the PTE */
>> -arch_enter_lazy_mmu_mode();
>> -for (;;) {
>> +/* Note: mm->context.id might not yet have been assigned as
>> + * this context might not have been activated yet when this
>> + * is called.
>> + */
>> +pte = set_pte_filter(pte);
> 
> Why do you need to call set_pte_filter() inside the loop ?
> The only difference between previous pte and next pte is the RPN, other 
> flags remain untouched so I can't see why you need to call 
> set_pte_filter() again.
> 

I missed the fact that we use the filtered pte in all the ptes in the range. 
One other details
that made me look at calling the filter in the loop was we clearing the struct 
page->flags.
The only flag right now we care about the PG_dcache_clean and that moved to 
folio. So we might be
good here. May be we add a comment in set_pte_filter saying can operate only on 
folio->flags ? 

>> +
>> +/* Perform the setting of the PTE */
>>  __set_pte_at(mm, addr, ptep, pte, 0);
>>  if (--nr == 0)
>>  break;
>>  ptep++;
>> -pte = __pte(pte_val(pte) + (1UL << PTE_RPN_SHIFT));
>>  addr += PAGE_SIZE;
>> +/* increment the pfn */
>> +pte = __pte(pte_val(pte) + PAGE_SIZE);
> 
> PAGE_SIZE doesn't work on all platforms, see for instance e500.
> 
> see comment at 
> https://elixir.bootlin.com/linux/v6.3-rc2/source/arch/powerpc/include/asm/nohash/32/pgtable.h#L147
> 
> And then you see 
> https://elixir.bootlin.com/linux/v6.3-rc2/source/arch/powerpc/include/asm/nohash/pte-e500.h#L63
> 

Didn't know that. I actually wanted to do pfn_pte(pte_pfn(pte) + 1) . But that 
needs pgprot_t. I
can move it back to PTE_RPN_SHIFT with details of the above documented. 

>> +
>>  }
>> -arch_leave_lazy_mmu_mode();
>>   }
>>   
>>   void unmap_kernel_page(unsigned long va)
> 
> Christophe

-aneesh


Re: [PATCH v2 0/3] Add generic data patching functions

2023-10-18 Thread Christophe Leroy


Le 17/10/2023 à 08:56, Benjamin Gray a écrit :
> On 17/10/23 5:39 pm, Christophe Leroy wrote:
>> Le 16/10/2023 à 07:01, Benjamin Gray a écrit :
>>> Currently patch_instruction() bases the write length on the value being
>>> written. If the value looks like a prefixed instruction it writes 8 
>>> bytes,
>>> otherwise it writes 4 bytes. This makes it potentially buggy to use for
>>> writing arbitrary data, as if you want to write 4 bytes but it 
>>> decides to
>>> write 8 bytes it may clobber the following memory or be unaligned and
>>> trigger an oops if it tries to cross a page boundary.
>>>
>>> To solve this, this series pulls out the size parameter to the 'top' of
>>> the text patching logic, and propagates it through the various 
>>> functions.
>>>
>>> The two sizes supported are int and long; this allows for patching
>>> instructions and pointers on both ppc32 and ppc64. On ppc32 these are 
>>> the
>>> same size, so care is taken to only use the size parameter on static
>>> functions, so the compiler can optimise it out entirely. Unfortunately
>>> GCC trips over its own feet here and won't optimise in a way that is
>>> optimal for strict RWX (mpc85xx_smp_defconfig) and no RWX
>>> (pmac32_defconfig).
>>>
>>> In the first case, patch_memory() is very large and can only be inlined
>>> if a single function calls it. While the source only calls it in
>>> patch_instruction(), an earlier optimisation pass inlined
>>> patch_instruction() into patch_branch(), so now there are 'two' 
>>> references
>>> to patch_memory() and it cannot be inlined into patch_instruction().
>>> Instead patch_instruction() becomes a single branch directly to
>>> patch_memory().
>>>
>>> We can fix this by marking patch_instruction() as noinline, but this
>>> prevents patch_memory() from being directly inlined into patch_branch()
>>> when RWX is disabled and patch_memory() is very small.
>>>
>>> It may be possible to avoid this by merging together patch_instruction()
>>> and patch_memory() on ppc32, but the only way I can think to do this
>>> without duplicating the implementation involves using the preprocessor
>>> to change if is_dword is a parameter or a local variable, which is 
>>> gross.
>>
>> What about:
>>
>> diff --git a/arch/powerpc/include/asm/code-patching.h
>> b/arch/powerpc/include/asm/code-patching.h
>> index 7c6056bb1706..af89ef450c93 100644
>> --- a/arch/powerpc/include/asm/code-patching.h
>> +++ b/arch/powerpc/include/asm/code-patching.h
>> @@ -72,7 +72,7 @@ static inline int create_branch(ppc_inst_t *instr,
>> const u32 *addr,
>>    int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>>   unsigned long target, int flags);
>>    int patch_branch(u32 *addr, unsigned long target, int flags);
>> -int patch_instruction(u32 *addr, ppc_inst_t instr);
>> +int patch_memory(void *addr, unsigned long val, bool is_dword);
>>    int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
>>
>>    /*
>> @@ -87,24 +87,28 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t 
>> instr);
>>
>>    #ifdef CONFIG_PPC64
>>
>> -int patch_uint(void *addr, unsigned int val);
>> -int patch_ulong(void *addr, unsigned long val);
>> +int patch_instruction(u32 *addr, ppc_inst_t instr);
>>
>>    #define patch_u64 patch_ulong
>>
>>    #else
>>
>> -static inline int patch_uint(u32 *addr, unsigned int val)
>> +static inline int patch_instruction(u32 *addr, ppc_inst_t instr)
>>    {
>> -    return patch_instruction(addr, ppc_inst(val));
>> +    return patch_memory(addr, ppc_inst_val(instr), false);
>>    }
>>
>> +#endif
>> +
>>    static inline int patch_ulong(void *addr, unsigned long val)
>>    {
>> -    return patch_instruction(addr, ppc_inst(val));
>> +    return patch_memory(addr, val, true);
>>    }
>>
>> -#endif
>> +static inline int patch_uint(void *addr, unsigned int val)
>> +{
>> +    return patch_memory(addr, val, false);
>> +}
>>
>>    #define patch_u32 patch_uint
>>
>> diff --git a/arch/powerpc/lib/code-patching.c
>> b/arch/powerpc/lib/code-patching.c
>> index 60289332412f..77418b2a4aa4 100644
>> --- a/arch/powerpc/lib/code-patching.c
>> +++ b/arch/powerpc/lib/code-patching.c
>> @@ -355,7 +355,7 @@ static int __do_patch_memory(void *addr, unsigned
>> long val, bool is_dword)
>>    return err;
>>    }
>>
>> -static int patch_memory(void *addr, unsigned long val, bool is_dword)
>> +int patch_memory(void *addr, unsigned long val, bool is_dword)
>>    {
>>    int err;
>>    unsigned long flags;
>> @@ -378,6 +378,7 @@ static int patch_memory(void *addr, unsigned long
>> val, bool is_dword)
>>
>>    return err;
>>    }
>> +NOKPROBE_SYMBOL(patch_memory)
>>
>>    #ifdef CONFIG_PPC64
>>
>> @@ -390,26 +391,6 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>>    }
>>    NOKPROBE_SYMBOL(patch_instruction)
>>
>> -int patch_uint(void *addr, unsigned int val)
>> -{
>> -    return patch_memory(addr, val, false);
>> -}
>> -NOKPROBE_SYMBOL(patch_uint)
>> -
>> -int patch_ulong(void *addr, unsigned long val)
>> -{
>>