Re: [PATCH v2 1/4] KVM: Refactor stats descriptor generation macros

2023-04-05 Thread Anup Patel
On Tue, Mar 7, 2023 at 12:32 AM David Matlack  wrote:
>
> Refactor the various KVM stats macros to reduce the amount of duplicate
> macro code. This change also improves readability by spelling out
> "CUMULATIVE", "INSTANT", and "PEAK" instead of the previous short-hands
> which were less clear ("COUNTER", "ICOUNTER", and "PCOUNTER").
>
> No functional change intended.
>
> Suggested-by: Sean Christopherson 
> Signed-off-by: David Matlack 

For KVM RISC-V:
Acked-by: Anup Patel 

Regards,
Anup

> ---
>  arch/arm64/kvm/guest.c|  14 +--
>  arch/mips/kvm/mips.c  |  54 +--
>  arch/powerpc/kvm/book3s.c |  62 ++--
>  arch/powerpc/kvm/booke.c  |  48 -
>  arch/riscv/kvm/vcpu.c |  16 +--
>  arch/s390/kvm/kvm-s390.c  | 198 +++---
>  arch/x86/kvm/x86.c|  94 +-
>  include/linux/kvm_host.h  |  95 ++
>  8 files changed, 272 insertions(+), 309 deletions(-)
>
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 07444fa22888..890ed444c237 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -44,13 +44,13 @@ const struct kvm_stats_header kvm_vm_stats_header = {
>
>  const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
> KVM_GENERIC_VCPU_STATS(),
> -   STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
> -   STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
> -   STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
> -   STATS_DESC_COUNTER(VCPU, mmio_exit_user),
> -   STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
> -   STATS_DESC_COUNTER(VCPU, signal_exits),
> -   STATS_DESC_COUNTER(VCPU, exits)
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, hvc_exit_stat),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, wfe_exit_stat),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, wfi_exit_stat),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, mmio_exit_user),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, mmio_exit_kernel),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, signal_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, exits)
>  };
>
>  const struct kvm_stats_header kvm_vcpu_stats_header = {
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index 36c8991b5d39..b7b2fa400bcf 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -53,34 +53,34 @@ const struct kvm_stats_header kvm_vm_stats_header = {
>
>  const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
> KVM_GENERIC_VCPU_STATS(),
> -   STATS_DESC_COUNTER(VCPU, wait_exits),
> -   STATS_DESC_COUNTER(VCPU, cache_exits),
> -   STATS_DESC_COUNTER(VCPU, signal_exits),
> -   STATS_DESC_COUNTER(VCPU, int_exits),
> -   STATS_DESC_COUNTER(VCPU, cop_unusable_exits),
> -   STATS_DESC_COUNTER(VCPU, tlbmod_exits),
> -   STATS_DESC_COUNTER(VCPU, tlbmiss_ld_exits),
> -   STATS_DESC_COUNTER(VCPU, tlbmiss_st_exits),
> -   STATS_DESC_COUNTER(VCPU, addrerr_st_exits),
> -   STATS_DESC_COUNTER(VCPU, addrerr_ld_exits),
> -   STATS_DESC_COUNTER(VCPU, syscall_exits),
> -   STATS_DESC_COUNTER(VCPU, resvd_inst_exits),
> -   STATS_DESC_COUNTER(VCPU, break_inst_exits),
> -   STATS_DESC_COUNTER(VCPU, trap_inst_exits),
> -   STATS_DESC_COUNTER(VCPU, msa_fpe_exits),
> -   STATS_DESC_COUNTER(VCPU, fpe_exits),
> -   STATS_DESC_COUNTER(VCPU, msa_disabled_exits),
> -   STATS_DESC_COUNTER(VCPU, flush_dcache_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_gpsi_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_gsfc_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_hc_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_grr_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_gva_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_ghfc_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_gpa_exits),
> -   STATS_DESC_COUNTER(VCPU, vz_resvd_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, wait_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, cache_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, signal_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, int_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, cop_unusable_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, tlbmod_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, tlbmiss_ld_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, tlbmiss_st_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, addrerr_st_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, addrerr_ld_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, syscall_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, resvd_inst_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, break_inst_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, trap_inst_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, msa_fpe_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, fpe_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, msa_disabled_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, flush_dcache_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_gpsi_exits),
> +   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_gsfc_exits),

[PATCH v2 7/7] selftests/powerpc/dscr: Restore timeout to DSCR selftests

2023-04-05 Thread Benjamin Gray
Reducing the time taken by dscr_sysfs_test.c allows restoring the
default timeout, which was removed in
commit 850507f30c38 ("selftests/powerpc: Turn off timeout setting for
benchmarks, dscr, signal, tm") because that test took too long.

Signed-off-by: Benjamin Gray 
---
 tools/testing/selftests/powerpc/dscr/Makefile | 2 --
 tools/testing/selftests/powerpc/dscr/settings | 1 -
 2 files changed, 3 deletions(-)
 delete mode 100644 tools/testing/selftests/powerpc/dscr/settings

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index b29a8863a734..9289d5febe1e 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -3,8 +3,6 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test 
dscr_user_test   \
  dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test  \
  dscr_sysfs_thread_test
 
-TEST_FILES := settings
-
 top_srcdir = ../../../../..
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/dscr/settings 
b/tools/testing/selftests/powerpc/dscr/settings
deleted file mode 100644
index e7b9417537fb..
--- a/tools/testing/selftests/powerpc/dscr/settings
+++ /dev/null
@@ -1 +0,0 @@
-timeout=0
-- 
2.39.2



[PATCH v2 5/7] selftests/powerpc/dscr: Improve DSCR explicit random test case

2023-04-05 Thread Benjamin Gray
The tests currently have a single writer thread updating the system
DSCR with a 1/1000 chance looped only 100 times. So only around one in
10 runs actually do anything.

* Add multiple threads to the dscr_explicit_random_test case.
* Use a barrier to make all the threads start work as simultaneously as
  possible.
* Use a rwlock and make all threads have a reasonable chance to write to
  the DSCR on each iteration.
  PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP is used to prevent
  writers from starving while all the other threads keep reading.
  Logging the reads/writes shows a decent mix across the whole test.
* Allow all threads a chance to write.
* Make the chance of writing more likely.

Signed-off-by: Benjamin Gray 
---
 tools/testing/selftests/powerpc/dscr/dscr.h   |   4 -
 .../powerpc/dscr/dscr_default_test.c  | 140 --
 .../powerpc/dscr/dscr_explicit_test.c |  84 +++
 3 files changed, 113 insertions(+), 115 deletions(-)

diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h 
b/tools/testing/selftests/powerpc/dscr/dscr.h
index 2c54998d4715..b281659071e8 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr.h
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -86,8 +86,4 @@ void set_default_dscr(unsigned long val)
}
 }
 
-double uniform_deviate(int seed)
-{
-   return seed * (1.0 / (RAND_MAX + 1.0));
-}
 #endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index 18e533d46c9a..60ab02525b79 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -69,105 +69,85 @@ int dscr_default_lockstep_test(void)
return 0;
 }
 
-static unsigned long dscr; /* System DSCR default */
-static unsigned long sequence;
-static unsigned long result[THREADS];
-
-static void *do_test(void *in)
+struct random_thread_args {
+   pthread_t thread_id;
+   unsigned long *expected_system_dscr;
+   pthread_rwlock_t *rw_lock;
+   pthread_barrier_t *barrier;
+};
+
+static void *dscr_default_random_thread(void *in)
 {
-   unsigned long thread = (unsigned long)in;
-   unsigned long i;
-
-   for (i = 0; i < COUNT; i++) {
-   unsigned long d, cur_dscr, cur_dscr_usr;
-   unsigned long s1, s2;
-
-   s1 = READ_ONCE(sequence);
-   if (s1 & 1)
-   continue;
-   rmb();
-
-   d = dscr;
-   cur_dscr = get_dscr();
-   cur_dscr_usr = get_dscr_usr();
-
-   rmb();
-   s2 = sequence;
+   struct random_thread_args *args = (struct random_thread_args *)in;
+   unsigned long *expected_dscr_p = args->expected_system_dscr;
+   pthread_rwlock_t *rw_lock = args->rw_lock;
+   int err;
 
-   if (s1 != s2)
-   continue;
+   srand(gettid());
 
-   if (cur_dscr != d) {
-   fprintf(stderr, "thread %ld kernel DSCR should be %ld "
-   "but is %ld\n", thread, d, cur_dscr);
-   result[thread] = 1;
-   pthread_exit([thread]);
-   }
+   err = pthread_barrier_wait(args->barrier);
+   FAIL_IF_EXIT(err != 0 && err != PTHREAD_BARRIER_SERIAL_THREAD);
 
-   if (cur_dscr_usr != d) {
-   fprintf(stderr, "thread %ld user DSCR should be %ld "
-   "but is %ld\n", thread, d, cur_dscr_usr);
-   result[thread] = 1;
-   pthread_exit([thread]);
+   for (int i = 0; i < COUNT; i++) {
+   unsigned long expected_dscr;
+   unsigned long current_dscr;
+   unsigned long current_dscr_usr;
+
+   FAIL_IF_EXIT(pthread_rwlock_rdlock(rw_lock));
+   expected_dscr = *expected_dscr_p;
+   current_dscr = get_dscr();
+   current_dscr_usr = get_dscr_usr();
+   FAIL_IF_EXIT(pthread_rwlock_unlock(rw_lock));
+
+   FAIL_IF_EXIT(current_dscr != expected_dscr);
+   FAIL_IF_EXIT(current_dscr_usr != expected_dscr);
+
+   if (rand() % 10 == 0) {
+   unsigned long next_dscr;
+
+   FAIL_IF_EXIT(pthread_rwlock_wrlock(rw_lock));
+   next_dscr = (*expected_dscr_p + 1) % DSCR_MAX;
+   set_default_dscr(next_dscr);
+   *expected_dscr_p = next_dscr;
+   FAIL_IF_EXIT(pthread_rwlock_unlock(rw_lock));
}
}
-   result[thread] = 0;
-   pthread_exit([thread]);
+
+   pthread_exit((void *)0);
 }
 
 int dscr_default_random_test(void)
 {
-   pthread_t threads[THREADS];
-   unsigned long i, *status[THREADS];
+   struct 

[PATCH v2 6/7] selftests/powerpc/dscr: Speed up DSCR sysfs tests

2023-04-05 Thread Benjamin Gray
This test case is extremely slow, taking around a minute compared to
most of the other DSCR tests taking a second at most. Perf shows most
time is spent by the kernel switching to each CPU it reads in
/sys/devices/system/cpu. This switching is an unavoidable consequnce
of reading all the .../cpuN/dscr values.

Remove the outer iteration loop from this test case, reducing the reads
from 1600 to 16. This still updates the DSCR 16 times and verifies on
every CPU each time, so I do not expect the lower coverage to be
meaningful. The speedup is significant: back down to ~1 second like the
other tests.

Signed-off-by: Benjamin Gray 
---
 .../testing/selftests/powerpc/dscr/dscr_sysfs_test.c  | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 4f1fef6198fc..e7cd0d6b1fad 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -67,17 +67,14 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
 int dscr_sysfs(void)
 {
unsigned long orig_dscr_default;
-   int i, j;
 
SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
 
orig_dscr_default = get_default_dscr();
-   for (i = 0; i < COUNT; i++) {
-   for (j = 0; j < DSCR_MAX; j++) {
-   set_default_dscr(j);
-   if (check_all_cpu_dscr_defaults(j))
-   goto fail;
-   }
+   for (int i = 0; i < DSCR_MAX; i++) {
+   set_default_dscr(i);
+   if (check_all_cpu_dscr_defaults(i))
+   goto fail;
}
set_default_dscr(orig_dscr_default);
return 0;
-- 
2.39.2



[PATCH v2 3/7] selftests/powerpc: Allow bind_to_cpu() to automatically pick CPU

2023-04-05 Thread Benjamin Gray
All current users of bind_to_cpu() don't care _which_ CPU they get, just
that they are bound to a single free one. So alter the interface to

1. Accept a BIND_CPU_ANY value that tells it to automatically
   pick a CPU
2. Return the picked CPU

And convert all these users to bind_to_cpu(BIND_CPU_ANY).

Signed-off-by: Benjamin Gray 

---

v2: * New in v2
---
 tools/testing/selftests/powerpc/include/utils.h |  2 ++
 .../powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c  |  3 +--
 .../powerpc/pmu/ebb/cpu_event_vs_ebb_test.c |  3 +--
 .../powerpc/pmu/ebb/ebb_vs_cpu_event_test.c |  3 +--
 .../powerpc/pmu/ebb/multi_ebb_procs_test.c  |  6 ++
 tools/testing/selftests/powerpc/pmu/lib.c   |  6 ++
 tools/testing/selftests/powerpc/utils.c | 13 -
 7 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/powerpc/include/utils.h 
b/tools/testing/selftests/powerpc/include/utils.h
index d3589e16a20f..44bfd48b93d6 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -31,6 +31,8 @@ int read_auxv(char *buf, ssize_t buf_size);
 void *find_auxv_entry(int type, char *auxv);
 void *get_auxv_entry(int type);
 
+#define BIND_CPU_ANY   (-1)
+
 int pick_online_cpu(void);
 int bind_to_cpu(int cpu);
 
diff --git 
a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
index 3cd33eb51e5e..fab7f34d7ce1 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
@@ -45,9 +45,8 @@ int cpu_event_pinned_vs_ebb(void)
 
SKIP_IF(!ebb_is_supported());
 
-   cpu = pick_online_cpu();
+   cpu = bind_to_cpu(BIND_CPU_ANY);
FAIL_IF(cpu < 0);
-   FAIL_IF(bind_to_cpu(cpu));
 
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
index 8466ef9d7de8..7c54c262036e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
@@ -43,9 +43,8 @@ int cpu_event_vs_ebb(void)
 
SKIP_IF(!ebb_is_supported());
 
-   cpu = pick_online_cpu();
+   cpu = bind_to_cpu(BIND_CPU_ANY);
FAIL_IF(cpu < 0);
-   FAIL_IF(bind_to_cpu(cpu));
 
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
index 4d822cb3589c..d7064b54c64f 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
@@ -43,9 +43,8 @@ int ebb_vs_cpu_event(void)
 
SKIP_IF(!ebb_is_supported());
 
-   cpu = pick_online_cpu();
+   cpu = bind_to_cpu(BIND_CPU_ANY);
FAIL_IF(cpu < 0);
-   FAIL_IF(bind_to_cpu(cpu));
 
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
index 9b0f70d59702..4ac22b2e774f 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -75,13 +75,11 @@ static int cycles_child(void)
 int multi_ebb_procs(void)
 {
pid_t pids[NR_CHILDREN];
-   int cpu, rc, i;
+   int rc, i;
 
SKIP_IF(!ebb_is_supported());
 
-   cpu = pick_online_cpu();
-   FAIL_IF(cpu < 0);
-   FAIL_IF(bind_to_cpu(cpu));
+   FAIL_IF(bind_to_cpu(BIND_CPU_ANY) < 0);
 
for (i = 0; i < NR_CHILDREN; i++) {
pids[i] = fork();
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c 
b/tools/testing/selftests/powerpc/pmu/lib.c
index 144f90a78d69..321357987408 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -103,12 +103,10 @@ static int eat_cpu_child(union pipe read_pipe, union pipe 
write_pipe)
 pid_t eat_cpu(int (test_function)(void))
 {
union pipe read_pipe, write_pipe;
-   int cpu, rc;
+   int rc;
pid_t pid;
 
-   cpu = pick_online_cpu();
-   FAIL_IF(cpu < 0);
-   FAIL_IF(bind_to_cpu(cpu));
+   FAIL_IF(bind_to_cpu(BIND_CPU_ANY) < 0);
 
if (pipe(read_pipe.fds) == -1)
return -1;
diff --git a/tools/testing/selftests/powerpc/utils.c 
b/tools/testing/selftests/powerpc/utils.c
index cdb996dba703..252fb4a95e90 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -455,13 +455,24 

[PATCH v2 1/7] selftests/powerpc/dscr: Correct typos

2023-04-05 Thread Benjamin Gray
Correct a couple of typos while working on other improvements to the
DSCR tests.

Signed-off-by: Benjamin Gray 
---
 tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c | 4 ++--
 tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c  | 4 ++--
 tools/testing/selftests/powerpc/dscr/dscr_user_test.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
index 32fcf2b324b1..5659d98cf340 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -7,8 +7,8 @@
  * privilege state SPR and the problem state SPR for this purpose.
  *
  * When using the privilege state SPR, the instructions such as
- * mfspr or mtspr are priviledged and the kernel emulates them
- * for us. Instructions using problem state SPR can be exuecuted
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
  * directly without any emulation if the HW supports them. Else
  * they also get emulated by the kernel.
  *
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
index f9dfd3d3c2d5..68ce328e813e 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -7,8 +7,8 @@
  * value using mfspr.
  *
  * When using the privilege state SPR, the instructions such as
- * mfspr or mtspr are priviledged and the kernel emulates them
- * for us. Instructions using problem state SPR can be exuecuted
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
  * directly without any emulation if the HW supports them. Else
  * they also get emulated by the kernel.
  *
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
index e09072446dd3..67bb872a246a 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -8,8 +8,8 @@
  * numbers.
  *
  * When using the privilege state SPR, the instructions such as
- * mfspr or mtspr are priviledged and the kernel emulates them
- * for us. Instructions using problem state SPR can be exuecuted
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
  * directly without any emulation if the HW supports them. Else
  * they also get emulated by the kernel.
  *
-- 
2.39.2



[PATCH v2 2/7] selftests/powerpc: Move bind_to_cpu() to utils.h

2023-04-05 Thread Benjamin Gray
This function will be useful in the DSCR test patches later in this
series, so promote it to be shared by all powerpc selftests.

Signed-off-by: Benjamin Gray 

---

v2: * New in v2
---
 tools/testing/selftests/powerpc/include/utils.h |  1 +
 tools/testing/selftests/powerpc/pmu/lib.c   | 13 -
 tools/testing/selftests/powerpc/pmu/lib.h   |  1 -
 tools/testing/selftests/powerpc/utils.c | 12 
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/powerpc/include/utils.h 
b/tools/testing/selftests/powerpc/include/utils.h
index eed7dd7582b2..d3589e16a20f 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -32,6 +32,7 @@ void *find_auxv_entry(int type, char *auxv);
 void *get_auxv_entry(int type);
 
 int pick_online_cpu(void);
+int bind_to_cpu(int cpu);
 
 int parse_intmax(const char *buffer, size_t count, intmax_t *result, int base);
 int parse_uintmax(const char *buffer, size_t count, uintmax_t *result, int 
base);
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c 
b/tools/testing/selftests/powerpc/pmu/lib.c
index 719f94f10d41..144f90a78d69 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -14,19 +14,6 @@
 #include "utils.h"
 #include "lib.h"
 
-
-int bind_to_cpu(int cpu)
-{
-   cpu_set_t mask;
-
-   printf("Binding to cpu %d\n", cpu);
-
-   CPU_ZERO();
-   CPU_SET(cpu, );
-
-   return sched_setaffinity(0, sizeof(mask), );
-}
-
 #define PARENT_TOKEN   0xAA
 #define CHILD_TOKEN0x55
 
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h 
b/tools/testing/selftests/powerpc/pmu/lib.h
index bf1bec013bbb..1d62403ae6ea 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -20,7 +20,6 @@ union pipe {
int fds[2];
 };
 
-extern int bind_to_cpu(int cpu);
 extern int kill_child_and_wait(pid_t child_pid);
 extern int wait_for_child(pid_t child_pid);
 extern int sync_with_child(union pipe read_pipe, union pipe write_pipe);
diff --git a/tools/testing/selftests/powerpc/utils.c 
b/tools/testing/selftests/powerpc/utils.c
index 7c8cfedb012a..cdb996dba703 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -452,6 +452,18 @@ int pick_online_cpu(void)
return cpu;
 }
 
+int bind_to_cpu(int cpu)
+{
+   cpu_set_t mask;
+
+   printf("Binding to cpu %d\n", cpu);
+
+   CPU_ZERO();
+   CPU_SET(cpu, );
+
+   return sched_setaffinity(0, sizeof(mask), );
+}
+
 bool is_ppc64le(void)
 {
struct utsname uts;
-- 
2.39.2



[PATCH v2 0/7] Update DSCR tests

2023-04-05 Thread Benjamin Gray
The randomness based DSCR tests currently have a low probability of doing
any writes to the DSCR, making them inefficient in uncovering bugs.

This series adds lockstep variants to these RNG tests, to ensure the happy
path is always tested, and improves the randomness and size of the RNG
tests.

It also removes many iterations of the sysfs DSCR test, allowing the default
timeout to be re-enabled.

v2: * Pull bind_to_cpu() out to utils.c and allow an automatically
  determined CPU selection.

Previous versions:
v1: https://lore.kernel.org/all/20230307005515.174362-1-bg...@linux.ibm.com/

Benjamin Gray (7):
  selftests/powerpc/dscr: Correct typos
  selftests/powerpc: Move bind_to_cpu() to utils.h
  selftests/powerpc: Allow bind_to_cpu() to automatically pick CPU
  selftests/powerpc/dscr: Add lockstep test cases to DSCR explicit tests
  selftests/powerpc/dscr: Improve DSCR explicit random test case
  selftests/powerpc/dscr: Speed up DSCR sysfs tests
  selftests/powerpc/dscr: Restore timeout to DSCR selftests

 tools/testing/selftests/powerpc/dscr/Makefile |   3 +-
 tools/testing/selftests/powerpc/dscr/dscr.h   |   4 -
 .../powerpc/dscr/dscr_default_test.c  | 207 +++---
 .../powerpc/dscr/dscr_explicit_test.c | 169 +++---
 .../powerpc/dscr/dscr_inherit_test.c  |   4 +-
 .../selftests/powerpc/dscr/dscr_sysfs_test.c  |  11 +-
 .../selftests/powerpc/dscr/dscr_user_test.c   |   4 +-
 tools/testing/selftests/powerpc/dscr/settings |   1 -
 .../testing/selftests/powerpc/include/utils.h |   3 +
 .../pmu/ebb/cpu_event_pinned_vs_ebb_test.c|   3 +-
 .../powerpc/pmu/ebb/cpu_event_vs_ebb_test.c   |   3 +-
 .../powerpc/pmu/ebb/ebb_vs_cpu_event_test.c   |   3 +-
 .../powerpc/pmu/ebb/multi_ebb_procs_test.c|   6 +-
 tools/testing/selftests/powerpc/pmu/lib.c |  19 +-
 tools/testing/selftests/powerpc/pmu/lib.h |   1 -
 tools/testing/selftests/powerpc/utils.c   |  23 ++
 16 files changed, 303 insertions(+), 161 deletions(-)
 delete mode 100644 tools/testing/selftests/powerpc/dscr/settings

--
2.39.2


[PATCH v2 4/7] selftests/powerpc/dscr: Add lockstep test cases to DSCR explicit tests

2023-04-05 Thread Benjamin Gray
Add new cases to the relevant tests that use explicitly synchronized
threads to test the behaviour across context switches with less
randomness. By locking the participants to the same CPU we guarantee a
context switch occurs each time they make progress, which is a likely
failure point if the kernel is not tracking the thread local DSCR
correctly.

The random case is left in to keep exercising potential edge cases.

Signed-off-by: Benjamin Gray 

---

v2: * Use the bind_to_cpu() added in the previous 2 new patches
  instead of reimplementing it
---
 tools/testing/selftests/powerpc/dscr/Makefile |  1 +
 .../powerpc/dscr/dscr_default_test.c  | 87 ---
 .../powerpc/dscr/dscr_explicit_test.c | 85 +-
 3 files changed, 159 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index 845db6273a1b..b29a8863a734 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -9,5 +9,6 @@ top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
+$(OUTPUT)/dscr_explicit_test: LDLIBS += -lpthread
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index e76611e608af..18e533d46c9a 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -9,8 +9,66 @@
  * Copyright 2012, Anton Blanchard, IBM Corporation.
  * Copyright 2015, Anshuman Khandual, IBM Corporation.
  */
+
+#define _GNU_SOURCE
+
 #include "dscr.h"
 
+#include 
+#include 
+#include 
+
+static void *dscr_default_lockstep_writer(void *arg)
+{
+   sem_t *reader_sem = (sem_t *)arg;
+   sem_t *writer_sem = (sem_t *)arg + 1;
+   unsigned long expected_dscr = 0;
+
+   for (int i = 0; i < COUNT; i++) {
+   FAIL_IF_EXIT(sem_wait(writer_sem));
+
+   set_default_dscr(expected_dscr);
+   expected_dscr = (expected_dscr + 1) % DSCR_MAX;
+
+   FAIL_IF_EXIT(sem_post(reader_sem));
+   }
+
+   return NULL;
+}
+
+int dscr_default_lockstep_test(void)
+{
+   pthread_t writer;
+   sem_t rw_semaphores[2];
+   sem_t *reader_sem = _semaphores[0];
+   sem_t *writer_sem = _semaphores[1];
+   unsigned long expected_dscr = 0;
+
+   SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
+   FAIL_IF(sem_init(reader_sem, 0, 0));
+   FAIL_IF(sem_init(writer_sem, 0, 1));  /* writer starts first */
+   FAIL_IF(bind_to_cpu(BIND_CPU_ANY) < 0);
+   FAIL_IF(pthread_create(, NULL, dscr_default_lockstep_writer, 
(void *)rw_semaphores));
+
+   for (int i = 0; i < COUNT ; i++) {
+   FAIL_IF(sem_wait(reader_sem));
+
+   FAIL_IF(get_dscr() != expected_dscr);
+   FAIL_IF(get_dscr_usr() != expected_dscr);
+
+   expected_dscr = (expected_dscr + 1) % DSCR_MAX;
+
+   FAIL_IF(sem_post(writer_sem));
+   }
+
+   FAIL_IF(pthread_join(writer, NULL));
+   FAIL_IF(sem_destroy(reader_sem));
+   FAIL_IF(sem_destroy(writer_sem));
+
+   return 0;
+}
+
 static unsigned long dscr; /* System DSCR default */
 static unsigned long sequence;
 static unsigned long result[THREADS];
@@ -57,16 +115,13 @@ static void *do_test(void *in)
pthread_exit([thread]);
 }
 
-int dscr_default(void)
+int dscr_default_random_test(void)
 {
pthread_t threads[THREADS];
unsigned long i, *status[THREADS];
-   unsigned long orig_dscr_default;
 
SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
 
-   orig_dscr_default = get_default_dscr();
-
/* Initial DSCR default */
dscr = 1;
set_default_dscr(dscr);
@@ -75,7 +130,7 @@ int dscr_default(void)
for (i = 0; i < THREADS; i++) {
if (pthread_create([i], NULL, do_test, (void *)i)) {
perror("pthread_create() failed");
-   goto fail;
+   return 1;
}
}
 
@@ -104,23 +159,31 @@ int dscr_default(void)
for (i = 0; i < THREADS; i++) {
if (pthread_join(threads[i], (void **)&(status[i]))) {
perror("pthread_join() failed");
-   goto fail;
+   return 1;
}
 
if (*status[i]) {
printf("%ldth thread failed to join with %ld status\n",
i, *status[i]);
-   goto fail;
+   return 1;
}
}
-   set_default_dscr(orig_dscr_default);
return 0;
-fail:
-   set_default_dscr(orig_dscr_default);
-   return 1;
 }
 
 int main(int argc, char *argv[])
 {
-   

Re: [PATCH] macintosh/windfarm_smu_sat: Add missing of_node_put()

2023-04-05 Thread Michael Ellerman
On Thu, 30 Mar 2023 11:35:58 +0800, Liang He wrote:
> We call of_node_get() in wf_sat_probe() after sat is created,
> so we need the of_node_put() before *kfree(sat)*.
> 
> 

Applied to powerpc/next.

[1/1] macintosh/windfarm_smu_sat: Add missing of_node_put()
  https://git.kernel.org/powerpc/c/631cf002826007ab7415258ee647dcaf8845ad5a

cheers


Re: [PATCH v3] powerpc: Use of_address_to_resource()

2023-04-05 Thread Michael Ellerman
On Wed, 29 Mar 2023 17:03:36 -0500, Rob Herring wrote:
> Replace open coded reading of "reg" or of_get_address()/
> of_translate_address() calls with a single call to
> of_address_to_resource().
> 
> 

Applied to powerpc/next.

[1/1] powerpc: Use of_address_to_resource()
  https://git.kernel.org/powerpc/c/2500763dd3db37fad94d9b506907c59c2f5e97c6

cheers


Re: [PATCH] powerpc: xics: Use of_address_count()

2023-04-05 Thread Michael Ellerman
On Mon, 27 Mar 2023 17:30:56 -0500, Rob Herring wrote:
> icp_native_init_one_node() only needs the number of entries in "reg".
> Replace the open coded "reg" parsing with of_address_count() to get the
> number of "reg" entries.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: xics: Use of_address_count()
  https://git.kernel.org/powerpc/c/037c47a436eab2d336d5e131ab1c1394f223a57b

cheers


Re: [PATCH] powerpc: Use of_property_read_bool() for boolean properties

2023-04-05 Thread Michael Ellerman
On Fri, 10 Mar 2023 08:46:57 -0600, Rob Herring wrote:
> It is preferred to use typed property access functions (i.e.
> of_property_read_ functions) rather than low-level
> of_get_property/of_find_property functions for reading properties.
> Convert reading boolean properties to to of_property_read_bool().
> 
> 

Applied to powerpc/next.

[1/1] powerpc: Use of_property_read_bool() for boolean properties
  https://git.kernel.org/powerpc/c/4d57e3515e3838b12eccbeb5e0e52f053e3f638a

cheers


Re: [PATCH] powerpc: Use of_property_present() for testing DT property presence

2023-04-05 Thread Michael Ellerman
On Fri, 10 Mar 2023 08:46:56 -0600, Rob Herring wrote:
> It is preferred to use typed property access functions (i.e.
> of_property_read_ functions) rather than low-level
> of_get_property/of_find_property functions for reading properties. As
> part of this, convert of_get_property/of_find_property calls to the
> recently added of_property_present() helper when we just want to test
> for presence of a property and nothing more.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Use of_property_present() for testing DT property presence
  https://git.kernel.org/powerpc/c/857d423c74228cfa064f79ff3a16b163fdb8d542

cheers


Re: [PATCH] powerpc: Use of_address_to_resource()

2023-04-05 Thread Michael Ellerman
On Sun, 19 Mar 2023 11:31:53 -0500, Rob Herring wrote:
> Replace open coded reading of "reg" or of_get_address()/
> of_translate_address() calls with a single call to
> of_address_to_resource().
> 
> 

Applied to powerpc/next.

[1/1] powerpc: Use of_address_to_resource()
  https://git.kernel.org/powerpc/c/2500763dd3db37fad94d9b506907c59c2f5e97c6

cheers


Re: [PATCH] powerpc: usbgecko: Use of_iomap()

2023-04-05 Thread Michael Ellerman
On Mon, 27 Mar 2023 17:31:09 -0500, Rob Herring wrote:
> Replace of_get_property()+of_translate_address()+ioremap() with a call
> to of_iomap() which does all those steps.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: usbgecko: Use of_iomap()
  https://git.kernel.org/powerpc/c/83a8fe569ef84d6eefcb99420a731cb87508f004

cheers


Re: [PATCH] powerpc: isa-bridge: Remove open coded "ranges" parsing

2023-04-05 Thread Michael Ellerman
On Mon, 27 Mar 2023 17:30:45 -0500, Rob Herring wrote:
> "ranges" is a standard property with common parsing functions. Users
> shouldn't be implementing their own parsing of it. Reimplement the
> ISA brige "ranges" parsing using the common ranges iterator functions.
> 
> The common routines are flexible enough to work on PCI and non-PCI to
> ISA bridges, so refactor pci_process_ISA_OF_ranges() and
> isa_bridge_init_non_pci() into a single implementation.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: isa-bridge: Remove open coded "ranges" parsing
  https://git.kernel.org/powerpc/c/e4ab08be5b4902e5b350b0e1e1a3c25eb21d76d4

cheers


Re: [PATCH] powerpc: fsl_rio: Use of_iomap()

2023-04-05 Thread Michael Ellerman
On Mon, 27 Mar 2023 17:31:02 -0500, Rob Herring wrote:
> Replace of_address_to_resource()+ioremap() with a call to of_iomap()
> which does both of those steps.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: fsl_rio: Use of_iomap()
  https://git.kernel.org/powerpc/c/de8d11bc6ec412a498acf795911c8597ae37d4e7

cheers


Re: [PATCH] macintosh: Use of_property_present() for testing DT property presence

2023-04-05 Thread Michael Ellerman
On Fri, 10 Mar 2023 08:47:35 -0600, Rob Herring wrote:
> It is preferred to use typed property access functions (i.e.
> of_property_read_ functions) rather than low-level
> of_get_property/of_find_property functions for reading properties. As
> part of this, convert of_get_property/of_find_property calls to the
> recently added of_property_present() helper when we just want to test
> for presence of a property and nothing more.
> 
> [...]

Applied to powerpc/next.

[1/1] macintosh: Use of_property_present() for testing DT property presence
  https://git.kernel.org/powerpc/c/87b626a66dd4ab7d5caf5199d98ec0b5953d73f8

cheers


Re: [PATCH v2] macintosh: via-pmu-led: requires ATA to be set

2023-04-05 Thread Michael Ellerman
On Wed, 22 Feb 2023 17:42:41 -0800, Randy Dunlap wrote:
> LEDS_TRIGGER_DISK depends on ATA, so selecting LEDS_TRIGGER_DISK
> when ATA is not set/enabled causes a Kconfig warning:
> 
> WARNING: unmet direct dependencies detected for LEDS_TRIGGER_DISK
>   Depends on [n]: NEW_LEDS [=y] && LEDS_TRIGGERS [=y] && ATA [=n]
>   Selected by [y]:
>   - ADB_PMU_LED_DISK [=y] && MACINTOSH_DRIVERS [=y] && ADB_PMU_LED [=y] && 
> LEDS_CLASS [=y]
> 
> [...]

Applied to powerpc/next.

[1/1] macintosh: via-pmu-led: requires ATA to be set
  https://git.kernel.org/powerpc/c/05dce4ba125336875cd3eed3c1503fa81cd2f691

cheers


Re: [PATCH] powerpc/atomics: Remove unused function

2023-04-05 Thread Michael Ellerman
On Fri, 24 Feb 2023 16:09:40 +0530, Nysal Jan K.A wrote:
> Remove arch_atomic_try_cmpxchg_lock function as it is no longer used
> since commit 9f61521c7a28 ("powerpc/qspinlock: powerpc qspinlock
> implementation")
> 
> 

Applied to powerpc/next.

[1/1] powerpc/atomics: Remove unused function
  https://git.kernel.org/powerpc/c/b0bbe5a2915201e3231e788d716d39dc54493b03

cheers


Re: [PATCH v3] powerpc: Implement arch_within_stack_frames

2023-04-05 Thread Michael Ellerman
On Tue, 28 Feb 2023 05:43:55 +, Nicholas Miehlbradt wrote:
> Walks the stack when copy_{to,from}_user address is in the stack to
> ensure that the object being copied is entirely a single stack frame and
> does not contain stack metadata.
> 
> Substantially similar to the x86 implementation. The back chain is used
> to traverse the stack and identify stack frame boundaries.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Implement arch_within_stack_frames
  https://git.kernel.org/powerpc/c/ac9c8901cb10aab043bf3599d19eebacfcda2858

cheers


Re: [PATCH] powerpc/perf: Properly detect mpc7450 family

2023-04-05 Thread Michael Ellerman
On Mon, 27 Feb 2023 16:54:45 +0100, Christophe Leroy wrote:
> Unlike PVR_POWER8, etc , PVR_7450 represents a full PVR
> value and not a family value.
> 
> To avoid confusion, do like E500 family and define the relevant
> PVR_VER_ values for the 7450 family:
>   0x8000 ==> 7450
>   0x8001 ==> 7455
>   0x8002 ==> 7447
>   0x8003 ==> 7447A
>   0x8004 ==> 7448
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/perf: Properly detect mpc7450 family
  https://git.kernel.org/powerpc/c/e7299f961fe5e4496db0bfaa9e819f5e97f3846b

cheers


Re: (subset) [PATCH 0/8] RTAS changes for 6.4

2023-04-05 Thread Michael Ellerman
On Mon, 06 Mar 2023 15:33:39 -0600, Nathan Lynch wrote:
> Proposed changes for the RTAS subsystem and client code.
> 
> Fixes that are subject to backporting are at the front of the queue,
> followed by documentation and cleanups, with enhancements at the end.
> 
> Noteworthy changes:
> * Change sys_rtas() to consume -2/990x statuses instead of returning
>   them to user space.
> * Lockdep annotations for invariants in rtas.c.
> 
> [...]

Applied to powerpc/next.

[6/8] powerpc/rtas: lockdep annotations
  https://git.kernel.org/powerpc/c/af8bc68263b2184e63ee67ca70cecff4636f7901

cheers


Re: (subset) [PATCH 0/3] COVER: Remove memcpy_page_flushcache()

2023-04-05 Thread Michael Ellerman
On Wed, 15 Mar 2023 16:20:53 -0700, Ira Weiny wrote:
> Commit 21b56c847753 ("iov_iter: get rid of separate bvec and xarray
> callbacks") removed the calls to memcpy_page_flushcache().
> 
> kmap_atomic() is deprecated and used in the x86 version of
> memcpy_page_flushcache().
> 
> Remove the unnecessary memcpy_page_flushcache() call from all arch's.
> 
> [...]

Patch 2 applied to powerpc/next.

[2/3] powerpc: Remove memcpy_page_flushcache()
  https://git.kernel.org/powerpc/c/0398abca61482ae47a41ae8f2401338aea366327

cheers


Re: [PATCH] powerpc/pseries: Add spaces around / operator

2023-04-05 Thread Michael Ellerman
On Fri, 24 Mar 2023 23:00:41 +0100, Petr Vaněk wrote:
> This is follow up change after 14b5d59a261b ("powerpc/pseries: Fix
> formatting to make code look more beautiful") to conform to kernel
> coding style.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/pseries: Add spaces around / operator
  https://git.kernel.org/powerpc/c/2747fd26f801c98d0a8177278b4f5c91b8de9c94

cheers


Re: [PATCH 0/3] Clean up PowerPC selftest stderr output

2023-04-05 Thread Michael Ellerman
On Tue, 28 Feb 2023 11:07:06 +1100, Benjamin Gray wrote:
> There are several messages being logged to stderr when building the PowerPC
> selftests:
> 
>   $ make -j$(nproc) O=build -C tools/testing/selftests \
> INSTALL_PATH="$PWD"/out/selftests TARGETS=powerpc install > /dev/null
> 
>   Makefile:50: warning: overriding recipe for target 'clean'
>   ../../lib.mk:124: warning: ignoring old recipe for target 'clean'
>   1+0 records in
>   1+0 records out
>   65536 bytes (66 kB, 64 KiB) copied, 7.71e-05 s, 850 MB/s
>   Makefile:50: warning: overriding recipe for target 'clean'
>   ../../lib.mk:124: warning: ignoring old recipe for target 'clean'
>   make[2]: warning: jobserver unavailable: using -j1.  Add '+' to parent make 
> rule.
>   ...
>   make[2]: warning: jobserver unavailable: using -j1.  Add '+' to parent make 
> rule.
>   Makefile:50: warning: overriding recipe for target 'clean'
>   ../../lib.mk:124: warning: ignoring old recipe for target 'clean'
>   make[2]: warning: jobserver unavailable: using -j1.  Add '+' to parent make 
> rule.
>   ...
>   make[2]: warning: jobserver unavailable: using -j1.  Add '+' to parent make 
> rule.
> 
> [...]

Applied to powerpc/next.

[1/3] selftests/powerpc: Use CLEAN macro to fix make warning
  https://git.kernel.org/powerpc/c/69608683a65be5322ef44091eaeb9890472b2eea
[2/3] selftests/powerpc: Pass make context to children
  https://git.kernel.org/powerpc/c/4ecd0868c5138238dec8a1549bb6ff8e5b48208b
[3/3] selftests/powerpc: Make dd output quiet
  https://git.kernel.org/powerpc/c/d3cf1662b665f20444a08bff52b6daae912e0d1d

cheers


Re: [PATCH] KVM: PPC: Mark three local functions "static"

2023-04-05 Thread Michael Ellerman
On Wed, 08 Mar 2023 15:24:37 -0800, Sean Christopherson wrote:
> Tag a few functions that are local and don't have a previous prototype as
> "static".
> 
> No functional change intended.
> 
> 

Applied to powerpc/topic/ppc-kvm.

[1/1] KVM: PPC: Mark three local functions "static"
  https://git.kernel.org/powerpc/c/e83ca8cfa286c9fc78b585b0e66df7f542bcbcf2

cheers


Re: [PATCH 0/3] powerpc/kvm: Enable HV KVM guests to use prefixed instructions to access emulated MMIO

2023-04-05 Thread Michael Ellerman
On Wed, 08 Mar 2023 17:33:43 +1100, Paul Mackerras wrote:
> This series changes the powerpc KVM code so that HV KVM can fetch
> prefixed instructions from the guest in those situations where there
> is a need to emulate an instruction, which for HV KVM means emulating
> loads and stores to emulated MMIO devices.  (Prefixed instructions
> were introduced with POWER10 and Power ISA v3.1, and consist of two
> 32-bit words, called the prefix and the suffix.)
> 
> [...]

Applied to powerpc/topic/ppc-kvm.

[1/3] powerpc/kvm: Make kvmppc_get_last_inst() produce a ppc_inst_t
  https://git.kernel.org/powerpc/c/acf17878da680a0c11c0bcb8a54b4f676ff39c80
[2/3] powerpc/kvm: Fetch prefixed instructions from the guest
  https://git.kernel.org/powerpc/c/953e37397fb61be61f095d36972188bac5235021
[3/3] powerpc/kvm: Enable prefixed instructions for HV KVM and disable for PR 
KVM
  https://git.kernel.org/powerpc/c/a3800ef9c48c4497dafe5ede1b65d91d9ef9cf1e

cheers


Re: [PATCH v2 0/2] KVM: PPC: Book3S HV: Injected interrupt SRR1

2023-04-05 Thread Michael Ellerman
On Thu, 30 Mar 2023 20:32:22 +1000, Nicholas Piggin wrote:
> I missed this in my earlier review and testing, but I think we need
> these in the prefix instruction enablement series before the final patch
> that enables HFSCR[PREFIX] for guests.
> 
> Thanks,
> Nick
> 
> [...]

Applied to powerpc/topic/ppc-kvm.

[1/2] KVM: PPC: Permit SRR1 flags in more injected interrupt types
  https://git.kernel.org/powerpc/c/460ba21d83fef766a5d34260e464c9ab8f10aa05
[2/2] KVM: PPC: Book3S HV: Set SRR1[PREFIX] bit on injected interrupts
  https://git.kernel.org/powerpc/c/6cd5c1db9983600f1848822e86e4906377b4a899

cheers


Re: [PATCH v2] KVM: PPC: BookE: Fix W=1 warnings

2023-04-05 Thread Michael Ellerman
On Mon, 03 Apr 2023 14:53:14 +1000, Michael Ellerman wrote:
> Fix various W=1 warnings in booke.c:
> 
>   arch/powerpc/kvm/booke.c:1008:5: error: no previous prototype for 
> ‘kvmppc_handle_exit’ [-Werror=missing-prototypes]
>1008 | int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
> | ^~
>   arch/powerpc/kvm/booke.c:1009: warning: Function parameter or member 'vcpu' 
> not described in 'kvmppc_handle_exit'
>   arch/powerpc/kvm/booke.c:1009: warning: Function parameter or member 
> 'exit_nr' not described in 'kvmppc_handle_exit'
> 
> [...]

Applied to powerpc/topic/ppc-kvm.

[1/1] KVM: PPC: BookE: Fix W=1 warnings
  https://git.kernel.org/powerpc/c/43d05c6123ca1ace5982ca326c156502e735b7d5

cheers


Re: [PATCH v5] KVM: PPC: Book3S HV: kvmppc_hv_entry: remove .global scope

2023-04-05 Thread Michael Ellerman
On Mon, 27 Mar 2023 07:33:20 -0400, Kautuk Consul wrote:
> kvmppc_hv_entry isn't called from anywhere other than
> book3s_hv_rmhandlers.S itself. Removing .global scope for
> this function and annotating it with SYM_CODE_START_LOCAL
> and SYM_CODE_END.
> 
> 

Applied to powerpc/topic/ppc-kvm.

[1/1] KVM: PPC: Book3S HV: kvmppc_hv_entry: remove .global scope
  https://git.kernel.org/powerpc/c/5f4f53d28cde2cc7be96f657229c8603da578500

cheers


Re: [PATCH] powerpc/mm: Fix false detection of read faults

2023-04-05 Thread Michael Ellerman
On Fri, 10 Mar 2023 16:08:34 +1100, Russell Currey wrote:
> To support detection of read faults with Radix execute-only memory, the
> vma_is_accessible() check in access_error() (which checks for PROT_NONE)
> was replaced with a check to see if VM_READ was missing, and if so,
> returns true to assert the fault was caused by a bad read.
> 
> This is incorrect, as it ignores that both VM_WRITE and VM_EXEC imply
> read on powerpc, as defined in protection_map[].  This causes mappings
> containing VM_WRITE or VM_EXEC without VM_READ to misreport the cause of
> page faults, since the MMU is still allowing reads.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/mm: Fix false detection of read faults
  https://git.kernel.org/powerpc/c/f2c7e3562b4c4f1699acc1538ebf3e75f5cced35

cheers


Re: [PATCH v2] powerpc/pseries/vas: Ignore VAS update for DLPAR if copy/paste is not enabled

2023-04-05 Thread Michael Ellerman
On Mon, 20 Mar 2023 19:50:08 -0700, Haren Myneni wrote:
> The hypervisor supports user-mode NX from Power10. pseries_vas_dlpar_cpu()
> is called from lparcfg_write() to update VAS windows for DLPAR event in
> shared processor mode and the kernel gets -ENOTSUPP for HCALLs if the
> user-mode NX is not supported. The current VAS implementation also
> supports only with Radix page tables. Whereas in dedicated processor
> mode, pseries_vas_notifier() is registered only if the copy/paste
> feature is enabled. So instead of displaying HCALL error messages,
> update VAS capabilities if the copy/paste feature is available.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/pseries/vas: Ignore VAS update for DLPAR if copy/paste is not 
enabled
  https://git.kernel.org/powerpc/c/eca9f6e6f83b6725b84e1c76fdde19b003cff0eb

cheers


Re: [PATCH v2] powerpc/64s: Fix __pte_needs_flush() false positive warning

2023-04-05 Thread Michael Ellerman
On Fri, 03 Mar 2023 09:59:47 +1100, Benjamin Gray wrote:
> Userspace PROT_NONE ptes set _PAGE_PRIVILEGED, triggering a false
> positive debug assertion that __pte_flags_need_flush() is not called
> on a kernel mapping.
> 
> Detect when it is a userspace PROT_NONE page by checking the required
> bits of PAGE_NONE are set, and none of the RWX bits are set.
> pte_protnone() is insufficient here because it always returns 0 when
> CONFIG_NUMA_BALANCING=n.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/64s: Fix __pte_needs_flush() false positive warning
  https://git.kernel.org/powerpc/c/1abce0580b89464546ae06abd5891ebec43c9470

cheers


Re: [PATCH] powerpc: don't try to copy ppc for task with NULL pt_regs

2023-04-05 Thread Michael Ellerman
On Sun, 26 Mar 2023 16:15:57 -0600, Jens Axboe wrote:
> Powerpc sets up PF_KTHREAD and PF_IO_WORKER with a NULL pt_regs, which
> from my (arguably very short) checking is not commonly done for other
> archs. This is fine, except when PF_IO_WORKER's have been created and
> the task does something that causes a coredump to be generated. Then we
> get this crash:
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc: don't try to copy ppc for task with NULL pt_regs
  https://git.kernel.org/powerpc/c/fd7276189450110ed835eb0a334e62d2f1c4e3be

cheers


Re: [PATCH v2] powerpc/papr_scm: Update the NUMA distance table for the target node

2023-04-05 Thread Michael Ellerman
On Tue, 04 Apr 2023 09:44:33 +0530, Aneesh Kumar K.V wrote:
> platform device helper routines won't update the NUMA distance table
> while creating a platform device, even if the device is present on
> a NUMA node that doesn't have memory or CPU. This is especially true
> for pmem devices. If the target node of the pmem device is not online, we
> find the nearest online node to the device and associate the pmem
> device with that online node. To find the nearest online node, we should
> have the numa distance table updated correctly. Update the distance
> information during the device probe.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/papr_scm: Update the NUMA distance table for the target node
  https://git.kernel.org/powerpc/c/b277fc793daf258877b4c0744b52f69d6e6ba22e

cheers


[PATCH v8 7/7] PCI: Work around PCIe link training failures

2023-04-05 Thread Maciej W. Rozycki
Attempt to handle cases such as with a downstream port of the ASMedia 
ASM2824 PCIe switch where link training never completes and the link 
continues switching between speeds indefinitely with the data link layer 
never reaching the active state.

It has been observed with a downstream port of the ASMedia ASM2824 Gen 3 
switch wired to the upstream port of the Pericom PI7C9X2G304 Gen 2 
switch, using a Delock Riser Card PCI Express x1 > 2 x PCIe x1 device, 
P/N 41433, wired to a SiFive HiFive Unmatched board.  In this setup the 
switches are supposed to negotiate the link speed of preferably 5.0GT/s, 
falling back to 2.5GT/s.

Instead the link continues oscillating between the two speeds, at the 
rate of 34-35 times per second, with link training reported repeatedly 
active ~84% of the time.  Forcibly limiting the target link speed to 
2.5GT/s with the upstream ASM2824 device however makes the two switches 
communicate correctly.  Removing the speed restriction afterwards makes 
the two devices switch to 5.0GT/s then.

Make use of these observations then and detect the inability to train 
the link, by checking for the Data Link Layer Link Active status bit 
being off while the Link Bandwidth Management Status indicating that 
hardware has changed the link speed or width in an attempt to correct 
unreliable link operation.

Restrict the speed to 2.5GT/s then with the Target Link Speed field, 
request a retrain and wait 200ms for the data link to go up.  If this 
turns out successful, then lift the restriction, letting the devices 
negotiate a higher speed.

Also check for a 2.5GT/s speed restriction the firmware may have already 
arranged and lift it too with ports of devices known to continue working 
afterwards, currently the ASM2824 only, that already report their data 
link being up.

Signed-off-by: Maciej W. Rozycki 
Link: 
https://lore.kernel.org/r/alpine.deb.2.21.2203022037020.56...@angie.orcam.me.uk/
Link: https://source.denx.de/u-boot/u-boot/-/commit/a398a51ccc68
---
No changes from v7.

Changes from v6:

- Regenerate against 6.3-rc5.

- Shorten the lore.kernel.org archive link in the change description.

Changes from v5:

- Move from a quirk into PCI core and call at device probing, hot-plug,
  reset and resume.  Keep the ASMedia part under CONFIG_PCI_QUIRKS.

- Rely on `dev->link_active_reporting' rather than re-retrieving the 
  capability.

Changes from v4:

- Remove  inclusion no longer needed.

- Make the quirk generic based on probing device features rather than 
  specific to the ASM2824 part only; take the Retrain Link bit erratum 
  into account.

- Still lift the 2.5GT/s speed restriction with the ASM2824 only.

- Increase retrain timeout from 200ms to 1s (PCIE_LINK_RETRAIN_TIMEOUT).

- Remove retrain success notification.

- Use PCIe helpers rather than generic PCI functions throughout.

- Trim down and update the wording of the change description for the 
  switch from an ASM2824-specific to a generic fixup.

Changes from v3:

- Remove the  entry for the ASM2824.

Changes from v2:

- Regenerate for 5.17-rc2 for a merge conflict.

- Replace BUG_ON for a missing PCI Express capability with WARN_ON and an
  early return.

Changes from v1:

- Regenerate for a merge conflict.
---
 drivers/pci/pci.c   |  154 ++--
 drivers/pci/pci.h   |1 
 drivers/pci/probe.c |2 
 3 files changed, 152 insertions(+), 5 deletions(-)

linux-pcie-asm2824-manual-retrain.diff
Index: linux-macro/drivers/pci/pci.c
===
--- linux-macro.orig/drivers/pci/pci.c
+++ linux-macro/drivers/pci/pci.c
@@ -859,6 +859,132 @@ int pci_wait_for_pending(struct pci_dev
return 0;
 }
 
+/*
+ * Retrain the link of a downstream PCIe port by hand if necessary.
+ *
+ * This is needed at least where a downstream port of the ASMedia ASM2824
+ * Gen 3 switch is wired to the upstream port of the Pericom PI7C9X2G304
+ * Gen 2 switch, and observed with the Delock Riser Card PCI Express x1 >
+ * 2 x PCIe x1 device, P/N 41433, plugged into the SiFive HiFive Unmatched
+ * board.
+ *
+ * In such a configuration the switches are supposed to negotiate the link
+ * speed of preferably 5.0GT/s, falling back to 2.5GT/s.  However the link
+ * continues switching between the two speeds indefinitely and the data
+ * link layer never reaches the active state, with link training reported
+ * repeatedly active ~84% of the time.  Forcing the target link speed to
+ * 2.5GT/s with the upstream ASM2824 device makes the two switches talk to
+ * each other correctly however.  And more interestingly retraining with a
+ * higher target link speed afterwards lets the two successfully negotiate
+ * 5.0GT/s.
+ *
+ * With the ASM2824 we can rely on the otherwise optional Data Link Layer
+ * Link Active status bit and in the failed link training scenario it will
+ * be off along with the Link Bandwidth Management Status indicating that
+ * 

[PATCH v8 6/7] net/mlx5: Rely on `link_active_reporting'

2023-04-05 Thread Maciej W. Rozycki
Use `link_active_reporting' to determine whether Data Link Layer Link 
Active Reporting is available rather than re-retrieving the capability.

Signed-off-by: Maciej W. Rozycki 
---
NB this has been compile-tested only with PPC64LE and x86-64 
configurations.

Changes from v7:

- Reorder from 5/7.

Changes from v6:

- Regenerate against 6.3-rc5.

New change in v6.
---
 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c |8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

linux-pcie-link-active-reporting-mlx5.diff
Index: linux-macro/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
===
--- linux-macro.orig/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ linux-macro/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -307,7 +307,6 @@ static int mlx5_pci_link_toggle(struct m
unsigned long timeout;
struct pci_dev *sdev;
int cap, err;
-   u32 reg32;
 
/* Check that all functions under the pci bridge are PFs of
 * this device otherwise fail this function.
@@ -346,11 +345,8 @@ static int mlx5_pci_link_toggle(struct m
return err;
 
/* Check link */
-   err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, );
-   if (err)
-   return err;
-   if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
-   mlx5_core_warn(dev, "No PCI link reporting capability 
(0x%08x)\n", reg32);
+   if (!bridge->link_active_reporting) {
+   mlx5_core_warn(dev, "No PCI link reporting capability\n");
msleep(1000);
goto restore;
}


[PATCH v8 5/7] powerpc/eeh: Rely on `link_active_reporting'

2023-04-05 Thread Maciej W. Rozycki
Use `link_active_reporting' to determine whether Data Link Layer Link 
Active Reporting is available rather than re-retrieving the capability.

Signed-off-by: Maciej W. Rozycki 
---
NB this has been compile-tested only with a PPC64LE configuration.

Changes from v7:

- Reorder from 4/7.

No change from v6.

New change in v6.
---
 arch/powerpc/kernel/eeh_pe.c |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

linux-pcie-link-active-reporting-eeh.diff
Index: linux-macro/arch/powerpc/kernel/eeh_pe.c
===
--- linux-macro.orig/arch/powerpc/kernel/eeh_pe.c
+++ linux-macro/arch/powerpc/kernel/eeh_pe.c
@@ -671,9 +671,8 @@ static void eeh_bridge_check_link(struct
eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
 
/* Check link */
-   eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, );
-   if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
-   eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", 
val);
+   if (!edev->pdev->link_active_reporting) {
+   eeh_edev_dbg(edev, "No link reporting capability\n");
msleep(1000);
return;
}


[PATCH v8 4/7] PCI: Initialize `link_active_reporting' earlier

2023-04-05 Thread Maciej W. Rozycki
Determine whether Data Link Layer Link Active Reporting is available 
ahead of calling any fixups so that the cached value can be used there 
and later on.

Signed-off-by: Maciej W. Rozycki 
---
Changes from v7:

- Reorder from 3/7.

Changes from v6:

- Regenerate against 6.3-rc5.

New change in v6.
---
 drivers/pci/probe.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

linux-pcie-link-active-reporting-early.diff
Index: linux-macro/drivers/pci/probe.c
===
--- linux-macro.orig/drivers/pci/probe.c
+++ linux-macro/drivers/pci/probe.c
@@ -820,7 +820,6 @@ static void pci_set_bus_speed(struct pci
 
pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, );
bus->max_bus_speed = pcie_link_speed[linkcap & 
PCI_EXP_LNKCAP_SLS];
-   bridge->link_active_reporting = !!(linkcap & 
PCI_EXP_LNKCAP_DLLLARC);
 
pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, );
pcie_update_link_speed(bus, linksta);
@@ -1829,6 +1828,7 @@ int pci_setup_device(struct pci_dev *dev
int pos = 0;
struct pci_bus_region region;
struct resource *res;
+   u32 linkcap;
 
hdr_type = pci_hdr_type(dev);
 
@@ -1876,6 +1876,10 @@ int pci_setup_device(struct pci_dev *dev
/* "Unknown power state" */
dev->current_state = PCI_UNKNOWN;
 
+   /* Set it early to make it available to fixups, etc.  */
+   pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, );
+   dev->link_active_reporting = !!(linkcap & PCI_EXP_LNKCAP_DLLLARC);
+
/* Early fixups, before probing the BARs */
pci_fixup_device(pci_fixup_early, dev);
 


[PATCH v8 3/7] PCI: Execute `quirk_enable_clear_retrain_link' earlier

2023-04-05 Thread Maciej W. Rozycki
Make `quirk_enable_clear_retrain_link' `pci_fixup_early' so that any later 
fixups can rely on `clear_retrain_link' to have been already initialised.

Signed-off-by: Maciej W. Rozycki 
---
Changes from v7:

- Reorder from 2/7.

No change from v6.

No change from v5.

New change in v5.
---
 drivers/pci/quirks.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

linux-pcie-clear-retrain-link-early.diff
Index: linux-macro/drivers/pci/quirks.c
===
--- linux-macro.orig/drivers/pci/quirks.c
+++ linux-macro/drivers/pci/quirks.c
@@ -2407,9 +2407,9 @@ static void quirk_enable_clear_retrain_l
dev->clear_retrain_link = 1;
pci_info(dev, "Enable PCIe Retrain Link quirk\n");
 }
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe110, 
quirk_enable_clear_retrain_link);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe111, 
quirk_enable_clear_retrain_link);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe130, 
quirk_enable_clear_retrain_link);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe110, 
quirk_enable_clear_retrain_link);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe111, 
quirk_enable_clear_retrain_link);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe130, 
quirk_enable_clear_retrain_link);
 
 static void fixup_rev1_53c810(struct pci_dev *dev)
 {


[PATCH v8 2/7] PCI: Export PCI link retrain timeout

2023-04-05 Thread Maciej W. Rozycki
Rename LINK_RETRAIN_TIMEOUT to PCIE_LINK_RETRAIN_TIMEOUT and make it
available via "pci.h" for PCI drivers to use.

Signed-off-by: Maciej W. Rozycki 
---
Changes from v7:

- Reorder from 1/7.

No change from v6.

No change from v5.

New change in v5.
---
 drivers/pci/pci.h   |2 ++
 drivers/pci/pcie/aspm.c |4 +---
 2 files changed, 3 insertions(+), 3 deletions(-)

linux-pcie-link-retrain-timeout.diff
Index: linux-macro/drivers/pci/pci.h
===
--- linux-macro.orig/drivers/pci/pci.h
+++ linux-macro/drivers/pci/pci.h
@@ -11,6 +11,8 @@
 
 #define PCI_VSEC_ID_INTEL_TBT  0x1234  /* Thunderbolt */
 
+#define PCIE_LINK_RETRAIN_TIMEOUT HZ
+
 extern const unsigned char pcie_link_speed[];
 extern bool pci_early_dump;
 
Index: linux-macro/drivers/pci/pcie/aspm.c
===
--- linux-macro.orig/drivers/pci/pcie/aspm.c
+++ linux-macro/drivers/pci/pcie/aspm.c
@@ -90,8 +90,6 @@ static const char *policy_str[] = {
[POLICY_POWER_SUPERSAVE] = "powersupersave"
 };
 
-#define LINK_RETRAIN_TIMEOUT HZ
-
 /*
  * The L1 PM substate capability is only implemented in function 0 in a
  * multi function device.
@@ -213,7 +211,7 @@ static bool pcie_retrain_link(struct pci
}
 
/* Wait for link training end. Break out after waiting for timeout */
-   end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
+   end_jiffies = jiffies + PCIE_LINK_RETRAIN_TIMEOUT;
do {
pcie_capability_read_word(parent, PCI_EXP_LNKSTA, );
if (!(reg16 & PCI_EXP_LNKSTA_LT))


[PATCH v8 1/7] PCI: pciehp: Rely on `link_active_reporting'

2023-04-05 Thread Maciej W. Rozycki
Use `link_active_reporting' to determine whether Data Link Layer Link 
Active Reporting is available rather than re-retrieving the capability.

Signed-off-by: Maciej W. Rozycki 
Reviewed-by: Lukas Wunner 
---
NB this has been compile-tested only with PPC64LE and x86-64
configurations.

Changes from v7:

- Add Reviewed-by: tag by Lukas Wunner.

- Reorder from 6/7.

No change from v6.

New change in v6.
---
 drivers/pci/hotplug/pciehp_hpc.c |7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

linux-pcie-link-active-reporting-hpc.diff
Index: linux-macro/drivers/pci/hotplug/pciehp_hpc.c
===
--- linux-macro.orig/drivers/pci/hotplug/pciehp_hpc.c
+++ linux-macro/drivers/pci/hotplug/pciehp_hpc.c
@@ -984,7 +984,7 @@ static inline int pcie_hotplug_depth(str
 struct controller *pcie_init(struct pcie_device *dev)
 {
struct controller *ctrl;
-   u32 slot_cap, slot_cap2, link_cap;
+   u32 slot_cap, slot_cap2;
u8 poweron;
struct pci_dev *pdev = dev->port;
struct pci_bus *subordinate = pdev->subordinate;
@@ -1030,9 +1030,6 @@ struct controller *pcie_init(struct pcie
if (dmi_first_match(inband_presence_disabled_dmi_table))
ctrl->inband_presence_disabled = 1;
 
-   /* Check if Data Link Layer Link Active Reporting is implemented */
-   pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, _cap);
-
/* Clear all remaining event bits in Slot Status register. */
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
@@ -1051,7 +1048,7 @@ struct controller *pcie_init(struct pcie
FLAG(slot_cap, PCI_EXP_SLTCAP_EIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS),
FLAG(slot_cap2, PCI_EXP_SLTCAP2_IBPD),
-   FLAG(link_cap, PCI_EXP_LNKCAP_DLLLARC),
+   FLAG(pdev->link_active_reporting, true),
pdev->broken_cmd_compl ? " (with Cmd Compl erratum)" : "");
 
/*


[PATCH v8 0/7] pci: Work around ASMedia ASM2824 PCIe link training failures

2023-04-05 Thread Maciej W. Rozycki
Hi,

 This is v8 of the change to work around a PCIe link training phenomenon 
where a pair of devices both capable of operating at a link speed above 
2.5GT/s seems unable to negotiate the link speed and continues training 
indefinitely with the Link Training bit switching on and off repeatedly 
and the data link layer never reaching the active state.

 This version adds a Reviewed-by: tag by Lukas Wunner accidentally missed 
from 6/7 in v7 and reorders said change to the front of the series.

 Last two iterations: 

.

  Maciej


Re: [kvm-unit-tests v3 10/13] powerpc: Add support for more interrupts including HV interrupts

2023-04-05 Thread Joel Stanley
Hi Nick,

On Mon, 27 Mar 2023 at 12:55, Nicholas Piggin  wrote:
>
> Interrupt vectors were not being populated for all architected
> interrupt types, which could lead to crashes rather than a message for
> unhandled interrupts.
>
> 0x20 sized vectors require some reworking of the code to fit. This
> also adds support for HV / HSRR type interrupts which will be used in
> a later change.
>
> Signed-off-by: Nicholas Piggin 
> ---
>  powerpc/cstart64.S | 79 ++
>  1 file changed, 65 insertions(+), 14 deletions(-)
>
> diff --git a/powerpc/cstart64.S b/powerpc/cstart64.S

> +handler_trampoline:
> +   mfctr   r0
> +   std r0,_CTR(r1)
> +
> +   ld  r0, P_HANDLER(0)
> +   mtctr   r0
> +
> +   /* nip and msr */
> +   mfsrr0  r0

I tried building the tests on a power8 box with binutils 2.34 and gas complains:

powerpc/cstart64.S: Assembler messages:
powerpc/cstart64.S:337: Error: unrecognized opcode: `mfhsrr0'
powerpc/cstart64.S:340: Error: unrecognized opcode: `mfhsrr1'

It appears this mnemonic is only supported for power10 (and were only
added in binutils 2.36):

$ git grep -i mfhsrr
opcodes/ppc-opc.c:{"mfhsrr0",   XSPR(31,339,314), XSPR_MASK, POWER10,
 EXT,{RS}},
opcodes/ppc-opc.c:{"mfhsrr1",   XSPR(31,339,315), XSPR_MASK, POWER10,
 EXT,{RS}},

I replaced it with mfspr and the tests ran fine:

@@ -334,10 +338,10 @@ handler_htrampoline:
mtctr   r0

/* nip and msr */
-   mfhsrr0 r0
+   mfspr   r0, SPRN_HSRR0
std r0, _NIP(r1)

-   mfhsrr1 r0
+   mfspr   r0, SPRN_HSRR1
std r0, _MSR(r1)

Cheers,

Joel


RE: [PATCH v4] Kconfig: introduce HAS_IOPORT option and select it as necessary

2023-04-05 Thread David Laight
From: Linuxppc-dev Arnd Bergmann
> Sent: 05 April 2023 21:32
> 
> On Wed, Apr 5, 2023, at 22:00, H. Peter Anvin wrote:
> > On April 5, 2023 8:12:38 AM PDT, Niklas Schnelle  
> > wrote:
> >>On Thu, 2023-03-23 at 17:33 +0100, Niklas Schnelle wrote:
> >>> We introduce a new HAS_IOPORT Kconfig option to indicate support for I/O
> >>> Port access. In a future patch HAS_IOPORT=n will disable compilation of
> >>> the I/O accessor functions inb()/outb() and friends on architectures
> >>> which can not meaningfully support legacy I/O spaces such as s390.
> >>> >>
> >>Gentle ping. As far as I can tell this hasn't been picked to any tree
> >>sp far but also hasn't seen complains so I'm wondering if I should send
> >>a new version of the combined series of this patch plus the added
> >>HAS_IOPORT dependencies per subsystem or wait until this is picked up.
> >
> > You need this on a system supporting not just ISA but also PCI.
> >
> > Typically on non-x86 architectures this is simply mapped into a memory 
> > window.
> 
> I'm pretty confident that the list is correct here, as the HAS_IOPORT
> symbol is enabled exactly for the architectures that have a way to
> map the I/O space. PCIe generally works fine without I/O space, the
> only exception are drivers for devices that were around as early PCI.

Isn't there a difference between cpu that have inb()/outb() (probably
only x86?) and architectures (well computer designs) that can generate
PCI 'I/O' cycles by some means.
It isn't even just PCI I/O cycles, I've used an ARM cpu (SA1100)
that mapped a chuck of physical address space onto PCMCIA I/O cycles.

If the hardware can map a PCI 'IO' bar into normal kernel address
space then the bar and accesses can be treated exactly like a memory bar.
This probably leaves x86 as the outlier where you need (IIRC) io_readl()
and friends that can generate in/out instructions for those accesses.

There are also all the x86 ISA devices which need in/out instructions.
But (with the likely exception of the UART) they are pretty much
platform specific.

So, to my mind at least, HAS_IOPORT is just the wrong question.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [PATCH v4] Kconfig: introduce HAS_IOPORT option and select it as necessary

2023-04-05 Thread Arnd Bergmann
On Wed, Apr 5, 2023, at 22:00, H. Peter Anvin wrote:
> On April 5, 2023 8:12:38 AM PDT, Niklas Schnelle  
> wrote:
>>On Thu, 2023-03-23 at 17:33 +0100, Niklas Schnelle wrote:
>>> We introduce a new HAS_IOPORT Kconfig option to indicate support for I/O
>>> Port access. In a future patch HAS_IOPORT=n will disable compilation of
>>> the I/O accessor functions inb()/outb() and friends on architectures
>>> which can not meaningfully support legacy I/O spaces such as s390.
>>> >>
>>Gentle ping. As far as I can tell this hasn't been picked to any tree
>>sp far but also hasn't seen complains so I'm wondering if I should send
>>a new version of the combined series of this patch plus the added
>>HAS_IOPORT dependencies per subsystem or wait until this is picked up.
>
> You need this on a system supporting not just ISA but also PCI.
>
> Typically on non-x86 architectures this is simply mapped into a memory window.

I'm pretty confident that the list is correct here, as the HAS_IOPORT
symbol is enabled exactly for the architectures that have a way to
map the I/O space. PCIe generally works fine without I/O space, the
only exception are drivers for devices that were around as early PCI.

  Arnd


Re: [PATCH v4] Kconfig: introduce HAS_IOPORT option and select it as necessary

2023-04-05 Thread Arnd Bergmann
On Wed, Apr 5, 2023, at 17:12, Niklas Schnelle wrote:
> On Thu, 2023-03-23 at 17:33 +0100, Niklas Schnelle wrote:
>
> Gentle ping. As far as I can tell this hasn't been picked to any tree
> sp far but also hasn't seen complains so I'm wondering if I should send
> a new version of the combined series of this patch plus the added
> HAS_IOPORT dependencies per subsystem or wait until this is picked up.

My bad, I've created an 'asm-generic-io' branch in the asm-generic
tree now and merged that into the master branch for 6.4.

If anyone wants to merge the later patches for 6.4, feel free to
pull in

https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git 
asm-generic-io

as a stable base.

   Arnd


Re: [PATCH v4] Kconfig: introduce HAS_IOPORT option and select it as necessary

2023-04-05 Thread H. Peter Anvin
On April 5, 2023 8:12:38 AM PDT, Niklas Schnelle  wrote:
>On Thu, 2023-03-23 at 17:33 +0100, Niklas Schnelle wrote:
>> We introduce a new HAS_IOPORT Kconfig option to indicate support for I/O
>> Port access. In a future patch HAS_IOPORT=n will disable compilation of
>> the I/O accessor functions inb()/outb() and friends on architectures
>> which can not meaningfully support legacy I/O spaces such as s390.
>> 
>> The following architectures do not select HAS_IOPORT:
>> 
>> * ARC
>> * C-SKY
>> * Hexagon
>> * Nios II
>> * OpenRISC
>> * s390
>> * User-Mode Linux
>> * Xtensa
>> 
>> All other architectures select HAS_IOPORT at least conditionally.
>> 
>> The "depends on" relations on HAS_IOPORT in drivers as well as ifdefs
>> for HAS_IOPORT specific sections will be added in subsequent patches on
>> a per subsystem basis.
>> 
>> Co-developed-by: Arnd Bergmann 
>> Signed-off-by: Arnd Bergmann 
>> Acked-by: Johannes Berg  # for ARCH=um
>> Acked-by: Geert Uytterhoeven 
>> Signed-off-by: Niklas Schnelle 
>> ---
>> Note: This patch is the initial patch of a larger series[0]. This patch
>> introduces the HAS_IOPORT config option while the rest of the series adds
>> driver dependencies and the final patch removes inb() / outb() and friends on
>> platforms that don't support them. 
>> 
>> Thus each of the per-subsystem patches is independent from each other but
>> depends on this patch while the final patch depends on the whole series. Thus
>> splitting this initial patch off allows the per-subsytem HAS_IOPORT 
>> dependency
>> addition be merged separately via different trees without breaking the build.
>> 
>> [0] 
>> https://lore.kernel.org/lkml/20230314121216.413434-1-schne...@linux.ibm.com/
>> 
>> Changes since v3:
>> - List archs without HAS_IOPORT in commit message (Arnd)
>> - Select HAS_IOPORT for LoongArch (Arnd)
>> - Use "select HAS_IOPORT if (E)ISA || .." instead of a "depends on" for 
>> (E)ISA
>>   for m68k and parisc
>> - Select HAS_IOPORT with config GSC on parisc (Arnd)
>> - Drop "depends on HAS_IOPORT" for um's config ISA (Johannes)
>> - Drop "depends on HAS_IOPORT" for config ISA on x86 and parisc where it is
>>   always selected (Arnd)
>> 
>
>Gentle ping. As far as I can tell this hasn't been picked to any tree
>sp far but also hasn't seen complains so I'm wondering if I should send
>a new version of the combined series of this patch plus the added
>HAS_IOPORT dependencies per subsystem or wait until this is picked up.
>
>Thanks,
>Niklas
>
>

You need this on a system supporting not just ISA but also PCI.

Typically on non-x86 architectures this is simply mapped into a memory window.


Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-04-05 Thread Bjorn Helgaas
On Wed, Apr 05, 2023 at 11:28:27AM +0300, Andy Shevchenko wrote:
> On Tue, Apr 04, 2023 at 11:11:01AM -0500, Bjorn Helgaas wrote:
> > On Thu, Mar 30, 2023 at 07:24:27PM +0300, Andy Shevchenko wrote:
> > > Provide two new helper macros to iterate over PCI device resources and
> > > convert users.
> > > 
> > > Looking at it, refactor existing pci_bus_for_each_resource() and convert
> > > users accordingly.

> > Applied 2-7 to pci/resource for v6.4, thanks, I really like this!
> 
> Btw, can you actually drop patch 7, please?

Done.

> > I omitted
> > 
> >   [1/7] kernel.h: Split out COUNT_ARGS() and CONCATENATE()"
> > 
> > only because it's not essential to this series and has only a trivial
> > one-line impact on include/linux/pci.h.
> 
> I'm not sure I understood what exactly "essentiality" means to you, but
> I included that because it makes the split which can be used later by
> others and not including kernel.h in the header is the objective I want
> to achieve. Without this patch the achievement is going to be deferred.
> Yet, this, as you have noticed, allows to compile and use the macros in
> the rest of the patches.

I haven't followed the kernel.h splitting, and I try to avoid
incidental changes outside of the files I maintain, so I just wanted
to keep this series purely PCI and avoid any possible objections to a
new include file or discussion about how it should be done.


Re: [PATCH v8 5/7] PCI: Allow pci_bus_for_each_resource() to take less arguments

2023-04-05 Thread Bjorn Helgaas
On Wed, Apr 05, 2023 at 02:50:47PM +0300, Andy Shevchenko wrote:
> On Thu, Mar 30, 2023 at 07:24:32PM +0300, Andy Shevchenko wrote:
> > Refactor pci_bus_for_each_resource() in the same way as it's done in
> > pci_dev_for_each_resource() case. This will allow to hide iterator
> > inside the loop, where it's not used otherwise.
> > 
> > No functional changes intended.
> 
> Bjorn, this has wrong author in your tree:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/commit/?h=resource=46dbad19a59e0dd8f1e7065e5281345797fbb365

I botched it, sorry, should be fixed now.

Bjorn


Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Peter Zijlstra
On Wed, Apr 05, 2023 at 04:43:14PM -0300, Marcelo Tosatti wrote:

> Two points:
> 
> 1) For a virtualized system, the overhead is not only of executing the
> IPI but:
> 
>   VM-exit
>   run VM-exit code in host
>   handle IPI
>   run VM-entry code in host
>   VM-entry

I thought we could do IPIs without VMexit these days? Also virt... /me
walks away.

> 2) Depends on the application and the definition of "occasional".
> 
> For certain types of applications (for example PLC software or
> RAN processing), upon occurrence of an event, it is necessary to
> complete a certain task in a maximum amount of time (deadline).

If the application is properly NOHZ_FULL and never does a kernel entry,
it will never get that IPI. If it is a pile of shit and does kernel
entries while it pretends to be NOHZ_FULL it gets to keep the pieces and
no amount of crying will get me to care.


Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Peter Zijlstra
On Wed, Apr 05, 2023 at 04:45:32PM -0300, Marcelo Tosatti wrote:
> On Wed, Apr 05, 2023 at 01:10:07PM +0200, Frederic Weisbecker wrote:
> > On Wed, Apr 05, 2023 at 12:44:04PM +0200, Frederic Weisbecker wrote:
> > > On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:
> > > > +   int state = atomic_read(>state);
> > > > +   /* will return true only for cpus in kernel space */
> > > > +   return state & CT_STATE_MASK == CONTEXT_KERNEL;
> > > > +}
> > > 
> > > Also note that this doesn't stricly prevent userspace from being 
> > > interrupted.
> > > You may well observe the CPU in kernel but it may receive the IPI later 
> > > after
> > > switching to userspace.
> > > 
> > > We could arrange for avoiding that with marking ct->state with a pending 
> > > work bit
> > > to flush upon user entry/exit but that's a bit more overhead so I first 
> > > need to
> > > know about your expectations here, ie: can you tolerate such an occasional
> > > interruption or not?
> > 
> > Bah, actually what can we do to prevent from that racy IPI? Not much I 
> > fear...
> 
> Use a different mechanism other than an IPI to ensure in progress
> __get_free_pages_fast() has finished execution.
> 
> Isnt this codepath slow path enough that it can use
> synchronize_rcu_expedited?

To actually hit this path you're doing something really dodgy.


Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Marcelo Tosatti
On Wed, Apr 05, 2023 at 01:10:07PM +0200, Frederic Weisbecker wrote:
> On Wed, Apr 05, 2023 at 12:44:04PM +0200, Frederic Weisbecker wrote:
> > On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:
> > > + int state = atomic_read(>state);
> > > + /* will return true only for cpus in kernel space */
> > > + return state & CT_STATE_MASK == CONTEXT_KERNEL;
> > > +}
> > 
> > Also note that this doesn't stricly prevent userspace from being 
> > interrupted.
> > You may well observe the CPU in kernel but it may receive the IPI later 
> > after
> > switching to userspace.
> > 
> > We could arrange for avoiding that with marking ct->state with a pending 
> > work bit
> > to flush upon user entry/exit but that's a bit more overhead so I first 
> > need to
> > know about your expectations here, ie: can you tolerate such an occasional
> > interruption or not?
> 
> Bah, actually what can we do to prevent from that racy IPI? Not much I fear...

Use a different mechanism other than an IPI to ensure in progress
__get_free_pages_fast() has finished execution.

Isnt this codepath slow path enough that it can use
synchronize_rcu_expedited?



Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Marcelo Tosatti
On Wed, Apr 05, 2023 at 12:43:58PM +0200, Frederic Weisbecker wrote:
> On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:
> > @@ -191,6 +192,20 @@ static void tlb_remove_table_smp_sync(void *arg)
> > /* Simply deliver the interrupt */
> >  }
> >  
> > +
> > +#ifdef CONFIG_CONTEXT_TRACKING
> > +static bool cpu_in_kernel(int cpu, void *info)
> > +{
> > +   struct context_tracking *ct = per_cpu_ptr(_tracking, cpu);
> 
> Like Peter said, an smp_mb() is required here before the read (unless there is
> already one between the page table modification and that ct->state read?).
> 
> So that you have this pairing:
> 
> 
>WRITE page_table  WRITE ct->state
>  smp_mb()  smp_mb() // implied by 
> atomic_fetch_or
>READ ct->stateREAD page_table
> 
> > +   int state = atomic_read(>state);
> > +   /* will return true only for cpus in kernel space */
> > +   return state & CT_STATE_MASK == CONTEXT_KERNEL;
> > +}
> 
> Also note that this doesn't stricly prevent userspace from being interrupted.
> You may well observe the CPU in kernel but it may receive the IPI later after
> switching to userspace.
> 
> We could arrange for avoiding that with marking ct->state with a pending work 
> bit
> to flush upon user entry/exit but that's a bit more overhead so I first need 
> to
> know about your expectations here, ie: can you tolerate such an occasional
> interruption or not?

Two points:

1) For a virtualized system, the overhead is not only of executing the
IPI but:

VM-exit
run VM-exit code in host
handle IPI
run VM-entry code in host
VM-entry

2) Depends on the application and the definition of "occasional".

For certain types of applications (for example PLC software or
RAN processing), upon occurrence of an event, it is necessary to
complete a certain task in a maximum amount of time (deadline).

One way to express this requirement is with a pair of numbers,
deadline time and execution time, where:

* deadline time: length of time between event and deadline.
* execution time: length of time it takes for processing of event
  to occur on a particular hardware platform
  (uninterrupted).





Re: [PATCH v2 0/5] locking: Introduce local{,64}_try_cmpxchg

2023-04-05 Thread Uros Bizjak
On Wed, Apr 5, 2023 at 6:37 PM Dave Hansen  wrote:
>
> On 4/5/23 07:17, Uros Bizjak wrote:
> > Add generic and target specific support for local{,64}_try_cmpxchg
> > and wire up support for all targets that use local_t infrastructure.
>
> I feel like I'm missing some context.
>
> What are the actual end user visible effects of this series?  Is there a
> measurable decrease in perf overhead?  Why go to all this trouble for
> perf?  Who else will use local_try_cmpxchg()?

This functionality was requested by perf people [1], so perhaps Steven
can give us some concrete examples. In general, apart from the removal
of unneeded compare instruction on x86, usage of try_cmpxchg also
results in slightly better code on non-x86 targets [2], since the code
now correctly identifies fast-path through the cmpxchg loop.

Also important is that try_cmpxchg code reuses the result of cmpxchg
instruction in the loop, so a read from the memory in the loop is
eliminated. When reviewing the cmpxchg usage sites, I found numerous
places where unnecessary read from memory was present in the loop, two
examples can be seen in the last patch of this series.

Also, using try_cmpxchg prevents inconsistencies of the cmpxchg loop,
where the result of the cmpxchg is compared with the wrong "old" value
- one such bug is still lurking in x86 APIC code, please see [3].

Please note that apart from perf subsystem, event subsystem can also
be improved by using local_try_cmpxchg. This is the reason that the
last patch includes a change in events/core.c.

> I'm all for improving things, and perf is an important user.  But, if
> the goal here is improving performance, it would be nice to see at least
> a stab at quantifying the performance delta.

[1] https://lore.kernel.org/lkml/20230301131831.6c8d4...@gandalf.local.home/
[2] 
https://lore.kernel.org/lkml/yo91omfdzttgx...@fvff77s0q05n.cambridge.arm.com/
[3] https://lore.kernel.org/lkml/20230227160917.107820-1-ubiz...@gmail.com/

Uros.


Re: [PATCH 01/18] fbdev: Prepare generic architecture helpers

2023-04-05 Thread Thomas Zimmermann

Hi

Am 05.04.23 um 17:53 schrieb Arnd Bergmann:

On Wed, Apr 5, 2023, at 17:05, Thomas Zimmermann wrote:

Generic implementations of fb_pgprotect() and fb_is_primary_device()
have been in the source code for a long time. Prepare the header file
to make use of them.

Improve the code by using an inline function for fb_pgprotect() and
by removing include statements.

Symbols are protected by preprocessor guards. Architectures that
provide a symbol need to define a preprocessor token of the same
name and value. Otherwise the header file will provide a generic
implementation. This pattern has been taken from .

Signed-off-by: Thomas Zimmermann 


Moving this into generic code is good, but I'm not sure
about the default for fb_pgprotect():


+
+#ifndef fb_pgprotect
+#define fb_pgprotect fb_pgprotect
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+   unsigned long off)
+{ }
+#endif


I think most architectures will want the version we have on
arc, arm, arm64, loongarch, and sh already:

static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
 unsigned long off)
{
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
}

so I'd suggest making that version the default, and treating the
empty ones (m68knommu, sparc32) as architecture specific
workarounds.


Make sense, thanks for the feedback. I'll send out an update soon.

Best regards
Thomas



I see that sparc64 and parisc use pgprot_uncached here, but as
they don't define a custom pgprot_writecombine, this ends up being
the same, and they can use the above definition as well.

mips defines pgprot_writecombine but uses pgprot_noncached
in fb_pgprotect(), which is probably a mistake and should have
been updated as part of commit 4b050ba7a66c ("MIPS: pgtable.h:
Implement the pgprot_writecombine function for MIPS").

 Arnd


--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH v2 0/5] locking: Introduce local{,64}_try_cmpxchg

2023-04-05 Thread Dave Hansen
On 4/5/23 07:17, Uros Bizjak wrote:
> Add generic and target specific support for local{,64}_try_cmpxchg
> and wire up support for all targets that use local_t infrastructure.

I feel like I'm missing some context.

What are the actual end user visible effects of this series?  Is there a
measurable decrease in perf overhead?  Why go to all this trouble for
perf?  Who else will use local_try_cmpxchg()?

I'm all for improving things, and perf is an important user.  But, if
the goal here is improving performance, it would be nice to see at least
a stab at quantifying the performance delta.


Re: [PATCH 01/18] fbdev: Prepare generic architecture helpers

2023-04-05 Thread Daniel Vetter
On Wed, Apr 05, 2023 at 05:53:03PM +0200, Arnd Bergmann wrote:
> On Wed, Apr 5, 2023, at 17:05, Thomas Zimmermann wrote:
> > Generic implementations of fb_pgprotect() and fb_is_primary_device()
> > have been in the source code for a long time. Prepare the header file
> > to make use of them.
> >
> > Improve the code by using an inline function for fb_pgprotect() and
> > by removing include statements.
> >
> > Symbols are protected by preprocessor guards. Architectures that
> > provide a symbol need to define a preprocessor token of the same
> > name and value. Otherwise the header file will provide a generic
> > implementation. This pattern has been taken from .
> >
> > Signed-off-by: Thomas Zimmermann 
> 
> Moving this into generic code is good, but I'm not sure
> about the default for fb_pgprotect():
> 
> > +
> > +#ifndef fb_pgprotect
> > +#define fb_pgprotect fb_pgprotect
> > +static inline void fb_pgprotect(struct file *file, struct vm_area_struct 
> > *vma,
> > +   unsigned long off)
> > +{ }
> > +#endif
> 
> I think most architectures will want the version we have on
> arc, arm, arm64, loongarch, and sh already:
> 
> static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
> unsigned long off)
> {
>vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
> }
> 
> so I'd suggest making that version the default, and treating the
> empty ones (m68knommu, sparc32) as architecture specific
> workarounds.

Yeah I was about to type the same suggestion :-)
-Daniel

 
> I see that sparc64 and parisc use pgprot_uncached here, but as
> they don't define a custom pgprot_writecombine, this ends up being
> the same, and they can use the above definition as well.
> 
> mips defines pgprot_writecombine but uses pgprot_noncached
> in fb_pgprotect(), which is probably a mistake and should have
> been updated as part of commit 4b050ba7a66c ("MIPS: pgtable.h:
> Implement the pgprot_writecombine function for MIPS").
> 
> Arnd

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 01/18] fbdev: Prepare generic architecture helpers

2023-04-05 Thread Arnd Bergmann
On Wed, Apr 5, 2023, at 17:05, Thomas Zimmermann wrote:
> Generic implementations of fb_pgprotect() and fb_is_primary_device()
> have been in the source code for a long time. Prepare the header file
> to make use of them.
>
> Improve the code by using an inline function for fb_pgprotect() and
> by removing include statements.
>
> Symbols are protected by preprocessor guards. Architectures that
> provide a symbol need to define a preprocessor token of the same
> name and value. Otherwise the header file will provide a generic
> implementation. This pattern has been taken from .
>
> Signed-off-by: Thomas Zimmermann 

Moving this into generic code is good, but I'm not sure
about the default for fb_pgprotect():

> +
> +#ifndef fb_pgprotect
> +#define fb_pgprotect fb_pgprotect
> +static inline void fb_pgprotect(struct file *file, struct vm_area_struct 
> *vma,
> + unsigned long off)
> +{ }
> +#endif

I think most architectures will want the version we have on
arc, arm, arm64, loongarch, and sh already:

static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
{
   vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
}

so I'd suggest making that version the default, and treating the
empty ones (m68knommu, sparc32) as architecture specific
workarounds.

I see that sparc64 and parisc use pgprot_uncached here, but as
they don't define a custom pgprot_writecombine, this ends up being
the same, and they can use the above definition as well.

mips defines pgprot_writecombine but uses pgprot_noncached
in fb_pgprotect(), which is probably a mistake and should have
been updated as part of commit 4b050ba7a66c ("MIPS: pgtable.h:
Implement the pgprot_writecombine function for MIPS").

Arnd


Re: [PATCH v4] Kconfig: introduce HAS_IOPORT option and select it as necessary

2023-04-05 Thread Niklas Schnelle
On Thu, 2023-03-23 at 17:33 +0100, Niklas Schnelle wrote:
> We introduce a new HAS_IOPORT Kconfig option to indicate support for I/O
> Port access. In a future patch HAS_IOPORT=n will disable compilation of
> the I/O accessor functions inb()/outb() and friends on architectures
> which can not meaningfully support legacy I/O spaces such as s390.
> 
> The following architectures do not select HAS_IOPORT:
> 
> * ARC
> * C-SKY
> * Hexagon
> * Nios II
> * OpenRISC
> * s390
> * User-Mode Linux
> * Xtensa
> 
> All other architectures select HAS_IOPORT at least conditionally.
> 
> The "depends on" relations on HAS_IOPORT in drivers as well as ifdefs
> for HAS_IOPORT specific sections will be added in subsequent patches on
> a per subsystem basis.
> 
> Co-developed-by: Arnd Bergmann 
> Signed-off-by: Arnd Bergmann 
> Acked-by: Johannes Berg  # for ARCH=um
> Acked-by: Geert Uytterhoeven 
> Signed-off-by: Niklas Schnelle 
> ---
> Note: This patch is the initial patch of a larger series[0]. This patch
> introduces the HAS_IOPORT config option while the rest of the series adds
> driver dependencies and the final patch removes inb() / outb() and friends on
> platforms that don't support them. 
> 
> Thus each of the per-subsystem patches is independent from each other but
> depends on this patch while the final patch depends on the whole series. Thus
> splitting this initial patch off allows the per-subsytem HAS_IOPORT dependency
> addition be merged separately via different trees without breaking the build.
> 
> [0] 
> https://lore.kernel.org/lkml/20230314121216.413434-1-schne...@linux.ibm.com/
> 
> Changes since v3:
> - List archs without HAS_IOPORT in commit message (Arnd)
> - Select HAS_IOPORT for LoongArch (Arnd)
> - Use "select HAS_IOPORT if (E)ISA || .." instead of a "depends on" for (E)ISA
>   for m68k and parisc
> - Select HAS_IOPORT with config GSC on parisc (Arnd)
> - Drop "depends on HAS_IOPORT" for um's config ISA (Johannes)
> - Drop "depends on HAS_IOPORT" for config ISA on x86 and parisc where it is
>   always selected (Arnd)
> 

Gentle ping. As far as I can tell this hasn't been picked to any tree
sp far but also hasn't seen complains so I'm wondering if I should send
a new version of the combined series of this patch plus the added
HAS_IOPORT dependencies per subsystem or wait until this is picked up.

Thanks,
Niklas



[PATCH 18/18] arch/x86: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Include  and set the required preprocessor tokens
correctly. x86 now implements its own set of fb helpers, but still
follows the overall pattern.

Signed-off-by: Thomas Zimmermann 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: "H. Peter Anvin" 
---
 arch/x86/include/asm/fb.h | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
index ab4c960146e3..a3fb801f12f1 100644
--- a/arch/x86/include/asm/fb.h
+++ b/arch/x86/include/asm/fb.h
@@ -2,10 +2,11 @@
 #ifndef _ASM_X86_FB_H
 #define _ASM_X86_FB_H
 
-#include 
-#include 
 #include 
 
+struct fb_info;
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
@@ -16,7 +17,11 @@ static inline void fb_pgprotect(struct file *file, struct 
vm_area_struct *vma,
pgprot_val(vma->vm_page_prot) =
prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
 }
+#define fb_pgprotect fb_pgprotect
+
+int fb_is_primary_device(struct fb_info *info);
+#define fb_is_primary_device fb_is_primary_device
 
-extern int fb_is_primary_device(struct fb_info *info);
+#include 
 
 #endif /* _ASM_X86_FB_H */
-- 
2.40.0



[PATCH 16/18] arch/sparc: Implement fb_is_primary_device() in source file

2023-04-05 Thread Thomas Zimmermann
Other architectures implment fb_is_primary_device() in a source
file. Do the same on sparc. No functional changes, but allows to
remove several include statement from .

Signed-off-by: Thomas Zimmermann 
Cc: "David S. Miller" 
---
 arch/sparc/Makefile |  1 +
 arch/sparc/include/asm/fb.h | 22 +-
 arch/sparc/video/Makefile   |  3 +++
 arch/sparc/video/fbdev.c| 24 
 4 files changed, 33 insertions(+), 17 deletions(-)
 create mode 100644 arch/sparc/video/Makefile
 create mode 100644 arch/sparc/video/fbdev.c

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index a4ea5b05f288..95a9211e48e3 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -60,6 +60,7 @@ libs-y += arch/sparc/prom/
 libs-y += arch/sparc/lib/
 
 drivers-$(CONFIG_PM) += arch/sparc/power/
+drivers-$(CONFIG_FB) += arch/sparc/video/
 
 boot := arch/sparc/boot
 
diff --git a/arch/sparc/include/asm/fb.h b/arch/sparc/include/asm/fb.h
index f699962e9ddf..e4ef1955b2b6 100644
--- a/arch/sparc/include/asm/fb.h
+++ b/arch/sparc/include/asm/fb.h
@@ -1,12 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _SPARC_FB_H_
 #define _SPARC_FB_H_
-#include 
-#include 
+
 #include 
+
 #include 
 #include 
 
+struct fb_info;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
@@ -15,20 +17,6 @@ static inline void fb_pgprotect(struct file *file, struct 
vm_area_struct *vma,
 #endif
 }
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   struct device *dev = info->device;
-   struct device_node *node;
-
-   if (console_set_on_cmdline)
-   return 0;
-
-   node = dev->of_node;
-   if (node &&
-   node == of_console_device)
-   return 1;
-
-   return 0;
-}
+int fb_is_primary_device(struct fb_info *info);
 
 #endif /* _SPARC_FB_H_ */
diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile
new file mode 100644
index ..6baddbd58e4d
--- /dev/null
+++ b/arch/sparc/video/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_FB) += fbdev.o
diff --git a/arch/sparc/video/fbdev.c b/arch/sparc/video/fbdev.c
new file mode 100644
index ..dadd5799fbb3
--- /dev/null
+++ b/arch/sparc/video/fbdev.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+int fb_is_primary_device(struct fb_info *info)
+{
+   struct device *dev = info->device;
+   struct device_node *node;
+
+   if (console_set_on_cmdline)
+   return 0;
+
+   node = dev->of_node;
+   if (node && node == of_console_device)
+   return 1;
+
+   return 0;
+}
+EXPORT_SYMBOL(fb_is_primary_device);
-- 
2.40.0



[PATCH 17/18] arch/sparc: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_pgprotect() with the generic one
from  on 32-bit builds. No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: "David S. Miller" 
---
 arch/sparc/include/asm/fb.h | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/fb.h b/arch/sparc/include/asm/fb.h
index e4ef1955b2b6..da411d77bafb 100644
--- a/arch/sparc/include/asm/fb.h
+++ b/arch/sparc/include/asm/fb.h
@@ -5,18 +5,22 @@
 #include 
 
 #include 
-#include 
 
 struct fb_info;
+struct file;
 
+#ifdef CONFIG_SPARC64
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
-#ifdef CONFIG_SPARC64
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-#endif
 }
+#define fb_pgprotect fb_pgprotect
+#endif
 
 int fb_is_primary_device(struct fb_info *info);
+#define fb_is_primary_device fb_is_primary_device
+
+#include 
 
 #endif /* _SPARC_FB_H_ */
-- 
2.40.0



[PATCH 14/18] arch/powerpc: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
---
 arch/powerpc/include/asm/fb.h | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/fb.h b/arch/powerpc/include/asm/fb.h
index 6541ab77c5b9..5f1a2e5f7654 100644
--- a/arch/powerpc/include/asm/fb.h
+++ b/arch/powerpc/include/asm/fb.h
@@ -2,8 +2,8 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
 #include 
+
 #include 
 
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
@@ -13,10 +13,8 @@ static inline void fb_pgprotect(struct file *file, struct 
vm_area_struct *vma,
 vma->vm_end - vma->vm_start,
 vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 09/18] video: Remove trailing whitespaces

2023-04-05 Thread Thomas Zimmermann
Fix trailing whitespaces. No functional changes.

Signed-off-by: Thomas Zimmermann 
---
 drivers/video/console/sticon.c  |   4 +-
 drivers/video/console/sticore.c | 102 ++---
 drivers/video/fbdev/sticore.h   |  14 +--
 drivers/video/fbdev/stifb.c | 156 
 4 files changed, 138 insertions(+), 138 deletions(-)

diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index 2cea69418a83..89ad7ade6cf9 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -282,7 +282,7 @@ static void sticon_init(struct vc_data *c, int init)
 vc_cols = sti_onscreen_x(sti) / sti->font->width;
 vc_rows = sti_onscreen_y(sti) / sti->font->height;
 c->vc_can_do_color = 1;
-
+
 if (init) {
c->vc_cols = vc_cols;
c->vc_rows = vc_rows;
@@ -374,7 +374,7 @@ static const struct consw sti_con = {
.con_font_set   = sticon_font_set,
.con_font_default   = sticon_font_default,
.con_build_attr = sticon_build_attr,
-   .con_invert_region  = sticon_invert_region, 
+   .con_invert_region  = sticon_invert_region,
 };
 
 
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c
index db568f67e4dc..6ea9596a3c4b 100644
--- a/drivers/video/console/sticore.c
+++ b/drivers/video/console/sticore.c
@@ -6,12 +6,12 @@
  * Copyright (C) 2000 Philipp Rumpf 
  * Copyright (C) 2001-2020 Helge Deller 
  * Copyright (C) 2001-2002 Thomas Bogendoerfer 
- * 
+ *
  * TODO:
  * - call STI in virtual mode rather than in real mode
- * - screen blanking with state_mgmt() in text mode STI ? 
+ * - screen blanking with state_mgmt() in text mode STI ?
  * - try to make it work on m68k hp workstations ;)
- * 
+ *
  */
 
 #define pr_fmt(fmt) "%s: " fmt, KBUILD_MODNAME
@@ -66,12 +66,12 @@ static const u8 col_trans[8] = {
 #define c_index(sti, c) ((c) & 0xff)
 
 static const struct sti_init_flags default_init_flags = {
-   .wait   = STI_WAIT, 
+   .wait   = STI_WAIT,
.reset  = 1,
-   .text   = 1, 
+   .text   = 1,
.nontext = 1,
-   .no_chg_bet = 1, 
-   .no_chg_bei = 1, 
+   .no_chg_bet = 1,
+   .no_chg_bei = 1,
.init_cmap_tx = 1,
 };
 
@@ -104,7 +104,7 @@ static int sti_init_graph(struct sti_struct *sti)
pr_err("STI init_graph failed (ret %d, errno %d)\n", ret, err);
return -1;
}
-   
+
return 0;
 }
 
@@ -120,7 +120,7 @@ static void sti_inq_conf(struct sti_struct *sti)
s32 ret;
 
outptr->ext_ptr = STI_PTR(>sti_data->inq_outptr_ext);
-   
+
do {
spin_lock_irqsave(>lock, flags);
memset(inptr, 0, sizeof(*inptr));
@@ -162,9 +162,9 @@ sti_putc(struct sti_struct *sti, int c, int y, int x,
 }
 
 static const struct sti_blkmv_flags clear_blkmv_flags = {
-   .wait   = STI_WAIT, 
-   .color  = 1, 
-   .clear  = 1, 
+   .wait   = STI_WAIT,
+   .color  = 1,
+   .clear  = 1,
 };
 
 void
@@ -185,7 +185,7 @@ sti_set(struct sti_struct *sti, int src_y, int src_x,
struct sti_blkmv_outptr *outptr = >sti_data->blkmv_outptr;
s32 ret;
unsigned long flags;
-   
+
do {
spin_lock_irqsave(>lock, flags);
*inptr = inptr_default;
@@ -224,7 +224,7 @@ sti_clear(struct sti_struct *sti, int src_y, int src_x,
 }
 
 static const struct sti_blkmv_flags default_blkmv_flags = {
-   .wait = STI_WAIT, 
+   .wait = STI_WAIT,
 };
 
 void
@@ -291,14 +291,14 @@ static int __init sti_setup(char *str)
 {
if (str)
strscpy(default_sti_path, str, sizeof(default_sti_path));
-   
+
return 1;
 }
 
 /* Assuming the machine has multiple STI consoles (=graphic cards) which
  * all get detected by sticon, the user may define with the linux kernel
  * parameter sti= which of them will be the initial boot-console.
- *  is a number between 0 and MAX_STI_ROMS, with 0 as the default 
+ *  is a number between 0 and MAX_STI_ROMS, with 0 as the default
  * STI screen.
  */
 __setup("sti=", sti_setup);
@@ -341,13 +341,13 @@ static int sti_font_setup(char *str)
  * should be used by the sticon driver to draw characters to the screen.
  * Possible values are:
  * - sti_font=:
- *  is the name of one of the linux-kernel built-in 
- * framebuffer font names (e.g. VGA8x16, SUN22x18). 
- * This is only available if the fonts have been statically 
compiled 
+ *  is the name of one of the linux-kernel built-in
+ * framebuffer font names (e.g. VGA8x16, SUN22x18).
+ * This is only available if the fonts have been statically 
compiled
  * in with e.g. the CONFIG_FONT_8x16 or CONFIG_FONT_SUN12x22 
options.
  * - sti_font= ( = 1,2,3,...)
  * most STI ROMs have built-in HP specific fonts, which 

[PATCH 08/18] arch/mips: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Thomas Bogendoerfer 
---
 arch/mips/include/asm/fb.h | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/fb.h b/arch/mips/include/asm/fb.h
index bd3f68c9ddfc..6bda0a81d8ca 100644
--- a/arch/mips/include/asm/fb.h
+++ b/arch/mips/include/asm/fb.h
@@ -1,19 +1,17 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
-#include 
 #include 
 
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 04/18] arch/arm64: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Catalin Marinas 
Cc: Will Deacon 
---
 arch/arm64/include/asm/fb.h | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h
index bdc735ee1f67..fc31a5d1f48a 100644
--- a/arch/arm64/include/asm/fb.h
+++ b/arch/arm64/include/asm/fb.h
@@ -5,19 +5,17 @@
 #ifndef __ASM_FB_H_
 #define __ASM_FB_H_
 
-#include 
-#include 
 #include 
 
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* __ASM_FB_H_ */
-- 
2.40.0



[PATCH 02/18] arch/arc: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Vineet Gupta 
---
 arch/arc/include/asm/fb.h | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arc/include/asm/fb.h b/arch/arc/include/asm/fb.h
index dc2e303cdbbb..dff149eaecaf 100644
--- a/arch/arc/include/asm/fb.h
+++ b/arch/arc/include/asm/fb.h
@@ -1,20 +1,19 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
-#include 
 #include 
 
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 12/18] arch/parisc: Implement fb_is_primary_device() under arch/parisc

2023-04-05 Thread Thomas Zimmermann
Move PARISC's implementation of fb_is_primary_device() into the
architecture directory. This the place of the declaration and
where other architectures implement this function. No functional
changes.

Signed-off-by: Thomas Zimmermann 
Cc: "James E.J. Bottomley" 
Cc: Helge Deller 
---
 arch/parisc/Makefile |  2 ++
 arch/parisc/include/asm/fb.h |  2 +-
 arch/parisc/video/Makefile   |  3 +++
 arch/parisc/video/fbdev.c| 27 +++
 drivers/video/sticore.c  | 19 ---
 include/video/sticore.h  |  2 ++
 6 files changed, 35 insertions(+), 20 deletions(-)
 create mode 100644 arch/parisc/video/Makefile
 create mode 100644 arch/parisc/video/fbdev.c

diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 0d049a6f6a60..968ebe17494c 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -119,6 +119,8 @@ export LIBGCC
 
 libs-y += arch/parisc/lib/ $(LIBGCC)
 
+drivers-y += arch/parisc/video/
+
 boot   := arch/parisc/boot
 
 PALO := $(shell if (which palo 2>&1); then : ; \
diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h
index 55d29c4f716e..0b9a38ced5c8 100644
--- a/arch/parisc/include/asm/fb.h
+++ b/arch/parisc/include/asm/fb.h
@@ -12,7 +12,7 @@ static inline void fb_pgprotect(struct file *file, struct 
vm_area_struct *vma,
pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
 }
 
-#if defined(CONFIG_FB_STI)
+#if defined(CONFIG_STI_CORE)
 int fb_is_primary_device(struct fb_info *info);
 #else
 static inline int fb_is_primary_device(struct fb_info *info)
diff --git a/arch/parisc/video/Makefile b/arch/parisc/video/Makefile
new file mode 100644
index ..16a73cce4661
--- /dev/null
+++ b/arch/parisc/video/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_STI_CORE) += fbdev.o
diff --git a/arch/parisc/video/fbdev.c b/arch/parisc/video/fbdev.c
new file mode 100644
index ..4a0ae08fc75b
--- /dev/null
+++ b/arch/parisc/video/fbdev.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2000 Philipp Rumpf 
+ * Copyright (C) 2001-2020 Helge Deller 
+ * Copyright (C) 2001-2002 Thomas Bogendoerfer 
+ */
+
+#include 
+
+#include 
+
+#include 
+
+int fb_is_primary_device(struct fb_info *info)
+{
+   struct sti_struct *sti;
+
+   sti = sti_get_rom(0);
+
+   /* if no built-in graphics card found, allow any fb driver as default */
+   if (!sti)
+   return true;
+
+   /* return true if it's the default built-in framebuffer driver */
+   return (sti->info == info);
+}
+EXPORT_SYMBOL(fb_is_primary_device);
diff --git a/drivers/video/sticore.c b/drivers/video/sticore.c
index f8aaedea437d..7eb925f2ba9c 100644
--- a/drivers/video/sticore.c
+++ b/drivers/video/sticore.c
@@ -30,7 +30,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 
@@ -1148,24 +1147,6 @@ int sti_call(const struct sti_struct *sti, unsigned long 
func,
return ret;
 }
 
-#if defined(CONFIG_FB_STI)
-/* check if given fb_info is the primary device */
-int fb_is_primary_device(struct fb_info *info)
-{
-   struct sti_struct *sti;
-
-   sti = sti_get_rom(0);
-
-   /* if no built-in graphics card found, allow any fb driver as default */
-   if (!sti)
-   return true;
-
-   /* return true if it's the default built-in framebuffer driver */
-   return (sti->info == info);
-}
-EXPORT_SYMBOL(fb_is_primary_device);
-#endif
-
 MODULE_AUTHOR("Philipp Rumpf, Helge Deller, Thomas Bogendoerfer");
 MODULE_DESCRIPTION("Core STI driver for HP's NGLE series graphics cards in HP 
PARISC machines");
 MODULE_LICENSE("GPL v2");
diff --git a/include/video/sticore.h b/include/video/sticore.h
index c0879352cde4..fbb78d7e7565 100644
--- a/include/video/sticore.h
+++ b/include/video/sticore.h
@@ -2,6 +2,8 @@
 #ifndef STICORE_H
 #define STICORE_H
 
+struct fb_info;
+
 /* generic STI structures & functions */
 
 #define MAX_STI_ROMS 4 /* max no. of ROMs which this driver handles */
-- 
2.40.0



[PATCH 05/18] arch/ia64: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
---
 arch/ia64/include/asm/fb.h | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/ia64/include/asm/fb.h b/arch/ia64/include/asm/fb.h
index 5f95782bfa46..0208f64a0da0 100644
--- a/arch/ia64/include/asm/fb.h
+++ b/arch/ia64/include/asm/fb.h
@@ -2,11 +2,12 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
-#include 
 #include 
+
 #include 
 
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
@@ -15,10 +16,8 @@ static inline void fb_pgprotect(struct file *file, struct 
vm_area_struct *vma,
else
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 06/18] arch/loongarch: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Huacai Chen 
Cc: WANG Xuerui 
---
 arch/loongarch/include/asm/fb.h | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/loongarch/include/asm/fb.h b/arch/loongarch/include/asm/fb.h
index 3116bde8772d..d1c9dd1c6e2e 100644
--- a/arch/loongarch/include/asm/fb.h
+++ b/arch/loongarch/include/asm/fb.h
@@ -5,19 +5,17 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
-#include 
 #include 
 
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 15/18] arch/sh: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: John Paul Adrian Glaubitz 
---
 arch/sh/include/asm/fb.h | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/sh/include/asm/fb.h b/arch/sh/include/asm/fb.h
index 9a0bca2686fd..1e7b1cfd5b5e 100644
--- a/arch/sh/include/asm/fb.h
+++ b/arch/sh/include/asm/fb.h
@@ -2,19 +2,17 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
-#include 
 #include 
 
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 13/18] arch/parisc: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from  on systems without CONFIG_STI_CORE. No
functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: "James E.J. Bottomley" 
Cc: Helge Deller 
---
 arch/parisc/include/asm/fb.h | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h
index 0b9a38ced5c8..66bb401c0cda 100644
--- a/arch/parisc/include/asm/fb.h
+++ b/arch/parisc/include/asm/fb.h
@@ -2,23 +2,24 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
-#include 
 #include 
+#include 
+
+struct fb_info;
+struct file;
 
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
 }
+#define fb_pgprotect fb_pgprotect
 
 #if defined(CONFIG_STI_CORE)
 int fb_is_primary_device(struct fb_info *info);
-#else
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#define fb_is_primary_device fb_is_primary_device
 #endif
 
+#include 
+
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 11/18] arch/parisc: Remove trailing whitespaces

2023-04-05 Thread Thomas Zimmermann
Fix trailing whitespaces. No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: "James E.J. Bottomley" 
Cc: Helge Deller 
---
 arch/parisc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index a2d8600521f9..0d049a6f6a60 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -11,7 +11,7 @@
 # Copyright (C) 1994 by Linus Torvalds
 # Portions Copyright (C) 1999 The Puffin Group
 #
-# Modified for PA-RISC Linux by Paul Lahaie, Alex deVries, 
+# Modified for PA-RISC Linux by Paul Lahaie, Alex deVries,
 # Mike Shaver, Helge Deller and Martin K. Petersen
 #
 
-- 
2.40.0



[PATCH 07/18] arch/m68k: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes. Also use the
generic helper for fb_pgprotect() on systems without MMU.

Signed-off-by: Thomas Zimmermann 
Cc: Geert Uytterhoeven 
---
 arch/m68k/include/asm/fb.h | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/m68k/include/asm/fb.h b/arch/m68k/include/asm/fb.h
index b86c6e2e26dd..f15a14e36826 100644
--- a/arch/m68k/include/asm/fb.h
+++ b/arch/m68k/include/asm/fb.h
@@ -2,8 +2,8 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
 #include 
+
 #include 
 #include 
 
@@ -27,13 +27,9 @@ static inline void fb_pgprotect(struct file *file, struct 
vm_area_struct *vma,
}
 }
 #endif /* CONFIG_SUN3 */
-#else
-#define fb_pgprotect(...) do {} while (0)
+#define fb_pgprotect fb_pgprotect
 #endif /* CONFIG_MMU */
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH 10/18] video: Move HP PARISC STI core code to shared location

2023-04-05 Thread Thomas Zimmermann
STI core files have been located in console and fbdev code. Move
the source code and header to the directories for video helpers.
Also update the config and build rules such that the code depends
on the config symbol CONFIG_STI_CORE, which STI console and STI
framebuffer select automatically.

Cleans up the console makefile and prepares PARISC to implement
fb_is_primary_device() within the arch/ directory. No functional
changes.

Signed-off-by: Thomas Zimmermann 
---
 drivers/video/Kconfig| 7 +++
 drivers/video/Makefile   | 1 +
 drivers/video/console/Kconfig| 1 +
 drivers/video/console/Makefile   | 4 +---
 drivers/video/console/sticon.c   | 2 +-
 drivers/video/fbdev/Kconfig  | 3 +--
 drivers/video/fbdev/stifb.c  | 2 +-
 drivers/video/{console => }/sticore.c| 2 +-
 {drivers/video/fbdev => include/video}/sticore.h | 0
 9 files changed, 14 insertions(+), 8 deletions(-)
 rename drivers/video/{console => }/sticore.c (99%)
 rename {drivers/video/fbdev => include/video}/sticore.h (100%)

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index bf05363d8906..8b2b9ac37c3d 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -11,6 +11,13 @@ config APERTURE_HELPERS
  Support tracking and hand-over of aperture ownership. Required
  by graphics drivers for firmware-provided framebuffers.
 
+config STI_CORE
+   bool
+   depends on PARISC
+   help
+ STI refers to the HP "Standard Text Interface" which is a set of
+ BIOS routines contained in a ROM chip in HP PA-RISC based machines.
+
 config VIDEO_CMDLINE
bool
 
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 831c9fa57a6c..6bbc03950899 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_APERTURE_HELPERS)+= aperture.o
+obj-$(CONFIG_STI_CORE)+= sticore.o
 obj-$(CONFIG_VGASTATE)+= vgastate.o
 obj-$(CONFIG_VIDEO_CMDLINE)   += cmdline.o
 obj-$(CONFIG_VIDEO_NOMODESET) += nomodeset.o
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 22cea5082ac4..a2a88d42edf0 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -141,6 +141,7 @@ config STI_CONSOLE
depends on PARISC && HAS_IOMEM
select FONT_SUPPORT
select CRC32
+   select STI_CORE
default y
help
  The STI console is the builtin display/keyboard on HP-PARISC
diff --git a/drivers/video/console/Makefile b/drivers/video/console/Makefile
index db07b784bd2c..fd79016a0d95 100644
--- a/drivers/video/console/Makefile
+++ b/drivers/video/console/Makefile
@@ -5,8 +5,6 @@
 
 obj-$(CONFIG_DUMMY_CONSOLE)   += dummycon.o
 obj-$(CONFIG_SGI_NEWPORT_CONSOLE) += newport_con.o
-obj-$(CONFIG_STI_CONSOLE) += sticon.o sticore.o
+obj-$(CONFIG_STI_CONSOLE) += sticon.o
 obj-$(CONFIG_VGA_CONSOLE) += vgacon.o
 obj-$(CONFIG_MDA_CONSOLE) += mdacon.o
-
-obj-$(CONFIG_FB_STI)  += sticore.o
diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index 89ad7ade6cf9..d11cfd2d68b5 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -50,7 +50,7 @@
 
 #include 
 
-#include "../fbdev/sticore.h"
+#include 
 
 /* switching to graphics mode */
 #define BLANK 0
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 96e91570cdd3..485e8c35d5c6 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -551,10 +551,9 @@ config FB_STI
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+   select STI_CORE
default y
help
- STI refers to the HP "Standard Text Interface" which is a set of
- BIOS routines contained in a ROM chip in HP PA-RISC based machines.
  Enabling this option will implement the linux framebuffer device
  using calls to the STI BIOS routines for initialisation.
 
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 6bc7e6d9..baca6974e288 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -69,7 +69,7 @@
 #include   /* for HP-UX compatibility */
 #include 
 
-#include "sticore.h"
+#include 
 
 /* REGION_BASE(fb_info, index) returns the virtual address for region  
*/
 #define REGION_BASE(fb_info, index) \
diff --git a/drivers/video/console/sticore.c b/drivers/video/sticore.c
similarity index 99%
rename from drivers/video/console/sticore.c
rename to drivers/video/sticore.c
index 6ea9596a3c4b..f8aaedea437d 100644
--- a/drivers/video/console/sticore.c
+++ b/drivers/video/sticore.c
@@ -32,7 +32,7 @@
 #include 
 #include 
 
-#include "../fbdev/sticore.h"
+#include 
 
 #define STI_DRIVERVERSION "Version 

[PATCH 00/18] arch: Consolidate

2023-04-05 Thread Thomas Zimmermann
Various architectures provide  with helpers for fbdev
framebuffer devices. Share the contained code where possible. There
is already , which implements generic (as in
'empty') functions of the fbdev helpers. The header was added in
commit aafe4dbed0bf ("asm-generic: add generic versions of common
headers"), but never used.

Each per-architecture header file declares and/or implements fbdev
helpers and defines a preprocessor token for each. The generic
header then provides the remaining helpers. It works like the I/O
helpers in .

For PARISC, the architecture helpers are mixed up with helpers
for the system's STI graphics firmware. We first move the STI code
to appropriate locations under video/ and then move the architecture
helper under arch/parisc.

For Sparc, there's an additional patch that moves the implementation
from the header into a source file. This allows to avoid some include
statements in the header file.

Built on arm, arm64, m68k, mips, parisc, powerpc, sparc and x86.

Thomas Zimmermann (18):
  fbdev: Prepare generic architecture helpers
  arch/arc: Implement  with generic helpers
  arch/arm: Implement  with generic helpers
  arch/arm64: Implement  with generic helpers
  arch/ia64: Implement  with generic helpers
  arch/loongarch: Implement  with generic helpers
  arch/m68k: Implement  with generic helpers
  arch/mips: Implement  with generic helpers
  video: Remove trailing whitespaces
  video: Move HP PARISC STI core code to shared location
  arch/parisc: Remove trailing whitespaces
  arch/parisc: Implement fb_is_primary_device() under arch/parisc
  arch/parisc: Implement  with generic helpers
  arch/powerpc: Implement  with generic helpers
  arch/sh: Implement  with generic helpers
  arch/sparc: Implement fb_is_primary_device() in source file
  arch/sparc: Implement  with generic helpers
  arch/x86: Implement  with generic helpers

 arch/arc/include/asm/fb.h |  11 +-
 arch/arm/include/asm/fb.h |  10 +-
 arch/arm64/include/asm/fb.h   |  10 +-
 arch/ia64/include/asm/fb.h|  11 +-
 arch/loongarch/include/asm/fb.h   |  10 +-
 arch/m68k/include/asm/fb.h|  10 +-
 arch/mips/include/asm/fb.h|  10 +-
 arch/parisc/Makefile  |   4 +-
 arch/parisc/include/asm/fb.h  |  17 +-
 arch/parisc/video/Makefile|   3 +
 arch/parisc/video/fbdev.c |  27 +++
 arch/powerpc/include/asm/fb.h |   8 +-
 arch/sh/include/asm/fb.h  |  10 +-
 arch/sparc/Makefile   |   1 +
 arch/sparc/include/asm/fb.h   |  30 ++--
 arch/sparc/video/Makefile |   3 +
 arch/sparc/video/fbdev.c  |  24 +++
 arch/x86/include/asm/fb.h |  11 +-
 drivers/video/Kconfig |   7 +
 drivers/video/Makefile|   1 +
 drivers/video/console/Kconfig |   1 +
 drivers/video/console/Makefile|   4 +-
 drivers/video/console/sticon.c|   6 +-
 drivers/video/fbdev/Kconfig   |   3 +-
 drivers/video/fbdev/stifb.c   | 158 +-
 drivers/video/{console => }/sticore.c | 123 ++
 include/asm-generic/fb.h  |  20 ++-
 .../video/fbdev => include/video}/sticore.h   |  16 +-
 28 files changed, 297 insertions(+), 252 deletions(-)
 create mode 100644 arch/parisc/video/Makefile
 create mode 100644 arch/parisc/video/fbdev.c
 create mode 100644 arch/sparc/video/Makefile
 create mode 100644 arch/sparc/video/fbdev.c
 rename drivers/video/{console => }/sticore.c (95%)
 rename {drivers/video/fbdev => include/video}/sticore.h (99%)


base-commit: a7180debb9c631375684f4d717466cfb9f238660
-- 
2.40.0



[PATCH 01/18] fbdev: Prepare generic architecture helpers

2023-04-05 Thread Thomas Zimmermann
Generic implementations of fb_pgprotect() and fb_is_primary_device()
have been in the source code for a long time. Prepare the header file
to make use of them.

Improve the code by using an inline function for fb_pgprotect() and
by removing include statements.

Symbols are protected by preprocessor guards. Architectures that
provide a symbol need to define a preprocessor token of the same
name and value. Otherwise the header file will provide a generic
implementation. This pattern has been taken from .

Signed-off-by: Thomas Zimmermann 
---
 include/asm-generic/fb.h | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/asm-generic/fb.h b/include/asm-generic/fb.h
index f9f18101ed36..cb42166e7e11 100644
--- a/include/asm-generic/fb.h
+++ b/include/asm-generic/fb.h
@@ -1,13 +1,29 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
 #ifndef __ASM_GENERIC_FB_H_
 #define __ASM_GENERIC_FB_H_
-#include 
 
-#define fb_pgprotect(...) do {} while (0)
+/*
+ * Only include this header file from your architecture's .
+ */
+
+struct fb_info;
+struct file;
+struct vm_area_struct;
+
+#ifndef fb_pgprotect
+#define fb_pgprotect fb_pgprotect
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+   unsigned long off)
+{ }
+#endif
 
+#ifndef fb_is_primary_device
+#define fb_is_primary_device fb_is_primary_device
 static inline int fb_is_primary_device(struct fb_info *info)
 {
return 0;
 }
+#endif
 
 #endif /* __ASM_GENERIC_FB_H_ */
-- 
2.40.0



[PATCH 03/18] arch/arm: Implement with generic helpers

2023-04-05 Thread Thomas Zimmermann
Replace the architecture's fb_is_primary_device() with the generic
one from . No functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Russell King 
---
 arch/arm/include/asm/fb.h | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/fb.h b/arch/arm/include/asm/fb.h
index d92e99cd8c8a..a341d76e6d8f 100644
--- a/arch/arm/include/asm/fb.h
+++ b/arch/arm/include/asm/fb.h
@@ -1,19 +1,17 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
-#include 
-#include 
 #include 
 
+struct file;
+
 static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
 {
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 }
+#define fb_pgprotect fb_pgprotect
 
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-   return 0;
-}
+#include 
 
 #endif /* _ASM_FB_H_ */
-- 
2.40.0



[PATCH v2 5/5] events: Illustrate the transition to local{,64}_try_cmpxchg

2023-04-05 Thread Uros Bizjak
This patch illustrates the transition to local{,64}_try_cmpxchg.
It is not intended to be merged as-is.

Signed-off-by: Uros Bizjak 
---
 arch/x86/events/core.c  | 9 -
 kernel/events/ring_buffer.c | 5 +++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d096b04bf80e..d9310e9363f1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -129,13 +129,12 @@ u64 x86_perf_event_update(struct perf_event *event)
 * exchange a new raw count - then add that new-prev delta
 * count to the generic event atomically:
 */
-again:
prev_raw_count = local64_read(>prev_count);
-   rdpmcl(hwc->event_base_rdpmc, new_raw_count);
 
-   if (local64_cmpxchg(>prev_count, prev_raw_count,
-   new_raw_count) != prev_raw_count)
-   goto again;
+   do {
+   rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+   } while (!local64_try_cmpxchg(>prev_count, _raw_count,
+ new_raw_count));
 
/*
 * Now we have the new raw value and have updated the prev
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 273a0fe7910a..111ab85ee97d 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -191,9 +191,10 @@ __perf_output_begin(struct perf_output_handle *handle,
 
perf_output_get_handle(handle);
 
+   offset = local_read(>head);
do {
tail = READ_ONCE(rb->user_page->data_tail);
-   offset = head = local_read(>head);
+   head = offset;
if (!rb->overwrite) {
if (unlikely(!ring_buffer_has_space(head, tail,
perf_data_size(rb),
@@ -217,7 +218,7 @@ __perf_output_begin(struct perf_output_handle *handle,
head += size;
else
head -= size;
-   } while (local_cmpxchg(>head, offset, head) != offset);
+   } while (!local_try_cmpxchg(>head, , head));
 
if (backward) {
offset = head;
-- 
2.39.2



[PATCH v2 4/5] locking/x86: Define arch_try_cmpxchg_local

2023-04-05 Thread Uros Bizjak
Define target specific arch_try_cmpxchg_local. This
definition overrides the generic arch_try_cmpxchg_local
fallback definition and enables target-specific
implementation of try_cmpxchg_local.

Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: "H. Peter Anvin" 
Signed-off-by: Uros Bizjak 
---
 arch/x86/include/asm/cmpxchg.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 94fbe6ae7431..540573f515b7 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -221,9 +221,15 @@ extern void __add_wrong_size(void)
 #define __try_cmpxchg(ptr, pold, new, size)\
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
 
+#define __try_cmpxchg_local(ptr, pold, new, size)  \
+   __raw_try_cmpxchg((ptr), (pold), (new), (size), "")
+
 #define arch_try_cmpxchg(ptr, pold, new)   \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
 
+#define arch_try_cmpxchg_local(ptr, pold, new) \
+   __try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr)))
+
 /*
  * xadd() adds "inc" to "*ptr" and atomically returns the previous
  * value of "*ptr".
-- 
2.39.2



[PATCH v2 3/5] locking/arch: Wire up local_try_cmpxchg

2023-04-05 Thread Uros Bizjak
Implement target specific support for local_try_cmpxchg
and local_cmpxchg using typed C wrappers that call their
_local counterpart and provide additional checking of
their input arguments.

Cc: Richard Henderson 
Cc: Ivan Kokshaysky 
Cc: Matt Turner 
Cc: Huacai Chen 
Cc: WANG Xuerui 
Cc: Jiaxun Yang 
Cc: Jun Yi 
Cc: Thomas Bogendoerfer 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: "H. Peter Anvin" 
Signed-off-by: Uros Bizjak 
---
 arch/alpha/include/asm/local.h | 12 ++--
 arch/loongarch/include/asm/local.h | 13 +++--
 arch/mips/include/asm/local.h  | 13 +++--
 arch/powerpc/include/asm/local.h   | 11 +++
 arch/x86/include/asm/local.h   | 13 +++--
 5 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/arch/alpha/include/asm/local.h b/arch/alpha/include/asm/local.h
index fab26a1c93d5..0fcaad642cc3 100644
--- a/arch/alpha/include/asm/local.h
+++ b/arch/alpha/include/asm/local.h
@@ -52,8 +52,16 @@ static __inline__ long local_sub_return(long i, local_t * l)
return result;
 }
 
-#define local_cmpxchg(l, o, n) \
-   (cmpxchg_local(&((l)->a.counter), (o), (n)))
+static __inline__ long local_cmpxchg(local_t *l, long old, long new)
+{
+   return cmpxchg_local(>a.counter, old, new);
+}
+
+static __inline__ bool local_try_cmpxchg(local_t *l, long *old, long new)
+{
+   return try_cmpxchg_local(>a.counter, (s64 *)old, new);
+}
+
 #define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
 
 /**
diff --git a/arch/loongarch/include/asm/local.h 
b/arch/loongarch/include/asm/local.h
index 65fbbae9fc4d..83e995b30e47 100644
--- a/arch/loongarch/include/asm/local.h
+++ b/arch/loongarch/include/asm/local.h
@@ -56,8 +56,17 @@ static inline long local_sub_return(long i, local_t *l)
return result;
 }
 
-#define local_cmpxchg(l, o, n) \
-   ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+static inline long local_cmpxchg(local_t *l, long old, long new)
+{
+   return cmpxchg_local(>a.counter, old, new);
+}
+
+static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
+{
+   typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
+   return try_cmpxchg_local(>a.counter, __old, new);
+}
+
 #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
 
 /**
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index 08366b1fd273..5daf6fe8e3e9 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -94,8 +94,17 @@ static __inline__ long local_sub_return(long i, local_t * l)
return result;
 }
 
-#define local_cmpxchg(l, o, n) \
-   ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+static __inline__ long local_cmpxchg(local_t *l, long old, long new)
+{
+   return cmpxchg_local(>a.counter, old, new);
+}
+
+static __inline__ bool local_try_cmpxchg(local_t *l, long *old, long new)
+{
+   typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
+   return try_cmpxchg_local(>a.counter, __old, new);
+}
+
 #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
 
 /**
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index bc4bd19b7fc2..45492fb5bf22 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -90,6 +90,17 @@ static __inline__ long local_cmpxchg(local_t *l, long o, 
long n)
return t;
 }
 
+static __inline__ bool local_try_cmpxchg(local_t *l, long *po, long n)
+{
+   long o = *po, r;
+
+   r = local_cmpxchg(l, o, n);
+   if (unlikely(r != o))
+   *po = r;
+
+   return likely(r == o);
+}
+
 static __inline__ long local_xchg(local_t *l, long n)
 {
long t;
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 349a47acaa4a..56d4ef604b91 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -120,8 +120,17 @@ static inline long local_sub_return(long i, local_t *l)
 #define local_inc_return(l)  (local_add_return(1, l))
 #define local_dec_return(l)  (local_sub_return(1, l))
 
-#define local_cmpxchg(l, o, n) \
-   (cmpxchg_local(&((l)->a.counter), (o), (n)))
+static inline long local_cmpxchg(local_t *l, long old, long new)
+{
+   return cmpxchg_local(>a.counter, old, new);
+}
+
+static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
+{
+   typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
+   return try_cmpxchg_local(>a.counter, __old, new);
+}
+
 /* Always has a lock prefix */
 #define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
 
-- 
2.39.2



[PATCH v2 0/5] locking: Introduce local{,64}_try_cmpxchg

2023-04-05 Thread Uros Bizjak
Add generic and target specific support for local{,64}_try_cmpxchg
and wire up support for all targets that use local_t infrastructure.

The patch enables x86 targets to emit special instruction for
local_try_cmpxchg and also local64_try_cmpxchg for x86_64.

The last patch changes __perf_output_begin in events/ring_buffer
to use new locking primitive and improves code from

 4b3:   48 8b 82 e8 00 00 00mov0xe8(%rdx),%rax
 4ba:   48 8b b8 08 04 00 00mov0x408(%rax),%rdi
 4c1:   8b 42 1cmov0x1c(%rdx),%eax
 4c4:   48 8b 4a 28 mov0x28(%rdx),%rcx
 4c8:   85 c0   test   %eax,%eax
 ...
 4ef:   48 89 c8mov%rcx,%rax
 4f2:   48 0f b1 7a 28  cmpxchg %rdi,0x28(%rdx)
 4f7:   48 39 c1cmp%rax,%rcx
 4fa:   75 b7   jne4b3 <...>

to

 4b2:   48 8b 4a 28 mov0x28(%rdx),%rcx
 4b6:   48 8b 82 e8 00 00 00mov0xe8(%rdx),%rax
 4bd:   48 8b b0 08 04 00 00mov0x408(%rax),%rsi
 4c4:   8b 42 1cmov0x1c(%rdx),%eax
 4c7:   85 c0   test   %eax,%eax
 ...
 4d4:   48 89 c8mov%rcx,%rax
 4d7:   48 0f b1 72 28  cmpxchg %rsi,0x28(%rdx)
 4dc:   0f 85 d0 00 00 00   jne5b2 <...>
 ...
 5b2:   48 89 c1mov%rax,%rcx
 5b5:   e9 fc fe ff ff  jmp4b6 <...>

Please note that in addition to removed compare, the load from
0x28(%rdx) gets moved out of the loop and the code is rearranged
according to likely/unlikely tags in the source.
---
v2:

Implement target specific support for local_try_cmpxchg and
local_cmpxchg using typed C wrappers that call their _local
counterpart and provide additional checking of their input
arguments.

Cc: Richard Henderson 
Cc: Ivan Kokshaysky 
Cc: Matt Turner 
Cc: Huacai Chen 
Cc: WANG Xuerui 
Cc: Thomas Bogendoerfer 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: Dave Hansen 
Cc: "H. Peter Anvin" 
Cc: Arnd Bergmann 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Mark Rutland 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Ian Rogers 
Cc: Will Deacon 
Cc: Boqun Feng 
Cc: Jiaxun Yang 
Cc: Jun Yi 

Uros Bizjak (5):
  locking/atomic: Add generic try_cmpxchg{,64}_local support
  locking/generic: Wire up local{,64}_try_cmpxchg
  locking/arch: Wire up local_try_cmpxchg
  locking/x86: Define arch_try_cmpxchg_local
  events: Illustrate the transition to local{,64}_try_cmpxchg

 arch/alpha/include/asm/local.h  | 12 +--
 arch/loongarch/include/asm/local.h  | 13 +--
 arch/mips/include/asm/local.h   | 13 +--
 arch/powerpc/include/asm/local.h| 11 ++
 arch/x86/events/core.c  |  9 
 arch/x86/include/asm/cmpxchg.h  |  6 ++
 arch/x86/include/asm/local.h| 13 +--
 include/asm-generic/local.h |  1 +
 include/asm-generic/local64.h   | 12 ++-
 include/linux/atomic/atomic-arch-fallback.h | 24 -
 include/linux/atomic/atomic-instrumented.h  | 20 -
 kernel/events/ring_buffer.c |  5 +++--
 scripts/atomic/gen-atomic-fallback.sh   |  4 
 scripts/atomic/gen-atomic-instrumented.sh   |  2 +-
 14 files changed, 126 insertions(+), 19 deletions(-)

-- 
2.39.2



[PATCH v2 2/5] locking/generic: Wire up local{,64}_try_cmpxchg

2023-04-05 Thread Uros Bizjak
Implement generic support for local{,64}_try_cmpxchg.

Redirect to the atomic_ family of functions when the target
does not provide its own local.h definitions.

For 64-bit targets, implement local64_try_cmpxchg and
local64_cmpxchg using typed C wrappers that call local_
family of functions and provide additional checking
of their input arguments.

Cc: Arnd Bergmann 
Signed-off-by: Uros Bizjak 
---
 include/asm-generic/local.h   |  1 +
 include/asm-generic/local64.h | 12 +++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index fca7f1d84818..7f97018df66f 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -42,6 +42,7 @@ typedef struct
 #define local_inc_return(l) atomic_long_inc_return(&(l)->a)
 
 #define local_cmpxchg(l, o, n) atomic_long_cmpxchg((&(l)->a), (o), (n))
+#define local_try_cmpxchg(l, po, n) atomic_long_try_cmpxchg((&(l)->a), (po), 
(n))
 #define local_xchg(l, n) atomic_long_xchg((&(l)->a), (n))
 #define local_add_unless(l, _a, u) atomic_long_add_unless((&(l)->a), (_a), (u))
 #define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a)
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
index 765be0b7d883..14963a7a6253 100644
--- a/include/asm-generic/local64.h
+++ b/include/asm-generic/local64.h
@@ -42,7 +42,16 @@ typedef struct {
 #define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
 #define local64_inc_return(l)  local_inc_return(&(l)->a)
 
-#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
+static inline s64 local64_cmpxchg(local64_t *l, s64 old, s64 new)
+{
+   return local_cmpxchg(>a, old, new);
+}
+
+static inline bool local64_try_cmpxchg(local64_t *l, s64 *old, s64 new)
+{
+   return local_try_cmpxchg(>a, (long *)old, new);
+}
+
 #define local64_xchg(l, n) local_xchg((&(l)->a), (n))
 #define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
 #define local64_inc_not_zero(l)local_inc_not_zero(&(l)->a)
@@ -81,6 +90,7 @@ typedef struct {
 #define local64_inc_return(l)  atomic64_inc_return(&(l)->a)
 
 #define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
+#define local64_try_cmpxchg(l, po, n) atomic64_try_cmpxchg((&(l)->a), (po), 
(n))
 #define local64_xchg(l, n) atomic64_xchg((&(l)->a), (n))
 #define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
 #define local64_inc_not_zero(l)atomic64_inc_not_zero(&(l)->a)
-- 
2.39.2



[PATCH v2 1/5] locking/atomic: Add generic try_cmpxchg{,64}_local support

2023-04-05 Thread Uros Bizjak
Add generic support for try_cmpxchg{,64}_local and their falbacks.

These provides the generic try_cmpxchg_local family of functions
from the arch_ prefixed version, also adding explicit instrumentation.

Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Boqun Feng 
Cc: Mark Rutland 
Signed-off-by: Uros Bizjak 
---
 include/linux/atomic/atomic-arch-fallback.h | 24 -
 include/linux/atomic/atomic-instrumented.h  | 20 -
 scripts/atomic/gen-atomic-fallback.sh   |  4 
 scripts/atomic/gen-atomic-instrumented.sh   |  2 +-
 4 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/include/linux/atomic/atomic-arch-fallback.h 
b/include/linux/atomic/atomic-arch-fallback.h
index 77bc5522e61c..36c92851cdee 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -217,6 +217,28 @@
 
 #endif /* arch_try_cmpxchg64_relaxed */
 
+#ifndef arch_try_cmpxchg_local
+#define arch_try_cmpxchg_local(_ptr, _oldp, _new) \
+({ \
+   typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+   ___r = arch_cmpxchg_local((_ptr), ___o, (_new)); \
+   if (unlikely(___r != ___o)) \
+   *___op = ___r; \
+   likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg_local */
+
+#ifndef arch_try_cmpxchg64_local
+#define arch_try_cmpxchg64_local(_ptr, _oldp, _new) \
+({ \
+   typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+   ___r = arch_cmpxchg64_local((_ptr), ___o, (_new)); \
+   if (unlikely(___r != ___o)) \
+   *___op = ___r; \
+   likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64_local */
+
 #ifndef arch_atomic_read_acquire
 static __always_inline int
 arch_atomic_read_acquire(const atomic_t *v)
@@ -2456,4 +2478,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
 #endif
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// b5e87bdd5ede61470c29f7a7e4de781af3770f09
+// 1f49bd4895a4b7a5383906649027205c52ec80ab
diff --git a/include/linux/atomic/atomic-instrumented.h 
b/include/linux/atomic/atomic-instrumented.h
index 7a139ec030b0..14a9212cc987 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -2066,6 +2066,24 @@ atomic_long_dec_if_positive(atomic_long_t *v)
arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \
 })
 
+#define try_cmpxchg_local(ptr, oldp, ...) \
+({ \
+   typeof(ptr) __ai_ptr = (ptr); \
+   typeof(oldp) __ai_oldp = (oldp); \
+   instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+   instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+   arch_try_cmpxchg_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_local(ptr, oldp, ...) \
+({ \
+   typeof(ptr) __ai_ptr = (ptr); \
+   typeof(oldp) __ai_oldp = (oldp); \
+   instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+   instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+   arch_try_cmpxchg64_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
 #define cmpxchg_double(ptr, ...) \
 ({ \
typeof(ptr) __ai_ptr = (ptr); \
@@ -2083,4 +2101,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 })
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 764f741eb77a7ad565dc8d99ce2837d5542e8aee
+// 456e206c7e4e681126c482e4edcc6f46921ac731
diff --git a/scripts/atomic/gen-atomic-fallback.sh 
b/scripts/atomic/gen-atomic-fallback.sh
index 3a07695e3c89..6e853f0dad8d 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -225,6 +225,10 @@ for cmpxchg in "cmpxchg" "cmpxchg64"; do
gen_try_cmpxchg_fallbacks "${cmpxchg}"
 done
 
+for cmpxchg in "cmpxchg_local" "cmpxchg64_local"; do
+   gen_try_cmpxchg_fallback "${cmpxchg}" ""
+done
+
 grep '^[a-z]' "$1" | while read name meta args; do
gen_proto "${meta}" "${name}" "atomic" "int" ${args}
 done
diff --git a/scripts/atomic/gen-atomic-instrumented.sh 
b/scripts/atomic/gen-atomic-instrumented.sh
index 77c06526a574..c8165e9431bf 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -173,7 +173,7 @@ for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg" 
"try_cmpxchg64"; do
done
 done
 
-for xchg in "cmpxchg_local" "cmpxchg64_local" "sync_cmpxchg"; do
+for xchg in "cmpxchg_local" "cmpxchg64_local" "sync_cmpxchg" 
"try_cmpxchg_local" "try_cmpxchg64_local" ; do
gen_xchg "${xchg}" "" ""
printf "\n"
 done
-- 
2.39.2



Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Valentin Schneider
On 05/04/23 14:05, Frederic Weisbecker wrote:
>  static void smp_call_function_many_cond(const struct cpumask *mask,
>   smp_call_func_t func, void *info,
> @@ -946,10 +948,13 @@ static void smp_call_function_many_cond(const struct 
> cpumask *mask,
>  #endif
>   cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, 
> CFD_SEQ_QUEUE);
>   if (llist_add(>node.llist, 
> _cpu(call_single_queue, cpu))) {
> - __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
> - nr_cpus++;
> - last_cpu = cpu;
> -
> + if (!(scf_flags & SCF_NO_USER) ||
> + !IS_ENABLED(CONFIG_GENERIC_ENTRY) ||
> +  ct_state_cpu(cpu) != CONTEXT_USER) {
> + __cpumask_set_cpu(cpu, 
> cfd->cpumask_ipi);
> + nr_cpus++;
> + last_cpu = cpu;
> + }

I've been hacking on something like this (CSD deferral for NOHZ-full),
and unfortunately this uses the CPU-local cfd_data storage thing, which
means any further smp_call_function() from the same CPU to the same
destination will spin on csd_lock_wait(), waiting for the target CPU to
come out of userspace and flush the queue - and we've just spent extra
effort into *not* disturbing it, so that'll take a while :(

I don't have much that is in a shareable state yet (though I'm supposed to
talk some more about it at OSPM in <2 weeks, so I'll have to get there),
but ATM I'm playing with
o a bitmask (like in [1]) for coalescable stuff such as do_sync_core() for
  x86 instruction patching
o a CSD-like queue for things that need to pass data around, using
  statically-allocated storage (so with a limit on how much it can be used) - 
the
  alternative being allocating a struct on sending, since you don't have a
  bound on how much crap you can queue on an undisturbed NOHZ-full CPU...

[1]: https://lore.kernel.org/all/20210929152429.067060...@infradead.org/



Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Frederic Weisbecker
On Wed, Apr 05, 2023 at 02:05:13PM +0200, Frederic Weisbecker wrote:
> On Wed, Apr 05, 2023 at 01:41:48PM +0200, Peter Zijlstra wrote:
> 1) It has the advantage to check context tracking _after_ the llist_add(), so
>it really can't be misused ordering-wise.
> 
> 2) The IPI callback is always enqueued and then executed upon return
>from userland. The ordering makes sure it will either IPI or execute
>upon return to userspace.

*from userspace


Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Frederic Weisbecker
On Wed, Apr 05, 2023 at 01:41:48PM +0200, Peter Zijlstra wrote:
> On Wed, Apr 05, 2023 at 01:10:07PM +0200, Frederic Weisbecker wrote:
> > On Wed, Apr 05, 2023 at 12:44:04PM +0200, Frederic Weisbecker wrote:
> > > On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:
> > > > +   int state = atomic_read(>state);
> > > > +   /* will return true only for cpus in kernel space */
> > > > +   return state & CT_STATE_MASK == CONTEXT_KERNEL;
> > > > +}
> > > 
> > > Also note that this doesn't stricly prevent userspace from being 
> > > interrupted.
> > > You may well observe the CPU in kernel but it may receive the IPI later 
> > > after
> > > switching to userspace.
> > > 
> > > We could arrange for avoiding that with marking ct->state with a pending 
> > > work bit
> > > to flush upon user entry/exit but that's a bit more overhead so I first 
> > > need to
> > > know about your expectations here, ie: can you tolerate such an occasional
> > > interruption or not?
> > 
> > Bah, actually what can we do to prevent from that racy IPI? Not much I 
> > fear...
> 
> Yeah, so I don't think that's actually a problem. The premise is that
> *IFF* NOHZ_FULL stays in userspace, then it will never observe the IPI.
> 
> If it violates this by doing syscalls or other kernel entries; it gets
> to keep the pieces.

Ok so how about the following (only build tested)?

Two things:

1) It has the advantage to check context tracking _after_ the llist_add(), so
   it really can't be misused ordering-wise.

2) The IPI callback is always enqueued and then executed upon return
   from userland. The ordering makes sure it will either IPI or execute
   upon return to userspace.

diff --git a/include/linux/context_tracking_state.h 
b/include/linux/context_tracking_state.h
index 4a4d56f77180..dc4b56da1747 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -137,10 +137,23 @@ static __always_inline int ct_state(void)
return ret;
 }
 
+static __always_inline int ct_state_cpu(int cpu)
+{
+   struct context_tracking *ct;
+
+   if (!context_tracking_enabled())
+   return CONTEXT_DISABLED;
+
+   ct = per_cpu_ptr(_tracking, cpu);
+
+   return atomic_read(>state) & CT_STATE_MASK;
+}
+
 #else
 static __always_inline bool context_tracking_enabled(void) { return false; }
 static __always_inline bool context_tracking_enabled_cpu(int cpu) { return 
false; }
 static __always_inline bool context_tracking_enabled_this_cpu(void) { return 
false; }
+static inline int ct_state_cpu(int cpu) { return CONTEXT_DISABLED; }
 #endif /* CONFIG_CONTEXT_TRACKING_USER */
 
 #endif
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 846add8394c4..cdc7e8a59acc 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 
+#include "../kernel/sched/smp.h"
 #include "common.h"
 
 #define CREATE_TRACE_POINTS
@@ -27,6 +28,10 @@ static __always_inline void __enter_from_user_mode(struct 
pt_regs *regs)
instrumentation_begin();
kmsan_unpoison_entry_regs(regs);
trace_hardirqs_off_finish();
+
+   /* Flush delayed IPI queue on nohz_full */
+   if (context_tracking_enabled_this_cpu())
+   flush_smp_call_function_queue();
instrumentation_end();
 }
 
diff --git a/kernel/smp.c b/kernel/smp.c
index 06a413987a14..14b25d25ef3a 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -878,6 +878,8 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
  */
 #define SCF_WAIT   (1U << 0)
 #define SCF_RUN_LOCAL  (1U << 1)
+#define SCF_NO_USER(1U << 2)
+
 
 static void smp_call_function_many_cond(const struct cpumask *mask,
smp_call_func_t func, void *info,
@@ -946,10 +948,13 @@ static void smp_call_function_many_cond(const struct 
cpumask *mask,
 #endif
cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, 
CFD_SEQ_QUEUE);
if (llist_add(>node.llist, 
_cpu(call_single_queue, cpu))) {
-   __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
-   nr_cpus++;
-   last_cpu = cpu;
-
+   if (!(scf_flags & SCF_NO_USER) ||
+   !IS_ENABLED(CONFIG_GENERIC_ENTRY) ||
+ct_state_cpu(cpu) != CONTEXT_USER) {
+   __cpumask_set_cpu(cpu, 
cfd->cpumask_ipi);
+   nr_cpus++;
+   last_cpu = cpu;
+   }
cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, 
CFD_SEQ_IPI);
} else {
cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, 
CFD_SEQ_NOIPI);
@@ -1121,6 +1126,24 @@ void __init smp_init(void)
smp_cpus_done(setup_max_cpus);
 }
 
+static void 

Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread David Hildenbrand

On 05.04.23 13:41, Peter Zijlstra wrote:

On Wed, Apr 05, 2023 at 01:10:07PM +0200, Frederic Weisbecker wrote:

On Wed, Apr 05, 2023 at 12:44:04PM +0200, Frederic Weisbecker wrote:

On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:

+   int state = atomic_read(>state);
+   /* will return true only for cpus in kernel space */
+   return state & CT_STATE_MASK == CONTEXT_KERNEL;
+}


Also note that this doesn't stricly prevent userspace from being interrupted.
You may well observe the CPU in kernel but it may receive the IPI later after
switching to userspace.

We could arrange for avoiding that with marking ct->state with a pending work 
bit
to flush upon user entry/exit but that's a bit more overhead so I first need to
know about your expectations here, ie: can you tolerate such an occasional
interruption or not?


Bah, actually what can we do to prevent from that racy IPI? Not much I fear...


Yeah, so I don't think that's actually a problem. The premise is that
*IFF* NOHZ_FULL stays in userspace, then it will never observe the IPI.

If it violates this by doing syscalls or other kernel entries; it gets
to keep the pieces.


Yair is currently on vacation, so I'm replying on his behalf.

Indeed, RT userspace is supposed to not call into the kernel, that's the 
premise.


--
Thanks,

David / dhildenb



Re: [PATCH v8 5/7] PCI: Allow pci_bus_for_each_resource() to take less arguments

2023-04-05 Thread Andy Shevchenko
On Thu, Mar 30, 2023 at 07:24:32PM +0300, Andy Shevchenko wrote:
> Refactor pci_bus_for_each_resource() in the same way as it's done in
> pci_dev_for_each_resource() case. This will allow to hide iterator
> inside the loop, where it's not used otherwise.
> 
> No functional changes intended.

Bjorn, this has wrong author in your tree:

https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/commit/?h=resource=46dbad19a59e0dd8f1e7065e5281345797fbb365

Or did I misinterpret something?

-- 
With Best Regards,
Andy Shevchenko




Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Peter Zijlstra
On Wed, Apr 05, 2023 at 01:10:07PM +0200, Frederic Weisbecker wrote:
> On Wed, Apr 05, 2023 at 12:44:04PM +0200, Frederic Weisbecker wrote:
> > On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:
> > > + int state = atomic_read(>state);
> > > + /* will return true only for cpus in kernel space */
> > > + return state & CT_STATE_MASK == CONTEXT_KERNEL;
> > > +}
> > 
> > Also note that this doesn't stricly prevent userspace from being 
> > interrupted.
> > You may well observe the CPU in kernel but it may receive the IPI later 
> > after
> > switching to userspace.
> > 
> > We could arrange for avoiding that with marking ct->state with a pending 
> > work bit
> > to flush upon user entry/exit but that's a bit more overhead so I first 
> > need to
> > know about your expectations here, ie: can you tolerate such an occasional
> > interruption or not?
> 
> Bah, actually what can we do to prevent from that racy IPI? Not much I fear...

Yeah, so I don't think that's actually a problem. The premise is that
*IFF* NOHZ_FULL stays in userspace, then it will never observe the IPI.

If it violates this by doing syscalls or other kernel entries; it gets
to keep the pieces.




Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Frederic Weisbecker
On Wed, Apr 05, 2023 at 12:44:04PM +0200, Frederic Weisbecker wrote:
> On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:
> > +   int state = atomic_read(>state);
> > +   /* will return true only for cpus in kernel space */
> > +   return state & CT_STATE_MASK == CONTEXT_KERNEL;
> > +}
> 
> Also note that this doesn't stricly prevent userspace from being interrupted.
> You may well observe the CPU in kernel but it may receive the IPI later after
> switching to userspace.
> 
> We could arrange for avoiding that with marking ct->state with a pending work 
> bit
> to flush upon user entry/exit but that's a bit more overhead so I first need 
> to
> know about your expectations here, ie: can you tolerate such an occasional
> interruption or not?

Bah, actually what can we do to prevent from that racy IPI? Not much I fear...



Re: [PATCH] powerpc/64: Always build with 128-bit long double

2023-04-05 Thread Segher Boessenkool
Hi!

On Wed, Apr 05, 2023 at 03:32:21PM +1000, Michael Ellerman wrote:
> Segher Boessenkool  writes:
> > On Tue, Apr 04, 2023 at 08:28:47PM +1000, Michael Ellerman wrote:
> >> The amdgpu driver builds some of its code with hard-float enabled,
> >> whereas the rest of the kernel is built with soft-float.
> >> 
> >> When building with 64-bit long double, if soft-float and hard-float
> >> objects are linked together, the build fails due to incompatible ABI
> >> tags.
> >
> >> Currently those build errors are avoided because the amdgpu driver is
> >> gated on 128-bit long double being enabled. But that's not a detail the
> >> amdgpu driver should need to be aware of, and if another driver starts
> >> using hard-float the same problem would occur.
> >
> > Well.  The kernel driver either has no business using long double (or
> > any other floating point even) at all, or it should know exactly what is
> > used: double precision, double-double, or quadruple precision.  Both of
> > the latter two are 128 bits.
> 
> In a perfect world ... :)

Well, without it knowing what exactly it calculates, does this code have
any business running in kernel space?  Is it acceptable to just do
random things in the kernel?  I don't know the kernel code that uses
long double at all (and I'm afraid to look for fear of going blind), but
all this sounds like the 64-bit IEEE double precision floating point is
not good enough for some certain calculation, but 80-bit extended double
precision as used on x86 is.  That does make it likely that both of our
128-bit formats would work, but there are lots and lots of "buts".  To
start with, what does that code require wrt fp contraction (so, floating
multiply-add)?

All of this suggests that there should not be floating point code here
*at all*, it is harder to use it in any acceptable way than to just do
things in fixed point or scaled integer or whatever.

> >> All versions of the 64-bit ABI specify that long-double is 128-bits.
> >> However some compilers, notably the kernel.org ones, are built to use
> >> 64-bit long double by default.
> >
> > Mea culpa, I suppose?  But buildall doesn't force 64 bit explicitly.
> > I wonder how this happened?  Is it maybe a problem in the powerpc64le
> > config in GCC itself?
> 
> Not blaming anyone, just one of those things that happens.

Oh I didn't say anyone is blaming me.  I want to fix the problem, that
is all :-)

> The
> toolchains the distros (Ubuntu/Fedora) build all seem to use 128, but
> possibly that's because someone told them to configure them that way at
> some point.

No, or yes, depending on how you look at it?  Default configurations all
have 128-bit long double.  But buildall uses (almost) the same
configuration on all targets, namely:

$GCC_SRC/configure \
--target=$TARGET --enable-targets=all --prefix=$PREFIX \
--enable-languages=c --without-headers --disable-bootstrap \
--disable-nls --disable-threads --disable-shared \
--disable-libmudflap --disable-libssp --disable-libgomp \
--disable-decimal-float --disable-libquadmath \
--disable-libatomic --disable-libcc1 --disable-libmpx

All of this is perfectly reasonable imnsho, but I guess the
--enable-targets=all causes the problem here?  That makes no sense, but
it is still my best guess.

> > I have a patch from summer last year (Arnd's
> > toolchains are built without it) that does
> > +   powerpc64le-*)  TARGET_GCC_CONF=--with-long-double-128
> > Unfortunately I don't remember why I did that, and I never investigated
> > what the deeper problem is :-/
> 
> Last summer (aka winter)

Oh right.  Last July :-)

> is when we first discovered this issue with the
> long double size being implicated.
> 
> See:
>   https://git.kernel.org/torvalds/c/c653c591789b3acfa4bf6ae45d5af4f330e50a91
> 
> So I guess that's what prompted your patch?

It was one day before my patch, maybe less than 12h even, so that could
be.  I don't update the kernel source automatically though (there are
50 to 100 build breaks every year, when things are in decent state I
tend to keep it for a while).  But it may have been our patches are due
to the same cause, and mine is no longer needed?  That would be nice.  I
never committed that patch (or there would be more context, sigh).

I'll dig, there is a real problem in the compiler it seems.  Thanks for
the help so far!


Segher


Re: [PATCH 3/3] mm/mmu_gather: send tlb_remove_table_smp_sync IPI only to CPUs in kernel mode

2023-04-05 Thread Frederic Weisbecker
On Tue, Apr 04, 2023 at 04:42:24PM +0300, Yair Podemsky wrote:
> @@ -191,6 +192,20 @@ static void tlb_remove_table_smp_sync(void *arg)
>   /* Simply deliver the interrupt */
>  }
>  
> +
> +#ifdef CONFIG_CONTEXT_TRACKING
> +static bool cpu_in_kernel(int cpu, void *info)
> +{
> + struct context_tracking *ct = per_cpu_ptr(_tracking, cpu);

Like Peter said, an smp_mb() is required here before the read (unless there is
already one between the page table modification and that ct->state read?).

So that you have this pairing:


   WRITE page_table  WRITE ct->state
   smp_mb()  smp_mb() // implied by 
atomic_fetch_or
   READ ct->stateREAD page_table

> + int state = atomic_read(>state);
> + /* will return true only for cpus in kernel space */
> + return state & CT_STATE_MASK == CONTEXT_KERNEL;
> +}

Also note that this doesn't stricly prevent userspace from being interrupted.
You may well observe the CPU in kernel but it may receive the IPI later after
switching to userspace.

We could arrange for avoiding that with marking ct->state with a pending work 
bit
to flush upon user entry/exit but that's a bit more overhead so I first need to
know about your expectations here, ie: can you tolerate such an occasional
interruption or not?

Thanks.



[powerpc:next] BUILD SUCCESS b0bbe5a2915201e3231e788d716d39dc54493b03

2023-04-05 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next
branch HEAD: b0bbe5a2915201e3231e788d716d39dc54493b03  powerpc/atomics: Remove 
unused function

elapsed time: 1250m

configs tested: 332
configs skipped: 28

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alphaallyesconfig   gcc  
alphabuildonly-randconfig-r001-20230403   gcc  
alpha   defconfig   gcc  
alpharandconfig-r002-20230403   gcc  
alpharandconfig-r005-20230403   gcc  
alpharandconfig-r015-20230403   gcc  
alpharandconfig-r016-20230403   gcc  
alpharandconfig-r021-20230403   gcc  
alpharandconfig-r023-20230403   gcc  
alpharandconfig-r024-20230403   gcc  
alpharandconfig-r025-20230403   gcc  
alpharandconfig-r026-20230403   gcc  
alpharandconfig-r034-20230403   gcc  
alpharandconfig-r036-20230403   gcc  
arc  allyesconfig   gcc  
arc  buildonly-randconfig-r006-20230403   gcc  
arc defconfig   gcc  
arc  randconfig-r002-20230403   gcc  
arc  randconfig-r004-20230403   gcc  
arc  randconfig-r006-20230403   gcc  
arc  randconfig-r011-20230403   gcc  
arc  randconfig-r013-20230403   gcc  
arc  randconfig-r016-20230403   gcc  
arc  randconfig-r021-20230403   gcc  
arc  randconfig-r022-20230403   gcc  
arc  randconfig-r025-20230403   gcc  
arc  randconfig-r031-20230404   gcc  
arc  randconfig-r032-20230403   gcc  
arc  randconfig-r032-20230404   gcc  
arc  randconfig-r033-20230403   gcc  
arc  randconfig-r034-20230403   gcc  
arc  randconfig-r034-20230404   gcc  
arc  randconfig-r036-20230404   gcc  
arc  randconfig-r043-20230403   gcc  
arm  allmodconfig   gcc  
arm  allyesconfig   gcc  
arm  buildonly-randconfig-r003-20230403   clang
arm defconfig   gcc  
arm  randconfig-r021-20230403   clang
arm  randconfig-r031-20230403   gcc  
arm  randconfig-r033-20230403   gcc  
arm  randconfig-r034-20230403   gcc  
arm  randconfig-r035-20230403   gcc  
arm  randconfig-r046-20230403   clang
armrealview_defconfig   gcc  
arm   sama5_defconfig   gcc  
armshmobile_defconfig   gcc  
arm   spear13xx_defconfig   clang
arm wpcm450_defconfig   gcc  
arm64allyesconfig   gcc  
arm64buildonly-randconfig-r001-20230404   gcc  
arm64buildonly-randconfig-r004-20230403   clang
arm64   defconfig   gcc  
arm64randconfig-r004-20230403   clang
arm64randconfig-r012-20230403   gcc  
arm64randconfig-r015-20230403   gcc  
arm64randconfig-r021-20230403   gcc  
arm64randconfig-r024-20230403   gcc  
arm64randconfig-r026-20230403   gcc  
arm64randconfig-r033-20230403   clang
csky buildonly-randconfig-r002-20230403   gcc  
cskydefconfig   gcc  
csky randconfig-r006-20230403   gcc  
csky randconfig-r012-20230403   gcc  
csky randconfig-r021-20230403   gcc  
csky randconfig-r031-20230403   gcc  
csky randconfig-r033-20230403   gcc  
csky randconfig-r034-20230403   gcc  
csky randconfig-r035-20230403   gcc  
hexagon  buildonly-randconfig-r001-20230403   clang
hexagon  buildonly-randconfig-r006-20230403   clang
hexagon  buildonly-randconfig-r006-20230404   clang
hexagon  randconfig-r005-20230403   clang
hexagon  randconfig-r006-20230403   clang
hexagon  randconfig-r024-20230403   clang
hexagon  randconfig-r041-20230403   clang
hexagon  randconfig-r045-20230403   clang
i386 allyesconfig   gcc  
i386 buildonly-randconfig-r002-20230403   clang
i386 debian-10.3-func   gcc  
i386   debian-10.3-kselftests   gcc  
i386debian-10.3-kunit   gcc  
i386  debian-10.3-kvm   gcc  
i386  debian-10.3   gcc  
i386defconfig   gcc  
i386 

Re: [PATCH v8 7/7] pcmcia: Convert to use less arguments in pci_bus_for_each_resource()

2023-04-05 Thread Andy Shevchenko
On Thu, Mar 30, 2023 at 07:24:34PM +0300, Andy Shevchenko wrote:

...

> @@ -960,12 +960,9 @@ static int nonstatic_autoadd_resources(struct 
> pcmcia_socket *s)
>*/
>   if (s->cb_dev->bus->number == 0)
>   return -EINVAL;
> -
> - for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
> - res = s->cb_dev->bus->resource[i];
> -#else
> - pci_bus_for_each_resource(s->cb_dev->bus, res, i) {
>  #endif
> +
> + pci_bus_for_each_resource(s->cb_dev->bus, res) {
>   if (!res)
>   continue;

As pointed out in the reply to Bjorn's email this hunk needs to be revisited,
since I wrote the documentation for the above call I have started understanding
the deal behind this special treatment for X86 case.

-- 
With Best Regards,
Andy Shevchenko




Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-04-05 Thread Andy Shevchenko
On Tue, Apr 04, 2023 at 11:11:01AM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 30, 2023 at 07:24:27PM +0300, Andy Shevchenko wrote:
> > Provide two new helper macros to iterate over PCI device resources and
> > convert users.
> > 
> > Looking at it, refactor existing pci_bus_for_each_resource() and convert
> > users accordingly.
> > 
> > Note, the amount of lines grew due to the documentation update.
> > 
> > Changelog v8:
> > - fixed issue with pci_bus_for_each_resource() macro (LKP)
> > - due to above added a new patch to document how it works
> > - moved the last patch to be #2 (Philippe)
> > - added tags (Philippe)
> > 
> > Changelog v7:
> > - made both macros to share same name (Bjorn)
> 
> I didn't actually request the same name for both; I would have had no
> idea how to even do that :)
> 
> v6 had:
> 
>   pci_dev_for_each_resource_p(dev, res)
>   pci_dev_for_each_resource(dev, res, i)
> 
> and I suggested:
> 
>   pci_dev_for_each_resource(dev, res)
>   pci_dev_for_each_resource_idx(dev, res, i)
> 
> because that pattern is used elsewhere.

Ah, sorry I misinterpreted your suggestion (I thought that at the end of
the day you wanted the macro to be less intrusive, so we change less code,
that's why I interpreted it the way described in the Changelog).

> But you figured out how to do
> it, and having one name is even better, so thanks for that extra work!

You are welcome!

> > - split out the pci_resource_n() conversion (Bjorn)
> > 
> > Changelog v6:
> > - dropped unused variable in PPC code (LKP)
> > 
> > Changelog v5:
> > - renamed loop variable to minimize the clash (Keith)
> > - addressed smatch warning (Dan)
> > - addressed 0-day bot findings (LKP)
> > 
> > Changelog v4:
> > - rebased on top of v6.3-rc1
> > - added tag (Krzysztof)
> > 
> > Changelog v3:
> > - rebased on top of v2 by Mika, see above
> > - added tag to pcmcia patch (Dominik)
> > 
> > Changelog v2:
> > - refactor to have two macros
> > - refactor existing pci_bus_for_each_resource() in the same way and
> >   convert users
> > 
> > Andy Shevchenko (6):
> >   kernel.h: Split out COUNT_ARGS() and CONCATENATE()
> >   PCI: Introduce pci_resource_n()
> >   PCI: Document pci_bus_for_each_resource() to avoid confusion
> >   PCI: Allow pci_bus_for_each_resource() to take less arguments
> >   EISA: Convert to use less arguments in pci_bus_for_each_resource()
> >   pcmcia: Convert to use less arguments in pci_bus_for_each_resource()

...

> Applied 2-7 to pci/resource for v6.4, thanks, I really like this!

Btw, can you actually drop patch 7, please?
After I have updated the documentation I have realised that why the first
chunk is invalid. It needs mode careful check and rework.

> I omitted
> 
>   [1/7] kernel.h: Split out COUNT_ARGS() and CONCATENATE()"
> 
> only because it's not essential to this series and has only a trivial
> one-line impact on include/linux/pci.h.

I'm not sure I understood what exactly "essentiality" means to you, but
I included that because it makes the split which can be used later by
others and not including kernel.h in the header is the objective I want
to achieve. Without this patch the achievement is going to be deferred.
Yet, this, as you have noticed, allows to compile and use the macros in
the rest of the patches.

P.S. Thank you for the review and application of the rest!

-- 
With Best Regards,
Andy Shevchenko