[RFC PATCH] KVM: PPC: Update MAINTAINERS

2023-04-07 Thread Nicholas Piggin
Michael is maintaining KVM PPC with the powerpc tree at the moment,
just doesn't necessarily have time to be across all of KVM. But I
think that's okay, from mechanics of how patches flow upstream he is
maintainer. And it probably makes a bit more sense to people who need
to look at the MAINTAINERS file if we have some contacts there.

So add mpe as KVM PPC maintainer and I am a reviewer. Split out the
subarchs that don't get much attention.

Thanks,
Nick
---
 MAINTAINERS | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 90abe83c02f3..c6283280683e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11292,8 +11292,15 @@ F: arch/mips/include/uapi/asm/kvm*
 F: arch/mips/kvm/
 
 KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc)
+M: Michael Ellerman 
+R: Nicholas Piggin 
 L: linuxppc-dev@lists.ozlabs.org
+L: k...@vger.kernel.org
+S: Maintained (Book3S 64-bit HV)
+S: Odd fixes (Book3S 64-bit PR)
+S: Orphan (Book3E and 32-bit)
 T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
topic/ppc-kvm
+S: Maintained
 F: arch/powerpc/include/asm/kvm*
 F: arch/powerpc/include/uapi/asm/kvm*
 F: arch/powerpc/kernel/kvm*
-- 
2.40.0



[PATCH v2 4/4] KVM: PPC: selftests: add selftests sanity tests

2023-04-07 Thread Nicholas Piggin
Add tests that exercise very basic functions of the kvm selftests
framework, guest creation, ucalls, hcalls, copying data between guest
and host, interrupts and page faults.

These don't stress KVM so much as being useful when developing support
for powerpc.

Acked-by: Michael Ellerman  (powerpc)
Signed-off-by: Nicholas Piggin 
---
 tools/testing/selftests/kvm/Makefile  |   2 +
 .../selftests/kvm/include/powerpc/hcall.h |   2 +
 .../testing/selftests/kvm/powerpc/null_test.c | 166 ++
 .../selftests/kvm/powerpc/rtas_hcall.c| 146 +++
 4 files changed, 316 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/powerpc/null_test.c
 create mode 100644 tools/testing/selftests/kvm/powerpc/rtas_hcall.c

diff --git a/tools/testing/selftests/kvm/Makefile 
b/tools/testing/selftests/kvm/Makefile
index 908602a9f513..d4052eccaaee 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -181,6 +181,8 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test
 TEST_GEN_PROGS_riscv += set_memory_region_test
 TEST_GEN_PROGS_riscv += kvm_binary_stats_test
 
+TEST_GEN_PROGS_powerpc += powerpc/null_test
+TEST_GEN_PROGS_powerpc += powerpc/rtas_hcall
 TEST_GEN_PROGS_powerpc += demand_paging_test
 TEST_GEN_PROGS_powerpc += dirty_log_test
 TEST_GEN_PROGS_powerpc += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/include/powerpc/hcall.h 
b/tools/testing/selftests/kvm/include/powerpc/hcall.h
index ba119f5a3fef..04c7d2d13020 100644
--- a/tools/testing/selftests/kvm/include/powerpc/hcall.h
+++ b/tools/testing/selftests/kvm/include/powerpc/hcall.h
@@ -12,6 +12,8 @@
 #define UCALL_R4_UCALL 0x5715 // regular ucall, r5 contains ucall pointer
 #define UCALL_R4_SIMPLE0x // simple exit usable by asm with no 
ucall data
 
+#define H_RTAS 0xf000
+
 int64_t hcall0(uint64_t token);
 int64_t hcall1(uint64_t token, uint64_t arg1);
 int64_t hcall2(uint64_t token, uint64_t arg1, uint64_t arg2);
diff --git a/tools/testing/selftests/kvm/powerpc/null_test.c 
b/tools/testing/selftests/kvm/powerpc/null_test.c
new file mode 100644
index ..31db0b6becd6
--- /dev/null
+++ b/tools/testing/selftests/kvm/powerpc/null_test.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tests for guest creation, run, ucall, interrupt, and vm dumping.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "processor.h"
+#include "helpers.h"
+
+extern void guest_code_asm(void);
+asm(".global guest_code_asm");
+asm(".balign 4");
+asm("guest_code_asm:");
+asm("li 3,0"); // H_UCALL
+asm("li 4,0"); // UCALL_R4_SIMPLE
+asm("sc 1");
+
+static void test_asm(void)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_vm *vm;
+
+   vm = vm_create_with_one_vcpu(, guest_code_asm);
+
+   vcpu_run(vcpu);
+   handle_ucall(vcpu, UCALL_NONE);
+
+   kvm_vm_free(vm);
+}
+
+static void guest_code_ucall(void)
+{
+   GUEST_DONE();
+}
+
+static void test_ucall(void)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_vm *vm;
+
+   vm = vm_create_with_one_vcpu(, guest_code_ucall);
+
+   vcpu_run(vcpu);
+   handle_ucall(vcpu, UCALL_DONE);
+
+   kvm_vm_free(vm);
+}
+
+static void trap_handler(struct ex_regs *regs)
+{
+   GUEST_SYNC(1);
+   regs->nia += 4;
+}
+
+static void guest_code_trap(void)
+{
+   GUEST_SYNC(0);
+   asm volatile("trap");
+   GUEST_DONE();
+}
+
+static void test_trap(void)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_vm *vm;
+
+   vm = vm_create_with_one_vcpu(, guest_code_trap);
+   vm_install_exception_handler(vm, 0x700, trap_handler);
+
+   vcpu_run(vcpu);
+   host_sync(vcpu, 0);
+   vcpu_run(vcpu);
+   host_sync(vcpu, 1);
+   vcpu_run(vcpu);
+   handle_ucall(vcpu, UCALL_DONE);
+
+   vm_install_exception_handler(vm, 0x700, NULL);
+
+   kvm_vm_free(vm);
+}
+
+static void dsi_handler(struct ex_regs *regs)
+{
+   GUEST_SYNC(1);
+   regs->nia += 4;
+}
+
+static void guest_code_dsi(void)
+{
+   GUEST_SYNC(0);
+   asm volatile("stb %r0,0(0)");
+   GUEST_DONE();
+}
+
+static void test_dsi(void)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_vm *vm;
+
+   vm = vm_create_with_one_vcpu(, guest_code_dsi);
+   vm_install_exception_handler(vm, 0x300, dsi_handler);
+
+   vcpu_run(vcpu);
+   host_sync(vcpu, 0);
+   vcpu_run(vcpu);
+   host_sync(vcpu, 1);
+   vcpu_run(vcpu);
+   handle_ucall(vcpu, UCALL_DONE);
+
+   vm_install_exception_handler(vm, 0x300, NULL);
+
+   kvm_vm_free(vm);
+}
+
+static void test_dump(void)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_vm *vm;
+
+   vm = vm_create_with_one_vcpu(, guest_code_ucall);
+
+   vcpu_run(vcpu);
+   handle_ucall(vcpu, UCALL_DONE);
+
+   printf("Testing 

[PATCH v2 3/4] KVM: PPC: selftests: add support for powerpc

2023-04-07 Thread Nicholas Piggin
Implement KVM selftests support for powerpc (Book3S-64).

ucalls are implemented with an unsuppored PAPR hcall number which causes
KVM to exit to userspace.

For now virtual memory is only implemented for the radix MMU, and only
the base page size is supported.

Guest interrupts are taken in real-mode, so require a real-mode page at
gRA 0. Interrupt entry requires some tricky code because gVA:gRA is not
1:1 mapped like the kernel is, so we can't just switch MMU on and off.

Acked-by: Michael Ellerman  (powerpc)
Signed-off-by: Nicholas Piggin 
---
 MAINTAINERS   |   2 +
 tools/testing/selftests/kvm/Makefile  |  13 +
 .../selftests/kvm/include/kvm_util_base.h |  20 +
 .../selftests/kvm/include/powerpc/hcall.h |  19 +
 .../selftests/kvm/include/powerpc/ppc_asm.h   |  32 ++
 .../selftests/kvm/include/powerpc/processor.h |  33 ++
 tools/testing/selftests/kvm/lib/guest_modes.c |   3 +
 tools/testing/selftests/kvm/lib/kvm_util.c|  12 +
 .../selftests/kvm/lib/powerpc/handlers.S  |  93 
 .../testing/selftests/kvm/lib/powerpc/hcall.c |  45 ++
 .../selftests/kvm/lib/powerpc/processor.c | 429 ++
 .../testing/selftests/kvm/lib/powerpc/ucall.c |  30 ++
 tools/testing/selftests/kvm/powerpc/helpers.h |  46 ++
 13 files changed, 777 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/include/powerpc/hcall.h
 create mode 100644 tools/testing/selftests/kvm/include/powerpc/ppc_asm.h
 create mode 100644 tools/testing/selftests/kvm/include/powerpc/processor.h
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/handlers.S
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/hcall.c
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/processor.c
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/ucall.c
 create mode 100644 tools/testing/selftests/kvm/powerpc/helpers.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c6283280683e..d353cbd5416c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11305,6 +11305,8 @@ F:  arch/powerpc/include/asm/kvm*
 F: arch/powerpc/include/uapi/asm/kvm*
 F: arch/powerpc/kernel/kvm*
 F: arch/powerpc/kvm/
+F: tools/testing/selftests/kvm/*/powerpc/
+F: tools/testing/selftests/kvm/powerpc/
 
 KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
 M: Anup Patel 
diff --git a/tools/testing/selftests/kvm/Makefile 
b/tools/testing/selftests/kvm/Makefile
index 84a627c43795..908602a9f513 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -55,6 +55,11 @@ LIBKVM_s390x += lib/s390x/ucall.c
 LIBKVM_riscv += lib/riscv/processor.c
 LIBKVM_riscv += lib/riscv/ucall.c
 
+LIBKVM_powerpc += lib/powerpc/handlers.S
+LIBKVM_powerpc += lib/powerpc/processor.c
+LIBKVM_powerpc += lib/powerpc/ucall.c
+LIBKVM_powerpc += lib/powerpc/hcall.c
+
 # Non-compiled test targets
 TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
 
@@ -176,6 +181,14 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test
 TEST_GEN_PROGS_riscv += set_memory_region_test
 TEST_GEN_PROGS_riscv += kvm_binary_stats_test
 
+TEST_GEN_PROGS_powerpc += demand_paging_test
+TEST_GEN_PROGS_powerpc += dirty_log_test
+TEST_GEN_PROGS_powerpc += kvm_create_max_vcpus
+TEST_GEN_PROGS_powerpc += kvm_page_table_test
+TEST_GEN_PROGS_powerpc += rseq_test
+TEST_GEN_PROGS_powerpc += set_memory_region_test
+TEST_GEN_PROGS_powerpc += kvm_binary_stats_test
+
 TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
 TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h 
b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 8a27bd4111ff..b59566869f65 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -105,6 +105,7 @@ struct kvm_vm {
bool pgd_created;
vm_paddr_t ucall_mmio_addr;
vm_paddr_t pgd;
+   vm_paddr_t prtb; // powerpc process table
vm_vaddr_t gdt;
vm_vaddr_t tss;
vm_vaddr_t idt;
@@ -160,6 +161,8 @@ enum vm_guest_mode {
VM_MODE_PXXV48_4K,  /* For 48bits VA but ANY bits PA */
VM_MODE_P47V64_4K,
VM_MODE_P44V64_4K,
+   VM_MODE_P52V52_4K,
+   VM_MODE_P52V52_64K,
VM_MODE_P36V48_4K,
VM_MODE_P36V48_16K,
VM_MODE_P36V48_64K,
@@ -197,6 +200,23 @@ extern enum vm_guest_mode vm_mode_default;
 #define MIN_PAGE_SHIFT 12U
 #define ptes_per_page(page_size)   ((page_size) / 8)
 
+#elif defined(__powerpc64__)
+
+/* Radix guest EA and RA are 52-bit on POWER9 and POWER10 */
+#define VM_MODE_DEFAULTVM_MODE_P52V52_64K
+/*
+ * XXX: This is a hack to allocate more memory for page tables because we
+ * don't pack "fragments" well with 64K page sizes. Should rework generic
+ * code to allow more flexible page table memory estimation (and fix our
+ * page table allocation).
+ */
+#define 

[PATCH v2 2/4] KVM: selftests: Add aligned guest physical page allocator

2023-04-07 Thread Nicholas Piggin
powerpc will require this to allocate MMU tables in guest memory that
are aligned and larger than the base page size.

Signed-off-by: Nicholas Piggin 
---
 .../selftests/kvm/include/kvm_util_base.h |  2 +
 tools/testing/selftests/kvm/lib/kvm_util.c| 44 ---
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h 
b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 16425da16861..8a27bd4111ff 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -679,6 +679,8 @@ const char *exit_reason_str(unsigned int exit_reason);
 
 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc_align(struct kvm_vm *vm, size_t num, size_t 
align,
+ vm_paddr_t paddr_min, uint32_t memslot);
 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
  vm_paddr_t paddr_min, uint32_t memslot);
 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c 
b/tools/testing/selftests/kvm/lib/kvm_util.c
index 8ec20ac33de0..4f158f5e 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1898,6 +1898,7 @@ const char *exit_reason_str(unsigned int exit_reason)
  * Input Args:
  *   vm - Virtual Machine
  *   num - number of pages
+ *   align - pages alignment
  *   paddr_min - Physical address minimum
  *   memslot - Memory region to allocate page from
  *
@@ -1911,7 +1912,7 @@ const char *exit_reason_str(unsigned int exit_reason)
  * and their base address is returned. A TEST_ASSERT failure occurs if
  * not enough pages are available at or above paddr_min.
  */
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+vm_paddr_t vm_phy_pages_alloc_align(struct kvm_vm *vm, size_t num, size_t 
align,
  vm_paddr_t paddr_min, uint32_t memslot)
 {
struct userspace_mem_region *region;
@@ -1925,24 +1926,27 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t 
num,
paddr_min, vm->page_size);
 
region = memslot2region(vm, memslot);
-   base = pg = paddr_min >> vm->page_shift;
-
-   do {
-   for (; pg < base + num; ++pg) {
-   if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
-   base = pg = 
sparsebit_next_set(region->unused_phy_pages, pg);
-   break;
+   base = paddr_min >> vm->page_shift;
+
+again:
+   base = (base + align - 1) & ~(align - 1);
+   for (pg = base; pg < base + num; ++pg) {
+   if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+   base = sparsebit_next_set(region->unused_phy_pages, pg);
+   if (!base) {
+   fprintf(stderr, "No guest physical pages "
+   "available, paddr_min: 0x%lx "
+   "page_size: 0x%x memslot: %u "
+   "num_pages: %lu align: %lu\n",
+   paddr_min, vm->page_size, memslot,
+   num, align);
+   fputs(" vm dump \n", stderr);
+   vm_dump(stderr, vm, 2);
+   TEST_ASSERT(false, "false");
+   abort();
}
+   goto again;
}
-   } while (pg && pg != base + num);
-
-   if (pg == 0) {
-   fprintf(stderr, "No guest physical page available, "
-   "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
-   paddr_min, vm->page_size, memslot);
-   fputs(" vm dump \n", stderr);
-   vm_dump(stderr, vm, 2);
-   abort();
}
 
for (pg = base; pg < base + num; ++pg)
@@ -1951,6 +1955,12 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t 
num,
return base * vm->page_size;
 }
 
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+   return vm_phy_pages_alloc_align(vm, num, 1, paddr_min, memslot);
+}
+
 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 uint32_t memslot)
 {
-- 
2.40.0



[PATCH v2 1/4] KVM: selftests: Move pgd_created check into virt_pgd_alloc

2023-04-07 Thread Nicholas Piggin
virt_arch_pgd_alloc all do the same test and set of pgd_created. Move
this into common code.

Signed-off-by: Nicholas Piggin 
---
 tools/testing/selftests/kvm/include/kvm_util_base.h | 5 +
 tools/testing/selftests/kvm/lib/aarch64/processor.c | 4 
 tools/testing/selftests/kvm/lib/riscv/processor.c   | 4 
 tools/testing/selftests/kvm/lib/s390x/processor.c   | 4 
 tools/testing/selftests/kvm/lib/x86_64/processor.c  | 7 ++-
 5 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h 
b/tools/testing/selftests/kvm/include/kvm_util_base.h
index fbc2a79369b8..16425da16861 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -821,7 +821,12 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm);
 
 static inline void virt_pgd_alloc(struct kvm_vm *vm)
 {
+   if (vm->pgd_created)
+   return;
+
virt_arch_pgd_alloc(vm);
+
+   vm->pgd_created = true;
 }
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c 
b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 5972a23b2765..76edd988178b 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -79,13 +79,9 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
 {
size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
 
-   if (vm->pgd_created)
-   return;
-
vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
 KVM_GUEST_PAGE_TABLE_MIN_PADDR,
 vm->memslots[MEM_REGION_PT]);
-   vm->pgd_created = true;
 }
 
 static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c 
b/tools/testing/selftests/kvm/lib/riscv/processor.c
index d146ca71e0c0..7695ba2cd369 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -57,13 +57,9 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
 {
size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
 
-   if (vm->pgd_created)
-   return;
-
vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
 KVM_GUEST_PAGE_TABLE_MIN_PADDR,
 vm->memslots[MEM_REGION_PT]);
-   vm->pgd_created = true;
 }
 
 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c 
b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 15945121daf1..358e03f09c7a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -17,16 +17,12 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
vm->page_size);
 
-   if (vm->pgd_created)
-   return;
-
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
   KVM_GUEST_PAGE_TABLE_MIN_PADDR,
   vm->memslots[MEM_REGION_PT]);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
 
vm->pgd = paddr;
-   vm->pgd_created = true;
 }
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c 
b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index c39a4353ba19..d49068045bdf 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -126,11 +126,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
-   /* If needed, create page map l4 table. */
-   if (!vm->pgd_created) {
-   vm->pgd = vm_alloc_page_table(vm);
-   vm->pgd_created = true;
-   }
+   /* Create page map l4 table. */
+   vm->pgd = vm_alloc_page_table(vm);
 }
 
 static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
-- 
2.40.0



[PATCH v2 0/4] KVM: selftests: add powerpc support

2023-04-07 Thread Nicholas Piggin
This series adds initial KVM selftests support for powerpc
(64-bit, BookS, radix MMU).

Since v1:
- Update MAINTAINERS KVM PPC entry to include kvm selftests.
- Fixes and cleanups from Sean's review including new patch 1.
- Add 4K guest page support requiring new patch 2.

Thanks,
Nick

Nicholas Piggin (4):
  KVM: selftests: Move pgd_created check into virt_pgd_alloc
  KVM: selftests: Add aligned guest physical page allocator
  KVM: PPC: selftests: add support for powerpc
  KVM: PPC: selftests: add selftests sanity tests

 MAINTAINERS   |   2 +
 tools/testing/selftests/kvm/Makefile  |  15 +
 .../selftests/kvm/include/kvm_util_base.h |  27 ++
 .../selftests/kvm/include/powerpc/hcall.h |  21 +
 .../selftests/kvm/include/powerpc/ppc_asm.h   |  32 ++
 .../selftests/kvm/include/powerpc/processor.h |  33 ++
 .../selftests/kvm/lib/aarch64/processor.c |   4 -
 tools/testing/selftests/kvm/lib/guest_modes.c |   3 +
 tools/testing/selftests/kvm/lib/kvm_util.c|  56 ++-
 .../selftests/kvm/lib/powerpc/handlers.S  |  93 
 .../testing/selftests/kvm/lib/powerpc/hcall.c |  45 ++
 .../selftests/kvm/lib/powerpc/processor.c | 429 ++
 .../testing/selftests/kvm/lib/powerpc/ucall.c |  30 ++
 .../selftests/kvm/lib/riscv/processor.c   |   4 -
 .../selftests/kvm/lib/s390x/processor.c   |   4 -
 .../selftests/kvm/lib/x86_64/processor.c  |   7 +-
 tools/testing/selftests/kvm/powerpc/helpers.h |  46 ++
 .../testing/selftests/kvm/powerpc/null_test.c | 166 +++
 .../selftests/kvm/powerpc/rtas_hcall.c| 146 ++
 19 files changed, 1129 insertions(+), 34 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/powerpc/hcall.h
 create mode 100644 tools/testing/selftests/kvm/include/powerpc/ppc_asm.h
 create mode 100644 tools/testing/selftests/kvm/include/powerpc/processor.h
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/handlers.S
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/hcall.c
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/processor.c
 create mode 100644 tools/testing/selftests/kvm/lib/powerpc/ucall.c
 create mode 100644 tools/testing/selftests/kvm/powerpc/helpers.h
 create mode 100644 tools/testing/selftests/kvm/powerpc/null_test.c
 create mode 100644 tools/testing/selftests/kvm/powerpc/rtas_hcall.c

-- 
2.40.0



Re: [PATCH] powerpc/boot: Fix crt0.S current address branch form

2023-04-07 Thread Michael Ellerman
Nicholas Piggin  writes:
> Use the preferred form of branch-and-link for finding the current
> address so objtool doesn't think it is an unannotated intra-function
> call.

We don't run objtool on this code in mainline AFAIK. Because BOOTAS
doesn't call it.

Did you actually see a warning, or are you just anticipating that it
would warn about it?

This diff would run it on boot asm and seems to build OK, so maybe we
should do that.

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 08071bac056d..5d3a4c5354d7 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -223,7 +223,7 @@ quiet_cmd_bootcc = BOOTCC  $@
   cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
 
 quiet_cmd_bootas = BOOTAS  $@
-  cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
+  cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< 
$(cmd_objtool)
 
 quiet_cmd_bootar = BOOTAR  $@
   cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@. $(real-prereqs); mv 
$@. $@


cheers


[PATCH 6/6] powerpc/64: modules support building with PCREL addresing

2023-04-07 Thread Nicholas Piggin
Build modules using PCREL addressing when CONFIG_PPC_KERNEL_PCREL=y.

- The module loader must handle several new relocation types:

  * R_PPC64_REL24_NOTOC is a function call handled like R_PPC_REL24, but
does not restore r2 upon return. The external function call stub is
changed to use pcrel addressing to load the function pointer rather
than based on the module TOC.

  * R_PPC64_GOT_PCREL34 is a reference to external data. A GOT table
must be built by hand, because the linker adds this during the final
link (which is not done for kernel modules). The GOT table is built
similarly to the way the external function call stub table is. This
section is called .mygot because .got has a special meaning for the
linker and can become upset.

  * R_PPC64_PCREL34 is used for local data addressing, but there is a
special case where the percpu section is moved at load-time to the
percpu area which is out of range of this relocation. This requires
the PCREL34 relocations are converted to use GOT_PCREL34 addressing.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Makefile   |   5 +-
 arch/powerpc/include/asm/module.h   |  10 +-
 arch/powerpc/include/asm/ppc_asm.h  |   6 +-
 arch/powerpc/include/uapi/asm/elf.h |   4 +
 arch/powerpc/kernel/module_64.c | 318 +---
 5 files changed, 309 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index d99fdd0f111a..85c1c2b23e7a 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -107,9 +107,7 @@ LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext
 LDFLAGS_vmlinux:= $(LDFLAGS_vmlinux-y)
 
 ifdef CONFIG_PPC64
-ifdef CONFIG_PPC_KERNEL_PCREL
-   KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-pcrel)
-endif
+ifndef CONFIG_PPC_KERNEL_PCREL
 ifeq ($(call cc-option-yn,-mcmodel=medium),y)
# -mcmodel=medium breaks modules because it uses 32bit offsets from
# the TOC pointer to create pointers where possible. Pointers into the
@@ -124,6 +122,7 @@ else
export NO_MINIMAL_TOC := -mno-minimal-toc
 endif
 endif
+endif
 
 CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no)
 ifndef CONFIG_CC_IS_CLANG
diff --git a/arch/powerpc/include/asm/module.h 
b/arch/powerpc/include/asm/module.h
index 09e2ffd360bb..ac53606c2594 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -27,8 +27,13 @@ struct ppc_plt_entry {
 struct mod_arch_specific {
 #ifdef __powerpc64__
unsigned int stubs_section; /* Index of stubs section in module */
+#ifdef CONFIG_PPC_KERNEL_PCREL
+   unsigned int got_section;   /* What section is the GOT? */
+   unsigned int pcpu_section;  /* .data..percpu section */
+#else
unsigned int toc_section;   /* What section is the TOC? */
bool toc_fixed; /* Have we fixed up .TOC.? */
+#endif
 
/* For module function descriptor dereference */
unsigned long start_opd;
@@ -52,12 +57,15 @@ struct mod_arch_specific {
 
 /*
  * Select ELF headers.
- * Make empty section for module_frob_arch_sections to expand.
+ * Make empty sections for module_frob_arch_sections to expand.
  */
 
 #ifdef __powerpc64__
 #ifdef MODULE
asm(".section .stubs,\"ax\",@nobits; .align 3; .previous");
+#ifdef CONFIG_PPC_KERNEL_PCREL
+   asm(".section .mygot,\"a\",@nobits; .align 3; .previous");
+#endif
 #endif
 #else
 #ifdef MODULE
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 9315f007d010..1a00523559e7 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -183,7 +183,7 @@
 /*
  * Used to name C functions called from asm
  */
-#if defined(CONFIG_PPC_KERNEL_PCREL) && !defined(MODULE)
+#ifdef CONFIG_PPC_KERNEL_PCREL
 #define CFUNC(name) name@notoc
 #else
 #define CFUNC(name) name
@@ -216,7 +216,7 @@
.globl name; \
 name:
 
-#if defined(CONFIG_PPC_KERNEL_PCREL) && !defined(MODULE)
+#ifdef CONFIG_PPC_KERNEL_PCREL
 #define _GLOBAL_TOC _GLOBAL
 #else
 #define _GLOBAL_TOC(name) \
@@ -379,7 +379,7 @@ GLUE(.,name):
ori reg, reg, (expr)@l; \
rldimi  reg, tmp, 32, 0
 
-#if defined(CONFIG_PPC_KERNEL_PCREL) && !defined(MODULE)
+#ifdef CONFIG_PPC_KERNEL_PCREL
 #define LOAD_REG_ADDR(reg,name)\
pla reg,name@pcrel
 
diff --git a/arch/powerpc/include/uapi/asm/elf.h 
b/arch/powerpc/include/uapi/asm/elf.h
index 308857123a08..dbc4a5b8d02d 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -279,8 +279,12 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG];
 #define R_PPC64_TLSLD  108
 #define R_PPC64_TOCSAVE109
 
+#define R_PPC64_REL24_NOTOC116
 #define R_PPC64_ENTRY  118
 
+#define R_PPC64_PCREL34132
+#define R_PPC64_GOT_PCREL34133
+
 #define R_PPC64_REL16  

[PATCH 5/6] powerpc/64: vmlinux support building with PCREL addresing

2023-04-07 Thread Nicholas Piggin
PC-Relative or PCREL addressing is an extension to the ELF ABI which
uses Power ISA v3.1 PC-relative instructions to calculate addresses,
rather than the traditional TOC scheme.

Add an option to build vmlinux using pcrel addressing. Modules continue
to use TOC addressing.

- TOC address helpers and r2 are poisoned with -1 when running vmlinux.
  r2 could be used for something useful once things are ironed out.

- Assembly must call C functions with @notoc annotation, or the linker
  complains aobut a missing nop after the call. This is done with the
  CFUNC macro introduced earlier.

- Boot: with the exception of prom_init, the execution branches to the
  kernel virtual address early in boot, before any addresses are
  generated, which ensures 34-bit pcrel addressing does not miss the
  high PAGE_OFFSET bits. TOC relative addressing has a similar
  requirement. prom_init does not go to the virtual address and its
  addresses should not carry over to the post-prom kernel.

- Ftrace trampolines are converted from TOC addressing to pcrel
  addressing, including module ftrace trampolines that currently use the
  kernel TOC to find ftrace target functions.

- BPF function prologue and function calling generation are converted
  from TOC to pcrel.

- copypage_64.S has an interesting problem, prefixed instructions have
  alignment restrictions so the linker can add padding, which makes the
  assembler treat the difference between two local labels as
  non-constant even if alignment is arranged so padding is not required.
  This may need toolchain help to solve nicely, for now move the prefix
  instruction out of the alternate patch section to work around it.

This reduces kernel text size by about 6%.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Kconfig   |  3 ++
 arch/powerpc/Makefile  |  7 +++
 arch/powerpc/include/asm/paca.h|  2 +
 arch/powerpc/include/asm/ppc-opcode.h  |  8 
 arch/powerpc/include/asm/ppc_asm.h | 19 
 arch/powerpc/include/asm/sections.h|  5 +++
 arch/powerpc/kernel/asm-offsets.c  |  2 +
 arch/powerpc/kernel/head_64.S  | 14 ++
 arch/powerpc/kernel/irq.c  |  8 
 arch/powerpc/kernel/module_64.c| 60 +++---
 arch/powerpc/kernel/paca.c |  2 +
 arch/powerpc/kernel/trace/ftrace.c | 50 -
 arch/powerpc/kernel/vector.S   |  6 +++
 arch/powerpc/kernel/vmlinux.lds.S  |  6 +++
 arch/powerpc/lib/copypage_64.S | 10 +
 arch/powerpc/net/bpf_jit.h | 10 +++--
 arch/powerpc/net/bpf_jit_comp64.c  | 35 +++
 arch/powerpc/platforms/Kconfig.cputype | 18 
 arch/powerpc/xmon/xmon.c   |  2 +
 19 files changed, 228 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 19c13733a4ed..08df2cba8b9d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -7,6 +7,9 @@ config CC_HAS_ELFV2
 config CC_HAS_PREFIXED
def_bool PPC64 && $(cc-option, -mcpu=power10 -mprefixed)
 
+config CC_HAS_PCREL
+   def_bool PPC64 && $(cc-option, -mcpu=power10 -mpcrel)
+
 config 32BIT
bool
default y if PPC32
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index de8bb40b73c0..d99fdd0f111a 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -107,6 +107,9 @@ LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext
 LDFLAGS_vmlinux:= $(LDFLAGS_vmlinux-y)
 
 ifdef CONFIG_PPC64
+ifdef CONFIG_PPC_KERNEL_PCREL
+   KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-pcrel)
+endif
 ifeq ($(call cc-option-yn,-mcmodel=medium),y)
# -mcmodel=medium breaks modules because it uses 32bit offsets from
# the TOC pointer to create pointers where possible. Pointers into the
@@ -186,7 +189,11 @@ KBUILD_CFLAGS += $(call cc-option,-mprefixed)
 else
 KBUILD_CFLAGS += $(call cc-option,-mno-prefixed)
 endif
+ifdef CONFIG_PPC_KERNEL_PCREL
+KBUILD_CFLAGS += $(call cc-option,-mpcrel)
+else
 KBUILD_CFLAGS += $(call cc-option,-mno-pcrel)
+endif
 
 # No AltiVec or VSX or MMA instructions when building kernel
 KBUILD_CFLAGS += $(call cc-option,-mno-altivec)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 0ab3511a47d7..da0377f46597 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -88,7 +88,9 @@ struct paca_struct {
u16 lock_token; /* Constant 0x8000, used in locks */
 #endif
 
+#ifndef CONFIG_PPC_KERNEL_PCREL
u64 kernel_toc; /* Kernel TOC address */
+#endif
u64 kernelbase; /* Base address of kernel */
u64 kernel_msr; /* MSR while running in kernel */
void *emergency_sp; /* pointer to emergency stack */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 21e33e46f4b8..ca5a0da7df4e 100644
--- 

[PATCH 4/6] powerpc: add CFUNC assembly label annotation

2023-04-07 Thread Nicholas Piggin
This macro is to be used in assembly where C functions are called.
pcrel addressing mode requires branches to functions with a
localentry value of 1 to have either a trailing nop or @notoc.
This macro permits the latter without changing callers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/ppc_asm.h  |   5 ++
 arch/powerpc/kernel/exceptions-64s.S| 112 
 arch/powerpc/kernel/head_64.S   |  12 +--
 arch/powerpc/kernel/interrupt_64.S  |  28 +++---
 arch/powerpc/kernel/misc_64.S   |   2 +-
 arch/powerpc/kernel/vdso/gettimeofday.S |   6 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  16 ++--
 arch/powerpc/lib/copypage_power7.S  |   4 +-
 arch/powerpc/lib/copyuser_power7.S  |   8 +-
 arch/powerpc/lib/hweight_64.S   |   8 +-
 arch/powerpc/lib/memcmp_64.S|   4 +-
 arch/powerpc/lib/memcpy_power7.S|   6 +-
 arch/powerpc/platforms/pseries/hvCall.S |   4 +-
 13 files changed, 112 insertions(+), 103 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index d2f44612f4b0..9f64f9a6a897 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -180,6 +180,11 @@
 
 #ifdef __KERNEL__
 
+/*
+ * Used to name C functions called from asm
+ */
+#define CFUNC(name) name
+
 /*
  * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
  * version below in the else case of the ifdef.
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 6441a1ba57ac..c33c8ebf8641 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1075,7 +1075,7 @@ EXC_COMMON_BEGIN(system_reset_common)
__GEN_COMMON_BODY system_reset
 
addir3,r1,STACK_INT_FRAME_REGS
-   bl  system_reset_exception
+   bl  CFUNC(system_reset_exception)
 
/* Clear MSR_RI before setting SRR0 and SRR1. */
li  r9,0
@@ -1223,9 +1223,9 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
addir3,r1,STACK_INT_FRAME_REGS
 BEGIN_FTR_SECTION
-   bl  machine_check_early_boot
+   bl  CFUNC(machine_check_early_boot)
 END_FTR_SECTION(0, 1) // nop out after boot
-   bl  machine_check_early
+   bl  CFUNC(machine_check_early)
std r3,RESULT(r1)   /* Save result */
ld  r12,_MSR(r1)
 
@@ -1286,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 * Queue up the MCE event so that we can log it later, while
 * returning from kernel or opal call.
 */
-   bl  machine_check_queue_event
+   bl  CFUNC(machine_check_queue_event)
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_KERNEL
 
@@ -1312,7 +1312,7 @@ EXC_COMMON_BEGIN(machine_check_common)
 */
GEN_COMMON machine_check
addir3,r1,STACK_INT_FRAME_REGS
-   bl  machine_check_exception_async
+   bl  CFUNC(machine_check_exception_async)
b   interrupt_return_srr
 
 
@@ -1322,7 +1322,7 @@ EXC_COMMON_BEGIN(machine_check_common)
  * done. Queue the event then call the idle code to do the wake up.
  */
 EXC_COMMON_BEGIN(machine_check_idle_common)
-   bl  machine_check_queue_event
+   bl  CFUNC(machine_check_queue_event)
 
/*
 * GPR-loss wakeups are relatively straightforward, because the
@@ -1361,7 +1361,7 @@ EXC_COMMON_BEGIN(unrecoverable_mce)
 BEGIN_FTR_SECTION
li  r10,0 /* clear MSR_RI */
mtmsrd  r10,1
-   bl  disable_machine_check
+   bl  CFUNC(disable_machine_check)
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld  r10,PACAKMSR(r13)
li  r3,MSR_ME
@@ -1378,14 +1378,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 * the early handler which is a true NMI.
 */
addir3,r1,STACK_INT_FRAME_REGS
-   bl  machine_check_exception
+   bl  CFUNC(machine_check_exception)
 
/*
 * We will not reach here. Even if we did, there is no way out.
 * Call unrecoverable_exception and die.
 */
addir3,r1,STACK_INT_FRAME_REGS
-   bl  unrecoverable_exception
+   bl  CFUNC(unrecoverable_exception)
b   .
 
 
@@ -1440,16 +1440,16 @@ EXC_COMMON_BEGIN(data_access_common)
bne-1f
 #ifdef CONFIG_PPC_64S_HASH_MMU
 BEGIN_MMU_FTR_SECTION
-   bl  do_hash_fault
+   bl  CFUNC(do_hash_fault)
 MMU_FTR_SECTION_ELSE
-   bl  do_page_fault
+   bl  CFUNC(do_page_fault)
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 #else
-   bl  do_page_fault
+   bl  CFUNC(do_page_fault)
 #endif
b   interrupt_return_srr
 
-1: bl  do_break
+1: bl  CFUNC(do_break)
/*
 * do_break() may have changed the NV GPRS while handling a breakpoint.
 * If so, we need to restore them with their updated 

[PATCH 3/6] powerpc/64: Add support to build with prefixed instructions

2023-04-07 Thread Nicholas Piggin
Add an option to build kernel and module with prefixed instructions if
the CPU and toolchain support it.

This is not related to kernel support for userspace execution of
prefixed instructions.

Building with prefixed instructions breaks some extended inline asm
memory addressing, for example it will provide immediates that exceed
the range of simple load/store displacement. Whether this is a
toolchain or a kernel asm problem remains to be seen. For now, these
are replaced with simpler and less efficient direct register addressing
when compiling with prefixed.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Kconfig   |  3 +++
 arch/powerpc/Makefile  |  4 +++
 arch/powerpc/include/asm/atomic.h  | 24 ++---
 arch/powerpc/include/asm/io.h  | 37 ++
 arch/powerpc/include/asm/uaccess.h | 28 +--
 arch/powerpc/kernel/trace/ftrace.c |  2 ++
 arch/powerpc/platforms/Kconfig.cputype | 20 ++
 7 files changed, 112 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a6c4407d3ec8..19c13733a4ed 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -4,6 +4,9 @@ source "arch/powerpc/platforms/Kconfig.cputype"
 config CC_HAS_ELFV2
def_bool PPC64 && $(cc-option, -mabi=elfv2)
 
+config CC_HAS_PREFIXED
+   def_bool PPC64 && $(cc-option, -mcpu=power10 -mprefixed)
+
 config 32BIT
bool
default y if PPC32
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e91d7e91347d..de8bb40b73c0 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -181,7 +181,11 @@ ifdef CONFIG_476FPE_ERR46
 endif
 
 # No prefix or pcrel
+ifdef CONFIG_PPC_KERNEL_PREFIXED
+KBUILD_CFLAGS += $(call cc-option,-mprefixed)
+else
 KBUILD_CFLAGS += $(call cc-option,-mno-prefixed)
+endif
 KBUILD_CFLAGS += $(call cc-option,-mno-pcrel)
 
 # No AltiVec or VSX or MMA instructions when building kernel
diff --git a/arch/powerpc/include/asm/atomic.h 
b/arch/powerpc/include/asm/atomic.h
index 486ab7889121..50212c44be2a 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -27,14 +27,22 @@ static __inline__ int arch_atomic_read(const atomic_t *v)
 {
int t;
 
-   __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
+   /* -mprefixed can generate offsets beyond range, fall back hack */
+   if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+   __asm__ __volatile__("lwz %0,0(%1)" : "=r"(t) : 
"b"(>counter));
+   else
+   __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : 
"m<>"(v->counter));
 
return t;
 }
 
 static __inline__ void arch_atomic_set(atomic_t *v, int i)
 {
-   __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
+   /* -mprefixed can generate offsets beyond range, fall back hack */
+   if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+   __asm__ __volatile__("stw %1,0(%2)" : "=m"(v->counter) : 
"r"(i), "b"(>counter));
+   else
+   __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : 
"r"(i));
 }
 
 #define ATOMIC_OP(op, asm_op, suffix, sign, ...)   \
@@ -226,14 +234,22 @@ static __inline__ s64 arch_atomic64_read(const atomic64_t 
*v)
 {
s64 t;
 
-   __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
+   /* -mprefixed can generate offsets beyond range, fall back hack */
+   if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+   __asm__ __volatile__("ld %0,0(%1)" : "=r"(t) : 
"b"(>counter));
+   else
+   __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : 
"m<>"(v->counter));
 
return t;
 }
 
 static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i)
 {
-   __asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
+   /* -mprefixed can generate offsets beyond range, fall back hack */
+   if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+   __asm__ __volatile__("std %1,0(%2)" : "=m"(v->counter) : 
"r"(i), "b"(>counter));
+   else
+   __asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : 
"r"(i));
 }
 
 #define ATOMIC64_OP(op, asm_op)
\
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index fc112a91d0c2..f1e657c9bbe8 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -97,6 +97,42 @@ extern bool isa_io_special;
  *
  */
 
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define DEF_MMIO_IN_X(name, size, insn)\
+static inline u##size name(const volatile u##size __iomem *addr)   \
+{  \
+   u##size ret;\
+   __asm__ 

[PATCH 2/6] powerpc/64s: Run at the kernel virtual address earlier in boot

2023-04-07 Thread Nicholas Piggin
This mostly consolidates the Book3E and Book3S behaviour in boot WRT
executing from the physical or virtual address.

Book3E sets up kernel virtual linear map in start_initialization_book3e
and runs from the virtual linear alias after that. This change makes
Book3S begin to execute from the virtual alias at the same point. Book3S
can not use its MMU for that at this point, but when the MMU is disabled,
the virtual linear address correctly aliases to physical memory because
the top bits of the address are ignored with MMU disabled.

Secondaries execute from the virtual address similarly early.

This reduces the differences between subarchs, but the main motivation
was to enable the PC-relative addressing ABI for Book3S, where pointer
calculations must execute from the virtual address or the top bits of
the pointer will be lost. This is similar to the requirement the TOC
relative addressing already has that the TOC pointer use its virtual
address.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/head_64.S | 82 +++
 1 file changed, 44 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 5b2d607cd1e8..66c21061036b 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -76,6 +76,13 @@
  *   2. The kernel is entered at __start
  */
 
+/*
+ * boot_from_prom and prom_init run at the physical address. Everything
+ * after prom and kexec entry run at the virtual address (PAGE_OFFSET).
+ * Secondaries run at the virtual address from generic_secondary_common_init
+ * onward.
+ */
+
 OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
 USE_FIXED_SECTION(first_256B)
/*
@@ -303,13 +310,11 @@ _GLOBAL(fsl_secondary_thread_init)
/* turn on 64-bit mode */
bl  enable_64b_mode
 
-   /* get a valid TOC pointer, wherever we're mapped at */
-   bl  relative_toc
-   tovirt(r2,r2)
-
/* Book3E initialization */
mr  r3,r24
bl  book3e_secondary_thread_init
+   bl  relative_toc
+
b   generic_secondary_common_init
 
 #endif /* CONFIG_PPC_BOOK3E_64 */
@@ -331,16 +336,12 @@ _GLOBAL(generic_secondary_smp_init)
/* turn on 64-bit mode */
bl  enable_64b_mode
 
-   /* get a valid TOC pointer, wherever we're mapped at */
-   bl  relative_toc
-   tovirt(r2,r2)
-
 #ifdef CONFIG_PPC_BOOK3E_64
/* Book3E initialization */
mr  r3,r24
mr  r4,r25
bl  book3e_secondary_core_init
-
+   /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */
 /*
  * After common core init has finished, check if the current thread is the
  * one we wanted to boot. If not, start the specified thread and stop the
@@ -378,6 +379,16 @@ _GLOBAL(generic_secondary_smp_init)
 10:
b   10b
 20:
+#else
+   /* Now the MMU is off, can branch to our PAGE_OFFSET address */
+   bcl 20,31,$+4
+1: mflrr11
+   addir11,r11,(2f - 1b)
+   tovirt(r11, r11)
+   mtctr   r11
+   bctr
+2:
+   bl  relative_toc
 #endif
 
 generic_secondary_common_init:
@@ -492,6 +503,8 @@ SYM_FUNC_START_LOCAL(start_initialization_book3s)
/* Switch off MMU if not already off */
bl  __mmu_off
 
+   /* Now the MMU is off, can return to our PAGE_OFFSET address */
+   tovirt(r25,r25)
mtlrr25
blr
 SYM_FUNC_END(start_initialization_book3s)
@@ -531,16 +544,19 @@ __start_initialization_multiplatform:
mr  r29,r9
 #endif
 
+   /* These functions return to the virtual (PAGE_OFFSET) address */
 #ifdef CONFIG_PPC_BOOK3E_64
bl  start_initialization_book3e
 #else
bl  start_initialization_book3s
 #endif /* CONFIG_PPC_BOOK3E_64 */
 
-   /* Get TOC pointer */
+   /* Get TOC pointer, virtual */
bl  relative_toc
 
/* find out where we are now */
+
+   /* OPAL doesn't pass base address in r4, have to derive it. */
bcl 20,31,$+4
 0: mflrr26 /* r26 = runtime addr here */
addis   r26,r26,(_stext - 0b)@ha
@@ -551,7 +567,7 @@ __start_initialization_multiplatform:
 __REF
 __boot_from_prom:
 #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-   /* Get TOC pointer */
+   /* Get TOC pointer, non-virtual */
bl  relative_toc
 
/* find out where we are now */
@@ -600,18 +616,11 @@ __boot_from_prom:
 __after_prom_start:
 #ifdef CONFIG_RELOCATABLE
/* process relocations for the final address of the kernel */
-   lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
-   sldir25,r25,32
-#if defined(CONFIG_PPC_BOOK3E_64)
-   tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
-#endif
lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
-#if defined(CONFIG_PPC_BOOK3E_64)
-   tophys(r26,r26)
-#endif
cmplwi  cr0,r7,1/* flagged to 

[PATCH 0/6] powerpc/64: Build with PC-Relative addressing

2023-04-07 Thread Nicholas Piggin
This won't see a lot of real use until POWER10 is the oldest supported
CPU for distros, but being as we're quite a unique user of toolchain I'd
like to start ironing things out earlier rather than later. I'm making a
list of observations here, https://github.com/linuxppc/issues/issues/455
and will take them to toolchan developers after the kernel work is a bit
further along.

The series is working pretty well for me on pseries and powernv systems.
Since RFC I've tidied things a bit and fixed the remaining hacks and
known issues, mainly in the module loader.

The first two patches are already posted to the list, boot code changes
that aren't too interesting to toolchain.

Thanks,
Nick

Nicholas Piggin (6):
  powerpc/64: Move initial base and TOC pointer calculation
  powerpc/64s: Run at the kernel virtual address earlier in boot
  powerpc/64: Add support to build with prefixed instructions
  powerpc: add CFUNC assembly label annotation
  powerpc/64: vmlinux support building with PCREL addresing
  powerpc/64: modules support building with PCREL addresing

 arch/powerpc/Kconfig|   6 +
 arch/powerpc/Makefile   |  10 +
 arch/powerpc/include/asm/atomic.h   |  24 +-
 arch/powerpc/include/asm/io.h   |  37 +++
 arch/powerpc/include/asm/module.h   |  10 +-
 arch/powerpc/include/asm/paca.h |   2 +
 arch/powerpc/include/asm/ppc-opcode.h   |   8 +
 arch/powerpc/include/asm/ppc_asm.h  |  24 ++
 arch/powerpc/include/asm/sections.h |   5 +
 arch/powerpc/include/asm/uaccess.h  |  28 +-
 arch/powerpc/include/uapi/asm/elf.h |   4 +
 arch/powerpc/kernel/asm-offsets.c   |   2 +
 arch/powerpc/kernel/exceptions-64s.S| 112 +++
 arch/powerpc/kernel/head_64.S   | 130 
 arch/powerpc/kernel/interrupt_64.S  |  28 +-
 arch/powerpc/kernel/irq.c   |   8 +
 arch/powerpc/kernel/misc_64.S   |   2 +-
 arch/powerpc/kernel/module_64.c | 376 +---
 arch/powerpc/kernel/paca.c  |   2 +
 arch/powerpc/kernel/trace/ftrace.c  |  52 +++-
 arch/powerpc/kernel/vdso/gettimeofday.S |   6 +-
 arch/powerpc/kernel/vector.S|   6 +
 arch/powerpc/kernel/vmlinux.lds.S   |   6 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  16 +-
 arch/powerpc/lib/copypage_64.S  |  10 +
 arch/powerpc/lib/copypage_power7.S  |   4 +-
 arch/powerpc/lib/copyuser_power7.S  |   8 +-
 arch/powerpc/lib/hweight_64.S   |   8 +-
 arch/powerpc/lib/memcmp_64.S|   4 +-
 arch/powerpc/lib/memcpy_power7.S|   6 +-
 arch/powerpc/net/bpf_jit.h  |  10 +-
 arch/powerpc/net/bpf_jit_comp64.c   |  35 ++-
 arch/powerpc/platforms/Kconfig.cputype  |  38 +++
 arch/powerpc/platforms/pseries/hvCall.S |   4 +-
 arch/powerpc/xmon/xmon.c|   2 +
 35 files changed, 814 insertions(+), 219 deletions(-)

-- 
2.40.0



[PATCH 1/6] powerpc/64: Move initial base and TOC pointer calculation

2023-04-07 Thread Nicholas Piggin
A later change moves the non-prom case to run at the virtual address
earlier, which calls for virtual TOC and kernel base. Split these two
calculations for prom and non-prom to make that change simpler.

Signed: Nicholas Piggin 
---
 arch/powerpc/kernel/head_64.S | 28 +++-
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1febb56ebaeb..5b2d607cd1e8 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -515,15 +515,6 @@ __start_initialization_multiplatform:
/* Zero r13 (paca) so early program check / mce don't use it */
li  r13,0
 
-   /* Get TOC pointer (current runtime address) */
-   bl  relative_toc
-
-   /* find out where we are now */
-   bcl 20,31,$+4
-0: mflrr26 /* r26 = runtime addr here */
-   addis   r26,r26,(_stext - 0b)@ha
-   addir26,r26,(_stext - 0b)@l /* current runtime base addr */
-
/*
 * Are we booted from a PROM Of-type client-interface ?
 */
@@ -545,11 +536,30 @@ __start_initialization_multiplatform:
 #else
bl  start_initialization_book3s
 #endif /* CONFIG_PPC_BOOK3E_64 */
+
+   /* Get TOC pointer */
+   bl  relative_toc
+
+   /* find out where we are now */
+   bcl 20,31,$+4
+0: mflrr26 /* r26 = runtime addr here */
+   addis   r26,r26,(_stext - 0b)@ha
+   addir26,r26,(_stext - 0b)@l /* current runtime base addr */
+
b   __after_prom_start
 
 __REF
 __boot_from_prom:
 #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+   /* Get TOC pointer */
+   bl  relative_toc
+
+   /* find out where we are now */
+   bcl 20,31,$+4
+0: mflrr26 /* r26 = runtime addr here */
+   addis   r26,r26,(_stext - 0b)@ha
+   addir26,r26,(_stext - 0b)@l /* current runtime base addr */
+
/* Save parameters */
mr  r31,r3
mr  r30,r4
-- 
2.40.0



Re: [PATCHv2 pci-next 2/2] PCI/AER: Rate limit the reporting of the correctable errors

2023-04-07 Thread Grant Grundler
[reposting in plain text mode]

On Fri, Apr 7, 2023 at 12:46 PM Bjorn Helgaas  wrote:
>
> On Fri, Apr 07, 2023 at 11:53:27AM -0700, Grant Grundler wrote:
> > On Thu, Apr 6, 2023 at 12:50 PM Bjorn Helgaas  wrote:
> > > On Fri, Mar 17, 2023 at 10:51:09AM -0700, Grant Grundler wrote:
> > > > From: Rajat Khandelwal 
> > > >
> > > > There are many instances where correctable errors tend to inundate
> > > > the message buffer. We observe such instances during thunderbolt PCIe
> > > > tunneling.
> > ...
>
> > > >   if (info->severity == AER_CORRECTABLE)
> > > > - pci_info(dev, "   [%2d] %-22s%s\n", i, errmsg,
> > > > - info->first_error == i ? " (First)" : "");
> > > > + pci_info_ratelimited(dev, "   [%2d] %-22s%s\n", 
> > > > i, errmsg,
> > > > +  info->first_error == i ? " 
> > > > (First)" : "");
> > >
> > > I don't think this is going to reliably work the way we want.  We have
> > > a bunch of pci_info_ratelimited() calls, and each caller has its own
> > > ratelimit_state data.  Unless we call pci_info_ratelimited() exactly
> > > the same number of times for each error, the ratelimit counters will
> > > get out of sync and we'll end up printing fragments from error A mixed
> > > with fragments from error B.
> >
> > Ok - what I'm reading between the lines here is the output should be
> > emitted in one step, not multiple pci_info_ratelimited() calls. if the
> > code built an output string (using sprintnf()), and then called
> > pci_info_ratelimited() exactly once at the bottom, would that be
> > sufficient?
> >
> > > I think we need to explicitly manage the ratelimiting ourselves,
> > > similar to print_hmi_event_info() or print_extlog_rcd().  Then we can
> > > have a *single* ratelimit_state, and we can check it once to determine
> > > whether to log this correctable error.
> >
> > Is the rate limiting per call location or per device? From above, I
> > understood rate limiting is "per call location".  If the code only
> > has one call location, it should achieve the same goal, right?
>
> Rate-limiting is per call location, so yes, if we only have one call
> location, that would solve it.  It would also have the nice property
> that all the output would be atomic so it wouldn't get mixed with
> other stuff, and it might encourage us to be a little less wordy in
> the output.

+1 to all of those reasons. Especially reducing the number of lines output.

I'm going to be out for the next week. If someone else (Rajat
Kendalwal maybe?) wants to rework this to use one call location it
should be fairly straight forward. If not, I'll tackle this when I'm
back (in 2 weeks essentially).

>
> But I don't think we need output in a single step; we just need a
> single instance of ratelimit_state (or one for CPER path and another
> for native AER path), and that can control all the output for a single
> error.  E.g., print_hmi_event_info() looks like this:
>
>   static void print_hmi_event_info(...)
>   {
> static DEFINE_RATELIMIT_STATE(rs, ...);
>
> if (__ratelimit()) {
>   printk("%s%s Hypervisor Maintenance interrupt ...");
>   printk("%s Error detail: %s\n", ...);
>   printk("%s  HMER: %016llx\n", ...);
> }
>   }
>
> I think it's nice that the struct ratelimit_state is explicit and
> there's no danger of breaking it when adding another printk later.

True. But a single call to a "well documented" API is my preference
(assuming this is my choice).

> It *could* be per pci_dev, too, but I suspect it's not worth spending
> 40ish bytes per device for the ratelimit data.

Good - I don't think we need to make this per device - I had assumed
it was but also currently don't see a need for this.

cheers,
grant


Re: [PATCHv2 pci-next 2/2] PCI/AER: Rate limit the reporting of the correctable errors

2023-04-07 Thread Grant Grundler
[reposting in plain text mode]


On Fri, Apr 7, 2023 at 12:46 PM Bjorn Helgaas  wrote:
>
> On Fri, Apr 07, 2023 at 11:53:27AM -0700, Grant Grundler wrote:
> > On Thu, Apr 6, 2023 at 12:50 PM Bjorn Helgaas  wrote:
> > > On Fri, Mar 17, 2023 at 10:51:09AM -0700, Grant Grundler wrote:
> > > > From: Rajat Khandelwal 
> > > >
> > > > There are many instances where correctable errors tend to inundate
> > > > the message buffer. We observe such instances during thunderbolt PCIe
> > > > tunneling.
> > ...
>
> > > >   if (info->severity == AER_CORRECTABLE)
> > > > - pci_info(dev, "   [%2d] %-22s%s\n", i, errmsg,
> > > > - info->first_error == i ? " (First)" : "");
> > > > + pci_info_ratelimited(dev, "   [%2d] %-22s%s\n", 
> > > > i, errmsg,
> > > > +  info->first_error == i ? " 
> > > > (First)" : "");
> > >
> > > I don't think this is going to reliably work the way we want.  We have
> > > a bunch of pci_info_ratelimited() calls, and each caller has its own
> > > ratelimit_state data.  Unless we call pci_info_ratelimited() exactly
> > > the same number of times for each error, the ratelimit counters will
> > > get out of sync and we'll end up printing fragments from error A mixed
> > > with fragments from error B.
> >
> > Ok - what I'm reading between the lines here is the output should be
> > emitted in one step, not multiple pci_info_ratelimited() calls. if the
> > code built an output string (using sprintnf()), and then called
> > pci_info_ratelimited() exactly once at the bottom, would that be
> > sufficient?
> >
> > > I think we need to explicitly manage the ratelimiting ourselves,
> > > similar to print_hmi_event_info() or print_extlog_rcd().  Then we can
> > > have a *single* ratelimit_state, and we can check it once to determine
> > > whether to log this correctable error.
> >
> > Is the rate limiting per call location or per device? From above, I
> > understood rate limiting is "per call location".  If the code only
> > has one call location, it should achieve the same goal, right?
>
> Rate-limiting is per call location, so yes, if we only have one call
> location, that would solve it.  It would also have the nice property
> that all the output would be atomic so it wouldn't get mixed with
> other stuff, and it might encourage us to be a little less wordy in
> the output.
>
> But I don't think we need output in a single step; we just need a
> single instance of ratelimit_state (or one for CPER path and another
> for native AER path), and that can control all the output for a single
> error.  E.g., print_hmi_event_info() looks like this:
>
>   static void print_hmi_event_info(...)
>   {
> static DEFINE_RATELIMIT_STATE(rs, ...);
>
> if (__ratelimit()) {
>   printk("%s%s Hypervisor Maintenance interrupt ...");
>   printk("%s Error detail: %s\n", ...);
>   printk("%s  HMER: %016llx\n", ...);
> }
>   }
>
> I think it's nice that the struct ratelimit_state is explicit and
> there's no danger of breaking it when adding another printk later.
>
> It *could* be per pci_dev, too, but I suspect it's not worth spending
> 40ish bytes per device for the ratelimit data.
>
> Bjorn


Re: [PATCH v2 0/4] KVM: Refactor KVM stats macros and enable custom stat names

2023-04-07 Thread Sean Christopherson
On Mon, Mar 06, 2023, David Matlack wrote:
> David Matlack (4):
>   KVM: Refactor stats descriptor generation macros
>   KVM: Refactor designated initializer macros for struct _kvm_stats_desc
>   KVM: Allow custom names for KVM_STAT()
>   KVM: x86: Drop union for pages_{4k,2m,1g} stats
> 
>  arch/arm64/kvm/guest.c  |  14 +--
>  arch/mips/kvm/mips.c|  54 -
>  arch/powerpc/kvm/book3s.c   |  62 +-
>  arch/powerpc/kvm/booke.c|  48 
>  arch/riscv/kvm/vcpu.c   |  16 +--
>  arch/s390/kvm/kvm-s390.c| 198 
>  arch/x86/include/asm/kvm_host.h |   9 +-
>  arch/x86/kvm/x86.c  |  94 +++
>  include/linux/kvm_host.h| 179 +++--
>  9 files changed, 314 insertions(+), 360 deletions(-)

For the series,

Reviewed-by: Sean Christopherson 


Re: [PATCH] powerpc/32: Include thread_info.h in head_booke.h

2023-04-07 Thread Nick Desaulniers
On Thu, Apr 06, 2023 at 10:51:30AM -0700, Nathan Chancellor wrote:
> When building with W=1 after commit 80b6093b55e3 ("kbuild: add -Wundef
> to KBUILD_CPPFLAGS for W=1 builds"), the following warning occurs.
> 
>   In file included from arch/powerpc/kvm/bookehv_interrupts.S:26:
>   arch/powerpc/kvm/../kernel/head_booke.h:20:6: warning: "THREAD_SHIFT" is 
> not defined, evaluates to 0 [-Wundef]
>  20 | #if (THREAD_SHIFT < 15)
> |  ^~~~
> 
> THREAD_SHIFT is defined in thread_info.h but it is not directly included
> in head_booke.h, so it is possible for THREAD_SHIFT to be undefined. Add
> the include to ensure that THREAD_SHIFT is always defined.
> 
> Reported-by: kernel test robot 
> Link: https://lore.kernel.org/202304050954.yskldczh-...@intel.com/
> Signed-off-by: Nathan Chancellor 

Thanks for the patch!
Reviewed-by: Nick Desaulniers 

> ---
>  arch/powerpc/kernel/head_booke.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/powerpc/kernel/head_booke.h 
> b/arch/powerpc/kernel/head_booke.h
> index 37d43c172676..b6b5b01a173c 100644
> --- a/arch/powerpc/kernel/head_booke.h
> +++ b/arch/powerpc/kernel/head_booke.h
> @@ -5,6 +5,7 @@
>  #include   /* for STACK_FRAME_REGS_MARKER */
>  #include 
>  #include 
> +#include  /* for THREAD_SHIFT */
>  
>  #ifdef __ASSEMBLY__
>  
> 
> ---
> base-commit: b0bbe5a2915201e3231e788d716d39dc54493b03
> change-id: 20230406-wundef-thread_shift_booke-e08d806ed656
> 
> Best regards,
> -- 
> Nathan Chancellor 
> 
> 


Re: arch/powerpc/kvm/../kernel/head_booke.h:20:6: warning: "THREAD_SHIFT" is not defined, evaluates to 0

2023-04-07 Thread Nathan Chancellor
On Fri, Apr 07, 2023 at 04:08:43PM -0700, Nick Desaulniers wrote:
> On Tue, Apr 4, 2023 at 6:29 PM kernel test robot  wrote:
> >
> > Hi Masahiro,
> >
> > FYI, the error/warning still remains.
> >
> > tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
> > master
> > head:   76f598ba7d8e2bfb4855b5298caedd5af0c374a8
> > commit: 80b6093b55e31c2c40ff082fb32523d4e852954f kbuild: add -Wundef to 
> > KBUILD_CPPFLAGS for W=1 builds
> > date:   4 months ago
> > config: powerpc-buildonly-randconfig-r003-20230405 
> > (https://download.01.org/0day-ci/archive/20230405/202304050954.yskldczh-...@intel.com/config)
> > compiler: powerpc-linux-gcc (GCC) 12.1.0
> > reproduce (this is a W=1 build):
> > wget 
> > https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> > ~/bin/make.cross
> > chmod +x ~/bin/make.cross
> > # 
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=80b6093b55e31c2c40ff082fb32523d4e852954f
> > git remote add linus 
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
> > git fetch --no-tags linus master
> > git checkout 80b6093b55e31c2c40ff082fb32523d4e852954f
> > # save the config file
> > mkdir build_dir && cp config build_dir/.config
> > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
> > O=build_dir ARCH=powerpc olddefconfig
> > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
> > O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/ 
> > arch/powerpc/kvm/ virt/
> >
> > If you fix the issue, kindly add following tag where applicable
> > | Reported-by: kernel test robot 
> > | Link: 
> > https://lore.kernel.org/oe-kbuild-all/202304050954.yskldczh-...@intel.com/
> >
> > All warnings (new ones prefixed by >>):
> >
> >In file included from arch/powerpc/kvm/bookehv_interrupts.S:26:
> > >> arch/powerpc/kvm/../kernel/head_booke.h:20:6: warning: "THREAD_SHIFT" is 
> > >> not defined, evaluates to 0 [-Wundef]
> >   20 | #if (THREAD_SHIFT < 15)
> >  |  ^~~~
> 
> Should arch/powerpc/kernel/head_booke.h be #include'ing
> asm/thread_info.h before using THREAD_SHIFT?

I think so, sorry for not cc'ing you on
https://lore.kernel.org/linuxppc-dev/20230406-wundef-thread_shift_booke-v1-1-8deffa4d8...@kernel.org/

> >
> >
> > vim +/THREAD_SHIFT +20 arch/powerpc/kvm/../kernel/head_booke.h
> >
> > 1a4b739bbb4f88 Christophe Leroy 2019-04-30  10
> > 63dafe5728e735 Becky Bruce  2006-01-14  11  /*
> > 63dafe5728e735 Becky Bruce  2006-01-14  12   * Macros used for common 
> > Book-e exception handling
> > 63dafe5728e735 Becky Bruce  2006-01-14  13   */
> > 63dafe5728e735 Becky Bruce  2006-01-14  14
> > 63dafe5728e735 Becky Bruce  2006-01-14  15  #define 
> > SET_IVOR(vector_number, vector_label)   \
> > 63dafe5728e735 Becky Bruce  2006-01-14  16  li  
> > r26,vector_label@l; \
> > 63dafe5728e735 Becky Bruce  2006-01-14  17  mtspr   
> > SPRN_IVOR##vector_number,r26;   \
> > 63dafe5728e735 Becky Bruce  2006-01-14  18  sync
> > 63dafe5728e735 Becky Bruce  2006-01-14  19
> > e12401222f749c Yuri Tikhonov2009-01-29 @20  #if (THREAD_SHIFT < 15)
> > e12401222f749c Yuri Tikhonov2009-01-29  21  #define 
> > ALLOC_STACK_FRAME(reg, val) \
> > e12401222f749c Yuri Tikhonov2009-01-29  22  addi reg,reg,val
> > e12401222f749c Yuri Tikhonov2009-01-29  23  #else
> > e12401222f749c Yuri Tikhonov2009-01-29  24  #define 
> > ALLOC_STACK_FRAME(reg, val) \
> > e12401222f749c Yuri Tikhonov2009-01-29  25  addis   
> > reg,reg,val@ha; \
> > e12401222f749c Yuri Tikhonov2009-01-29  26  addi
> > reg,reg,val@l
> > e12401222f749c Yuri Tikhonov2009-01-29  27  #endif
> > e12401222f749c Yuri Tikhonov2009-01-29  28
> >
> > :: The code at line 20 was first introduced by commit
> > :: e12401222f749c37277a313d631dc024bbfd3b00 powerpc/44x: Support for 
> > 256KB PAGE_SIZE
> >
> > :: TO: Yuri Tikhonov 
> > :: CC: Josh Boyer 
> >
> > --
> > 0-DAY CI Kernel Test Service
> > https://github.com/intel/lkp-tests
> 
> 
> 
> -- 
> Thanks,
> ~Nick Desaulniers


Re: arch/powerpc/kvm/../kernel/head_booke.h:20:6: warning: "THREAD_SHIFT" is not defined, evaluates to 0

2023-04-07 Thread Nick Desaulniers
On Tue, Apr 4, 2023 at 6:29 PM kernel test robot  wrote:
>
> Hi Masahiro,
>
> FYI, the error/warning still remains.
>
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
> master
> head:   76f598ba7d8e2bfb4855b5298caedd5af0c374a8
> commit: 80b6093b55e31c2c40ff082fb32523d4e852954f kbuild: add -Wundef to 
> KBUILD_CPPFLAGS for W=1 builds
> date:   4 months ago
> config: powerpc-buildonly-randconfig-r003-20230405 
> (https://download.01.org/0day-ci/archive/20230405/202304050954.yskldczh-...@intel.com/config)
> compiler: powerpc-linux-gcc (GCC) 12.1.0
> reproduce (this is a W=1 build):
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=80b6093b55e31c2c40ff082fb32523d4e852954f
> git remote add linus 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
> git fetch --no-tags linus master
> git checkout 80b6093b55e31c2c40ff082fb32523d4e852954f
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
> O=build_dir ARCH=powerpc olddefconfig
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
> O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kernel/ 
> arch/powerpc/kvm/ virt/
>
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot 
> | Link: 
> https://lore.kernel.org/oe-kbuild-all/202304050954.yskldczh-...@intel.com/
>
> All warnings (new ones prefixed by >>):
>
>In file included from arch/powerpc/kvm/bookehv_interrupts.S:26:
> >> arch/powerpc/kvm/../kernel/head_booke.h:20:6: warning: "THREAD_SHIFT" is 
> >> not defined, evaluates to 0 [-Wundef]
>   20 | #if (THREAD_SHIFT < 15)
>  |  ^~~~

Should arch/powerpc/kernel/head_booke.h be #include'ing
asm/thread_info.h before using THREAD_SHIFT?

>
>
> vim +/THREAD_SHIFT +20 arch/powerpc/kvm/../kernel/head_booke.h
>
> 1a4b739bbb4f88 Christophe Leroy 2019-04-30  10
> 63dafe5728e735 Becky Bruce  2006-01-14  11  /*
> 63dafe5728e735 Becky Bruce  2006-01-14  12   * Macros used for common 
> Book-e exception handling
> 63dafe5728e735 Becky Bruce  2006-01-14  13   */
> 63dafe5728e735 Becky Bruce  2006-01-14  14
> 63dafe5728e735 Becky Bruce  2006-01-14  15  #define 
> SET_IVOR(vector_number, vector_label)   \
> 63dafe5728e735 Becky Bruce  2006-01-14  16  li  
> r26,vector_label@l; \
> 63dafe5728e735 Becky Bruce  2006-01-14  17  mtspr   
> SPRN_IVOR##vector_number,r26;   \
> 63dafe5728e735 Becky Bruce  2006-01-14  18  sync
> 63dafe5728e735 Becky Bruce  2006-01-14  19
> e12401222f749c Yuri Tikhonov2009-01-29 @20  #if (THREAD_SHIFT < 15)
> e12401222f749c Yuri Tikhonov2009-01-29  21  #define 
> ALLOC_STACK_FRAME(reg, val) \
> e12401222f749c Yuri Tikhonov2009-01-29  22  addi reg,reg,val
> e12401222f749c Yuri Tikhonov2009-01-29  23  #else
> e12401222f749c Yuri Tikhonov2009-01-29  24  #define 
> ALLOC_STACK_FRAME(reg, val) \
> e12401222f749c Yuri Tikhonov2009-01-29  25  addis   
> reg,reg,val@ha; \
> e12401222f749c Yuri Tikhonov2009-01-29  26  addireg,reg,val@l
> e12401222f749c Yuri Tikhonov2009-01-29  27  #endif
> e12401222f749c Yuri Tikhonov2009-01-29  28
>
> :: The code at line 20 was first introduced by commit
> :: e12401222f749c37277a313d631dc024bbfd3b00 powerpc/44x: Support for 
> 256KB PAGE_SIZE
>
> :: TO: Yuri Tikhonov 
> :: CC: Josh Boyer 
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests



-- 
Thanks,
~Nick Desaulniers


Re: [PATCHv2 pci-next 2/2] PCI/AER: Rate limit the reporting of the correctable errors

2023-04-07 Thread Bjorn Helgaas
On Fri, Apr 07, 2023 at 11:53:27AM -0700, Grant Grundler wrote:
> On Thu, Apr 6, 2023 at 12:50 PM Bjorn Helgaas  wrote:
> > On Fri, Mar 17, 2023 at 10:51:09AM -0700, Grant Grundler wrote:
> > > From: Rajat Khandelwal 
> > >
> > > There are many instances where correctable errors tend to inundate
> > > the message buffer. We observe such instances during thunderbolt PCIe
> > > tunneling.
> ...

> > >   if (info->severity == AER_CORRECTABLE)
> > > - pci_info(dev, "   [%2d] %-22s%s\n", i, errmsg,
> > > - info->first_error == i ? " (First)" : "");
> > > + pci_info_ratelimited(dev, "   [%2d] %-22s%s\n", i, 
> > > errmsg,
> > > +  info->first_error == i ? " 
> > > (First)" : "");
> >
> > I don't think this is going to reliably work the way we want.  We have
> > a bunch of pci_info_ratelimited() calls, and each caller has its own
> > ratelimit_state data.  Unless we call pci_info_ratelimited() exactly
> > the same number of times for each error, the ratelimit counters will
> > get out of sync and we'll end up printing fragments from error A mixed
> > with fragments from error B.
> 
> Ok - what I'm reading between the lines here is the output should be
> emitted in one step, not multiple pci_info_ratelimited() calls. if the
> code built an output string (using sprintnf()), and then called
> pci_info_ratelimited() exactly once at the bottom, would that be
> sufficient?
>
> > I think we need to explicitly manage the ratelimiting ourselves,
> > similar to print_hmi_event_info() or print_extlog_rcd().  Then we can
> > have a *single* ratelimit_state, and we can check it once to determine
> > whether to log this correctable error.
> 
> Is the rate limiting per call location or per device? From above, I
> understood rate limiting is "per call location".  If the code only
> has one call location, it should achieve the same goal, right?

Rate-limiting is per call location, so yes, if we only have one call
location, that would solve it.  It would also have the nice property
that all the output would be atomic so it wouldn't get mixed with
other stuff, and it might encourage us to be a little less wordy in
the output.

But I don't think we need output in a single step; we just need a
single instance of ratelimit_state (or one for CPER path and another
for native AER path), and that can control all the output for a single
error.  E.g., print_hmi_event_info() looks like this:

  static void print_hmi_event_info(...)
  {
static DEFINE_RATELIMIT_STATE(rs, ...);

if (__ratelimit()) {
  printk("%s%s Hypervisor Maintenance interrupt ...");
  printk("%s Error detail: %s\n", ...);
  printk("%s  HMER: %016llx\n", ...);
}
  }

I think it's nice that the struct ratelimit_state is explicit and
there's no danger of breaking it when adding another printk later.

It *could* be per pci_dev, too, but I suspect it's not worth spending
40ish bytes per device for the ratelimit data.

Bjorn


Re: [PATCHv2 pci-next 2/2] PCI/AER: Rate limit the reporting of the correctable errors

2023-04-07 Thread Grant Grundler
On Thu, Apr 6, 2023 at 12:50 PM Bjorn Helgaas  wrote:
>
> On Fri, Mar 17, 2023 at 10:51:09AM -0700, Grant Grundler wrote:
> > From: Rajat Khandelwal 
> >
> > There are many instances where correctable errors tend to inundate
> > the message buffer. We observe such instances during thunderbolt PCIe
> > tunneling.
> >
> > It's true that they are mitigated by the hardware and are non-fatal
> > but we shouldn't be spamming the logs with such correctable errors as it
> > confuses other kernel developers less familiar with PCI errors, support
> > staff, and users who happen to look at the logs, hence rate limit them.
> >
> > A typical example log inside an HP TBT4 dock:
> > [54912.661142] pcieport :00:07.0: AER: Multiple Corrected error 
> > received: :2b:00.0
> > [54912.661194] igc :2b:00.0: PCIe Bus Error: severity=Corrected, 
> > type=Data Link Layer, (Transmitter ID)
> > [54912.661203] igc :2b:00.0:   device [8086:5502] error 
> > status/mask=1100/2000
> > [54912.661211] igc :2b:00.0:[ 8] Rollover
> > [54912.661219] igc :2b:00.0:[12] Timeout
> > [54982.838760] pcieport :00:07.0: AER: Corrected error received: 
> > :2b:00.0
> > [54982.838798] igc :2b:00.0: PCIe Bus Error: severity=Corrected, 
> > type=Data Link Layer, (Transmitter ID)
> > [54982.838808] igc :2b:00.0:   device [8086:5502] error 
> > status/mask=1000/2000
> > [54982.838817] igc :2b:00.0:[12] Timeout
>
> The timestamps don't contribute to understanding the problem, so we
> can omit them.

Ok.

> > This gets repeated continuously, thus inundating the buffer.
> >
> > Signed-off-by: Rajat Khandelwal 
> > Signed-off-by: Grant Grundler 
> > ---
> >  drivers/pci/pcie/aer.c | 42 --
> >  1 file changed, 28 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > index cb6b96233967..b592cea8bffe 100644
> > --- a/drivers/pci/pcie/aer.c
> > +++ b/drivers/pci/pcie/aer.c
> > @@ -706,8 +706,8 @@ static void __aer_print_error(struct pci_dev *dev,
> >   errmsg = "Unknown Error Bit";
> >
> >   if (info->severity == AER_CORRECTABLE)
> > - pci_info(dev, "   [%2d] %-22s%s\n", i, errmsg,
> > - info->first_error == i ? " (First)" : "");
> > + pci_info_ratelimited(dev, "   [%2d] %-22s%s\n", i, 
> > errmsg,
> > +  info->first_error == i ? " 
> > (First)" : "");
>
> I don't think this is going to reliably work the way we want.  We have
> a bunch of pci_info_ratelimited() calls, and each caller has its own
> ratelimit_state data.  Unless we call pci_info_ratelimited() exactly
> the same number of times for each error, the ratelimit counters will
> get out of sync and we'll end up printing fragments from error A mixed
> with fragments from error B.

Ok - what I'm reading between the lines here is the output should be
emitted in one step, not multiple pci_info_ratelimited() calls. if the
code built an output string (using sprintnf()), and then called
pci_info_ratelimited() exactly once at the bottom, would that be
sufficient?

> I think we need to explicitly manage the ratelimiting ourselves,
> similar to print_hmi_event_info() or print_extlog_rcd().  Then we can
> have a *single* ratelimit_state, and we can check it once to determine
> whether to log this correctable error.

Is the rate limiting per call location or per device? From above, I
understood rate limiting is "per call location".
If the code only has one call location, it should achieve the same goal, right?

cheers,
grant
>
> >   else
> >   pci_err(dev, "   [%2d] %-22s%s\n", i, errmsg,
> >   info->first_error == i ? " (First)" : "");
> > @@ -719,7 +719,6 @@ void aer_print_error(struct pci_dev *dev, struct 
> > aer_err_info *info)
> >  {
> >   int layer, agent;
> >   int id = ((dev->bus->number << 8) | dev->devfn);
> > - const char *level;
> >
> >   if (!info->status) {
> >   pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, 
> > (Unregistered Agent ID)\n",
> > @@ -730,14 +729,21 @@ void aer_print_error(struct pci_dev *dev, struct 
> > aer_err_info *info)
> >   layer = AER_GET_LAYER_ERROR(info->severity, info->status);
> >   agent = AER_GET_AGENT(info->severity, info->status);
> >
> > - level = (info->severity == AER_CORRECTABLE) ? KERN_INFO : KERN_ERR;
> > + if (info->severity == AER_CORRECTABLE) {
> > + pci_info_ratelimited(dev, "PCIe Bus Error: severity=%s, 
> > type=%s, (%s)\n",
> > +  
> > aer_error_severity_string[info->severity],
> > +  aer_error_layer[layer], 
> > aer_agent_string[agent]);
> >
> > - pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> > -

Re: [PATCH] KVM: PPC: BOOK3S: book3s_hv_nested.c: improve branch prediction for k.alloc

2023-04-07 Thread Sean Christopherson
On Fri, Apr 07, 2023, Bagas Sanjaya wrote:
> On Fri, Apr 07, 2023 at 05:31:47AM -0400, Kautuk Consul wrote:
> > I used the unlikely() macro on the return values of the k.alloc
> > calls and found that it changes the code generation a bit.
> > Optimize all return paths of k.alloc calls by improving
> > branch prediction on return value of k.alloc.

Nit, this is improving code generation, not branch prediction.

> What about below?
> 
> "Improve branch prediction on kmalloc() and kzalloc() call by using
> unlikely() macro to optimize their return paths."

Another nit, using unlikely() doesn't necessarily provide a measurable 
optimization.
As above, it does often improve code generation for the happy path, but that 
doesn't
always equate to improved performance, e.g. if the CPU can easily predict the 
branch
and/or there is no impact on the cache footprint.


Re: [PATCH] KVM: PPC: BOOK3S: book3s_hv_nested.c: improve branch prediction for k.alloc

2023-04-07 Thread Bagas Sanjaya
On Fri, Apr 07, 2023 at 05:31:47AM -0400, Kautuk Consul wrote:
> I used the unlikely() macro on the return values of the k.alloc
> calls and found that it changes the code generation a bit.
> Optimize all return paths of k.alloc calls by improving
> branch prediction on return value of k.alloc.

What about below?

"Improve branch prediction on kmalloc() and kzalloc() call by using
unlikely() macro to optimize their return paths."

That is, try to avoid first-person construct (I).

Thanks.

-- 
An old man doll... just what I always wanted! - Clara


signature.asc
Description: PGP signature


[PATCH] KVM: PPC: BOOK3S: book3s_hv_nested.c: improve branch prediction for k.alloc

2023-04-07 Thread Kautuk Consul
I used the unlikely() macro on the return values of the k.alloc
calls and found that it changes the code generation a bit.
Optimize all return paths of k.alloc calls by improving
branch prediction on return value of k.alloc.

Signed-off-by: Kautuk Consul 
---
 arch/powerpc/kvm/book3s_hv_nested.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index 5a64a1341e6f..dbf2dd073e1f 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -446,7 +446,7 @@ long kvmhv_nested_init(void)
ptb_order = 12;
pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
   GFP_KERNEL);
-   if (!pseries_partition_tb) {
+   if (unlikely(!pseries_partition_tb)) {
pr_err("kvm-hv: failed to allocated nested partition table\n");
return -ENOMEM;
}
@@ -575,7 +575,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
return H_PARAMETER;
 
buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
-   if (!buf)
+   if (unlikely(!buf))
return H_NO_MEM;
 
gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
@@ -689,7 +689,7 @@ static struct kvm_nested_guest *kvmhv_alloc_nested(struct 
kvm *kvm, unsigned int
long shadow_lpid;
 
gp = kzalloc(sizeof(*gp), GFP_KERNEL);
-   if (!gp)
+   if (unlikely(!gp))
return NULL;
gp->l1_host = kvm;
gp->l1_lpid = lpid;
@@ -1633,7 +1633,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu 
*vcpu,
/* 4. Insert the pte into our shadow_pgtable */
 
n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
-   if (!n_rmap)
+   if (unlikely(!n_rmap))
return RESUME_GUEST; /* Let the guest try again */
n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
(((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
-- 
2.39.2



Re: [PATCH] powerpc/bpf: populate extable entries only during the last pass

2023-04-07 Thread Christophe Leroy


Le 06/04/2023 à 09:35, Hari Bathini a écrit :
> Since commit 85e031154c7c ("powerpc/bpf: Perform complete extra passes
> to update addresses"), two additional passes are performed to avoid
> space and CPU time wastage on powerpc. But these extra passes led to
> WARN_ON_ONCE() hits in bpf_add_extable_entry(). Fix it by not adding
> extable entries during the extra pass.

Are you sure this change is correct ?
During the extra pass the code can get shrinked or expanded (within the 
limits of the size of the preliminary pass). Shouldn't extable entries 
be populated during the last pass ?

Christophe

> 
> Fixes: 85e031154c7c ("powerpc/bpf: Perform complete extra passes to update 
> addresses")
> Signed-off-by: Hari Bathini 
> ---
>   arch/powerpc/net/bpf_jit_comp32.c | 2 +-
>   arch/powerpc/net/bpf_jit_comp64.c | 2 +-
>   2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
> b/arch/powerpc/net/bpf_jit_comp32.c
> index 7f91ea064c08..e788b1fbeee6 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -977,7 +977,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
> struct codegen_context *
>   if (size != BPF_DW && !fp->aux->verifier_zext)
>   EMIT(PPC_RAW_LI(dst_reg_h, 0));
>   
> - if (BPF_MODE(code) == BPF_PROBE_MEM) {
> + if (BPF_MODE(code) == BPF_PROBE_MEM && !extra_pass) {
>   int insn_idx = ctx->idx - 1;
>   int jmp_off = 4;
>   
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
> b/arch/powerpc/net/bpf_jit_comp64.c
> index 8dd3cabaa83a..1cc2777ec846 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -921,7 +921,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
> struct codegen_context *
>   if (size != BPF_DW && insn_is_zext([i + 1]))
>   addrs[++i] = ctx->idx * 4;
>   
> - if (BPF_MODE(code) == BPF_PROBE_MEM) {
> + if (BPF_MODE(code) == BPF_PROBE_MEM && !extra_pass) {
>   ret = bpf_add_extable_entry(fp, image, pass, 
> ctx, ctx->idx - 1,
>   4, dst_reg);
>   if (ret)