from:"Bibo Mao"

[PATCH v4 2/2] LoongArch: Add steal time support in guest side

2024-05-24 Thread Bibo Mao

Percpu struct kvm_steal_time is added here, its size is 64 bytes and
also defined as 64 bytes, so that the whole structure is in one physical
page.

When vcpu is onlined, function pv_enable_steal_time() is called. This
function will pass guest physical address of struct kvm_steal_time and
tells hypervisor to enable steal time. When vcpu is offline, physical
address is set as 0 and tells hypervisor to disable steal time.

Here is output of vmstat on guest when there is workload on both host
and guest. It shows steal time stat information.

procs ---memory-- -io -system-- --cpu-
 r  b   swpd   free  inact active   bibo   in   cs us sy id wa st
15  1  0 7583616 184112  72208200  162   52 31  6 43  0 20
17  0  0 7583616 184704  721920 0 6318 6885  5 60  8  5 22
16  0  0 7583616 185392  721440 0 1766 1081  0 49  0  1 50
16  0  0 7583616 184816  723040 0 6300 6166  4 62 12  2 20
18  0  0 7583632 184480  722400 0 2814 1754  2 58  4  1 35

Signed-off-by: Bibo Mao 
---
 .../admin-guide/kernel-parameters.txt |   2 +-
 arch/loongarch/Kconfig|  11 ++
 arch/loongarch/include/asm/paravirt.h |   5 +
 arch/loongarch/kernel/paravirt.c  | 133 ++
 arch/loongarch/kernel/time.c  |   2 +
 5 files changed, 152 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index ef25e06ec08c..3435eaab392b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4057,7 +4057,7 @@
prediction) vulnerability. System may allow data
leaks with this option.
 
-   no-steal-acc[X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,EARLY] Disable
+   no-steal-acc[X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,LOONGARCH,EARLY] 
Disable
paravirtualized steal time accounting. steal time is
computed, but won't influence scheduler behaviour
 
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 4bda59525a13..a1c4b0080039 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -641,6 +641,17 @@ config PARAVIRT
  over full virtualization.  However, when run without a hypervisor
  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_TIME_ACCOUNTING
+   bool "Paravirtual steal time accounting"
+   select PARAVIRT
+   help
+ Select this option to enable fine granularity task steal time
+ accounting. Time spent executing other tasks in parallel with
+ the current vCPU is discounted from the vCPU power. To account for
+ that, there can be a small performance impact.
+
+ If in doubt, say N here.
+
 endmenu
 
 config ARCH_SELECT_MEMORY_MODEL
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
index 0965710f47f2..dddec49671ae 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -18,6 +18,7 @@ static inline u64 paravirt_steal_clock(int cpu)
 }
 
 int __init pv_ipi_init(void);
+int __init pv_time_init(void);
 
 #else
 
@@ -26,5 +27,9 @@ static inline int pv_ipi_init(void)
return 0;
 }
 
+static inline int pv_time_init(void)
+{
+   return 0;
+}
 #endif // CONFIG_PARAVIRT
 #endif
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 1633ed4f692f..68f3cd75862a 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -4,11 +4,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock;
 
 static u64 native_steal_clock(int cpu)
 {
@@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu)
 
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+   steal_acc = false;
+   return 0;
+}
+early_param("no-steal-acc", parse_no_stealacc);
+
+static u64 para_steal_clock(int cpu)
+{
+   u64 steal;
+   struct kvm_steal_time *src;
+   int version;
+
+   src = _cpu(steal_time, cpu);
+   do {
+
+   version = src->version;
+   /* Make sure that the version is read before the steal */
+   virt_rmb();
+   steal = src->steal;
+   /* Make sure that the steal is read before the next version */
+   virt_rmb();
+
+   } while ((version & 1) || (version != src->version));
+   return steal;
+}
+
+static int pv_enable_steal_time(void)
+{
+   int cpu = smp_processor_id();
+   struct kvm

[PATCH v4 1/2] LoongArch: KVM: Add steal time support in kvm side

2024-05-24 Thread Bibo Mao

Steal time feature is added here in kvm side, VM can search supported
features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
is added here. Like x86, steal time structure is saved in guest memory,
one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
enable the feature.

One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
save and restore base address of steal time structure when VM is migrated.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 ++
 arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kvm/Kconfig |   1 +
 arch/loongarch/kvm/exit.c  |  38 +++-
 arch/loongarch/kvm/vcpu.c  | 124 +
 8 files changed, 187 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index c87b6ea0ec47..2eb2f7572023 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_HALT_POLL_NS_DEFAULT   50
+#define KVM_REQ_STEAL_UPDATE   KVM_ARCH_REQ(1)
 
 #define KVM_GUESTDBG_SW_BP_MASK\
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
@@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
struct kvm_mp_state mp_state;
/* cpucfg */
u32 cpucfg[KVM_MAX_CPUCFG_REGS];
+   /* paravirt steal time */
+   struct {
+   u64 guest_addr;
+   u64 last_steal;
+   struct gfn_to_hva_cache cache;
+   } st;
 };
 
 static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 4ba2312e5f8c..a9ba8185d4af 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -14,6 +14,7 @@
 
 #define KVM_HCALL_SERVICE  HYPERCALL_ENCODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_SERVICE)
 #define  KVM_HCALL_FUNC_IPI1
+#define  KVM_HCALL_FUNC_NOTIFY 2
 
 #define KVM_HCALL_SWDBG
HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
 
@@ -24,6 +25,15 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+#define KVM_STEAL_PHYS_VALID   BIT_ULL(0)
+#define KVM_STEAL_PHYS_MASKGENMASK_ULL(63, 6)
+struct kvm_steal_time {
+   __u64 steal;
+   __u32 version;
+   __u32 flags;
+   __u32 pad[12];
+};
+
 /*
  * Hypercall interface for KVM hypervisor
  *
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 590a92cb5416..d7e51300a89f 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int 
num, unsigned long v
vcpu->arch.gprs[num] = val;
 }
 
+static inline bool kvm_pvtime_supported(void)
+{
+   return !!sched_info_on();
+}
 #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index eb09adda54b7..7a4633ef284b 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -169,6 +169,7 @@
 #define  KVM_SIGNATURE "KVM\0"
 #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
 #define  KVM_FEATURE_IPI   BIT(1)
+#define  KVM_FEATURE_STEAL_TIMEBIT(2)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/loongarch/include/uapi/asm/kvm.h 
b/arch/loongarch/include/uapi/asm/kvm.h
index f9abef382317..ddc5cab0ffd0 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -81,7 +81,11 @@ struct kvm_fpu {
 #define LOONGARCH_REG_64(TYPE, REG)(TYPE | KVM_REG_SIZE_U64 | (REG << 
LOONGARCH_REG_SHIFT))
 #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, 
REG)
 #define KVM_IOC_CPUCFG(REG)
LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+
+/* Device Control API on vcpu fd */
 #define KVM_LOONGARCH_VCPU_CPUCFG  0
+#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
+#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
 
 struct kvm_debug_exit_arch {
 };
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index c4ef2b4d9797..248744b4d086 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
select KVM_MMIO
select HAVE_KVM_READONLY_MEM
select KVM_XFER_TO_GUEST_WORK
+   select SCHED_INFO
help
  Support hosting virtualized guest machines using
  hardware virtualization extensions. You will need
diff --git a/arch/loongarch/kvm/exit.c b/arch/l

[PATCH v4 0/2] LoongArch: Add steal time support

2024-05-24 Thread Bibo Mao

Para-virt feature steal time is added in both kvm and guest kernel side.
It is silimar with other architectures, steal time structure comes from
guest memory, also pseduo register is used to save/restore base address
of steal time structure, so that when vm is migrated, kvm module on
target machine knows the base address of steal time.

---
v3 ... v4:
  1. To resolve compile dependency problem, enable SCHED_INFO option
section if KVM is enabled.
  2. Put new added option PARAVIRT_TIME_ACCOUNTING in one submemu with
PARAVIRT in file arch/loongarch/Kconfig.

v2 ... v3:
  1. Solve code confliction based on the kernel 6.9.0
  2. Add kernel parameter no-steal-acc support on LoongArch with file
Documentation/admin-guide/kernel-parameters.txt
  3. Add strict checking with pv stealtimer gpa address in function
kvm_save_notify() and kvm_loongarch_pvtime_set_attr()

v1 ... v2:
  1. Add PARAVIRT_TIME_ACCOUNTING kconfig option in file
arch/loongarch/Kconfig
  2. Function name change such as replace pv_register_steal_time with
pv_enable_steal_time etc

---

Bibo Mao (2):
  LoongArch: KVM: Add steal time support in kvm side
  LoongArch: Add steal time support in guest side

 .../admin-guide/kernel-parameters.txt |   2 +-
 arch/loongarch/Kconfig|  11 ++
 arch/loongarch/include/asm/kvm_host.h |   7 +
 arch/loongarch/include/asm/kvm_para.h |  10 ++
 arch/loongarch/include/asm/kvm_vcpu.h |   4 +
 arch/loongarch/include/asm/loongarch.h|   1 +
 arch/loongarch/include/asm/paravirt.h |   5 +
 arch/loongarch/include/uapi/asm/kvm.h |   4 +
 arch/loongarch/kernel/paravirt.c  | 133 ++
 arch/loongarch/kernel/time.c  |   2 +
 arch/loongarch/kvm/Kconfig|   1 +
 arch/loongarch/kvm/exit.c |  38 -
 arch/loongarch/kvm/vcpu.c | 124 
 13 files changed, 339 insertions(+), 3 deletions(-)


base-commit: 6e51b4b5bbc07e52b226017936874715629932d1
-- 
2.39.3

[PATCH v3 2/2] LoongArch: Add steal time support in guest side

2024-05-20 Thread Bibo Mao

Percpu struct kvm_steal_time is added here, its size is 64 bytes and
also defined as 64 bytes, so that the whole structure is in one physical
page.

When vcpu is onlined, function pv_enable_steal_time() is called. This
function will pass guest physical address of struct kvm_steal_time and
tells hypervisor to enable steal time. When vcpu is offline, physical
address is set as 0 and tells hypervisor to disable steal time.

Here is output of vmstat on guest when there is workload on both host
and guest. It includes steal time stat information.

procs ---memory-- -io -system-- --cpu-
 r  b   swpd   free  inact active   bibo   in   cs us sy id wa st
15  1  0 7583616 184112  72208200  162   52 31  6 43  0 20
17  0  0 7583616 184704  721920 0 6318 6885  5 60  8  5 22
16  0  0 7583616 185392  721440 0 1766 1081  0 49  0  1 50
16  0  0 7583616 184816  723040 0 6300 6166  4 62 12  2 20
18  0  0 7583632 184480  722400 0 2814 1754  2 58  4  1 35

Signed-off-by: Bibo Mao 
---
 .../admin-guide/kernel-parameters.txt |   2 +-
 arch/loongarch/Kconfig|  11 ++
 arch/loongarch/include/asm/paravirt.h |   5 +
 arch/loongarch/kernel/paravirt.c  | 131 ++
 arch/loongarch/kernel/time.c  |   2 +
 5 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 45d95614ec44..502d98292692 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4049,7 +4049,7 @@
prediction) vulnerability. System may allow data
leaks with this option.
 
-   no-steal-acc[X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,EARLY] Disable
+   no-steal-acc[X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,LOONGARCH,EARLY] 
Disable
paravirtualized steal time accounting. steal time is
computed, but won't influence scheduler behaviour
 
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 42331d9a8dd7..76dd20bdf415 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -583,6 +583,17 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT_TIME_ACCOUNTING
+   bool "Paravirtual steal time accounting"
+   select PARAVIRT
+   help
+ Select this option to enable fine granularity task steal time
+ accounting. Time spent executing other tasks in parallel with
+ the current vCPU is discounted from the vCPU power. To account for
+ that, there can be a small performance impact.
+
+ If in doubt, say N here.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
index 0965710f47f2..dddec49671ae 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -18,6 +18,7 @@ static inline u64 paravirt_steal_clock(int cpu)
 }
 
 int __init pv_ipi_init(void);
+int __init pv_time_init(void);
 
 #else
 
@@ -26,5 +27,9 @@ static inline int pv_ipi_init(void)
return 0;
 }
 
+static inline int pv_time_init(void)
+{
+   return 0;
+}
 #endif // CONFIG_PARAVIRT
 #endif
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 1633ed4f692f..c14265fb9701 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -4,11 +4,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock;
 
 static u64 native_steal_clock(int cpu)
 {
@@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu)
 
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+   steal_acc = false;
+   return 0;
+}
+early_param("no-steal-acc", parse_no_stealacc);
+
+static u64 para_steal_clock(int cpu)
+{
+   u64 steal;
+   struct kvm_steal_time *src;
+   int version;
+
+   src = _cpu(steal_time, cpu);
+   do {
+
+   version = src->version;
+   /* Make sure that the version is read before the steal */
+   virt_rmb();
+   steal = src->steal;
+   /* Make sure that the steal is read before the next version */
+   virt_rmb();
+
+   } while ((version & 1) || (version != src->version));
+   return steal;
+}
+
+static int pv_enable_steal_time(void)
+{
+   int cpu = smp_processor_id();
+   struct kvm_steal_time *st;
+   unsigned long addr;
+
+   if (!has_steal_clock)
+   r

[PATCH v3 0/2] LoongArch: Add steal time support

2024-05-20 Thread Bibo Mao

Para-virt feature steal time is added in both kvm and guest kernel side.
It is silimar with other architectures, steal time structure comes from
guest memory, also pseduo register is used to save/restore base address
of steal time structure, so that vm migration is supported also.

---
v2 ... v3:
  1. Solve code confliction based on the kernel 6.9.0
  2. Add kernel parameter no-steal-acc support on LoongArch with file
Documentation/admin-guide/kernel-parameters.txt
  3. Add strict checking with pv stealtimer gpa address in function
kvm_save_notify() and kvm_loongarch_pvtime_set_attr()

v1 ... v2:
  1. Add PARAVIRT_TIME_ACCOUNTING kconfig option in file
arch/loongarch/Kconfig
  2. Function name change such as replace pv_register_steal_time with
pv_enable_steal_time etc

---
Bibo Mao (2):
  LoongArch: KVM: Add steal time support in kvm side
  LoongArch: Add steal time support in guest side

 .../admin-guide/kernel-parameters.txt |   2 +-
 arch/loongarch/Kconfig|  11 ++
 arch/loongarch/include/asm/kvm_host.h |   7 +
 arch/loongarch/include/asm/kvm_para.h |  10 ++
 arch/loongarch/include/asm/kvm_vcpu.h |   4 +
 arch/loongarch/include/asm/loongarch.h|   1 +
 arch/loongarch/include/asm/paravirt.h |   5 +
 arch/loongarch/include/uapi/asm/kvm.h |   4 +
 arch/loongarch/kernel/paravirt.c  | 131 ++
 arch/loongarch/kernel/time.c  |   2 +
 arch/loongarch/kvm/exit.c |  38 -
 arch/loongarch/kvm/vcpu.c | 124 +
 12 files changed, 336 insertions(+), 3 deletions(-)


base-commit: 3c999d1ae3c75991902a1a7dad0cb62c2a3008b4
-- 
2.39.3

[PATCH v3 1/2] LoongArch: KVM: Add steal time support in kvm side

2024-05-20 Thread Bibo Mao

Steal time feature is added here in kvm side, VM can search supported
features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
is added here. Like x86, steal time structure is saved in guest memory,
one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
enable the feature.

One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
save and restore base address of steal time structure when VM is migrated.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 ++
 arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kvm/exit.c  |  38 +++-
 arch/loongarch/kvm/vcpu.c  | 124 +
 7 files changed, 186 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index c87b6ea0ec47..2eb2f7572023 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_HALT_POLL_NS_DEFAULT   50
+#define KVM_REQ_STEAL_UPDATE   KVM_ARCH_REQ(1)
 
 #define KVM_GUESTDBG_SW_BP_MASK\
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
@@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
struct kvm_mp_state mp_state;
/* cpucfg */
u32 cpucfg[KVM_MAX_CPUCFG_REGS];
+   /* paravirt steal time */
+   struct {
+   u64 guest_addr;
+   u64 last_steal;
+   struct gfn_to_hva_cache cache;
+   } st;
 };
 
 static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 4ba2312e5f8c..a9ba8185d4af 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -14,6 +14,7 @@
 
 #define KVM_HCALL_SERVICE  HYPERCALL_ENCODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_SERVICE)
 #define  KVM_HCALL_FUNC_IPI1
+#define  KVM_HCALL_FUNC_NOTIFY 2
 
 #define KVM_HCALL_SWDBG
HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
 
@@ -24,6 +25,15 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+#define KVM_STEAL_PHYS_VALID   BIT_ULL(0)
+#define KVM_STEAL_PHYS_MASKGENMASK_ULL(63, 6)
+struct kvm_steal_time {
+   __u64 steal;
+   __u32 version;
+   __u32 flags;
+   __u32 pad[12];
+};
+
 /*
  * Hypercall interface for KVM hypervisor
  *
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 590a92cb5416..d7e51300a89f 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int 
num, unsigned long v
vcpu->arch.gprs[num] = val;
 }
 
+static inline bool kvm_pvtime_supported(void)
+{
+   return !!sched_info_on();
+}
 #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index eb09adda54b7..7a4633ef284b 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -169,6 +169,7 @@
 #define  KVM_SIGNATURE "KVM\0"
 #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
 #define  KVM_FEATURE_IPI   BIT(1)
+#define  KVM_FEATURE_STEAL_TIMEBIT(2)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/loongarch/include/uapi/asm/kvm.h 
b/arch/loongarch/include/uapi/asm/kvm.h
index f9abef382317..ddc5cab0ffd0 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -81,7 +81,11 @@ struct kvm_fpu {
 #define LOONGARCH_REG_64(TYPE, REG)(TYPE | KVM_REG_SIZE_U64 | (REG << 
LOONGARCH_REG_SHIFT))
 #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, 
REG)
 #define KVM_IOC_CPUCFG(REG)
LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+
+/* Device Control API on vcpu fd */
 #define KVM_LOONGARCH_VCPU_CPUCFG  0
+#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
+#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
 
 struct kvm_debug_exit_arch {
 };
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index c86e099af5ca..e2abd97fb13f 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -24,7 +24,7 @@
 static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
-   unsigned int index;
+   unsigned int index, ret;
 
if (inst.reg2_format.opcode != cpucfg_op)
return EMULATE_FAIL;
@@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst 
inst)
vcpu->arch.gpr

[PATCH v2 1/2] LoongArch: KVM: Add steal time support in kvm side

2024-04-29 Thread Bibo Mao

Steal time feature is added here in kvm side, VM can search supported
features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
is added here. Like x86, steal time structure is saved in guest memory,
one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
enable the feature.

One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
save and restore base address of steal time structure when VM is migrated.

Since it needs hypercall instruction emulation handling, and it is
dependent on this patchset:
https://lore.kernel.org/all/20240428100518.1642324-1-maob...@loongson.cn/

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 +++
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kvm/exit.c  |  29 +-
 arch/loongarch/kvm/vcpu.c  | 120 +
 6 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 1921750d4b4c..30bda553c54d 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_HALT_POLL_NS_DEFAULT   50
+#define KVM_REQ_RECORD_STEAL   KVM_ARCH_REQ(1)
 
 #define KVM_GUESTDBG_VALID_MASK(KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP)
@@ -198,6 +199,12 @@ struct kvm_vcpu_arch {
struct kvm_mp_state mp_state;
/* cpucfg */
u32 cpucfg[KVM_MAX_CPUCFG_REGS];
+   /* paravirt steal time */
+   struct {
+   u64 guest_addr;
+   u64 last_steal;
+   struct gfn_to_hva_cache cache;
+   } st;
 };
 
 static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 56775554402a..5fb89e20432d 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -12,6 +12,7 @@
 #define KVM_HCALL_CODE_SWDBG   1
 #define KVM_HCALL_PV_SERVICE   HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_PV_SERVICE)
 #define  KVM_HCALL_FUNC_PV_IPI 1
+#define  KVM_HCALL_FUNC_NOTIFY 2
 #define KVM_HCALL_SWDBGHYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_SWDBG)
 
 /*
@@ -21,6 +22,15 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+#define KVM_STEAL_PHYS_VALID   BIT_ULL(0)
+#define KVM_STEAL_PHYS_MASKGENMASK_ULL(63, 6)
+struct kvm_steal_time {
+   __u64 steal;
+   __u32 version;
+   __u32 flags;
+   __u32 pad[12];
+};
+
 /*
  * Hypercall interface for KVM hypervisor
  *
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 0ad36704cb4b..ab6a5e93c280 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -168,6 +168,7 @@
 #define  KVM_SIGNATURE "KVM\0"
 #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
 #define  KVM_FEATURE_PV_IPIBIT(1)
+#define  KVM_FEATURE_STEAL_TIMEBIT(2)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/loongarch/include/uapi/asm/kvm.h 
b/arch/loongarch/include/uapi/asm/kvm.h
index 8f78b23672ac..286b5ce93a57 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -80,7 +80,11 @@ struct kvm_fpu {
 #define LOONGARCH_REG_64(TYPE, REG)(TYPE | KVM_REG_SIZE_U64 | (REG << 
LOONGARCH_REG_SHIFT))
 #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, 
REG)
 #define KVM_IOC_CPUCFG(REG)
LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+
+/* Device Control API on vcpu fd */
 #define KVM_LOONGARCH_VCPU_CPUCFG  0
+#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
+#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
 
 struct kvm_debug_exit_arch {
 };
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 680aeaa4aeeb..af0f1c46e4eb 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -209,7 +209,7 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
 static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
-   unsigned int index;
+   unsigned int index, ret;
 
rd = inst.reg2_format.rd;
rj = inst.reg2_format.rj;
@@ -232,7 +232,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, 
larch_inst inst)
vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
break;
case CPUCFG_KVM_FEATURE:
-   vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
+   ret = KVM_FEATURE_PV_IPI;
+   if (sched_info_on())
+   ret |=

[PATCH v2 2/2] LoongArch: Add steal time support in guest side

2024-04-29 Thread Bibo Mao

Percpu struct kvm_steal_time is added here, its size is 64 bytes and
also defined as 64 bytes, so that the whole structure is in one physical
page.

When vcpu is onlined, function pv_enable_steal_time() is called. This
function will pass guest physical address of struct kvm_steal_time and
tells hypervisor to enable steal time. When vcpu is offline, physical
address is set as 0 and tells hypervisor to disable steal time.

Here is output of vmstat on guest when there is workload on both host
and guest. It includes steal time stat information.

procs ---memory-- -io -system-- --cpu-
 r  b   swpd   free  inact active   bibo   in   cs us sy id wa st
15  1  0 7583616 184112  72208200  162   52 31  6 43  0 20
17  0  0 7583616 184704  721920 0 6318 6885  5 60  8  5 22
16  0  0 7583616 185392  721440 0 1766 1081  0 49  0  1 50
16  0  0 7583616 184816  723040 0 6300 6166  4 62 12  2 20
18  0  0 7583632 184480  722400 0 2814 1754  2 58  4  1 35

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|  11 +++
 arch/loongarch/include/asm/paravirt.h |   5 +
 arch/loongarch/kernel/paravirt.c  | 131 ++
 arch/loongarch/kernel/time.c  |   2 +
 4 files changed, 149 insertions(+)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0a1540a8853e..f3a03c33a052 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -592,6 +592,17 @@ config PARAVIRT
  over full virtualization.  However, when run without a hypervisor
  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_TIME_ACCOUNTING
+   bool "Paravirtual steal time accounting"
+   select PARAVIRT
+   help
+ Select this option to enable fine granularity task steal time
+ accounting. Time spent executing other tasks in parallel with
+ the current vCPU is discounted from the vCPU power. To account for
+ that, there can be a small performance impact.
+
+ If in doubt, say N here.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
index 58f7b7b89f2c..fe27fb5e82b8 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -17,11 +17,16 @@ static inline u64 paravirt_steal_clock(int cpu)
 }
 
 int pv_ipi_init(void);
+int __init pv_time_init(void);
 #else
 static inline int pv_ipi_init(void)
 {
return 0;
 }
 
+static inline int pv_time_init(void)
+{
+   return 0;
+}
 #endif // CONFIG_PARAVIRT
 #endif
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 9044ed62045c..3f83afc7e2d2 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -5,10 +5,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock;
 
 static u64 native_steal_clock(int cpu)
 {
@@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu)
 
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+   steal_acc = false;
+   return 0;
+}
+early_param("no-steal-acc", parse_no_stealacc);
+
+static u64 para_steal_clock(int cpu)
+{
+   u64 steal;
+   struct kvm_steal_time *src;
+   int version;
+
+   src = _cpu(steal_time, cpu);
+   do {
+
+   version = src->version;
+   /* Make sure that the version is read before the steal */
+   virt_rmb();
+   steal = src->steal;
+   /* Make sure that the steal is read before the next version */
+   virt_rmb();
+
+   } while ((version & 1) || (version != src->version));
+   return steal;
+}
+
+static int pv_enable_steal_time(void)
+{
+   int cpu = smp_processor_id();
+   struct kvm_steal_time *st;
+   unsigned long addr;
+
+   if (!has_steal_clock)
+   return -EPERM;
+
+   st = _cpu(steal_time, cpu);
+   addr = per_cpu_ptr_to_phys(st);
+
+   /* The whole structure kvm_steal_time should be one page */
+   if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) {
+   pr_warn("Illegal PV steal time addr %lx\n", addr);
+   return -EFAULT;
+   }
+
+   addr |= KVM_STEAL_PHYS_VALID;
+   kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr);
+   return 0;
+}
+
 #ifdef CONFIG_SMP
 static void pv_send_ipi_single(int cpu, unsigned int action)
 {
@@ -110,6 +164,32 @@ static void pv_init_ipi(void)
if (r < 0)
panic("SWI0 IRQ request failed\n");
 }
+
+static void pv

[PATCH v2 0/2] LoongArch: Add steal time support

2024-04-29 Thread Bibo Mao

Para-virt feature steal time is added in both kvm and guest kernel side.
It is silimar with other architectures, steal time structure comes from
guest memory, also pseduo register is used to save/restore base address
of steal time structure, so that vm migration is supported also.

---
v2:
  1. Add PARAVIRT_TIME_ACCOUNTING kconfig option in file
arch/loongarch/Kconfig
  2. Function name change such as replace pv_register_steal_time with
pv_enable_steal_time etc

---
Bibo Mao (2):
  LoongArch: KVM: Add steal time support in kvm side
  LoongArch: Add steal time support in guest side

 arch/loongarch/Kconfig |  11 +++
 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 ++
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/asm/paravirt.h  |   5 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kernel/paravirt.c   | 131 +
 arch/loongarch/kernel/time.c   |   2 +
 arch/loongarch/kvm/exit.c  |  29 +-
 arch/loongarch/kvm/vcpu.c  | 120 ++
 10 files changed, 318 insertions(+), 2 deletions(-)


base-commit: 2c8159388952f530bd260e097293ccc0209240be
-- 
2.39.3

[PATCH v8 6/6] LoongArch: Add pv ipi support on guest kernel side

2024-04-28 Thread Bibo Mao

PARAVIRT option and pv ipi is added on guest kernel side, function
pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function
firstly checks whether system runs on VM mode. If kernel runs on VM mode,
it will call function kvm_para_available() to detect current hypervirsor
type. Now only KVM type detection is supported, the paravirt function can
work only if current hypervisor type is KVM, since there is only KVM
supported on LoongArch now.

PV IPI uses virtual IPI sender and virtual IPI receiver function. With
virutal IPI sender, ipi message is stored in DDR memory rather than
emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs
at the same time like X86 KVM method. Hypercall method is used for IPI
sending.

With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since
VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt
acknowledge. And IPI message is stored in DDR, no trap in get IPI message.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|   9 ++
 arch/loongarch/include/asm/hardirq.h  |   1 +
 arch/loongarch/include/asm/paravirt.h |  27 
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |   2 +-
 arch/loongarch/kernel/paravirt.c  | 151 ++
 arch/loongarch/kernel/smp.c   |   4 +-
 8 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 54ad04dacdee..0a1540a8853e 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -583,6 +583,15 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   depends on AS_HAS_LVZ_EXTENSION
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..b26d596a73aa 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t message cacheline_aligned_in_smp;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..58f7b7b89f2c
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_ipi_init(void);
+#else
+static inline int pv_ipi_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3a7620b66bc6..c9bfeda89e40 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index ce36897d1e5a..4863e6c1b739 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + 
IRQ_STACK_SIZE);
}
 
-   set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+   set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI 
| ECFGF_PMC);
 }
diff --git a/arch/loongarch/kernel/paravir

[PATCH v8 5/6] LoongArch: KVM: Add pv ipi support on kvm side

2024-04-28 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sending, and two iocsr access on ipi receiving
which is ipi interrupt handler. On VM mode all iocsr accessing will
cause VM to trap into hypervisor. So with one ipi hw notification
there will be three times of trap.

PV ipi is added for VM, hypercall instruction is used for ipi sender,
and hypervisor will inject SWI to destination vcpu. During SWI
interrupt handler, only estat CSR register is written to clear irq.
Estat CSR register access will not trap into hypervisor. So with pv
ipi supported, there is one trap with pv ipi sender, and no trap with
ipi receiver, there is only one trap with ipi notification.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at
most 128 vcpus at one time. It reduces trap times into hypervisor
greatly.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 129 +
 arch/loongarch/include/asm/kvm_vcpu.h  |  10 ++
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kvm/exit.c  |  73 +-
 arch/loongarch/kvm/vcpu.c  |   1 +
 6 files changed, 213 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 3ba16ef1fe69..0b96c6303cf7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+   u64 hypercall_exits;
 };
 
 #define KVM_MEM_HUGEPAGE_CAPABLE   (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index d48f993ae206..a5809a854bae 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,16 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypercall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+#define KVM_HCALL_CODE_PV_SERVICE  0
+#define KVM_HCALL_PV_SERVICE   HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_PV_SERVICE)
+#define  KVM_HCALL_FUNC_PV_IPI 1
+
 /*
  * LoongArch hypercall return code
  */
@@ -9,6 +19,125 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+/*
+ * Hypercall interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in a0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+   unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+   register unsigned long a3  asm("a3") = arg2;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+

[PATCH v8 3/6] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-04-28 Thread Bibo Mao

Instruction cpucfg can be used to get processor features. And there
is trap exception when it is executed in VM mode, and also it is
to provide cpu features to VM. On real hardware cpucfg area 0 - 20
is used.  Here one specified area 0x4000 -- 0x40ff is used
for KVM hypervisor to privide PV features, and the area can be extended
for other hypervisors in future. This area will never be used for
real HW, it is only used by software.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h | 10 +
 arch/loongarch/kvm/exit.c  | 53 ++
 3 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a1d22e8b6f94 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,16 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x4000 -- 0x40ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x4000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 923bbca9bd22..552a2fedbe44 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,44 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+   unsigned long plv;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*
+* Disable preemption since hw gcsr is accessed
+*/
+   preempt_disable();
+   plv = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD) >> CSR_CRMD_PLV_SHIFT;
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   /* Cpucfg emulation between 0x4000 -- 0x40ff */
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   preempt_enable();
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +258,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH v8 4/6] LoongArch: KVM: Add vcpu search support from physical cpuid

2024-04-28 Thread Bibo Mao

Physical cpuid is used for interrupt routing for irqchips such as
ipi/msi/extioi interrupt controller. And physical cpuid is stored
at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
is created and physical cpuid of two vcpus cannot be the same.

Different irqchips have different size declaration about physical cpuid,
max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
for MSI irqchip.

The smallest value from all interrupt controllers is selected now,
and the max cpuid size is defines as 256 by KVM which comes from
extioi irqchip.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h | 26 
 arch/loongarch/include/asm/kvm_vcpu.h |  1 +
 arch/loongarch/kvm/vcpu.c | 93 ++-
 arch/loongarch/kvm/vm.c   | 11 
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..3ba16ef1fe69 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
 
 #define MAX_PGTABLE_LEVELS 4
 
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ *  For IPI HW, max dest CPUID size 1024
+ *  For extioi interrupt controller, max dest CPUID size is 256
+ *  For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID  256
+
+struct kvm_phyid_info {
+   struct kvm_vcpu *vcpu;
+   boolenabled;
+};
+
+struct kvm_phyid_map {
+   int max_phyid;
+   struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
 struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int  root_level;
+   spinlock_tphyid_map_lock;
+   struct kvm_phyid_map  *phyid_map;
 
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 0cb4fdb8a9b5..9f53950959da 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
 void kvm_restore_timer(struct kvm_vcpu *vcpu);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
 
 /*
  * Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 3a8779065f73..b633fd28b8db 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 *val)
return 0;
 }
 
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   if (val >= KVM_MAX_PHYID)
+   return -EINVAL;
+
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   map = vcpu->kvm->arch.phyid_map;
+   spin_lock(>kvm->arch.phyid_map_lock);
+   if (map->phys_map[cpuid].enabled) {
+   /*
+* Cpuid is already set before
+* Forbid changing different cpuid at runtime
+*/
+   if (cpuid != val) {
+   /*
+* Cpuid 0 is initial value for vcpu, maybe invalid
+* unset value for vcpu
+*/
+   if (cpuid) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+   } else {
+/* Discard duplicated cpuid set */
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+   }
+
+   if (map->phys_map[val].enabled) {
+   /*
+* New cpuid is already set with other vcpu
+* Forbid sharing the same cpuid between different vcpus
+*/
+   if (map->phys_map[val].vcpu != vcpu) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+
+   /* Discard duplicated cpuid set operation*/
+

[PATCH v8 2/6] LoongArch: KVM: Add hypercall instruction emulation support

2024-04-28 Thread Bibo Mao

On LoongArch system, there is hypercall instruction special for
virtualization. When system executes this instruction on host side,
there is illegal instruction exception reported, however it will
trap into host when it is executed in VM mode.

When hypercall is emulated, A0 register is set with value
KVM_HCALL_INVALID_CODE, rather than inject EXCCODE_INE invalid
instruction exception. So VM can continue to executing the next code.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 2dbec7853ae8..c862672ed953 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -26,4 +26,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..d48f993ae206
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypercall return code
+ */
+#define KVM_HCALL_STATUS_SUCCESS   0
+#define KVM_HCALL_INVALID_CODE -1UL
+#define KVM_HCALL_INVALID_PARAMETER-2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..923bbca9bd22 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_LSXDIS]= kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS]   = kvm_handle_lasx_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypercall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH v8 0/6] LoongArch: Add pv ipi support on LoongArch VM

2024-04-28 Thread Bibo Mao

On physical machine, ipi HW uses IOCSR registers, however there is trap
into hypervisor when vcpu accesses IOCSR registers if system is in VM
mode. SWI is a interrupt mechanism like SGI on ARM, software can send
interrupt to CPU, only that on LoongArch SWI can only be sent to local CPU
now. So SWI can not used for IPI on real HW system, however it can be used
on VM when combined with hypercall method. IPI can be sent with hypercall
method and SWI interrupt is injected to vcpu, vcpu can treat SWI
interrupt as IPI.

With PV IPI supported, there is one trap with IPI sending, however with IPI
receiving there is no trap. with IOCSR HW ipi method, there will be one
trap with IPI sending and two trap with ipi receiving.

Also IPI multicast support is added for VM, the idea comes from x86 PV ipi.
IPI can be sent to 128 vcpus in one time. With IPI multicast support, trap
will be reduced greatly.

Here is the microbenchmarck data with "perf bench futex wake" testcase on
3C5000 single-way machine, there are 16 cpus on 3C5000 single-way machine,
VM has 16 vcpus also. The benchmark data is ms time unit to wakeup 16
threads, the performance is better if data is smaller.

physical machine 0.0176 ms
VM original  0.1140 ms
VM with pv ipi patch 0.0481 ms

It passes to boot with 128/256 vcpus, and passes to run runltp command
with package ltp-20230516.

---
v7 --- v8:
 1. Remove kernel PLV mode checking with cpucfg emulation for hypervisor
feature inquiry.
 2. Remove document about loongarch hypercall ABI per request of huacai,
will add English/Chinese doc at the same time in later.

v6 --- v7:
  1. Refine LoongArch virt document by review comments.
  2. Add function kvm_read_reg()/kvm_write_reg() in hypercall emulation,
and later it can be used for other trap emulations.

v5 --- v6:
  1. Add privilege checking when emulating cpucfg at index 0x400 --
0x40FF, return 0 if not executed at kernel mode.
  2. Add document about LoongArch pv ipi with new creatly directory
Documentation/virt/kvm/loongarch/
  3. Fix pv ipi handling in kvm backend function kvm_pv_send_ipi(),
where min should plus BITS_PER_LONG with second bitmap, otherwise
VM with more than 64 vpus fails to boot.
  4. Adjust patch order and code refine with review comments.

v4 --- v5:
  1. Refresh function/macro name from review comments.

v3 --- v4:
  1. Modfiy pv ipi hook function name call_func_ipi() and
call_func_single_ipi() with send_ipi_mask()/send_ipi_single(), since pv
ipi is used for both remote function call and reschedule notification.
  2. Refresh changelog.

v2 --- v3:
  1. Add 128 vcpu ipi multicast support like x86
  2. Change cpucfg base address from 0x1000 to 0x4000, in order
to avoid confliction with future hw usage
  3. Adjust patch order in this patchset, move patch
Refine-ipi-ops-on-LoongArch-platform to the first one.

v1 --- v2:
  1. Add hw cpuid map support since ipi routing uses hw cpuid
  2. Refine changelog description
  3. Add hypercall statistic support for vcpu
  4. Set percpu pv ipi message buffer aligned with cacheline
  5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---
Bibo Mao (6):
  LoongArch/smp: Refine some ipi functions on LoongArch platform
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch: KVM: Add vcpu search support from physical cpuid
  LoongArch: KVM: Add pv ipi support on kvm side
  LoongArch: Add pv ipi support on guest kernel side

 arch/loongarch/Kconfig|   9 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_host.h |  27 +++
 arch/loongarch/include/asm/kvm_para.h | 155 ++
 arch/loongarch/include/asm/kvm_vcpu.h |  11 ++
 arch/loongarch/include/asm/loongarch.h|  11 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 151 +
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/smp.c   |  62 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 132 +--
 arch/loongarch/kvm/vcpu.c |  94 ++-
 arch/loongarch/kvm/vm.c   |  11 ++
 22 files changed, 690 insertions(+), 102 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 create m

[PATCH v8 1/6] LoongArch/smp: Refine some ipi functions on LoongArch platform

2024-04-28 Thread Bibo Mao

It is code refine about ipi handling on LoongArch platform, there are
three modifications.
1. Add generic function get_percpu_irq(), replacing some percpu irq
functions such as get_ipi_irq()/get_pmc_irq()/get_timer_irq() with
get_percpu_irq().

2. Change definition about parameter action called by function
loongson_send_ipi_single() and loongson_send_ipi_mask(), and it is
defined as decimal encoding format at ipi sender side. Normal decimal
encoding is used rather than binary bitmap encoding for ipi action, ipi
hw sender uses decimal encoding code, and ipi receiver will get binary
bitmap encoding, the ipi hw will convert it into bitmap in ipi message
buffer.

3. Add structure smp_ops on LoongArch platform so that pv ipi can be used
later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 58 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..75d30529748c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*init_ipi)(void);
+   void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
+   void (*send_ipi_single)(int cpu, unsigned int action);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTION_BOOT_CPU0
+#define ACTION_RESCHEDULE  1
+#define ACTION_CALL_FUNCTION   2
+#define SMP_BOOT_CPU   BIT(ACTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..ce36897d1e5a 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void

[PATCH 1/2] LoongArch: KVM: Add steal time support in kvm side

2024-03-26 Thread Bibo Mao

Steal time feature is added here in kvm side, VM can search supported
features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
is added here. Like x86, steal time structure is saved in guest memory,
one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
enable the feature.

One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
save and restore base address of steal time structure when VM is migrated.

Since it needs hypercall instruction emulation handling, and it is
dependent on this patchset:
https://lore.kernel.org/all/20240201031950.3225626-1-maob...@loongson.cn/

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 +++
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kvm/exit.c  |  35 ++--
 arch/loongarch/kvm/vcpu.c  | 120 +
 6 files changed, 172 insertions(+), 5 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index c53946f8ef9f..1d1eaa124349 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_HALT_POLL_NS_DEFAULT   50
+#define KVM_REQ_RECORD_STEAL   KVM_ARCH_REQ(1)
 
 #define KVM_GUESTDBG_VALID_MASK(KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP)
@@ -199,6 +200,12 @@ struct kvm_vcpu_arch {
struct kvm_mp_state mp_state;
/* cpucfg */
u32 cpucfg[KVM_MAX_CPUCFG_REGS];
+   /* paravirt steal time */
+   struct {
+   u64 guest_addr;
+   u64 last_steal;
+   struct gfn_to_hva_cache cache;
+   } st;
 };
 
 static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 56775554402a..5fb89e20432d 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -12,6 +12,7 @@
 #define KVM_HCALL_CODE_SWDBG   1
 #define KVM_HCALL_PV_SERVICE   HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_PV_SERVICE)
 #define  KVM_HCALL_FUNC_PV_IPI 1
+#define  KVM_HCALL_FUNC_NOTIFY 2
 #define KVM_HCALL_SWDBGHYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_SWDBG)
 
 /*
@@ -21,6 +22,15 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+#define KVM_STEAL_PHYS_VALID   BIT_ULL(0)
+#define KVM_STEAL_PHYS_MASKGENMASK_ULL(63, 6)
+struct kvm_steal_time {
+   __u64 steal;
+   __u32 version;
+   __u32 flags;
+   __u32 pad[12];
+};
+
 /*
  * Hypercall interface for KVM hypervisor
  *
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 0ad36704cb4b..ab6a5e93c280 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -168,6 +168,7 @@
 #define  KVM_SIGNATURE "KVM\0"
 #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
 #define  KVM_FEATURE_PV_IPIBIT(1)
+#define  KVM_FEATURE_STEAL_TIMEBIT(2)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/loongarch/include/uapi/asm/kvm.h 
b/arch/loongarch/include/uapi/asm/kvm.h
index 8f78b23672ac..286b5ce93a57 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -80,7 +80,11 @@ struct kvm_fpu {
 #define LOONGARCH_REG_64(TYPE, REG)(TYPE | KVM_REG_SIZE_U64 | (REG << 
LOONGARCH_REG_SHIFT))
 #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, 
REG)
 #define KVM_IOC_CPUCFG(REG)
LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+
+/* Device Control API on vcpu fd */
 #define KVM_LOONGARCH_VCPU_CPUCFG  0
+#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
+#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
 
 struct kvm_debug_exit_arch {
 };
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index d71172e2568e..c774e5803f7f 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -209,7 +209,7 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
 static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
-   unsigned int index;
+   unsigned int index, ret;
unsigned long plv;
 
rd = inst.reg2_format.rd;
@@ -240,10 +240,13 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, 
larch_inst inst)
vcpu->arch.gprs[rd] = 0;
break;
case CPUCFG_KVM_FEATURE:
-   if ((plv & CSR_CRMD_PLV) == PLV_KERN)
-   vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
-   else
-   vcpu->arch.gprs[rd]

[PATCH 2/2] LoongArch: Add steal time support in guest side

2024-03-26 Thread Bibo Mao

Percpu struct kvm_steal_time is added here, its size is 64 bytes and
also defined as 64 bytes, so that the whole structure is in one physical
page.

When vcpu is onlined, function pv_register_steal_time() is called. This
function will pass physical address of struct kvm_steal_time and tells
hypervisor to enable steal time. When vcpu is offline, physical address
is set as 0 and tells hypervisor to disable steal time.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/paravirt.h |   5 +
 arch/loongarch/kernel/paravirt.c  | 130 ++
 arch/loongarch/kernel/time.c  |   2 +
 3 files changed, 137 insertions(+)

diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
index 58f7b7b89f2c..fe27fb5e82b8 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -17,11 +17,16 @@ static inline u64 paravirt_steal_clock(int cpu)
 }
 
 int pv_ipi_init(void);
+int __init pv_time_init(void);
 #else
 static inline int pv_ipi_init(void)
 {
return 0;
 }
 
+static inline int pv_time_init(void)
+{
+   return 0;
+}
 #endif // CONFIG_PARAVIRT
 #endif
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 9044ed62045c..56182c64ab38 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -5,10 +5,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock;
 
 static u64 native_steal_clock(int cpu)
 {
@@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu)
 
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+   steal_acc = false;
+   return 0;
+}
+early_param("no-steal-acc", parse_no_stealacc);
+
+static u64 para_steal_clock(int cpu)
+{
+   u64 steal;
+   struct kvm_steal_time *src;
+   int version;
+
+   src = _cpu(steal_time, cpu);
+   do {
+
+   version = src->version;
+   /* Make sure that the version is read before the steal */
+   virt_rmb();
+   steal = src->steal;
+   /* Make sure that the steal is read before the next version */
+   virt_rmb();
+
+   } while ((version & 1) || (version != src->version));
+   return steal;
+}
+
+static int pv_register_steal_time(void)
+{
+   int cpu = smp_processor_id();
+   struct kvm_steal_time *st;
+   unsigned long addr;
+
+   if (!has_steal_clock)
+   return -EPERM;
+
+   st = _cpu(steal_time, cpu);
+   addr = per_cpu_ptr_to_phys(st);
+
+   /* The whole structure kvm_steal_time should be one page */
+   if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) {
+   pr_warn("Illegal PV steal time addr %lx\n", addr);
+   return -EFAULT;
+   }
+
+   addr |= KVM_STEAL_PHYS_VALID;
+   kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr);
+   return 0;
+}
+
 #ifdef CONFIG_SMP
 static void pv_send_ipi_single(int cpu, unsigned int action)
 {
@@ -110,6 +164,32 @@ static void pv_init_ipi(void)
if (r < 0)
panic("SWI0 IRQ request failed\n");
 }
+
+static void pv_disable_steal_time(void)
+{
+   if (has_steal_clock)
+   kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 
0);
+}
+
+static int pv_cpu_online(unsigned int cpu)
+{
+   unsigned long flags;
+
+   local_irq_save(flags);
+   pv_register_steal_time();
+   local_irq_restore(flags);
+   return 0;
+}
+
+static int pv_cpu_down_prepare(unsigned int cpu)
+{
+   unsigned long flags;
+
+   local_irq_save(flags);
+   pv_disable_steal_time();
+   local_irq_restore(flags);
+   return 0;
+}
 #endif
 
 static bool kvm_para_available(void)
@@ -149,3 +229,53 @@ int __init pv_ipi_init(void)
 
return 1;
 }
+
+static void pv_cpu_reboot(void *unused)
+{
+   pv_disable_steal_time();
+}
+
+static int pv_reboot_notify(struct notifier_block *nb, unsigned long code,
+   void *unused)
+{
+   on_each_cpu(pv_cpu_reboot, NULL, 1);
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block pv_reboot_nb = {
+   .notifier_call  = pv_reboot_notify,
+};
+
+int __init pv_time_init(void)
+{
+   int feature;
+
+   if (!cpu_has_hypervisor)
+   return 0;
+   if (!kvm_para_available())
+   return 0;
+
+   feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+   if (!(feature & KVM_FEATURE_STEAL_TIME))
+   return 0;
+
+   has_steal_clock = 1;
+   if (pv_register_steal_time()) {
+   has_steal_clock = 0;
+   return 0;
+   }
+
+   register_reboot_notifier(_reb

[PATCH 1/2] LoongArch: KVM: Add steal time support in kvm side

2024-03-26 Thread Bibo Mao

Steal time feature is added here in kvm side, VM can search supported
features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
is added here. Like x86, steal time structure is saved in guest memory,
one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
enable the feature.

One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
save and restore base address of steal time structure when VM is migrated.

Since it needs hypercall instruction emulation handling, and it is
dependent on this patchset:
https://lore.kernel.org/all/20240201031950.3225626-1-maob...@loongson.cn/

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 +++
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kvm/exit.c  |  35 ++--
 arch/loongarch/kvm/vcpu.c  | 120 +
 6 files changed, 172 insertions(+), 5 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index c53946f8ef9f..1d1eaa124349 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_HALT_POLL_NS_DEFAULT   50
+#define KVM_REQ_RECORD_STEAL   KVM_ARCH_REQ(1)
 
 #define KVM_GUESTDBG_VALID_MASK(KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP)
@@ -199,6 +200,12 @@ struct kvm_vcpu_arch {
struct kvm_mp_state mp_state;
/* cpucfg */
u32 cpucfg[KVM_MAX_CPUCFG_REGS];
+   /* paravirt steal time */
+   struct {
+   u64 guest_addr;
+   u64 last_steal;
+   struct gfn_to_hva_cache cache;
+   } st;
 };
 
 static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 56775554402a..5fb89e20432d 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -12,6 +12,7 @@
 #define KVM_HCALL_CODE_SWDBG   1
 #define KVM_HCALL_PV_SERVICE   HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_PV_SERVICE)
 #define  KVM_HCALL_FUNC_PV_IPI 1
+#define  KVM_HCALL_FUNC_NOTIFY 2
 #define KVM_HCALL_SWDBGHYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_SWDBG)
 
 /*
@@ -21,6 +22,15 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+#define KVM_STEAL_PHYS_VALID   BIT_ULL(0)
+#define KVM_STEAL_PHYS_MASKGENMASK_ULL(63, 6)
+struct kvm_steal_time {
+   __u64 steal;
+   __u32 version;
+   __u32 flags;
+   __u32 pad[12];
+};
+
 /*
  * Hypercall interface for KVM hypervisor
  *
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 0ad36704cb4b..ab6a5e93c280 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -168,6 +168,7 @@
 #define  KVM_SIGNATURE "KVM\0"
 #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
 #define  KVM_FEATURE_PV_IPIBIT(1)
+#define  KVM_FEATURE_STEAL_TIMEBIT(2)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/loongarch/include/uapi/asm/kvm.h 
b/arch/loongarch/include/uapi/asm/kvm.h
index 8f78b23672ac..286b5ce93a57 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -80,7 +80,11 @@ struct kvm_fpu {
 #define LOONGARCH_REG_64(TYPE, REG)(TYPE | KVM_REG_SIZE_U64 | (REG << 
LOONGARCH_REG_SHIFT))
 #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, 
REG)
 #define KVM_IOC_CPUCFG(REG)
LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+
+/* Device Control API on vcpu fd */
 #define KVM_LOONGARCH_VCPU_CPUCFG  0
+#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
+#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
 
 struct kvm_debug_exit_arch {
 };
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index d71172e2568e..c774e5803f7f 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -209,7 +209,7 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
 static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
-   unsigned int index;
+   unsigned int index, ret;
unsigned long plv;
 
rd = inst.reg2_format.rd;
@@ -240,10 +240,13 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, 
larch_inst inst)
vcpu->arch.gprs[rd] = 0;
break;
case CPUCFG_KVM_FEATURE:
-   if ((plv & CSR_CRMD_PLV) == PLV_KERN)
-   vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
-   else
-   vcpu->arch.gprs[rd]

[PATCH 2/2] LoongArch: Add steal time support in guest side

2024-03-26 Thread Bibo Mao

Percpu struct kvm_steal_time is added here, its size is 64 bytes and
also defined as 64 bytes, so that the whole structure is in one physical
page.

When vcpu is onlined, function pv_register_steal_time() is called. This
function will pass physical address of struct kvm_steal_time and tells
hypervisor to enable steal time. When vcpu is offline, physical address
is set as 0 and tells hypervisor to disable steal time.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/paravirt.h |   5 +
 arch/loongarch/kernel/paravirt.c  | 130 ++
 arch/loongarch/kernel/time.c  |   2 +
 3 files changed, 137 insertions(+)

diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
index 58f7b7b89f2c..fe27fb5e82b8 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -17,11 +17,16 @@ static inline u64 paravirt_steal_clock(int cpu)
 }
 
 int pv_ipi_init(void);
+int __init pv_time_init(void);
 #else
 static inline int pv_ipi_init(void)
 {
return 0;
 }
 
+static inline int pv_time_init(void)
+{
+   return 0;
+}
 #endif // CONFIG_PARAVIRT
 #endif
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 9044ed62045c..56182c64ab38 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -5,10 +5,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock;
 
 static u64 native_steal_clock(int cpu)
 {
@@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu)
 
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+   steal_acc = false;
+   return 0;
+}
+early_param("no-steal-acc", parse_no_stealacc);
+
+static u64 para_steal_clock(int cpu)
+{
+   u64 steal;
+   struct kvm_steal_time *src;
+   int version;
+
+   src = _cpu(steal_time, cpu);
+   do {
+
+   version = src->version;
+   /* Make sure that the version is read before the steal */
+   virt_rmb();
+   steal = src->steal;
+   /* Make sure that the steal is read before the next version */
+   virt_rmb();
+
+   } while ((version & 1) || (version != src->version));
+   return steal;
+}
+
+static int pv_register_steal_time(void)
+{
+   int cpu = smp_processor_id();
+   struct kvm_steal_time *st;
+   unsigned long addr;
+
+   if (!has_steal_clock)
+   return -EPERM;
+
+   st = _cpu(steal_time, cpu);
+   addr = per_cpu_ptr_to_phys(st);
+
+   /* The whole structure kvm_steal_time should be one page */
+   if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) {
+   pr_warn("Illegal PV steal time addr %lx\n", addr);
+   return -EFAULT;
+   }
+
+   addr |= KVM_STEAL_PHYS_VALID;
+   kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr);
+   return 0;
+}
+
 #ifdef CONFIG_SMP
 static void pv_send_ipi_single(int cpu, unsigned int action)
 {
@@ -110,6 +164,32 @@ static void pv_init_ipi(void)
if (r < 0)
panic("SWI0 IRQ request failed\n");
 }
+
+static void pv_disable_steal_time(void)
+{
+   if (has_steal_clock)
+   kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 
0);
+}
+
+static int pv_cpu_online(unsigned int cpu)
+{
+   unsigned long flags;
+
+   local_irq_save(flags);
+   pv_register_steal_time();
+   local_irq_restore(flags);
+   return 0;
+}
+
+static int pv_cpu_down_prepare(unsigned int cpu)
+{
+   unsigned long flags;
+
+   local_irq_save(flags);
+   pv_disable_steal_time();
+   local_irq_restore(flags);
+   return 0;
+}
 #endif
 
 static bool kvm_para_available(void)
@@ -149,3 +229,53 @@ int __init pv_ipi_init(void)
 
return 1;
 }
+
+static void pv_cpu_reboot(void *unused)
+{
+   pv_disable_steal_time();
+}
+
+static int pv_reboot_notify(struct notifier_block *nb, unsigned long code,
+   void *unused)
+{
+   on_each_cpu(pv_cpu_reboot, NULL, 1);
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block pv_reboot_nb = {
+   .notifier_call  = pv_reboot_notify,
+};
+
+int __init pv_time_init(void)
+{
+   int feature;
+
+   if (!cpu_has_hypervisor)
+   return 0;
+   if (!kvm_para_available())
+   return 0;
+
+   feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+   if (!(feature & KVM_FEATURE_STEAL_TIME))
+   return 0;
+
+   has_steal_clock = 1;
+   if (pv_register_steal_time()) {
+   has_steal_clock = 0;
+   return 0;
+   }
+
+   register_reboot_notifier(_reb

[PATCH 0/2] LoongArch: Add steal time support

2024-03-26 Thread Bibo Mao

Para-virt feature steal time is added in both kvm and guest kernel side.
It is silimar with other architectures, steal time structure comes from
guest memory, also pseduo register is used to save/restore base address
of steal time structure, so that vm migration is supported also.

Bibo Mao (2):
  LoongArch: KVM: Add steal time support in kvm side
  LoongArch: Add steal time support in guest side

 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 ++
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/asm/paravirt.h  |   5 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kernel/paravirt.c   | 130 +
 arch/loongarch/kernel/time.c   |   2 +
 arch/loongarch/kvm/exit.c  |  35 ++-
 arch/loongarch/kvm/vcpu.c  | 120 +++
 9 files changed, 309 insertions(+), 5 deletions(-)


base-commit: 2ac2b1665d3fbec6ca709dd6ef3ea05f4a51ee4c
-- 
2.39.3

[PATCH v7 7/7] Documentation: KVM: Add hypercall for LoongArch

2024-03-15 Thread Bibo Mao

Add documentation topic for using pv_virt when running as a guest
on KVM hypervisor.

Signed-off-by: Bibo Mao 
---
 Documentation/virt/kvm/index.rst  |  1 +
 .../virt/kvm/loongarch/hypercalls.rst | 82 +++
 Documentation/virt/kvm/loongarch/index.rst| 10 +++
 3 files changed, 93 insertions(+)
 create mode 100644 Documentation/virt/kvm/loongarch/hypercalls.rst
 create mode 100644 Documentation/virt/kvm/loongarch/index.rst

diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
index ad13ec55ddfe..9ca5a45c2140 100644
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -14,6 +14,7 @@ KVM
s390/index
ppc-pv
x86/index
+   loongarch/index
 
locking
vcpu-requests
diff --git a/Documentation/virt/kvm/loongarch/hypercalls.rst 
b/Documentation/virt/kvm/loongarch/hypercalls.rst
new file mode 100644
index ..0f61a49c31a9
--- /dev/null
+++ b/Documentation/virt/kvm/loongarch/hypercalls.rst
@@ -0,0 +1,82 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+The LoongArch paravirtual interface
+===
+
+KVM hypercalls use the HVCL instruction with code 0x100, and the hypercall
+number is put in a0 and up to five arguments may be placed in a1-a5, the
+return value is placed in a0.
+
+The code for that interface can be found in arch/loongarch/kvm/*
+
+Querying for existence
+==
+
+To find out if we're running on KVM or not, cpucfg can be used with index
+CPUCFG_KVM_BASE (0x4000), cpucfg range between 0x4000 - 0x40FF
+is marked as a specially reserved range. All existing and future processors
+will not implement any features in this range.
+
+When Linux is running on KVM, cpucfg with index CPUCFG_KVM_BASE (0x4000)
+returns magic string "KVM\0"
+
+Once you determined you're running under a PV capable KVM, you can now use
+hypercalls described as follows.
+
+KVM hypercall ABI
+=
+
+Hypercall ABI on KVM is simple, only one scratch register a0 and at most
+five generic registers used as input parameter. FP register and vector register
+is not used for input register and should not be modified during hypercall.
+Hypercall function can be inlined since there is only one scratch register.
+
+The parameters are as follows:
+
+   =   =
+   RegisterIN  OUT
+   =   =
+   a0  function number(Required)   Return code(Required)
+   a1  1st parameter(Optional) Keep Unmodified
+   a2  2nd parameter(Optional) Keep Unmodified
+   a3  3rd parameter(Optional) Keep Unmodified
+   a4  4th parameter(Optional) Keep Unmodified
+   a5  5th parameter(Optional) Keep Unmodified
+   =   =
+
+Return codes can be as follows:
+
+   =
+   CodeMeaning
+   =
+   0   Success
+   -1  Hypercall not implemented
+   -2  Hypercall parameter error
+   =
+
+KVM Hypercalls Documentation
+
+
+The template for each hypercall is:
+1. Hypercall name
+2. Purpose
+
+1. KVM_HCALL_FUNC_PV_IPI
+
+
+:Purpose: Send IPIs to multiple vCPUs.
+
+- a0: KVM_HCALL_FUNC_PV_IPI
+- a1: lower part of the bitmap of destination physical cpu core id
+- a2: higher part of the bitmap of destination physical cpu core id
+- a3: the lowest physical cpu core id in bitmap
+
+The hypercall lets a guest send multicast IPIs, with at most 128
+destinations per hypercall.  The destinations are represented by a bitmap
+contained in the first two arguments (a1 and a2). Bit 0 of a1 corresponds
+to the physical cpu core id in the third argument (a3), bit 1 corresponds
+to the physical cpu core id a3+1, and so on.
+
+Returns success always, it skips current cpu and continues to send ipi to
+next cpu in the bitmap mask if current physical cpu core id is invalid.
diff --git a/Documentation/virt/kvm/loongarch/index.rst 
b/Documentation/virt/kvm/loongarch/index.rst
new file mode 100644
index ..83387b4c5345
--- /dev/null
+++ b/Documentation/virt/kvm/loongarch/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=
+KVM for LoongArch systems
+=
+
+.. toctree::
+   :maxdepth: 2
+
+   hypercalls.rst
-- 
2.39.3

[PATCH v7 6/7] LoongArch: Add pv ipi support on guest kernel side

2024-03-15 Thread Bibo Mao

PARAVIRT option and pv ipi is added on guest kernel side, function
pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function
firstly checks whether system runs on VM mode. If kernel runs on VM mode,
it will call function kvm_para_available() to detect current hypervirsor
type. Now only KVM type detection is supported, the paravirt function can
work only if current hypervisor type is KVM, since there is only KVM
supported on LoongArch now.

PV IPI uses virtual IPI sender and virtual IPI receiver function. With
virutal IPI sender, ipi message is stored in DDR memory rather than
emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs
at the same time like X86 KVM method. Hypercall method is used for IPI
sending.

With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since
VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt
acknowledge. And IPI message is stored in DDR, no trap in get IPI message.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|   9 ++
 arch/loongarch/include/asm/hardirq.h  |   1 +
 arch/loongarch/include/asm/paravirt.h |  27 
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |   2 +-
 arch/loongarch/kernel/paravirt.c  | 151 ++
 arch/loongarch/kernel/smp.c   |   4 +-
 8 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index b274784c2e26..a1fccaf117aa 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -578,6 +578,15 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   depends on AS_HAS_LVZ_EXTENSION
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..b26d596a73aa 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t message cacheline_aligned_in_smp;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..58f7b7b89f2c
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_ipi_init(void);
+#else
+static inline int pv_ipi_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index ce36897d1e5a..4863e6c1b739 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + 
IRQ_STACK_SIZE);
}
 
-   set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+   set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI 
| ECFGF_PMC);
 }
diff --git a/arch/loongarch/kernel/paravir

[PATCH v7 5/7] LoongArch: KVM: Add pv ipi support on kvm side

2024-03-15 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sending, and two iocsr access on ipi receiving
which is ipi interrupt handler. On VM mode all iocsr accessing will
cause VM to trap into hypervisor. So with one ipi hw notification
there will be three times of trap.

PV ipi is added for VM, hypercall instruction is used for ipi sender,
and hypervisor will inject SWI to destination vcpu. During SWI
interrupt handler, only estat CSR register is written to clear irq.
Estat CSR register access will not trap into hypervisor. So with pv
ipi supported, there is one trap with pv ipi sender, and no trap with
ipi receiver, there is only one trap with ipi notification.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at
most 128 vcpus at one time. It reduces trap times into hypervisor
greatly.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 129 +
 arch/loongarch/include/asm/kvm_vcpu.h  |  10 ++
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kvm/exit.c  |  76 ++-
 arch/loongarch/kvm/vcpu.c  |   1 +
 6 files changed, 216 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 3ba16ef1fe69..0b96c6303cf7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+   u64 hypercall_exits;
 };
 
 #define KVM_MEM_HUGEPAGE_CAPABLE   (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index d48f993ae206..a5809a854bae 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,16 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypercall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+#define KVM_HCALL_CODE_PV_SERVICE  0
+#define KVM_HCALL_PV_SERVICE   HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_PV_SERVICE)
+#define  KVM_HCALL_FUNC_PV_IPI 1
+
 /*
  * LoongArch hypercall return code
  */
@@ -9,6 +19,125 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+/*
+ * Hypercall interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in a0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+   unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+   register long ret asm("a0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+   register unsigned long a3  asm("a3") = arg2;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+

[PATCH v7 4/7] LoongArch: KVM: Add vcpu search support from physical cpuid

2024-03-15 Thread Bibo Mao

Physical cpuid is used for interrupt routing for irqchips such as
ipi/msi/extioi interrupt controller. And physical cpuid is stored
at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
is created and physical cpuid of two vcpus cannot be the same.

Different irqchips have different size declaration about physical cpuid,
max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
for MSI irqchip.

The smallest value from all interrupt controllers is selected now,
and the max cpuid size is defines as 256 by KVM which comes from
extioi irqchip.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h | 26 
 arch/loongarch/include/asm/kvm_vcpu.h |  1 +
 arch/loongarch/kvm/vcpu.c | 93 ++-
 arch/loongarch/kvm/vm.c   | 11 
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..3ba16ef1fe69 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
 
 #define MAX_PGTABLE_LEVELS 4
 
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ *  For IPI HW, max dest CPUID size 1024
+ *  For extioi interrupt controller, max dest CPUID size is 256
+ *  For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID  256
+
+struct kvm_phyid_info {
+   struct kvm_vcpu *vcpu;
+   boolenabled;
+};
+
+struct kvm_phyid_map {
+   int max_phyid;
+   struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
 struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int  root_level;
+   spinlock_tphyid_map_lock;
+   struct kvm_phyid_map  *phyid_map;
 
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 0cb4fdb8a9b5..9f53950959da 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
 void kvm_restore_timer(struct kvm_vcpu *vcpu);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
 
 /*
  * Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 36106922b5d7..a1a1dc4a3cf2 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 *val)
return 0;
 }
 
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   if (val >= KVM_MAX_PHYID)
+   return -EINVAL;
+
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   map = vcpu->kvm->arch.phyid_map;
+   spin_lock(>kvm->arch.phyid_map_lock);
+   if (map->phys_map[cpuid].enabled) {
+   /*
+* Cpuid is already set before
+* Forbid changing different cpuid at runtime
+*/
+   if (cpuid != val) {
+   /*
+* Cpuid 0 is initial value for vcpu, maybe invalid
+* unset value for vcpu
+*/
+   if (cpuid) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+   } else {
+/* Discard duplicated cpuid set */
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+   }
+
+   if (map->phys_map[val].enabled) {
+   /*
+* New cpuid is already set with other vcpu
+* Forbid sharing the same cpuid between different vcpus
+*/
+   if (map->phys_map[val].vcpu != vcpu) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+
+   /* Discard duplicated cpuid set operation*/
+

[PATCH v7 3/7] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-03-15 Thread Bibo Mao

Instruction cpucfg can be used to get processor features. And there
is trap exception when it is executed in VM mode, and also it is
to provide cpu features to VM. On real hardware cpucfg area 0 - 20
is used.  Here one specified area 0x4000 -- 0x40ff is used
for KVM hypervisor to privide PV features, and the area can be extended
for other hypervisors in future. This area will never be used for
real HW, it is only used by software.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h | 10 +
 arch/loongarch/kvm/exit.c  | 59 +++---
 3 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a1d22e8b6f94 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,16 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x4000 -- 0x40ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x4000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 923bbca9bd22..a8d3b652d3ea 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,50 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+   unsigned long plv;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*
+* Disable preemption since hw gcsr is accessed
+*/
+   preempt_disable();
+   plv = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD) >> CSR_CRMD_PLV_SHIFT;
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   /*
+* Cpucfg emulation between 0x4000 -- 0x40ff
+* Return value with 0 if executed in user mode
+*/
+   if ((plv & CSR_CRMD_PLV) == PLV_KERN)
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   else
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   preempt_enable();
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +264,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH v7 0/7] LoongArch: Add pv ipi support on LoongArch VM

2024-03-15 Thread Bibo Mao

On physical machine, ipi HW uses IOCSR registers, however there is trap
into hypervisor when vcpu accesses IOCSR registers if system is in VM
mode. SWI is a interrupt mechanism like SGI on ARM, software can send
interrupt to CPU, only that on LoongArch SWI can only be sent to local CPU
now. So SWI can not used for IPI on real HW system, however it can be used
on VM when combined with hypercall method. IPI can be sent with hypercall
method and SWI interrupt is injected to vcpu, vcpu can treat SWI
interrupt as IPI.

With PV IPI supported, there is one trap with IPI sending, however with IPI
receiving there is no trap. with IOCSR HW ipi method, there will be one
trap with IPI sending and two trap with ipi receiving.

Also IPI multicast support is added for VM, the idea comes from x86 PV ipi.
IPI can be sent to 128 vcpus in one time. With IPI multicast support, trap
will be reduced greatly.

Here is the microbenchmarck data with "perf bench futex wake" testcase on
3C5000 single-way machine, there are 16 cpus on 3C5000 single-way machine,
VM has 16 vcpus also. The benchmark data is ms time unit to wakeup 16
threads, the performance is better if data is smaller.

physical machine 0.0176 ms
VM original  0.1140 ms
VM with pv ipi patch 0.0481 ms

It passes to boot with 128/256 vcpus, and passes to run runltp command
with package ltp-20230516.

---
Change in v7:
  1. Refine LoongArch virt document by review comments.
  2. Add function kvm_read_reg()/kvm_write_reg() in hypercall emulation,
and later it can be used for other trap emulations.

Change in V6:
  1. Add privilege checking when emulating cpucfg at index 0x400 --
0x40FF, return 0 if not executed at kernel mode.
  2. Add document about LoongArch pv ipi with new creatly directory
Documentation/virt/kvm/loongarch/
  3. Fix pv ipi handling in kvm backend function kvm_pv_send_ipi(),
where min should plus BITS_PER_LONG with second bitmap, otherwise
VM with more than 64 vpus fails to boot.
  4. Adjust patch order and code refine with review comments.

Change in V5:
  1. Refresh function/macro name from review comments.

Change in V4:
  1. Modfiy pv ipi hook function name call_func_ipi() and
call_func_single_ipi() with send_ipi_mask()/send_ipi_single(), since pv
ipi is used for both remote function call and reschedule notification.
  2. Refresh changelog.

Change in V3:
  1. Add 128 vcpu ipi multicast support like x86
  2. Change cpucfg base address from 0x1000 to 0x4000, in order
to avoid confliction with future hw usage
  3. Adjust patch order in this patchset, move patch
Refine-ipi-ops-on-LoongArch-platform to the first one.

Change in V2:
  1. Add hw cpuid map support since ipi routing uses hw cpuid
  2. Refine changelog description
  3. Add hypercall statistic support for vcpu
  4. Set percpu pv ipi message buffer aligned with cacheline
  5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---
Bibo Mao (7):
  LoongArch/smp: Refine some ipi functions on LoongArch platform
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch: KVM: Add vcpu search support from physical cpuid
  LoongArch: KVM: Add pv ipi support on kvm side
  LoongArch: Add pv ipi support on guest kernel side
  Documentation: KVM: Add hypercall for LoongArch

 Documentation/virt/kvm/index.rst  |   1 +
 .../virt/kvm/loongarch/hypercalls.rst |  82 +
 Documentation/virt/kvm/loongarch/index.rst|  10 ++
 arch/loongarch/Kconfig|   9 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_host.h |  27 +++
 arch/loongarch/include/asm/kvm_para.h | 155 ++
 arch/loongarch/include/asm/kvm_vcpu.h |  11 ++
 arch/loongarch/include/asm/loongarch.h|  11 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 151 +
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/smp.c   |  62 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 141 ++--
 arch/loongarch/kvm/vcpu.c |  94 ++-
 arch/loongarch/kvm/vm.c   |  11 ++
 25 files changed, 792 insertions(+), 102 deletions(-)
 create mode 100644 Documentation/virt/kvm

[PATCH v7 1/7] LoongArch/smp: Refine some ipi functions on LoongArch platform

2024-03-15 Thread Bibo Mao

It is code refine about ipi handling on LoongArch platform, there are
three modifications.
1. Add generic function get_percpu_irq(), replacing some percpu irq
functions such as get_ipi_irq()/get_pmc_irq()/get_timer_irq() with
get_percpu_irq().

2. Change definition about parameter action called by function
loongson_send_ipi_single() and loongson_send_ipi_mask(), and it is
defined as decimal encoding format at ipi sender side. Normal decimal
encoding is used rather than binary bitmap encoding for ipi action, ipi
hw sender uses decimal encoding code, and ipi receiver will get binary
bitmap encoding, the ipi hw will convert it into bitmap in ipi message
buffer.

3. Add structure smp_ops on LoongArch platform so that pv ipi can be used
later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 58 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..75d30529748c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*init_ipi)(void);
+   void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
+   void (*send_ipi_single)(int cpu, unsigned int action);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTION_BOOT_CPU0
+#define ACTION_RESCHEDULE  1
+#define ACTION_CALL_FUNCTION   2
+#define SMP_BOOT_CPU   BIT(ACTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..ce36897d1e5a 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void

[PATCH v7 2/7] LoongArch: KVM: Add hypercall instruction emulation support

2024-03-15 Thread Bibo Mao

On LoongArch system, there is hypercall instruction special for
virtualization. When system executes this instruction on host side,
there is illegal instruction exception reported, however it will
trap into host when it is executed in VM mode.

When hypercall is emulated, A0 register is set with value
KVM_HCALL_INVALID_CODE, rather than inject EXCCODE_INE invalid
instruction exception. So VM can continue to executing the next code.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..d48f993ae206
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypercall return code
+ */
+#define KVM_HCALL_STATUS_SUCCESS   0
+#define KVM_HCALL_INVALID_CODE -1UL
+#define KVM_HCALL_INVALID_PARAMETER-2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..923bbca9bd22 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_LSXDIS]= kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS]   = kvm_handle_lasx_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypercall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH v6 7/7] Documentation: KVM: Add hypercall for LoongArch

2024-03-02 Thread Bibo Mao

Add documentation topic for using pv_virt when running as a guest
on KVM hypervisor.

Signed-off-by: Bibo Mao 
---
 Documentation/virt/kvm/index.rst  |  1 +
 .../virt/kvm/loongarch/hypercalls.rst | 79 +++
 Documentation/virt/kvm/loongarch/index.rst| 10 +++
 3 files changed, 90 insertions(+)
 create mode 100644 Documentation/virt/kvm/loongarch/hypercalls.rst
 create mode 100644 Documentation/virt/kvm/loongarch/index.rst

diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
index ad13ec55ddfe..9ca5a45c2140 100644
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -14,6 +14,7 @@ KVM
s390/index
ppc-pv
x86/index
+   loongarch/index
 
locking
vcpu-requests
diff --git a/Documentation/virt/kvm/loongarch/hypercalls.rst 
b/Documentation/virt/kvm/loongarch/hypercalls.rst
new file mode 100644
index ..1679e48d67d2
--- /dev/null
+++ b/Documentation/virt/kvm/loongarch/hypercalls.rst
@@ -0,0 +1,79 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+The LoongArch paravirtual interface
+===
+
+KVM hypercalls use the HVCL instruction with code 0x100, and the hypercall
+number is put in a0 and up to five arguments may be placed in a1-a5, the
+return value is placed in v0 (alias with a0).
+
+The code for that interface can be found in arch/loongarch/kvm/*
+
+Querying for existence
+==
+
+To find out if we're running on KVM or not, cpucfg can be used with index
+CPUCFG_KVM_BASE (0x4000), cpucfg range between 0x4000 - 0x40FF
+is marked as a specially reserved range. All existing and future processors
+will not implement any features in this range.
+
+When Linux is running on KVM, cpucfg with index CPUCFG_KVM_BASE (0x4000)
+returns magic string "KVM\0"
+
+Once you determined you're running under a PV capable KVM, you can now use
+hypercalls as described below.
+
+KVM hypercall ABI
+=
+
+Hypercall ABI on KVM is simple, only one scratch register a0 (v0) and at most
+five generic registers used as input parameter. FP register and vector register
+is not used for input register and should not be modified during hypercall.
+Hypercall function can be inlined since there is only one scratch register.
+
+The parameters are as follows:
+
+   
+   RegisterIN  OUT
+   
+   a0  function number Return code
+   a1  1st parameter   -
+   a2  2nd parameter   -
+   a3  3rd parameter   -
+   a4  4th parameter   -
+   a5  5th parameter   -
+   
+
+Return codes can be as follows:
+
+   =
+   CodeMeaning
+   =
+   0   Success
+   -1  Hypercall not implemented
+   -2  Hypercall parameter error
+   =
+
+KVM Hypercalls Documentation
+
+
+The template for each hypercall is:
+1. Hypercall name
+2. Purpose
+
+1. KVM_HCALL_FUNC_PV_IPI
+
+
+:Purpose: Send IPIs to multiple vCPUs.
+
+- a0: KVM_HCALL_FUNC_PV_IPI
+- a1: lower part of the bitmap of destination physical CPUIDs
+- a2: higher part of the bitmap of destination physical CPUIDs
+- a3: the lowest physical CPUID in bitmap
+
+The hypercall lets a guest send multicast IPIs, with at most 128
+destinations per hypercall.  The destinations are represented by a bitmap
+contained in the first two arguments (a1 and a2). Bit 0 of a1 corresponds
+to the physical CPUID in the third argument (a3), bit 1 corresponds to the
+physical ID a3+1, and so on.
diff --git a/Documentation/virt/kvm/loongarch/index.rst 
b/Documentation/virt/kvm/loongarch/index.rst
new file mode 100644
index ..83387b4c5345
--- /dev/null
+++ b/Documentation/virt/kvm/loongarch/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=
+KVM for LoongArch systems
+=
+
+.. toctree::
+   :maxdepth: 2
+
+   hypercalls.rst
-- 
2.39.3

[PATCH v6 5/7] LoongArch: KVM: Add pv ipi support on kvm side

2024-03-02 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sending, and two iocsr access on ipi receiving
which is ipi interrupt handler. On VM mode all iocsr accessing will
cause VM to trap into hypervisor. So with one ipi hw notification
there will be three times of trap.

PV ipi is added for VM, hypercall instruction is used for ipi sender,
and hypervisor will inject SWI to destination vcpu. During SWI interrupt
handler, only estat CSR register is written to clear irq. Estat CSR
register access will not trap into hypervisor. So with pv ipi supported,
there is one trap with pv ipi sender, and no trap with ipi receiver,
there is only one trap with ipi notification.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
128 vcpus at one time. It reduces trap times into hypervisor greatly.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h  |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 130 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kvm/exit.c  |  76 ++-
 arch/loongarch/kvm/vcpu.c  |   1 +
 5 files changed, 207 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 3ba16ef1fe69..0b96c6303cf7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+   u64 hypercall_exits;
 };
 
 #define KVM_MEM_HUGEPAGE_CAPABLE   (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index d48f993ae206..a82bffbbf8a1 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,16 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypercall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+#define KVM_HCALL_CODE_PV_SERVICE  0
+#define KVM_HCALL_PV_SERVICE   HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_PV_SERVICE)
+#define  KVM_HCALL_FUNC_PV_IPI 1
+
 /*
  * LoongArch hypercall return code
  */
@@ -9,6 +19,126 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+/*
+ * Hypercall interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+   unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+   register unsigned long a3  asm("a3") = arg2;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+

[PATCH v6 6/7] LoongArch: Add pv ipi support on guest kernel side

2024-03-02 Thread Bibo Mao

PARAVIRT option and pv ipi is added on guest kernel side, function
pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function
firstly checks whether system runs on VM mode. If kernel runs on VM mode,
it will call function kvm_para_available() to detect current hypervirsor
type. Now only KVM type detection is supported, the paravirt function can
work only if current hypervisor type is KVM, since there is only KVM
supported on LoongArch now.

PV IPI uses virtual IPI sender and virtual IPI receiver function. With
virutal IPI sender, ipi message is stored in DDR memory rather than
emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs
at the same time like X86 KVM method. Hypercall method is used for IPI
sending.

With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since
VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt
acknowledge. And IPI message is stored in DDR, no trap in get IPI message.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|   9 ++
 arch/loongarch/include/asm/hardirq.h  |   1 +
 arch/loongarch/include/asm/paravirt.h |  27 
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |   2 +-
 arch/loongarch/kernel/paravirt.c  | 151 ++
 arch/loongarch/kernel/smp.c   |   4 +-
 8 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 929f68926b34..fdaae9a0435c 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -587,6 +587,15 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   depends on AS_HAS_LVZ_EXTENSION
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..b26d596a73aa 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t message cacheline_aligned_in_smp;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..58f7b7b89f2c
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_ipi_init(void);
+#else
+static inline int pv_ipi_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index ce36897d1e5a..4863e6c1b739 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + 
IRQ_STACK_SIZE);
}
 
-   set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+   set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI 
| ECFGF_PMC);
 }
diff --git a/arch/loongarch/kernel/paravir

[PATCH v6 4/7] LoongArch: KVM: Add vcpu search support from physical cpuid

2024-03-02 Thread Bibo Mao

Physical cpuid is used for interrupt routing for irqchips such as
ipi/msi/extioi interrupt controller. And physical cpuid is stored
at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
is created and physical cpuid of two vcpus cannot be the same.

Different irqchips have different size declaration about physical cpuid,
max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
for MSI irqchip.

The smallest value from all interrupt controllers is selected now,
and the max cpuid size is defines as 256 by KVM which comes from
extioi irqchip.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h | 26 
 arch/loongarch/include/asm/kvm_vcpu.h |  1 +
 arch/loongarch/kvm/vcpu.c | 93 ++-
 arch/loongarch/kvm/vm.c   | 11 
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..3ba16ef1fe69 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
 
 #define MAX_PGTABLE_LEVELS 4
 
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ *  For IPI HW, max dest CPUID size 1024
+ *  For extioi interrupt controller, max dest CPUID size is 256
+ *  For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID  256
+
+struct kvm_phyid_info {
+   struct kvm_vcpu *vcpu;
+   boolenabled;
+};
+
+struct kvm_phyid_map {
+   int max_phyid;
+   struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
 struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int  root_level;
+   spinlock_tphyid_map_lock;
+   struct kvm_phyid_map  *phyid_map;
 
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 0cb4fdb8a9b5..9f53950959da 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
 void kvm_restore_timer(struct kvm_vcpu *vcpu);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
 
 /*
  * Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 36106922b5d7..a1a1dc4a3cf2 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 *val)
return 0;
 }
 
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   if (val >= KVM_MAX_PHYID)
+   return -EINVAL;
+
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   map = vcpu->kvm->arch.phyid_map;
+   spin_lock(>kvm->arch.phyid_map_lock);
+   if (map->phys_map[cpuid].enabled) {
+   /*
+* Cpuid is already set before
+* Forbid changing different cpuid at runtime
+*/
+   if (cpuid != val) {
+   /*
+* Cpuid 0 is initial value for vcpu, maybe invalid
+* unset value for vcpu
+*/
+   if (cpuid) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+   } else {
+/* Discard duplicated cpuid set */
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+   }
+
+   if (map->phys_map[val].enabled) {
+   /*
+* New cpuid is already set with other vcpu
+* Forbid sharing the same cpuid between different vcpus
+*/
+   if (map->phys_map[val].vcpu != vcpu) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+
+   /* Discard duplicated cpuid set operation*/
+

[PATCH v6 1/7] LoongArch/smp: Refine some ipi functions on LoongArch platform

2024-03-02 Thread Bibo Mao

It is code refine about ipi handling on LoongArch platform, there are
three modifications.
1. Add generic function get_percpu_irq(), replacing some percpu irq
functions such as get_ipi_irq()/get_pmc_irq()/get_timer_irq() with
get_percpu_irq().

2. Change definition about parameter action called by function
loongson_send_ipi_single() and loongson_send_ipi_mask(), and it is
defined as decimal encoding format at ipi sender side. Normal decimal
encoding is used rather than binary bitmap encoding for ipi action, ipi
hw sender uses decimal encoding code, and ipi receiver will get binary
bitmap encoding, the ipi hw will convert it into bitmap in ipi message
buffer.

3. Add structure smp_ops on LoongArch platform so that pv ipi can be used
later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 58 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..75d30529748c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*init_ipi)(void);
+   void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
+   void (*send_ipi_single)(int cpu, unsigned int action);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTION_BOOT_CPU0
+#define ACTION_RESCHEDULE  1
+#define ACTION_CALL_FUNCTION   2
+#define SMP_BOOT_CPU   BIT(ACTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..ce36897d1e5a 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void

[PATCH v6 3/7] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-03-02 Thread Bibo Mao

Instruction cpucfg can be used to get processor features. And there
is trap exception when it is executed in VM mode, and also it is
to provide cpu features to VM. On real hardware cpucfg area 0 - 20
is used.  Here one specified area 0x4000 -- 0x40ff is used
for KVM hypervisor to privide PV features, and the area can be extended
for other hypervisors in future. This area will never be used for
real HW, it is only used by software.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h | 10 +
 arch/loongarch/kvm/exit.c  | 59 +++---
 3 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a1d22e8b6f94 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,16 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x4000 -- 0x40ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x4000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 923bbca9bd22..a8d3b652d3ea 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,50 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+   unsigned long plv;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*
+* Disable preemption since hw gcsr is accessed
+*/
+   preempt_disable();
+   plv = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD) >> CSR_CRMD_PLV_SHIFT;
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   /*
+* Cpucfg emulation between 0x4000 -- 0x40ff
+* Return value with 0 if executed in user mode
+*/
+   if ((plv & CSR_CRMD_PLV) == PLV_KERN)
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   else
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   preempt_enable();
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +264,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH v6 0/7] LoongArch: Add pv ipi support on LoongArch VM

2024-03-02 Thread Bibo Mao

On physical machine, ipi HW uses IOCSR registers, however there is trap
into hypervisor when vcpu accesses IOCSR registers if system is in VM
mode. SWI is a interrupt mechanism like SGI on ARM, software can send
interrupt to CPU, only that on LoongArch SWI can only be sent to local CPU
now. So SWI can not used for IPI on real HW system, however it can be used
on VM when combined with hypercall method. IPI can be sent with hypercall
method and SWI interrupt is injected to vcpu, vcpu can treat SWI
interrupt as IPI.

With PV IPI supported, there is one trap with IPI sending, however with IPI
receiving there is no trap. with IOCSR HW ipi method, there will be one
trap with IPI sending and two trap with ipi receiving.

Also IPI multicast support is added for VM, the idea comes from x86 PV ipi.
IPI can be sent to 128 vcpus in one time. With IPI multicast support, trap
will be reduced greatly.

Here is the microbenchmarck data with "perf bench futex wake" testcase on
3C5000 single-way machine, there are 16 cpus on 3C5000 single-way machine,
VM has 16 vcpus also. The benchmark data is ms time unit to wakeup 16
threads, the performance is better if data is smaller.

physical machine 0.0176 ms
VM original  0.1140 ms
VM with pv ipi patch 0.0481 ms

It passes to boot with 128/256 vcpus, runltp command in package ltp-20230516
passes to run with 16 cores.

---
Change in V6:
  1. Add privilege checking when emulating cpucfg at index 0x400 --
0x40FF, return 0 if not executed at kernel mode.
  2. Add document about LoongArch pv ipi with new creatly directory
Documentation/virt/kvm/loongarch/
  3. Fix pv ipi handling in kvm backend function kvm_pv_send_ipi(),
where min should plus BITS_PER_LONG with second bitmap, otherwise
VM with more than 64 vpus fails to boot.
  4. Adjust patch order and code refine with review comments.
 
Change in V5:
  1. Refresh function/macro name from review comments.

Change in V4:
  1. Modfiy pv ipi hook function name call_func_ipi() and
call_func_single_ipi() with send_ipi_mask()/send_ipi_single(), since pv
ipi is used for both remote function call and reschedule notification.
  2. Refresh changelog.

Change in V3:
  1. Add 128 vcpu ipi multicast support like x86
  2. Change cpucfg base address from 0x1000 to 0x4000, in order
to avoid confliction with future hw usage
  3. Adjust patch order in this patchset, move patch
Refine-ipi-ops-on-LoongArch-platform to the first one.

Change in V2:
  1. Add hw cpuid map support since ipi routing uses hw cpuid
  2. Refine changelog description
  3. Add hypercall statistic support for vcpu
  4. Set percpu pv ipi message buffer aligned with cacheline
  5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---
Bibo Mao (7):
  LoongArch/smp: Refine some ipi functions on LoongArch platform
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch: KVM: Add vcpu search support from physical cpuid
  LoongArch: KVM: Add pv ipi support on kvm side
  LoongArch: Add pv ipi support on guest kernel side
  Documentation: KVM: Add hypercall for LoongArch

 Documentation/virt/kvm/index.rst  |   1 +
 .../virt/kvm/loongarch/hypercalls.rst |  79 +
 Documentation/virt/kvm/loongarch/index.rst|  10 ++
 arch/loongarch/Kconfig|   9 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_host.h |  27 +++
 arch/loongarch/include/asm/kvm_para.h | 156 ++
 arch/loongarch/include/asm/kvm_vcpu.h |   1 +
 arch/loongarch/include/asm/loongarch.h|  11 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 151 +
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/smp.c   |  62 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 141 ++--
 arch/loongarch/kvm/vcpu.c |  94 ++-
 arch/loongarch/kvm/vm.c   |  11 ++
 25 files changed, 780 insertions(+), 102 deletions(-)
 create mode 100644 Documentation/virt/kvm/loongarch/hypercalls.rst
 create mode 100644 Documentation/virt/kvm/loongarch/index.rst
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 create mode 100644 arch/loongarch/include/asm/

[PATCH v6 2/7] LoongArch: KVM: Add hypercall instruction emulation support

2024-03-02 Thread Bibo Mao

On LoongArch system, there is hypercall instruction special for
virtualization. When system executes this instruction on host side,
there is illegal instruction exception reported, however it will
trap into host when it is executed in VM mode.

When hypercall is emulated, A0 register is set with value
KVM_HCALL_INVALID_CODE, rather than inject EXCCODE_INE invalid
instruction exception. So VM can continue to executing the next code.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..d48f993ae206
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypercall return code
+ */
+#define KVM_HCALL_STATUS_SUCCESS   0
+#define KVM_HCALL_INVALID_CODE -1UL
+#define KVM_HCALL_INVALID_PARAMETER-2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..923bbca9bd22 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_LSXDIS]= kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS]   = kvm_handle_lasx_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypercall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH v5 6/6] LoongArch: Add pv ipi support on LoongArch system

2024-02-21 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sending, and two iocsr access on ipi receiving
which is ipi interrupt handler. On VM mode all iocsr accessing will
cause VM to trap into hypervisor. So with one ipi hw notification
there will be three times of trap.

PV ipi is added for VM, hypercall instruction is used for ipi sender,
and hypervisor will inject SWI to destination vcpu. During SWI interrupt
handler, only estat CSR register is written to clear irq. Estat CSR
register access will not trap into hypervisor. So with pv ipi supported,
there is one trap with pv ipi sender, and no trap with ipi receiver,
there is only one trap with ipi notification.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
128 vcpus at one time. It reduces trap times into hypervisor greatly.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h   |   1 +
 arch/loongarch/include/asm/kvm_host.h  |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 123 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kernel/irq.c|   2 +-
 arch/loongarch/kernel/paravirt.c   | 112 ++
 arch/loongarch/kernel/setup.c  |   1 +
 arch/loongarch/kernel/smp.c|   2 +-
 arch/loongarch/kvm/exit.c  |  73 ++-
 arch/loongarch/kvm/vcpu.c  |   1 +
 10 files changed, 313 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..b26d596a73aa 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t message cacheline_aligned_in_smp;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 3ba16ef1fe69..0b96c6303cf7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+   u64 hypercall_exits;
 };
 
 #define KVM_MEM_HUGEPAGE_CAPABLE   (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index af5d677a9052..a82bffbbf8a1 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -8,6 +8,9 @@
 #define HYPERVISOR_KVM 1
 #define HYPERVISOR_VENDOR_SHIFT8
 #define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+#define KVM_HCALL_CODE_PV_SERVICE  0
+#define KVM_HCALL_PV_SERVICE   HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HCALL_CODE_PV_SERVICE)
+#define  KVM_HCALL_FUNC_PV_IPI 1
 
 /*
  * LoongArch hypercall return code
@@ -16,6 +19,126 @@
 #define KVM_HCALL_INVALID_CODE -1UL
 #define KVM_HCALL_INVALID_PARAMETER-2UL
 
+/*
+ * Hypercall interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+

[PATCH v5 5/6] LoongArch: KVM: Add vcpu search support from physical cpuid

2024-02-21 Thread Bibo Mao

Physical cpuid is used for interrupt routing for irqchips such as
ipi/msi/extioi interrupt controller. And physical cpuid is stored
at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
is created and physical cpuid of two vcpus cannot be the same.

Different irqchips have different size declaration about physical cpuid,
max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
for MSI irqchip.

The smallest value from all interrupt controllers is selected now,
and the max cpuid size is defines as 256 by KVM which comes from
extioi irqchip.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h | 26 
 arch/loongarch/include/asm/kvm_vcpu.h |  1 +
 arch/loongarch/kvm/vcpu.c | 93 ++-
 arch/loongarch/kvm/vm.c   | 11 
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..3ba16ef1fe69 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
 
 #define MAX_PGTABLE_LEVELS 4
 
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ *  For IPI HW, max dest CPUID size 1024
+ *  For extioi interrupt controller, max dest CPUID size is 256
+ *  For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID  256
+
+struct kvm_phyid_info {
+   struct kvm_vcpu *vcpu;
+   boolenabled;
+};
+
+struct kvm_phyid_map {
+   int max_phyid;
+   struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
 struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int  root_level;
+   spinlock_tphyid_map_lock;
+   struct kvm_phyid_map  *phyid_map;
 
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 0cb4fdb8a9b5..9f53950959da 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
 void kvm_restore_timer(struct kvm_vcpu *vcpu);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
 
 /*
  * Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 27701991886d..40296d8ef297 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 *val)
return 0;
 }
 
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   if (val >= KVM_MAX_PHYID)
+   return -EINVAL;
+
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   map = vcpu->kvm->arch.phyid_map;
+   spin_lock(>kvm->arch.phyid_map_lock);
+   if (map->phys_map[cpuid].enabled) {
+   /*
+* Cpuid is already set before
+* Forbid changing different cpuid at runtime
+*/
+   if (cpuid != val) {
+   /*
+* Cpuid 0 is initial value for vcpu, maybe invalid
+* unset value for vcpu
+*/
+   if (cpuid) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+   } else {
+/* Discard duplicated cpuid set */
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+   }
+
+   if (map->phys_map[val].enabled) {
+   /*
+* New cpuid is already set with other vcpu
+* Forbid sharing the same cpuid between different vcpus
+*/
+   if (map->phys_map[val].vcpu != vcpu) {
+   spin_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+
+   /* Discard duplicated cpuid set operation*/
+

[PATCH v5 4/6] LoongArch: Add paravirt interface for guest kernel

2024-02-21 Thread Bibo Mao

Paravirt interface pv_ipi_init() is added here for guest kernel, it
firstly checks whether system runs on VM mode. If kernel runs on VM mode,
it will call function kvm_para_available() to detect current VMM type.
Now only KVM VMM type is detected,the paravirt function can work only if
current VMM is KVM hypervisor, since there is only KVM hypervisor
supported on LoongArch now.

There is not effective with pv_ipi_init() now, it is dummy function.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|  9 
 arch/loongarch/include/asm/kvm_para.h |  7 
 arch/loongarch/include/asm/paravirt.h | 27 
 .../include/asm/paravirt_api_clock.h  |  1 +
 arch/loongarch/kernel/Makefile|  1 +
 arch/loongarch/kernel/paravirt.c  | 41 +++
 arch/loongarch/kernel/setup.c |  1 +
 7 files changed, 87 insertions(+)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 929f68926b34..fdaae9a0435c 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -587,6 +587,15 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   depends on AS_HAS_LVZ_EXTENSION
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index d48f993ae206..af5d677a9052 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,13 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypercall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+
 /*
  * LoongArch hypercall return code
  */
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..58f7b7b89f2c
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_ipi_init(void);
+#else
+static inline int pv_ipi_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index ..5cf794e8490f
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool kvm_para_available(void)
+{
+   static int hypervisor_type;
+   int config;
+
+   if (!hypervisor_type) {
+   config = read_cpucfg(CPUCFG_KVM_SIG);
+   if (!memcmp(, KVM_SIGNATURE, 4))
+   hypervisor_type = HYPERVISOR_KVM;
+   }
+
+   return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_ipi_init(void)
+{
+   if (!cpu_has_hypervisor)
+   return 0;
+   if (!kvm_para_available())
+   return 0;
+
+   return 1;
+}
diff --git

[PATCH v5 2/6] LoongArch: KVM: Add hypercall instruction emulation support

2024-02-21 Thread Bibo Mao

On LoongArch system, there is hypercall instruction special for
virtualization. When system executes this instruction on host side,
there is illegal instruction exception reported, however it will
trap into host when it is executed in VM mode.

When hypercall is emulated, A0 register is set with value
KVM_HCALL_INVALID_CODE, rather than inject EXCCODE_INE invalid
instruction exception. So VM can continue to executing the next code.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..d48f993ae206
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypercall return code
+ */
+#define KVM_HCALL_STATUS_SUCCESS   0
+#define KVM_HCALL_INVALID_CODE -1UL
+#define KVM_HCALL_INVALID_PARAMETER-2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..923bbca9bd22 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_LSXDIS]= kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS]   = kvm_handle_lasx_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypercall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH v5 3/6] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-02-21 Thread Bibo Mao

Instruction cpucfg can be used to get processor features. And there
is trap exception when it is executed in VM mode, and also it is
to provide cpu features to VM. On real hardware cpucfg area 0 - 20
is used.  Here one specified area 0x4000 -- 0x40ff is used
for KVM hypervisor to privide PV features, and the area can be extended
for other hypervisors in future. This area will never be used for
real HW, it is only used by software.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h | 10 ++
 arch/loongarch/kvm/exit.c  | 46 +-
 3 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a1d22e8b6f94 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,16 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x4000 -- 0x40ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x4000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 923bbca9bd22..6a38fd59d86d 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,37 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*/
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +251,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH v5 0/6] LoongArch: Add pv ipi support on LoongArch VM

2024-02-21 Thread Bibo Mao

On physical machine, ipi HW uses IOCSR registers, however there is trap
into hypervisor when vcpu accesses IOCSR registers if system is in VM
mode. SWI is a interrupt mechanism like SGI on ARM, software can send
interrupt to CPU, only that on LoongArch SWI can only be sent to local CPU
now. So SWI can not used for IPI on real HW system, however it can be used
on VM when combined with hypercall method. IPI can be sent with hypercall
method and SWI interrupt is injected to vcpu, vcpu can treat SWI
interrupt as IPI.

With PV IPI supported, there is one trap with IPI sending, however with IPI
receiving there is no trap. with IOCSR HW ipi method, there will be one
trap with IPI sending and two trap with ipi receiving.

Also IPI multicast support is added for VM, the idea comes from x86 PV ipi.
IPI can be sent to 128 vcpus in one time. With IPI multicast support, trap
will be reduced greatly.

Here is the microbenchmarck data with "perf bench futex wake" testcase on
3C5000 single-way machine, there are 16 cpus on 3C5000 single-way machine,
VM has 16 vcpus also. The benchmark data is ms time unit to wakeup 16 threads,
the performance is better if data is smaller.

physical machine 0.0176 ms
VM original  0.1140 ms
VM with pv ipi patch 0.0481 ms

---
Change in V5:
  1. Refresh function/macro name from review comments.

Change in V4:
  1. Modfiy pv ipi hook function name call_func_ipi() and
call_func_single_ipi() with send_ipi_mask()/send_ipi_single(), since pv
ipi is used for both remote function call and reschedule notification.
  2. Refresh changelog.

Change in V3:
  1. Add 128 vcpu ipi multicast support like x86
  2. Change cpucfg base address from 0x1000 to 0x4000, in order
to avoid confliction with future hw usage
  3. Adjust patch order in this patchset, move patch
Refine-ipi-ops-on-LoongArch-platform to the first one.

Change in V2:
  1. Add hw cpuid map support since ipi routing uses hw cpuid
  2. Refine changelog description
  3. Add hypercall statistic support for vcpu
  4. Set percpu pv ipi message buffer aligned with cacheline
  5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---
Bibo Mao (6):
  LoongArch/smp: Refine some ipi functions on LoongArch platform
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch: Add paravirt interface for guest kernel
  LoongArch: KVM: Add vcpu search support from physical cpuid
  LoongArch: Add pv ipi support on LoongArch system

 arch/loongarch/Kconfig|   9 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_host.h |  27 +++
 arch/loongarch/include/asm/kvm_para.h | 156 ++
 arch/loongarch/include/asm/kvm_vcpu.h |   1 +
 arch/loongarch/include/asm/loongarch.h|  11 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 153 +
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/setup.c |   2 +
 arch/loongarch/kernel/smp.c   |  60 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 125 --
 arch/loongarch/kvm/vcpu.c |  94 ++-
 arch/loongarch/kvm/vm.c   |  11 ++
 23 files changed, 676 insertions(+), 102 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild
 create mode 100644 arch/loongarch/kernel/paravirt.c


base-commit: 39133352cbed6626956d38ed72012f49b0421e7b
-- 
2.39.3

[PATCH v5 1/6] LoongArch/smp: Refine some ipi functions on LoongArch platform

2024-02-21 Thread Bibo Mao

It is code refine about ipi handling on LoongArch platform, there are
three modifications.
1. Add generic function get_percpu_irq(), replacing some percpu irq
functions such as get_ipi_irq()/get_pmc_irq()/get_timer_irq() with
get_percpu_irq().

2. Change definition about parameter action called by function
loongson_send_ipi_single() and loongson_send_ipi_mask(), and it is
defined as decimal encoding format at ipi sender side. Normal decimal
encoding is used rather than binary bitmap encoding for ipi action, ipi
hw sender uses decimal encoding code, and ipi receiver will get binary
bitmap encoding, the ipi hw will convert it into bitmap in ipi message
buffer.

3. Add structure smp_ops on LoongArch platform so that pv ipi can be used
later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 58 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..75d30529748c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*init_ipi)(void);
+   void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
+   void (*send_ipi_single)(int cpu, unsigned int action);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTION_BOOT_CPU0
+#define ACTION_RESCHEDULE  1
+#define ACTION_CALL_FUNCTION   2
+#define SMP_BOOT_CPU   BIT(ACTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..ce36897d1e5a 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void

[PATCH v4 6/6] LoongArch: Add pv ipi support on LoongArch system

2024-01-31 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sending, and two iocsr access on ipi receiving
which is ipi interrupt handler. On VM mode all iocsr registers
accessing will cause VM to trap into hypervisor. So with ipi hw
notification once there will be three times of trap.

This patch adds pv ipi support for VM, hypercall instruction is used
to ipi sender, and hypervisor will inject SWI on the VM. During SWI
interrupt handler, only estat CSR register is written to clear irq.
Estat CSR register access will not trap into hypervisor. So with pv ipi
supported, pv ipi sender will trap into hypervsor one time, pv ipi
revicer will not trap, there is only one time of trap.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
128 vcpus at one time. It reduces trap times into hypervisor greatly.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h   |   1 +
 arch/loongarch/include/asm/kvm_host.h  |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 124 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kernel/irq.c|   2 +-
 arch/loongarch/kernel/paravirt.c   | 113 ++
 arch/loongarch/kernel/smp.c|   2 +-
 arch/loongarch/kvm/exit.c  |  73 ++-
 arch/loongarch/kvm/vcpu.c  |   1 +
 9 files changed, 314 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..8a611843c1f0 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t messages cacheline_aligned_in_smp;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 57399d7cf8b7..1bf927e2bfac 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+   u64 hvcl_exits;
 };
 
 #define KVM_MEM_HUGEPAGE_CAPABLE   (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 41200e922a82..a25a84e372b9 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -9,6 +9,10 @@
 #define HYPERVISOR_VENDOR_SHIFT8
 #define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
 
+#define KVM_HC_CODE_SERVICE0
+#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HC_CODE_SERVICE)
+#define  KVM_HC_FUNC_IPI   1
+
 /*
  * LoongArch hypcall return code
  */
@@ -16,6 +20,126 @@
 #define KVM_HC_INVALID_CODE-1UL
 #define KVM_HC_INVALID_PARAMETER   -2UL
 
+/*
+ * Hypercalls interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __al

[PATCH v4 5/6] LoongArch: KVM: Add vcpu search support from physical cpuid

2024-01-31 Thread Bibo Mao

Physical cpuid is used for interrupt routing for irqchips such as
ipi/msi/extioi interrupt controller. And physical cpuid is stored
at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
is created and physical cpuid of two vcpu can not be the same. Since
different irqchips have different size declaration about physical cpuid,
KVM uses the smallest cpuid from extioi irqchip, and the max cpuid size
is defines as 256.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h | 26 
 arch/loongarch/include/asm/kvm_vcpu.h |  1 +
 arch/loongarch/kvm/vcpu.c | 93 ++-
 arch/loongarch/kvm/vm.c   | 11 
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..57399d7cf8b7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
 
 #define MAX_PGTABLE_LEVELS 4
 
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ *  For IPI HW, max dest CPUID size 1024
+ *  For extioi interrupt controller, max dest CPUID size is 256
+ *  For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID  256
+
+struct kvm_phyid_info {
+   struct kvm_vcpu *vcpu;
+   boolenabled;
+};
+
+struct kvm_phyid_map {
+   int max_phyid;
+   struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
 struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int  root_level;
+   struct mutex  phyid_map_lock;
+   struct kvm_phyid_map  *phyid_map;
 
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 0cb4fdb8a9b5..9f53950959da 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
 void kvm_restore_timer(struct kvm_vcpu *vcpu);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
 
 /*
  * Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 27701991886d..97ca9c7160e6 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 *val)
return 0;
 }
 
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   if (val >= KVM_MAX_PHYID)
+   return -EINVAL;
+
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   map = vcpu->kvm->arch.phyid_map;
+   mutex_lock(>kvm->arch.phyid_map_lock);
+   if (map->phys_map[cpuid].enabled) {
+   /*
+* Cpuid is already set before
+* Forbid changing different cpuid at runtime
+*/
+   if (cpuid != val) {
+   /*
+* Cpuid 0 is initial value for vcpu, maybe invalid
+* unset value for vcpu
+*/
+   if (cpuid) {
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+   } else {
+/* Discard duplicated cpuid set */
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+   }
+
+   if (map->phys_map[val].enabled) {
+   /*
+* New cpuid is already set with other vcpu
+* Forbid sharing the same cpuid between different vcpus
+*/
+   if (map->phys_map[val].vcpu != vcpu) {
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+
+   /* Discard duplicated cpuid set operation*/
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+
+   kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
+   map->phys_map[val].enabled  = true;
+   m

[PATCH v4 4/6] LoongArch: Add paravirt interface for guest kernel

2024-01-31 Thread Bibo Mao

The patch adds paravirt interface for guest kernel, function
pv_guest_initi() firstly checks whether system runs on VM mode. If kernel
runs on VM mode, it will call function kvm_para_available() to detect
whether current VMM is KVM hypervisor. And the paravirt function can work
only if current VMM is KVM hypervisor, since there is only KVM hypervisor
supported on LoongArch now.

This patch only adds paravirt interface for guest kernel, however there
is not effective pv functions added here.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|  9 
 arch/loongarch/include/asm/kvm_para.h |  7 
 arch/loongarch/include/asm/paravirt.h | 27 
 .../include/asm/paravirt_api_clock.h  |  1 +
 arch/loongarch/kernel/Makefile|  1 +
 arch/loongarch/kernel/paravirt.c  | 41 +++
 arch/loongarch/kernel/setup.c |  2 +
 7 files changed, 88 insertions(+)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 10959e6c3583..817a56dff80f 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -585,6 +585,15 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   depends on AS_HAS_LVZ_EXTENSION
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 9425d3b7e486..41200e922a82 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,13 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypcall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+
 /*
  * LoongArch hypcall return code
  */
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..b64813592ba0
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_guest_init(void);
+#else
+static inline int pv_guest_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index ..21d01d05791a
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool kvm_para_available(void)
+{
+   static int hypervisor_type;
+   int config;
+
+   if (!hypervisor_type) {
+   config = read_cpucfg(CPUCFG_KVM_SIG);
+   if (!memcmp(, KVM_SIGNATURE, 4))
+   hypervisor_type = HYPERVISOR_KVM;
+   }
+
+   return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_guest_init(void)
+{
+   if (!cpu_has_hypervisor)
+   return 0;
+   if (!kvm_para_available())
+

[PATCH v4 0/6] LoongArch: Add pv ipi support on LoongArch VM

2024-01-31 Thread Bibo Mao

This patchset adds pv ipi support for VM. On physical machine, ipi HW
uses IOCSR registers, however there is trap into hypervisor when vcpu
accesses IOCSR registers if system is in VM mode. SWI is a interrupt
mechanism like SGI on ARM, software can send interrupt to CPU, only that
on LoongArch SWI can only be sent to local CPU now. So SWI can not used
for IPI on real HW system, however it can be used on VM when combined with
hypercall method. This patch uses SWI interrupt for IPI mechanism, SWI
injection uses hypercall method. And there is one trap with IPI sending,
however with IPI receiving there is no trap. with IOCSR HW ipi method,
there will be two trap into hypervisor with ipi receiving.

Also this patch adds IPI multicast support for VM, this idea comes from
x86 pv ipi. IPI can be sent to 128 vcpus in one time.

Here is the microbenchmarck data with perf bench futex wake case on 3C5000
single-way machine, there are 16 cpus on 3C5000 single-way machine, VM
has 16 vcpus also. The benchmark data is ms time unit to wakeup 16 threads,
the performance is higher if data is smaller.

perf bench futex wake, Wokeup 16 of 16 threads in ms
--physical machine--   --VM original--   --VM with pv ipi patch--
  0.0176 ms   0.1140 ms0.0481 ms

---
Change in V4:
  1. Modfiy pv ipi hook function name call_func_ipi() and 
call_func_single_ipi() with send_ipi_mask()/send_ipi_single(), since pv
ipi is used for both remote function call and reschedule notification.
  2. Refresh changelog.

Change in V3:
  1. Add 128 vcpu ipi multicast support like x86
  2. Change cpucfg base address from 0x1000 to 0x4000, in order
to avoid confliction with future hw usage
  3. Adjust patch order in this patchset, move patch
Refine-ipi-ops-on-LoongArch-platform to the first one.

Change in V2:
  1. Add hw cpuid map support since ipi routing uses hw cpuid
  2. Refine changelog description
  3. Add hypercall statistic support for vcpu
  4. Set percpu pv ipi message buffer aligned with cacheline
  5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---

Bibo Mao (6):
  LoongArch/smp: Refine ipi ops on LoongArch platform
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch: Add paravirt interface for guest kernel
  LoongArch: KVM: Add vcpu search support from physical cpuid
  LoongArch: Add pv ipi support on LoongArch system

 arch/loongarch/Kconfig|   9 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_host.h |  27 +++
 arch/loongarch/include/asm/kvm_para.h | 157 ++
 arch/loongarch/include/asm/kvm_vcpu.h |   1 +
 arch/loongarch/include/asm/loongarch.h|  11 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 154 +
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/setup.c |   2 +
 arch/loongarch/kernel/smp.c   |  60 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 125 --
 arch/loongarch/kvm/vcpu.c |  94 ++-
 arch/loongarch/kvm/vm.c   |  11 ++
 23 files changed, 678 insertions(+), 102 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild
 create mode 100644 arch/loongarch/kernel/paravirt.c


base-commit: 1bbb19b6eb1b8685ab1c268a401ea64380b8bbcb
-- 
2.39.3

[PATCH v4 2/6] LoongArch: KVM: Add hypercall instruction emulation support

2024-01-31 Thread Bibo Mao

On LoongArch system, hypercall instruction is supported when system
runs on VM mode. This patch adds dummy function with hypercall
instruction emulation, rather than inject EXCCODE_INE invalid
instruction exception.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..9425d3b7e486
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypcall return code
+ */
+#define KVM_HC_STATUS_SUCCESS  0
+#define KVM_HC_INVALID_CODE-1UL
+#define KVM_HC_INVALID_PARAMETER   -2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..d15c71320a11 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_LSXDIS]= kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS]   = kvm_handle_lasx_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypcall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH v4 3/6] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-01-31 Thread Bibo Mao

VM will trap into hypervisor when executing cpucfg instruction. And
hardware only uses the area 0 - 20 for actual usage now, here one
specified area 0x4000 -- 0x40ff is used for KVM hypervisor,
and the area can be extended to use for other hypervisors in future.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h | 10 ++
 arch/loongarch/kvm/exit.c  | 46 +-
 3 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a1d22e8b6f94 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,16 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x4000 -- 0x40ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x4000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index d15c71320a11..f4e4df05f578 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,37 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*/
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +251,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH v4 1/6] LoongArch/smp: Refine ipi ops on LoongArch platform

2024-01-31 Thread Bibo Mao

This patch refines ipi handling on LoongArch platform, there are
three changes with this patch.
1. Add generic get_percpu_irq() api, replace some percpu irq functions
such as get_ipi_irq()/get_pmc_irq()/get_timer_irq() with get_percpu_irq().

2. Change parameter action definition with function
loongson_send_ipi_single() and loongson_send_ipi_mask(). Normal decimal
encoding is used rather than binary bitmap encoding for ipi action, ipi
hw sender uses devimal action code, and ipi receiver will get binary bitmap
encoding, the ipi hw will convert it into bitmap in ipi message buffer.

3. Add structure smp_ops on LoongArch platform so that pv ipi can be used
later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 58 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..8a42632b038a 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*init_ipi)(void);
+   void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
+   void (*send_ipi_single)(int cpu, unsigned int action);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTTION_BOOT_CPU   0
+#define ACTTION_RESCHEDULE 1
+#define ACTTION_CALL_FUNCTION  2
+#define SMP_BOOT_CPU   BIT(ACTTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..ce36897d1e5a 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
 }
 
-static int __init

[PATCH v3 1/6] LoongArch/smp: Refine ipi ops on LoongArch platform

2024-01-22 Thread Bibo Mao

This patch refines ipi handling on LoongArch platform, there are
three changes with this patch.
1. Add generic get_percpu_irq api, replace some percpu irq function
such as get_ipi_irq/get_pmc_irq/get_timer_irq with get_percpu_irq.

2. Change parameter action definition with function
loongson_send_ipi_single and loongson_send_ipi_mask. Code encoding is used
here rather than bitmap encoding for ipi action, ipi hw sender uses action
code, and ipi receiver will get action bitmap encoding, the ipi hw will
convert it into bitmap in ipi message buffer.

3. Add smp_ops on LoongArch platform so that pv ipi can be used later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 58 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..330f1cb3741c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*call_func_ipi)(const struct cpumask *mask, unsigned int action);
+   void (*call_func_single_ipi)(int cpu, unsigned int action);
+   void (*ipi_init)(void);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTTION_BOOT_CPU   0
+#define ACTTION_RESCHEDULE 1
+#define ACTTION_CALL_FUNCTION  2
+#define SMP_BOOT_CPU   BIT(ACTTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..1b58f7c3eed9 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
 }
 
-static int __init get_ipi_irq(void)
-{
-   struct irq_domain *d

[PATCH v3 5/6] LoongArch: KVM: Add physical cpuid map support

2024-01-22 Thread Bibo Mao

Physical cpuid is used to irq routing for irqchips such as ipi/msi/
extioi interrupt controller. And physical cpuid is stored at CSR
register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is
created. Since different irqchips have different size definition
about physical cpuid, KVM uses the smallest cpuid from extioi, and
the max cpuid size is defines as 256.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h | 26 
 arch/loongarch/include/asm/kvm_vcpu.h |  1 +
 arch/loongarch/kvm/vcpu.c | 93 ++-
 arch/loongarch/kvm/vm.c   | 11 
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..57399d7cf8b7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
 
 #define MAX_PGTABLE_LEVELS 4
 
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ *  For IPI HW, max dest CPUID size 1024
+ *  For extioi interrupt controller, max dest CPUID size is 256
+ *  For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID  256
+
+struct kvm_phyid_info {
+   struct kvm_vcpu *vcpu;
+   boolenabled;
+};
+
+struct kvm_phyid_map {
+   int max_phyid;
+   struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
 struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int  root_level;
+   struct mutex  phyid_map_lock;
+   struct kvm_phyid_map  *phyid_map;
 
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index e71ceb88f29e..2402129ee955 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
 void kvm_restore_timer(struct kvm_vcpu *vcpu);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
 
 /*
  * Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 27701991886d..97ca9c7160e6 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 *val)
return 0;
 }
 
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   if (val >= KVM_MAX_PHYID)
+   return -EINVAL;
+
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   map = vcpu->kvm->arch.phyid_map;
+   mutex_lock(>kvm->arch.phyid_map_lock);
+   if (map->phys_map[cpuid].enabled) {
+   /*
+* Cpuid is already set before
+* Forbid changing different cpuid at runtime
+*/
+   if (cpuid != val) {
+   /*
+* Cpuid 0 is initial value for vcpu, maybe invalid
+* unset value for vcpu
+*/
+   if (cpuid) {
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+   } else {
+/* Discard duplicated cpuid set */
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+   }
+
+   if (map->phys_map[val].enabled) {
+   /*
+* New cpuid is already set with other vcpu
+* Forbid sharing the same cpuid between different vcpus
+*/
+   if (map->phys_map[val].vcpu != vcpu) {
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return -EINVAL;
+   }
+
+   /* Discard duplicated cpuid set operation*/
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   return 0;
+   }
+
+   kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
+   map->phys_map[val].enabled  = true;
+   map->phys_map[val].vcpu = vcpu;
+   if (m

[PATCH v3 2/6] LoongArch: KVM: Add hypercall instruction emulation support

2024-01-22 Thread Bibo Mao

On LoongArch system, hypercall instruction is supported when system
runs on VM mode. This patch adds dummy function with hypercall
instruction emulation, rather than inject EXCCODE_INE invalid
instruction exception.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..9425d3b7e486
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypcall return code
+ */
+#define KVM_HC_STATUS_SUCCESS  0
+#define KVM_HC_INVALID_CODE-1UL
+#define KVM_HC_INVALID_PARAMETER   -2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..d15c71320a11 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_LSXDIS]= kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS]   = kvm_handle_lasx_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypcall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH v3 6/6] LoongArch: Add pv ipi support on LoongArch system

2024-01-22 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sender and two iocsr access on ipi receiver
which is ipi interrupt handler. On VM mode all iocsr registers
accessing will trap into hypervisor. So with one ipi hw notification
there will be three times of trap.

This patch adds pv ipi support for VM, hypercall instruction is used
to ipi sender, and hypervisor will inject SWI on the VM. During SWI
interrupt handler, only estat CSR register is written to clear irq.
Estat CSR register access will not trap into hypervisor. So with pv ipi
supported, pv ipi sender will trap into hypervsor one time, pv ipi
revicer will not trap, there is only one time of trap.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
128 vcpus at one time. It reduces trap into hypervisor greatly.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h   |   1 +
 arch/loongarch/include/asm/kvm_host.h  |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 124 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kernel/irq.c|   2 +-
 arch/loongarch/kernel/paravirt.c   | 113 ++
 arch/loongarch/kernel/smp.c|   2 +-
 arch/loongarch/kvm/exit.c  |  73 ++-
 arch/loongarch/kvm/vcpu.c  |   1 +
 9 files changed, 314 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..8a611843c1f0 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t messages cacheline_aligned_in_smp;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 57399d7cf8b7..1bf927e2bfac 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+   u64 hvcl_exits;
 };
 
 #define KVM_MEM_HUGEPAGE_CAPABLE   (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 41200e922a82..a25a84e372b9 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -9,6 +9,10 @@
 #define HYPERVISOR_VENDOR_SHIFT8
 #define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
 
+#define KVM_HC_CODE_SERVICE0
+#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HC_CODE_SERVICE)
+#define  KVM_HC_FUNC_IPI   1
+
 /*
  * LoongArch hypcall return code
  */
@@ -16,6 +20,126 @@
 #define KVM_HC_INVALID_CODE-1UL
 #define KVM_HC_INVALID_PARAMETER   -2UL
 
+/*
+ * Hypercalls interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall3(

[PATCH v3 4/6] LoongArch: Add paravirt interface for guest kernel

2024-01-22 Thread Bibo Mao

The patch add paravirt interface for guest kernel, function
pv_guest_init firstly checks whether system runs on VM mode. If kernel
runs on VM mode, it will call function kvm_para_available to detect
whether current VMM is KVM hypervisor. And the paravirt function can work
only if current VMM is KVM hypervisor, since there is only KVM hypervisor
supported on LoongArch now.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|  9 
 arch/loongarch/include/asm/kvm_para.h |  7 
 arch/loongarch/include/asm/paravirt.h | 27 
 .../include/asm/paravirt_api_clock.h  |  1 +
 arch/loongarch/kernel/Makefile|  1 +
 arch/loongarch/kernel/paravirt.c  | 41 +++
 arch/loongarch/kernel/setup.c |  2 +
 7 files changed, 88 insertions(+)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 10959e6c3583..817a56dff80f 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -585,6 +585,15 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   depends on AS_HAS_LVZ_EXTENSION
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 9425d3b7e486..41200e922a82 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,13 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypcall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+
 /*
  * LoongArch hypcall return code
  */
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..b64813592ba0
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_guest_init(void);
+#else
+static inline int pv_guest_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index ..21d01d05791a
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool kvm_para_available(void)
+{
+   static int hypervisor_type;
+   int config;
+
+   if (!hypervisor_type) {
+   config = read_cpucfg(CPUCFG_KVM_SIG);
+   if (!memcmp(, KVM_SIGNATURE, 4))
+   hypervisor_type = HYPERVISOR_KVM;
+   }
+
+   return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_guest_init(void)
+{
+   if (!cpu_has_hypervisor)
+   return 0;
+   if (!kvm_para_available())
+   return 0;
+
+   return 1;
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
in

[PATCH v3 3/6] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-01-22 Thread Bibo Mao

System will trap into hypervisor when executing cpucfg instruction.
And now hardware only uses the area 0 - 20 for actual usage, here
one specified area 0x1000 -- 0x10ff is used for KVM hypervisor,
and the area can be extended for other hypervisors in future.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h | 10 ++
 arch/loongarch/kvm/exit.c  | 46 +-
 3 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a1d22e8b6f94 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,16 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x4000 -- 0x40ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x4000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index d15c71320a11..f4e4df05f578 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,37 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*/
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +251,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH v3 0/6] LoongArch: Add pv ipi support on LoongArch VM

2024-01-22 Thread Bibo Mao

This patchset adds pv ipi support for VM. On physical machine, ipi HW
uses IOCSR registers, however there is trap into hypervisor when vcpu
accesses IOCSR registers if system is in VM mode. SWI is a interrupt
mechanism like SGI on ARM, software can send interrupt to CPU, only that
on LoongArch SWI can only be sent to local CPU now. So SWI can not used
for IPI on real HW system, however it can be used on VM when combined with
hypercall method. This patch uses SWI interrupt for IPI mechanism, SWI
injection uses hypercall method. And there is one trap with IPI sending,
however with SWI interrupt handler there is no trap.

Here is the microbenchmarck data with perf bench futex wake case on 3C5000
single-way machine, there are 16 cpus on 3C5000 single-way machine, VM
has 16 vcpus also. The benchmark data is ms time unit to wakeup 16 threads,
the performance is higher if data is smaller.

perf bench futex wake, Wokeup 16 of 16 threads in ms
--physical machine--   --VM original--   --VM with pv ipi patch--
  0.0176 ms   0.1140 ms0.0481 ms 

---
Change in V3:
  1. Add 128 vcpu ipi multicast support like x86
  2. Change cpucfg base address from 0x1000 to 0x4000 which is
used to dectect hypervisor type, in order to avoid confliction with future
hw usage
  3. Adjust patch order in this patchset, move patch 
Refine-ipi-ops-on-LoongArch-platform to the first one.

Change in V2:
  1. Add hw cpuid map support since ipi routing uses hw cpuid
  2. Refine changelog description
  3. Add hypercall statistic support for vcpu
  4. Set percpu pv ipi message buffer aligned with cacheline
  5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---

Bibo Mao (6):
  LoongArch/smp: Refine ipi ops on LoongArch platform
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch: Add paravirt interface for guest kernel
  LoongArch: KVM: Add physical cpuid map support
  LoongArch: Add pv ipi support on LoongArch system

 arch/loongarch/Kconfig|   9 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_host.h |  27 +++
 arch/loongarch/include/asm/kvm_para.h | 157 ++
 arch/loongarch/include/asm/kvm_vcpu.h |   1 +
 arch/loongarch/include/asm/loongarch.h|  11 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 154 +
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/setup.c |   2 +
 arch/loongarch/kernel/smp.c   |  60 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 125 --
 arch/loongarch/kvm/vcpu.c |  94 ++-
 arch/loongarch/kvm/vm.c   |  11 ++
 23 files changed, 678 insertions(+), 102 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild
 create mode 100644 arch/loongarch/kernel/paravirt.c


base-commit: 7a396820222d6d4c02057f41658b162bdcdadd0e
-- 
2.39.3

[PATCH v2 5/6] LoongArch: KVM: Add physical cpuid map support

2024-01-07 Thread Bibo Mao

Physical cpuid is used to irq routing for irqchips such as ipi/msi/
extioi interrupt controller. And physical cpuid is stored at CSR
register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is
created. Since different irqchips have different size definition
about physical cpuid, KVM uses the smallest cpuid from extioi, and
the max cpuid size is defines as 256.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/kvm_host.h | 26 
 arch/loongarch/include/asm/kvm_vcpu.h |  1 +
 arch/loongarch/kvm/vcpu.c | 61 ++-
 arch/loongarch/kvm/vm.c   | 11 +
 4 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 0e89db020481..93acba84f87e 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
 
 #define MAX_PGTABLE_LEVELS 4
 
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ *  For IPI HW, max dest CPUID size 1024
+ *  For extioi interrupt controller, max dest CPUID size is 256
+ *  For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID  256
+
+struct kvm_phyid_info {
+   struct kvm_vcpu *vcpu;
+   boolenabled;
+};
+
+struct kvm_phyid_map {
+   int max_phyid;
+   struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
 struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int  root_level;
+   struct mutex  phyid_map_lock;
+   struct kvm_phyid_map  *phyid_map;
 
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h 
b/arch/loongarch/include/asm/kvm_vcpu.h
index 0e87652f780a..3019e260a3ae 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -61,6 +61,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
 void kvm_restore_timer(struct kvm_vcpu *vcpu);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
 
 /*
  * Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index cf1c4d64c1b7..9dc40a80ab5a 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,63 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 *val)
return 0;
 }
 
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   if (val >= KVM_MAX_PHYID)
+   return -EINVAL;
+
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   if (cpuid == 0) {
+   kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
+   map = vcpu->kvm->arch.phyid_map;
+   map->phys_map[val].enabled  = true;
+   map->phys_map[val].vcpu = vcpu;
+
+   mutex_lock(>kvm->arch.phyid_map_lock);
+   if (map->max_phyid < val)
+   map->max_phyid = val;
+   mutex_unlock(>kvm->arch.phyid_map_lock);
+   } else if (cpuid != val)
+   return -EINVAL;
+
+   return 0;
+}
+
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
+{
+   struct kvm_phyid_map  *map;
+
+   if (cpuid >= KVM_MAX_PHYID)
+   return NULL;
+
+   map = kvm->arch.phyid_map;
+   if (map->phys_map[cpuid].enabled)
+   return map->phys_map[cpuid].vcpu;
+
+   return NULL;
+}
+
+static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
+{
+   int cpuid;
+   struct loongarch_csrs *csr = vcpu->arch.csr;
+   struct kvm_phyid_map  *map;
+
+   map = vcpu->kvm->arch.phyid_map;
+   cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+   if (cpuid >= KVM_MAX_PHYID)
+   return;
+
+   if (map->phys_map[cpuid].enabled) {
+   map->phys_map[cpuid].vcpu = NULL;
+   map->phys_map[cpuid].enabled = false;
+   }
+}
+
 static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
 {
int ret = 0, gintc;
@@ -291,7 +348,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int 
id, u64 val)
kvm_se

[PATCH v2 6/6] LoongArch: Add pv ipi support on LoongArch system

2024-01-07 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sender and two iocsr access on ipi receiver
which is ipi interrupt handler. On VM mode all iocsr registers
accessing will trap into hypervisor. So with one ipi hw notification
there will be three times of trap.

This patch adds pv ipi support for VM, hypercall instruction is used
to ipi sender, and hypervisor will inject SWI on the VM. During SWI
interrupt handler, only estat CSR register is written to clear irq.
Estat CSR register access will not trap into hypervisor. So with pv ipi
supported, pv ipi sender will trap into hypervsor one time, pv ipi
revicer will not trap, there is only one time of trap.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
64 vcpus at one time. It reduces trap into hypervisor greatly.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h   |   1 +
 arch/loongarch/include/asm/kvm_host.h  |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 124 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kernel/irq.c|   2 +-
 arch/loongarch/kernel/paravirt.c   | 110 ++
 arch/loongarch/kernel/smp.c|   2 +-
 arch/loongarch/kvm/exit.c  |  70 +-
 arch/loongarch/kvm/vcpu.c  |   1 +
 9 files changed, 308 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..8a611843c1f0 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t messages cacheline_aligned_in_smp;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_host.h 
b/arch/loongarch/include/asm/kvm_host.h
index 93acba84f87e..f21c60ce58be 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+   u64 hvcl_exits;
 };
 
 #define KVM_MEM_HUGEPAGE_CAPABLE   (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 41200e922a82..a25a84e372b9 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -9,6 +9,10 @@
 #define HYPERVISOR_VENDOR_SHIFT8
 #define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
 
+#define KVM_HC_CODE_SERVICE0
+#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HC_CODE_SERVICE)
+#define  KVM_HC_FUNC_IPI   1
+
 /*
  * LoongArch hypcall return code
  */
@@ -16,6 +20,126 @@
 #define KVM_HC_INVALID_CODE-1UL
 #define KVM_HC_INVALID_PARAMETER   -2UL
 
+/*
+ * Hypercalls interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64

[PATCH v2 4/6] LoongArch: Add paravirt interface for guest kernel

2024-01-07 Thread Bibo Mao

The patch add paravirt interface for guest kernel, function
pv_guest_init firstly checks whether system runs on VM mode. If kernel
runs on VM mode, it will call function kvm_para_available to detect
whether current VMM is KVM hypervisor. And the paravirt function can work
only if current VMM is KVM hypervisor, and there is only KVM hypervisor
supported on LoongArch now.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|  9 
 arch/loongarch/include/asm/kvm_para.h |  7 
 arch/loongarch/include/asm/paravirt.h | 27 
 .../include/asm/paravirt_api_clock.h  |  1 +
 arch/loongarch/kernel/Makefile|  1 +
 arch/loongarch/kernel/paravirt.c  | 41 +++
 arch/loongarch/kernel/setup.c |  2 +
 7 files changed, 88 insertions(+)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ee123820a476..d8ccaf46a50d 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -564,6 +564,15 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   depends on AS_HAS_LVZ_EXTENSION
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 9425d3b7e486..41200e922a82 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,13 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypcall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+
 /*
  * LoongArch hypcall return code
  */
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..b64813592ba0
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_guest_init(void);
+#else
+static inline int pv_guest_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index ..21d01d05791a
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool kvm_para_available(void)
+{
+   static int hypervisor_type;
+   int config;
+
+   if (!hypervisor_type) {
+   config = read_cpucfg(CPUCFG_KVM_SIG);
+   if (!memcmp(, KVM_SIGNATURE, 4))
+   hypervisor_type = HYPERVISOR_KVM;
+   }
+
+   return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_guest_init(void)
+{
+   if (!cpu_has_hypervisor)
+   return 0;
+   if (!kvm_para_available())
+   return 0;
+
+   return 1;
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index d183a745fb

[PATCH v2 3/6] LoongArch: SMP: Refine ipi ops on LoongArch platform

2024-01-07 Thread Bibo Mao

This patch refines ipi handling on LoongArch platform, there are
three changes with this patch.
1. Add generic get_percpu_irq api, replace some percpu irq function
such as get_ipi_irq/get_pmc_irq/get_timer_irq with get_percpu_irq.

2. Change parameter action definition with function
loongson_send_ipi_single and loongson_send_ipi_mask. Code encoding is used
here rather than bitmap encoding for ipi action, ipi hw sender uses action
code, and ipi receiver will get action bitmap encoding, the ipi hw will
convert it into bitmap in ipi message buffer.

3. Add smp_ops on LoongArch platform so that pv ipi can be used later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 58 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..330f1cb3741c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*call_func_ipi)(const struct cpumask *mask, unsigned int action);
+   void (*call_func_single_ipi)(int cpu, unsigned int action);
+   void (*ipi_init)(void);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTTION_BOOT_CPU   0
+#define ACTTION_RESCHEDULE 1
+#define ACTTION_CALL_FUNCTION  2
+#define SMP_BOOT_CPU   BIT(ACTTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..1b58f7c3eed9 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
 }
 
-static int __init get_ipi_irq(void)
-{
-   struct irq_domain *d

[PATCH v2 0/6] LoongArch: Add pv ipi support on LoongArch VM

2024-01-07 Thread Bibo Mao

This patchset adds pv ipi support for VM. On physical machine, ipi HW
uses IOCSR registers, however there is trap into hypervisor when vcpu
accesses IOCSR registers if system is in VM mode. SWI is a interrupt
mechanism like SGI on ARM, software can send interrupt to CPU, only that
on LoongArch SWI can only be sent to local CPU now. So SWI can not used
for IPI on real HW system, however it can be used on VM when combined with
hypercall method. This patch uses SWI interrupt for IPI mechanism, SWI
injection uses hypercall method. And there is one trap with IPI sending,
however with SWI interrupt handler there is no trap.

This patch passes to runltp testcases, and unixbench score is 99% of
that on physical machine on 3C5000 single way machine. Here is unixbench
score with 16 cores on 3C5000 single way machine.

UnixBench score on 3C5000 machine with 16 cores 
Dhrystone 2 using register variables 116700.0  339749961.8  29113.1
Double-Precision Whetstone   55.0  57716.9  10494.0
Execl Throughput 43.0  33563.4   7805.4
File Copy 1024 bufsize 2000 maxblocks  3960.01017912.5   2570.5
File Copy 256 bufsize 500 maxblocks1655.0 260061.4   1571.4
File Copy 4096 bufsize 8000 maxblocks  5800.03216109.4   5545.0
Pipe Throughput   12440.0   18404312.0  14794.5
Pipe-based Context Switching   4000.03395856.2   8489.6
Process Creation126.0  55684.8   4419.4
Shell Scripts (1 concurrent) 42.4  55901.8  13184.4
Shell Scripts (8 concurrent)  6.0   7396.5  12327.5
System Call Overhead  15000.06997351.4   4664.9
System Benchmarks Index Score7288.6

UnixBench score on VM with 16 cores -
Dhrystone 2 using register variables 116700.0  341649555.5  29275.9
Double-Precision Whetstone   55.0  57490.9  10452.9
Execl Throughput 43.0  33663.8   7828.8
File Copy 1024 bufsize 2000 maxblocks  3960.01047631.2   2645.5
File Copy 256 bufsize 500 maxblocks1655.0 286671.0   1732.2
File Copy 4096 bufsize 8000 maxblocks  5800.03243588.2   5592.4
Pipe Throughput   12440.0   16353087.8  13145.6
Pipe-based Context Switching   4000.03100690.0   7751.7
Process Creation126.0  51502.1   4087.5
Shell Scripts (1 concurrent) 42.4  56665.3  13364.4
Shell Scripts (8 concurrent)  6.0   7412.1  12353.4
System Call Overhead  15000.06962239.6   4641.5
System Benchmarks Index Score7205.8

---
Change in V2:
  1. Add hw cpuid map support since ipi routing uses hw cpuid
  2. Refine changelog description
  3. Add hypercall statistic support for vcpu
  4. Set percpu pv ipi message buffer aligned with cacheline
  5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---

Bibo Mao (6):
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch/smp: Refine ipi ops on LoongArch platform
  LoongArch: Add paravirt interface for guest kernel
  LoongArch: KVM: Add physical cpuid map support
  LoongArch: Add pv ipi support on LoongArch system

 arch/loongarch/Kconfig|   9 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_host.h |  27 +++
 arch/loongarch/include/asm/kvm_para.h | 157 ++
 arch/loongarch/include/asm/kvm_vcpu.h |   1 +
 arch/loongarch/include/asm/loongarch.h|  10 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 151 +
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/setup.c |   2 +
 arch/loongarch/kernel/smp.c   |  60 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 122 --
 arch/loongarch/kvm/vcpu.c |  62 ++-
 arch/loongarch/kvm/vm.c   |  11 ++
 23 files changed, 639 insertions(+), 102 deletions(-)
 create

[PATCH v2 2/6] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-01-07 Thread Bibo Mao

System will trap into hypervisor when executing cpucfg instruction.
And now hardware only uses the area 0 - 20 for actual usage, here
one specified area 0x1000 -- 0x10ff is used for KVM hypervisor,
and the area can be extended for other hypervisors in future.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h |  9 +
 arch/loongarch/kvm/exit.c  | 46 +-
 3 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a03b466555a1 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,15 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x1000 -- 0x10ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x1000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 59e5fe221982..e233d7b3b76d 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,37 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*/
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +251,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH v2 1/6] LoongArch: KVM: Add hypercall instruction emulation support

2024-01-07 Thread Bibo Mao

On LoongArch system, hypercall instruction is supported when system
runs on VM mode. This patch adds dummy function with hypercall
instruction emulation, rather than inject EXCCODE_INE invalid
instruction exception.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..9425d3b7e486
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypcall return code
+ */
+#define KVM_HC_STATUS_SUCCESS  0
+#define KVM_HC_INVALID_CODE-1UL
+#define KVM_HC_INVALID_PARAMETER   -2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index e708a1786d6b..59e5fe221982 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -650,6 +650,15 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -679,6 +688,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_TLBM]  = kvm_handle_write_fault,
[EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypcall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH 3/5] LoongArch/smp: Refine ipi ops on LoongArch platform

2024-01-02 Thread Bibo Mao

This patch refines ipi handling on LoongArch platform, there are
three changes with this patch.
1. Add generic get_percpu_irq api, replace some percpu irq function
such as get_ipi_irq/get_pmc_irq/get_timer_irq with get_percpu_irq.

2. Change parameter action definition with function
loongson_send_ipi_single and loongson_send_ipi_mask. Code encoding is used
here rather than bitmap encoding for ipi action, ipi hw sender uses action
code, and ipi receiver will get action bitmap encoding, the ipi hw will
convert it into bitmap in ipi message buffer.

3. Add smp_ops on LoongArch platform so that pv ipi can be used later.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h |  4 ++
 arch/loongarch/include/asm/irq.h | 10 -
 arch/loongarch/include/asm/smp.h | 31 +++
 arch/loongarch/kernel/irq.c  | 22 +--
 arch/loongarch/kernel/perf_event.c   | 14 +--
 arch/loongarch/kernel/smp.c  | 59 +++-
 arch/loongarch/kernel/time.c | 12 +-
 7 files changed, 72 insertions(+), 80 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
 extern void ack_bad_irq(unsigned int irq);
 #define ack_bad_irq ack_bad_irq
 
+enum ipi_msg_type {
+   IPI_RESCHEDULE,
+   IPI_CALL_FUNCTION,
+};
 #define NR_IPI 2
 
 typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
 extern struct fwnode_handle *pch_lpc_handle;
 extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
 
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+   struct irq_domain *d;
+
+   d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+   if (d)
+   return irq_create_mapping(d, vector);
 
+   return -EINVAL;
+}
 #include 
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..330f1cb3741c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
 #include 
 #include 
 
+struct smp_ops {
+   void (*call_func_ipi)(const struct cpumask *mask, unsigned int action);
+   void (*call_func_single_ipi)(int cpu, unsigned int action);
+   void (*ipi_init)(void);
+};
+
+extern struct smp_ops smp_ops;
 extern int smp_num_siblings;
 extern int num_processors;
 extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
 void loongson_boot_secondary(int cpu, struct task_struct *idle);
 void loongson_init_secondary(void);
 void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
 #ifdef CONFIG_HOTPLUG_CPU
 int loongson_cpu_disable(void);
 void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define cpu_physical_id(cpu)   cpu_logical_map(cpu)
 
-#define SMP_BOOT_CPU   0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION  0x4
+#define ACTTION_BOOT_CPU   0
+#define ACTTION_RESCHEDULE 1
+#define ACTTION_CALL_FUNCTION  2
+#define SMP_BOOT_CPU   BIT(ACTTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION  BIT(ACTTION_CALL_FUNCTION)
 
 struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
 
 extern asmlinkage void smpboot_entry(void);
 extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void calculate_cpu_foreign_map(void);
 
 /*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
  */
 extern void show_ipi_list(struct seq_file *p, int prec);
 
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
-   loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
-   loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline int __cpu_disable(void)
 {
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..1b58f7c3eed9 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
 }
 
-static int __init get_ipi_irq(void)
-{
-   struct irq_domain *d

[PATCH 5/5] LoongArch: Add pv ipi support on LoongArch system

2024-01-02 Thread Bibo Mao

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sender and two iocsr access on ipi interrupt
handler. On VM mode all iocsr registers accessing will trap into
hypervisor.

This patch adds pv ipi support for VM, hypercall instruction is used
to ipi sender, and hypervisor will inject SWI on the VM. During SWI
interrupt handler, only estat CSR register is read and written. Estat
CSR register access will not trap into hypervisor. So with pv ipi
supported, pv ipi sender will trap into hypervsor, pv ipi interrupt
handler will not trap.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
64 vcpus at a time. And hw cpuid is equal to logic cpuid in LoongArch
kvm hypervisor now, will add hw cpuid search logic in kvm hypervisor
in the next patch.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/hardirq.h   |   1 +
 arch/loongarch/include/asm/kvm_para.h  | 124 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/kernel/irq.c|   2 +-
 arch/loongarch/kernel/paravirt.c   | 103 
 arch/loongarch/kernel/smp.c|   2 +-
 arch/loongarch/kvm/exit.c  |  66 -
 7 files changed, 295 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h 
b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..998011f162d0 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
 typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+   atomic_t messages;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 41200e922a82..a25a84e372b9 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -9,6 +9,10 @@
 #define HYPERVISOR_VENDOR_SHIFT8
 #define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
 
+#define KVM_HC_CODE_SERVICE0
+#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, 
KVM_HC_CODE_SERVICE)
+#define  KVM_HC_FUNC_IPI   1
+
 /*
  * LoongArch hypcall return code
  */
@@ -16,6 +20,126 @@
 #define KVM_HC_INVALID_CODE-1UL
 #define KVM_HC_INVALID_PARAMETER   -2UL
 
+/*
+ * Hypercalls interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+   unsigned long arg0, unsigned long arg1)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r" (a2)
+   : "memory"
+   );
+
+   return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+   unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+   register long ret asm("v0");
+   register unsigned long fun asm("a0") = fid;
+   register unsigned long a1  asm("a1") = arg0;
+   register unsigned long a2  asm("a2") = arg1;
+   register unsigned long a3  asm("a3") = arg2;
+
+   __asm__ __volatile__(
+   "hvcl "__stringify(KVM_HC_SERVICE)
+   : "=r" (ret)
+   : "r" (fun), "r" (a1), "r&quo

[PATCH 2/5] LoongArch: KVM: Add cpucfg area for kvm hypervisor

2024-01-02 Thread Bibo Mao

System will trap into hypervisor when executing cpucfg instruction.
And now hardware only uses the area 0 - 20 for actual usage, here
one specified area 0x1000 -- 0x10ff is used for KVM hypervisor,
and the area can be extended for other hypervisors in future.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/inst.h  |  1 +
 arch/loongarch/include/asm/loongarch.h |  9 +
 arch/loongarch/kvm/exit.c  | 46 +-
 3 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op= 0x11,
extwh_op= 0x16,
extwb_op= 0x17,
+   cpucfg_op   = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h 
b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a03b466555a1 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,15 @@
 #define  CPUCFG48_VFPU_CG  BIT(2)
 #define  CPUCFG48_RAM_CG   BIT(3)
 
+/*
+ * cpucfg index area: 0x1000 -- 0x10ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE0x1000UL
+#define CPUCFG_KVM_SIZE0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define  KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
 #ifndef __ASSEMBLY__
 
 /* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 59e5fe221982..e233d7b3b76d 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,37 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
 }
 
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
int rd, rj;
unsigned int index;
+
+   rd = inst.reg2_format.rd;
+   rj = inst.reg2_format.rj;
+   ++vcpu->stat.cpucfg_exits;
+   index = vcpu->arch.gprs[rj];
+
+   /*
+* By LoongArch Reference Manual 2.2.10.5
+* Return value is 0 for undefined cpucfg index
+*/
+   switch (index) {
+   case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+   break;
+   case CPUCFG_KVM_SIG:
+   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+   break;
+   default:
+   vcpu->arch.gprs[rd] = 0;
+   break;
+   }
+
+   return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +251,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
-   if (inst.reg2_format.opcode == 0x1B) {
-   rd = inst.reg2_format.rd;
-   rj = inst.reg2_format.rj;
-   ++vcpu->stat.cpucfg_exits;
-   index = vcpu->arch.gprs[rj];
-   er = EMULATE_DONE;
-   /*
-* By LoongArch Reference Manual 2.2.10.5
-* return value is 0 for undefined cpucfg index
-*/
-   if (index < KVM_MAX_CPUCFG_REGS)
-   vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
-   else
-   vcpu->arch.gprs[rd] = 0;
-   }
+   if (inst.reg2_format.opcode == cpucfg_op)
+   er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
-- 
2.39.3

[PATCH 4/5] LoongArch: Add paravirt interface for guest kernel

2024-01-02 Thread Bibo Mao

The patch add paravirt interface for guest kernel, it checks whether
system runs on VM mode. If it is, it will detect hypervisor type. And
returns true it is KVM hypervisor, else return false. Currently only
KVM hypervisor is supported, so there is only hypervisor detection
for KVM type.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/Kconfig|  8 
 arch/loongarch/include/asm/kvm_para.h |  7 
 arch/loongarch/include/asm/paravirt.h | 27 
 .../include/asm/paravirt_api_clock.h  |  1 +
 arch/loongarch/kernel/Makefile|  1 +
 arch/loongarch/kernel/paravirt.c  | 41 +++
 arch/loongarch/kernel/setup.c |  2 +
 7 files changed, 87 insertions(+)
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 create mode 100644 arch/loongarch/kernel/paravirt.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ee123820a476..940e5960d297 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -564,6 +564,14 @@ config CPU_HAS_PREFETCH
bool
default y
 
+config PARAVIRT
+   bool "Enable paravirtualization code"
+   help
+  This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.  However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
 config ARCH_SUPPORTS_KEXEC
def_bool y
 
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
index 9425d3b7e486..41200e922a82 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,13 @@
 #ifndef _ASM_LOONGARCH_KVM_PARA_H
 #define _ASM_LOONGARCH_KVM_PARA_H
 
+/*
+ * Hypcall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT8
+#define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + 
code)
+
 /*
  * LoongArch hypcall return code
  */
diff --git a/arch/loongarch/include/asm/paravirt.h 
b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index ..b64813592ba0
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include 
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+   return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_guest_init(void);
+#else
+static inline int pv_guest_init(void)
+{
+   return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h 
b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index ..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)  += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 
 obj-$(CONFIG_SMP)  += smp.o
 
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index ..21d01d05791a
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool kvm_para_available(void)
+{
+   static int hypervisor_type;
+   int config;
+
+   if (!hypervisor_type) {
+   config = read_cpucfg(CPUCFG_KVM_SIG);
+   if (!memcmp(, KVM_SIGNATURE, 4))
+   hypervisor_type = HYPERVISOR_KVM;
+   }
+
+   return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_guest_init(void)
+{
+   if (!cpu_has_hypervisor)
+   return 0;
+   if (!kvm_para_available())
+   return 0;
+
+   return 1;
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index d183a745fb85..fa680bdd0bd1 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -43,6 +43,7 @@
 #in

[PATCH 0/5] LoongArch: Add pv ipi support on LoongArch VM

2024-01-02 Thread Bibo Mao

This patchset adds pv ipi support for VM. On physical machine, ipi HW
uses IOCSR registers, however there will be trap into hypervisor with
IOCSR registers access. This patch uses SWI interrupt for IPI
notification. During ipi sending with hypercall method, there is still
one trap; however with SWI interrupt handler there is no trap.

This patch passes to runltp testcases, and unixbench score is 99% of
that on physical machine on 3C5000 single way machine. Here is unixbench
score with 16 cores on 3C5000 single way machine.

UnixBench score on 3C5000 machine with 16 cores 
Dhrystone 2 using register variables 116700.0  339749961.8  29113.1
Double-Precision Whetstone   55.0  57716.9  10494.0
Execl Throughput 43.0  33563.4   7805.4
File Copy 1024 bufsize 2000 maxblocks  3960.01017912.5   2570.5
File Copy 256 bufsize 500 maxblocks1655.0 260061.4   1571.4
File Copy 4096 bufsize 8000 maxblocks  5800.03216109.4   5545.0
Pipe Throughput   12440.0   18404312.0  14794.5
Pipe-based Context Switching   4000.03395856.2   8489.6
Process Creation126.0  55684.8   4419.4
Shell Scripts (1 concurrent) 42.4  55901.8  13184.4
Shell Scripts (8 concurrent)  6.0   7396.5  12327.5
System Call Overhead  15000.06997351.4   4664.9
System Benchmarks Index Score7288.6

UnixBench score on VM with 16 cores -
Dhrystone 2 using register variables 116700.0  341649555.5  29275.9
Double-Precision Whetstone   55.0  57490.9  10452.9
Execl Throughput 43.0  33663.8   7828.8
File Copy 1024 bufsize 2000 maxblocks  3960.01047631.2   2645.5
File Copy 256 bufsize 500 maxblocks1655.0 286671.0   1732.2
File Copy 4096 bufsize 8000 maxblocks  5800.03243588.2   5592.4
Pipe Throughput   12440.0   16353087.8  13145.6
Pipe-based Context Switching   4000.03100690.0   7751.7
Process Creation126.0  51502.1   4087.5
Shell Scripts (1 concurrent) 42.4  56665.3  13364.4
Shell Scripts (8 concurrent)  6.0   7412.1  12353.4
System Call Overhead  15000.06962239.6   4641.5
System Benchmarks Index Score7205.8

Bibo Mao (5):
  LoongArch: KVM: Add hypercall instruction emulation support
  LoongArch: KVM: Add cpucfg area for kvm hypervisor
  LoongArch/smp: Refine ipi ops on LoongArch platform
  LoongArch: Add paravirt interface for guest kernel
  LoongArch: Add pv ipi support on LoongArch system

 arch/loongarch/Kconfig|   7 +
 arch/loongarch/include/asm/Kbuild |   1 -
 arch/loongarch/include/asm/hardirq.h  |   5 +
 arch/loongarch/include/asm/inst.h |   1 +
 arch/loongarch/include/asm/irq.h  |  10 +-
 arch/loongarch/include/asm/kvm_para.h | 157 ++
 arch/loongarch/include/asm/loongarch.h|  10 ++
 arch/loongarch/include/asm/paravirt.h |  27 +++
 .../include/asm/paravirt_api_clock.h  |   1 +
 arch/loongarch/include/asm/smp.h  |  31 ++--
 arch/loongarch/include/uapi/asm/Kbuild|   2 -
 arch/loongarch/kernel/Makefile|   1 +
 arch/loongarch/kernel/irq.c   |  24 +--
 arch/loongarch/kernel/paravirt.c  | 144 
 arch/loongarch/kernel/perf_event.c|  14 +-
 arch/loongarch/kernel/setup.c |   2 +
 arch/loongarch/kernel/smp.c   |  61 ---
 arch/loongarch/kernel/time.c  |  12 +-
 arch/loongarch/kvm/exit.c | 118 +++--
 19 files changed, 527 insertions(+), 101 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 create mode 100644 arch/loongarch/include/asm/paravirt.h
 create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild
 create mode 100644 arch/loongarch/kernel/paravirt.c


base-commit: 610a9b8f49fbcf1100716370d3b5f6f884a2835a
-- 
2.39.3

[PATCH 1/5] LoongArch: KVM: Add hypercall instruction emulation support

2024-01-02 Thread Bibo Mao

On LoongArch system, hypercall instruction is supported when system
runs on VM mode. This patch adds dummy function with hypercall
instruction emulation, rather than inject EXCCODE_INE invalid
instruction exception.

Signed-off-by: Bibo Mao 
---
 arch/loongarch/include/asm/Kbuild  |  1 -
 arch/loongarch/include/asm/kvm_para.h  | 26 ++
 arch/loongarch/include/uapi/asm/Kbuild |  2 --
 arch/loongarch/kvm/exit.c  | 10 ++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kvm_para.h
 delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild

diff --git a/arch/loongarch/include/asm/Kbuild 
b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
 generic-y += param.h
 generic-y += posix_types.h
 generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h 
b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index ..9425d3b7e486
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypcall return code
+ */
+#define KVM_HC_STATUS_SUCCESS  0
+#define KVM_HC_INVALID_CODE-1UL
+#define KVM_HC_INVALID_PARAMETER   -2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+   return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild 
b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index e708a1786d6b..59e5fe221982 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -650,6 +650,15 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
+{
+   update_pc(>arch);
+
+   /* Treat it as noop intruction, only set return value */
+   vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+   return RESUME_GUEST;
+}
+
 /*
  * LoongArch KVM callback handling for unimplemented guest exiting
  */
@@ -679,6 +688,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = 
{
[EXCCODE_TLBM]  = kvm_handle_write_fault,
[EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
[EXCCODE_GSPR]  = kvm_handle_gspr,
+   [EXCCODE_HVC]   = kvm_handle_hypcall,
 };
 
 int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
-- 
2.39.3

[PATCH 1/2] hugetlb: clear huge pte during flush function on mips platform

2020-06-29 Thread bibo mao

From: Bibo Mao 

If multiple threads are accessing the same huge page at the same
time, hugetlb_cow will be called if one thread write the COW huge
page. And function huge_ptep_clear_flush is called to notify other
threads to clear the huge pte tlb entry. The other threads clear
the huge pte tlb entry and reload it from page table, the reload
huge pte entry may be old.

This patch fixes this issue on mips platform, and it clears huge
pte entry before notifying other threads to flush current huge
page entry, it is similar with other architectures.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/hugetlb.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 10e3be87..c214440 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -46,7 +46,13 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct 
*mm,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep)
 {
-   flush_tlb_page(vma, addr & huge_page_mask(hstate_vma(vma)));
+   /*
+* clear the huge pte entry firstly, so that the other smp threads will
+* not get old pte entry after finishing flush_tlb_page and before
+* setting new huge pte entry
+*/
+   huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+   flush_tlb_page(vma, addr);
 }
 
 #define __HAVE_ARCH_HUGE_PTE_NONE
-- 
1.8.3.1

[PATCH 2/2] hugetlb: use lightweight tlb flush when update huge tlb on mips

2020-06-29 Thread bibo mao

From: Bibo Mao 

On mips platform huge pte pointers to invalid_pte_table if
huge_pte_none return true. TLB entry with normal page size is
added if huge pte entry is none. When updating huge pte entry,
older tlb entry with normal page needs to be invalid.

This patch uses lightweight tlb flush function local_flush_tlb_page,
rather than flush_tlb_range which will flush all tlb entries instead.
Also this patch adds new huge tlb update function named
update_mmu_cache_huge, page faulting address is passed rather than
huge page start address.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/hugetlb.h | 17 -
 include/linux/hugetlb.h |  9 +
 mm/hugetlb.c| 12 +++-
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index c214440..fce09b4 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -72,15 +72,22 @@ static inline int huge_ptep_set_access_flags(struct 
vm_area_struct *vma,
 
if (changed) {
set_pte_at(vma->vm_mm, addr, ptep, pte);
-   /*
-* There could be some standard sized pages in there,
-* get them all.
-*/
-   flush_tlb_range(vma, addr, addr + HPAGE_SIZE);
}
return changed;
 }
 
+#define update_mmu_cache_huge  update_mmu_cache_huge
+static inline void update_mmu_cache_huge(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep)
+{
+   /*
+* There could be some standard sized page in there,
+* parameter address must be page faulting address rather than
+* start address of huge page
+*/
+   local_flush_tlb_page(vma, address);
+   update_mmu_cache(vma, address & huge_page_mask(hstate_vma(vma)), ptep);
+}
 #include 
 
 #endif /* __ASM_HUGETLB_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 858522e..2f3f9eb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -746,6 +746,15 @@ static inline void huge_ptep_modify_prot_commit(struct 
vm_area_struct *vma,
 }
 #endif
 
+#ifndef update_mmu_cache_huge
+#define update_mmu_cache_huge  update_mmu_cache_huge
+static inline void update_mmu_cache_huge(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep)
+{
+   update_mmu_cache(vma, address & huge_page_mask(hstate_vma(vma)), ptep);
+}
+#endif
+
 #else  /* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1410e62..96faad7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3757,10 +3757,12 @@ static void set_huge_ptep_writable(struct 
vm_area_struct *vma,
   unsigned long address, pte_t *ptep)
 {
pte_t entry;
+   struct hstate *h = hstate_vma(vma);
+   unsigned long haddr = address & huge_page_mask(h);
 
entry = huge_pte_mkwrite(huge_pte_mkdirty(huge_ptep_get(ptep)));
-   if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1))
-   update_mmu_cache(vma, address, ptep);
+   if (huge_ptep_set_access_flags(vma, haddr, ptep, entry, 1))
+   update_mmu_cache_huge(vma, address, ptep);
 }
 
 bool is_hugetlb_entry_migration(pte_t pte)
@@ -4128,7 +4130,7 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, 
struct vm_area_struct *vma,
 * and just make the page writable */
if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
page_move_anon_rmap(old_page, vma);
-   set_huge_ptep_writable(vma, haddr, ptep);
+   set_huge_ptep_writable(vma, address, ptep);
return 0;
}
 
@@ -4630,7 +4632,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct 
vm_area_struct *vma,
entry = pte_mkyoung(entry);
if (huge_ptep_set_access_flags(vma, haddr, ptep, entry,
flags & FAULT_FLAG_WRITE))
-   update_mmu_cache(vma, haddr, ptep);
+   update_mmu_cache_huge(vma, address, ptep);
 out_put_page:
if (page != pagecache_page)
unlock_page(page);
@@ -4770,7 +4772,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
hugetlb_count_add(pages_per_huge_page(h), dst_mm);
 
/* No need to invalidate - it was non-present before */
-   update_mmu_cache(dst_vma, dst_addr, dst_pte);
+   update_mmu_cache_huge(dst_vma, dst_addr, dst_pte);
 
spin_unlock(ptl);
set_page_huge_active(page);
-- 
1.8.3.1

[PATCH 3/3] MIPS: Do not call flush_tlb_all when setting pmd entry

2020-06-24 Thread Bibo Mao

Function set_pmd_at is to set pmd entry, if tlb entry need to
be flushed, there exists pmdp_huge_clear_flush alike function
before set_pmd_at is called. So it is not necessary to call
flush_tlb_all in this function.

In these scenarios, tlb for the pmd range needs to be flushed:
1. privilege degrade such as wrprotect is set on the pmd entry
2. pmd entry is cleared
3. there is exception if set_pmd_at is issued by dup_mmap, since
flush_tlb_mm is called for parent process, it is not necessary
to flush tlb in function copy_huge_pmd.

Signed-off-by: Bibo Mao 
---
 arch/mips/mm/pgtable-32.c | 1 -
 arch/mips/mm/pgtable-64.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index bd4b065..61891af 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -45,7 +45,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
 {
*pmdp = pmd;
-   flush_tlb_all();
 }
 #endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */
 
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 183ff9f..7536f78 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -100,7 +100,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
 {
*pmdp = pmd;
-   flush_tlb_all();
 }
 
 void __init pagetable_init(void)
-- 
1.8.3.1

[PATCH 2/3] mm/huge_memory.c: update tlb entry if pmd is changed

2020-06-24 Thread Bibo Mao

When set_pmd_at is called in function do_huge_pmd_anonymous_page,
new tlb entry can be added by software on MIPS platform.

Here add update_mmu_cache_pmd when pmd entry is set, and
update_mmu_cache_pmd is defined as empty excepts arc/mips platform.
This patch has no negative effect on other platforms except arc/mips
system.

Signed-off-by: Bibo Mao 
---
 mm/huge_memory.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0f9187b..8b4ccf7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -643,6 +643,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct 
vm_fault *vmf,
lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
+   update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
mm_inc_nr_ptes(vma->vm_mm);
spin_unlock(vmf->ptl);
@@ -756,6 +757,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
} else {
set_huge_zero_page(pgtable, vma->vm_mm, vma,
   haddr, vmf->pmd, zero_page);
+   update_mmu_cache_pmd(vma, vmf->address, 
vmf->pmd);
spin_unlock(vmf->ptl);
set = true;
}
-- 
1.8.3.1

[PATCH 1/3] mm: set page fault address for update_mmu_cache_pmd

2020-06-24 Thread Bibo Mao

update_mmu_cache_pmd is used to update tlb for the pmd entry by
software. On MIPS system, the tlb entry indexed by page fault
address maybe exists already, only that tlb entry may be small
page, also it may be huge page. Before updating pmd entry with
huge page size, older tlb entry need to be invalidated.

Here page fault address is passed to function update_mmu_cache_pmd,
rather than pmd huge page start address. The page fault address
can be used for invalidating older tlb entry.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 9 +
 mm/huge_memory.c| 7 ---
 mm/memory.c | 2 +-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index dd7a0f5..bd81661 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct 
*vma,
 #define__HAVE_ARCH_UPDATE_MMU_TLB
 #define update_mmu_tlb update_mmu_cache
 
+extern void local_flush_tlb_page(struct vm_area_struct *vma,
+   unsigned long page);
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
 {
pte_t pte = *(pte_t *)pmdp;
 
+   /*
+* If pmd_none is true, older tlb entry will be normal page.
+* here to invalidate older tlb entry indexed by address
+* parameter address must be page fault address rather than
+* start address of pmd huge page
+*/
+   local_flush_tlb_page(vma, address);
__update_tlb(vma, address, pte);
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 78c84be..0f9187b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
pgtable_t pgtable)
 {
struct mm_struct *mm = vma->vm_mm;
+   unsigned long start = addr & PMD_MASK;
pmd_t entry;
spinlock_t *ptl;
 
@@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
}
entry = pmd_mkyoung(*pmd);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-   if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+   if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
update_mmu_cache_pmd(vma, addr, pmd);
}
 
@@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
pgtable = NULL;
}
 
-   set_pmd_at(mm, addr, pmd, entry);
+   set_pmd_at(mm, start, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
 
 out_unlock:
@@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, 
pfn_t pfn,
 
track_pfn_insert(vma, , pfn);
 
-   insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
+   insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, 
pgtable);
return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
diff --git a/mm/memory.c b/mm/memory.c
index dc7f354..c703458 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct 
page *page)
 
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 
-   update_mmu_cache_pmd(vma, haddr, vmf->pmd);
+   update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 
/* fault is handled */
ret = 0;
-- 
1.8.3.1

[PATCH v2] MIPS: Do not flush tlb when setting pmd entry

2020-06-18 Thread Bibo Mao

Function set_pmd_at is to set pmd entry, if tlb entry need to
be flushed, there exists pmdp_huge_clear_flush alike function
before set_pmd_at is called. So it is not necessary to call
flush_tlb_all in this function.

In these scenarios, tlb for the pmd range needs to be flushed:
1. privilege degrade such as wrprotect is set on the pmd entry
2. pmd entry is cleared
3. there is exception if set_pmd_at is issued by dup_mmap, since
flush_tlb_mm is called for parent process, it is not necessary
to flush tlb in function copy_huge_pmd.

Signed-off-by: Bibo Mao 
---
v2:
- add the same operation on mips32 system
- update changelog description
---
 arch/mips/mm/pgtable-32.c | 1 -
 arch/mips/mm/pgtable-64.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index bd4b065..61891af 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -45,7 +45,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
 {
*pmdp = pmd;
-   flush_tlb_all();
 }
 #endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */
 
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 183ff9f..7536f78 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -100,7 +100,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
 {
*pmdp = pmd;
-   flush_tlb_all();
 }
 
 void __init pagetable_init(void)
-- 
1.8.3.1

[PATCH] MIPS: Use arch specific syscall name match function

2020-06-08 Thread Bibo Mao

On MIPS system, most of the syscall function name begin with prefix
sys_. Some syscalls are special such as clone/fork, function name of
these begin with __sys_. Since scratch registers need be saved in
stack when these system calls happens.

With ftrace system call method, system call functions are declared with
SYSCALL_DEFINEx, metadata of the system call symbol name begins with
sys_. Here mips specific function arch_syscall_match_sym_name is used to
compare function name between sys_call_table[] and metadata of syscall
symbol.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/ftrace.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index b463f2a..9b42115 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -87,4 +87,20 @@ struct dyn_arch_ftrace {
 #endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_FTRACE_SYSCALLS
+/*
+ * Some syscall entry functions on mips start with "__sys_" (fork and clone,
+ * for instance). We should also match the sys_ variant with those.
+ */
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym,
+  const char *name)
+{
+   return !strcmp(sym, name) ||
+   (!strncmp(sym, "__sys_", 6) && !strcmp(sym + 6, name + 4));
+}
+#endif /* CONFIG_FTRACE_SYSCALLS */
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_MIPS_FTRACE_H */
-- 
1.8.3.1

[PATCH v2 1/2] MIPS: Set page access bit with pgprot on platforms with RIXI

2020-06-05 Thread Bibo Mao

On MIPS system which has rixi hardware bit, page access bit is not
set in pgrot. For memory reading, there will be one page fault to
allocate physical page; however valid bit is not set, there will
be the second fast tlb-miss fault handling to set valid/access bit.

This patch set page access/valid bit with pgrot if there is reading
access privilege. It will reduce one tlb-miss handling for memory
reading access.

The valid/access bit will be cleared in order to track memory
accessing activity. If the page is accessed, tlb-miss fast handling
will set valid/access bit, pte_sw_mkyoung is not necessary in slow
page fault path. This patch removes pte_sw_mkyoung function which
is defined as empty function except MIPS system.

Signed-off-by: Bibo Mao 
Acked-by: Andrew Morton 
---
v2:
- refine commit log title
---
 arch/mips/include/asm/pgtable.h | 10 --
 arch/mips/mm/cache.c| 34 +-
 include/asm-generic/pgtable.h   | 16 
 mm/memory.c |  3 ---
 4 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 85b39c9..d066469 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -25,6 +25,14 @@
 struct mm_struct;
 struct vm_area_struct;
 
+#define __PP   _PAGE_PRESENT
+#define __NX   _PAGE_NO_EXEC
+#define __NR   _PAGE_NO_READ
+#define ___W   _PAGE_WRITE
+#define ___A   _PAGE_ACCESSED
+#define ___R   (_PAGE_SILENT_READ | _PAGE_ACCESSED)
+#define __PC   _page_cachable_default
+
 #define PAGE_NONE  __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
 _page_cachable_default)
 #define PAGE_SHARED__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
@@ -414,8 +422,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
-#define pte_sw_mkyoung pte_mkyoung
-
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index ad6df1c..f814e43 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -158,23 +158,23 @@ void __update_cache(unsigned long address, pte_t pte)
 static inline void setup_protection_map(void)
 {
if (cpu_has_rixi) {
-   protection_map[0]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-   protection_map[1]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC);
-   protection_map[2]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-   protection_map[3]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC);
-   protection_map[4]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[5]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[6]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[7]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-
-   protection_map[8]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-   protection_map[9]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC);
-   protection_map[10] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ);
-   protection_map[11] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
-   protection_map[12] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[13] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[14] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_WRITE);
-   protection_map[15] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_WRITE);
+   protection_map[0]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[1]  = __pgprot(__PC | __PP | __NX | ___R);
+   protection_map[2]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[3]  = __pgprot(__PC | __PP | __NX | ___R);
+   protection_map[4]  = __pgprot(__PC | __PP | ___R);
+   protection_map[5]  = __pgprot(__PC | __PP | ___R);
+   protection_map[6]  = __pgprot(__PC | __PP | ___R);
+   protection_map[7]  = __pgprot(__PC | __PP | ___R);
+
+   protection_map[8]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[9]  = __pgprot(__PC | __PP | __NX | ___R);
+   protection_map[10] = __pgprot(__PC | __PP | __NX | ___W | __NR);
+   protection_map[11] = __pgprot(__PC | __PP | __NX | ___W | ___R);
+   protection_map[12] = __pgprot(__PC | __PP | ___R);
+   protection_map[13] = __pgprot(__PC | __PP |

[PATCH v2 2/2] MIPS: Add writable-applies-readable policy with pgrot

2020-06-05 Thread Bibo Mao

On Linux system, writable applies readable privilege in most
architectures, this patch adds this policy on MIPS platform
where hardware rixi is supported.

Signed-off-by: Bibo Mao 
---
 arch/mips/mm/cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index f814e43..dae0617 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -160,7 +160,7 @@ static inline void setup_protection_map(void)
if (cpu_has_rixi) {
protection_map[0]  = __pgprot(__PC | __PP | __NX | __NR);
protection_map[1]  = __pgprot(__PC | __PP | __NX | ___R);
-   protection_map[2]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[2]  = __pgprot(__PC | __PP | __NX | ___R);
protection_map[3]  = __pgprot(__PC | __PP | __NX | ___R);
protection_map[4]  = __pgprot(__PC | __PP | ___R);
protection_map[5]  = __pgprot(__PC | __PP | ___R);
@@ -169,7 +169,7 @@ static inline void setup_protection_map(void)
 
protection_map[8]  = __pgprot(__PC | __PP | __NX | __NR);
protection_map[9]  = __pgprot(__PC | __PP | __NX | ___R);
-   protection_map[10] = __pgprot(__PC | __PP | __NX | ___W | __NR);
+   protection_map[10] = __pgprot(__PC | __PP | __NX | ___W | ___R);
protection_map[11] = __pgprot(__PC | __PP | __NX | ___W | ___R);
protection_map[12] = __pgprot(__PC | __PP | ___R);
protection_map[13] = __pgprot(__PC | __PP | ___R);
-- 
1.8.3.1

[PATCH 2/2] MIPS: Add writable-applies-readable policy with pgrot

2020-06-05 Thread Bibo Mao

On Linux system, writable applies readable privilege in most
architectures, this patch adds this policy on MIPS platform
where hardware rixi is supported.

Signed-off-by: Bibo Mao 
---
 arch/mips/mm/cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index f814e43..dae0617 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -160,7 +160,7 @@ static inline void setup_protection_map(void)
if (cpu_has_rixi) {
protection_map[0]  = __pgprot(__PC | __PP | __NX | __NR);
protection_map[1]  = __pgprot(__PC | __PP | __NX | ___R);
-   protection_map[2]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[2]  = __pgprot(__PC | __PP | __NX | ___R);
protection_map[3]  = __pgprot(__PC | __PP | __NX | ___R);
protection_map[4]  = __pgprot(__PC | __PP | ___R);
protection_map[5]  = __pgprot(__PC | __PP | ___R);
@@ -169,7 +169,7 @@ static inline void setup_protection_map(void)
 
protection_map[8]  = __pgprot(__PC | __PP | __NX | __NR);
protection_map[9]  = __pgprot(__PC | __PP | __NX | ___R);
-   protection_map[10] = __pgprot(__PC | __PP | __NX | ___W | __NR);
+   protection_map[10] = __pgprot(__PC | __PP | __NX | ___W | ___R);
protection_map[11] = __pgprot(__PC | __PP | __NX | ___W | ___R);
protection_map[12] = __pgprot(__PC | __PP | ___R);
protection_map[13] = __pgprot(__PC | __PP | ___R);
-- 
1.8.3.1

[PATCH 1/2] MIPS: set page access bit with pgprot on some MIPS platform

2020-06-05 Thread Bibo Mao

On MIPS system which has rixi hardware bit, page access bit is not
set in pgrot. For memory reading, there will be one page fault to
allocate physical page; however valid bit is not set, there will
be the second fast tlb-miss fault handling to set valid/access bit.

This patch set page access/valid bit with pgrot if there is reading
access privilege. It will reduce one tlb-miss handling for memory
reading access.

The valid/access bit will be cleared in order to track memory
accessing activity. If the page is accessed, tlb-miss fast handling
will set valid/access bit, pte_sw_mkyoung is not necessary in slow
page fault path. This patch removes pte_sw_mkyoung function which
is defined as empty function except MIPS system.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 11 +--
 arch/mips/mm/cache.c| 34 +-
 include/asm-generic/pgtable.h   | 16 
 mm/memory.c |  3 ---
 4 files changed, 26 insertions(+), 38 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 85b39c9..e2452ab 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -25,6 +25,15 @@
 struct mm_struct;
 struct vm_area_struct;
 
+#define __PP   _PAGE_PRESENT
+#define __NX   _PAGE_NO_EXEC
+#define __NR   _PAGE_NO_READ
+#define ___W   _PAGE_WRITE
+#define ___A   _PAGE_ACCESSED
+#define ___R   (_PAGE_SILENT_READ | _PAGE_ACCESSED)
+#define __PC   _page_cachable_default
+
+
 #define PAGE_NONE  __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
 _page_cachable_default)
 #define PAGE_SHARED__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
@@ -414,8 +423,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
-#define pte_sw_mkyoung pte_mkyoung
-
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index ad6df1c..f814e43 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -158,23 +158,23 @@ void __update_cache(unsigned long address, pte_t pte)
 static inline void setup_protection_map(void)
 {
if (cpu_has_rixi) {
-   protection_map[0]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-   protection_map[1]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC);
-   protection_map[2]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-   protection_map[3]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC);
-   protection_map[4]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[5]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[6]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[7]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-
-   protection_map[8]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-   protection_map[9]  = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC);
-   protection_map[10] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ);
-   protection_map[11] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
-   protection_map[12] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[13] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT);
-   protection_map[14] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_WRITE);
-   protection_map[15] = __pgprot(_page_cachable_default | 
_PAGE_PRESENT | _PAGE_WRITE);
+   protection_map[0]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[1]  = __pgprot(__PC | __PP | __NX | ___R);
+   protection_map[2]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[3]  = __pgprot(__PC | __PP | __NX | ___R);
+   protection_map[4]  = __pgprot(__PC | __PP | ___R);
+   protection_map[5]  = __pgprot(__PC | __PP | ___R);
+   protection_map[6]  = __pgprot(__PC | __PP | ___R);
+   protection_map[7]  = __pgprot(__PC | __PP | ___R);
+
+   protection_map[8]  = __pgprot(__PC | __PP | __NX | __NR);
+   protection_map[9]  = __pgprot(__PC | __PP | __NX | ___R);
+   protection_map[10] = __pgprot(__PC | __PP | __NX | ___W | __NR);
+   protection_map[11] = __pgprot(__PC | __PP | __NX | ___W | ___R);
+   protection_map[12] = __pgprot(__PC | __PP | ___R);
+   protection_map[13] = __pgprot(__PC | __PP | ___R);
+   protection_map[14] = __pgprot(__PC | __PP | ___W |

[PATCH] MIPS: Do not flush tlb when setting pmd entry

2020-06-03 Thread Bibo Mao

Function set_pmd_at is to set pmd entry, if tlb entry need to
be flushed, there exists pmdp_huge_clear_flush alike function
before set_pmd_at is called. So it is not necessary to call
flush_tlb_all in this function.

Signed-off-by: Bibo Mao 
---
 arch/mips/mm/pgtable-64.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 6fd6e96..a236752 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -101,7 +101,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
 {
*pmdp = pmd;
-   flush_tlb_all();
 }
 
 void __init pagetable_init(void)
-- 
1.8.3.1

[PATCH v7 2/4] mm/memory.c: Update local TLB if PTE entry exists

2020-05-26 Thread Bibo Mao

If two threads concurrently fault at the same page, the thread that
won the race updates the PTE and its local TLB. For now, the other
thread gives up, simply does nothing, and continues.

It could happen that this second thread triggers another fault, whereby
it only updates its local TLB while handling the fault. Instead of
triggering another fault, let's directly update the local TLB of the
second thread. Function update_mmu_tlb is used here to update local
TLB on the second thread, and it is defined as empty on other arches.

Signed-off-by: Bibo Mao 
Acked-by: Andrew Morton 
---
 arch/mips/include/asm/pgtable.h | 23 +++
 include/asm-generic/pgtable.h   | 17 +
 mm/memory.c | 27 +++
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index f8f48fc..6f40612 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -483,6 +483,26 @@ static inline void flush_tlb_fix_spurious_fault(struct 
vm_area_struct *vma,
 {
 }
 
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+   return pte_val(pte_a) == pte_val(pte_b);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep,
+   pte_t entry, int dirty)
+{
+   if (!pte_same(*ptep, entry))
+   set_pte_at(vma->vm_mm, address, ptep, entry);
+   /*
+* update_mmu_cache will unconditionally execute, handling both
+* the case that the PTE changed and the spurious fault case.
+*/
+   return true;
+}
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -526,6 +546,9 @@ static inline void update_mmu_cache(struct vm_area_struct 
*vma,
__update_tlb(vma, address, pte);
 }
 
+#define__HAVE_ARCH_UPDATE_MMU_TLB
+#define update_mmu_tlb update_mmu_cache
+
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
 {
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 329b8c8..fa5c73f 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -188,6 +188,23 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
 }
 #endif
 
+
+/*
+ * If two threads concurrently fault at the same page, the thread that
+ * won the race updates the PTE and its local TLB/Cache. The other thread
+ * gives up, simply does nothing, and continues; on architectures where
+ * software can update TLB,  local TLB can be updated here to avoid next page
+ * fault. This function updates TLB only, do nothing with cache or others.
+ * It is the difference with function update_mmu_cache.
+ */
+#ifndef __HAVE_ARCH_UPDATE_MMU_TLB
+static inline void update_mmu_tlb(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep)
+{
+}
+#define __HAVE_ARCH_UPDATE_MMU_TLB
+#endif
+
 /*
  * Some architectures may be able to avoid expensive synchronization
  * primitives when modifications are made to PTE's which are already
diff --git a/mm/memory.c b/mm/memory.c
index f703fe8..8bb31c4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2436,10 +2436,9 @@ static inline bool cow_user_page(struct page *dst, 
struct page *src,
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
 * Other thread has already handled the fault
-* and we don't need to do anything. If it's
-* not the case, the fault will be triggered
-* again on the same address.
+* and update local tlb only
 */
+   update_mmu_tlb(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2463,7 +2462,8 @@ static inline bool cow_user_page(struct page *dst, struct 
page *src,
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, >ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
-   /* The PTE changed under us. Retry page fault. */
+   /* The PTE changed under us, update local tlb */
+   update_mmu_tlb(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2752,6 +2752,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
new_page = old_page;
page_copied = 1;
} else {
+   update_mmu_tlb(vma, vmf->address, vmf->pte);

[PATCH v7 1/4] MIPS: Do not flush tlb page when updating PTE entry

2020-05-26 Thread Bibo Mao

It is not necessary to flush tlb page on all CPUs if suitable PTE
entry exists already during page fault handling, just updating
TLB is fine.

Here redefine flush_tlb_fix_spurious_fault as empty on MIPS system.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9b01d2d..f8f48fc 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -478,6 +478,11 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
return __pgprot(prot);
 }
 
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+   unsigned long address)
+{
+}
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
-- 
1.8.3.1

[PATCH v7 3/4] mm/memory.c: Add memory read privilege on page fault handling

2020-05-26 Thread Bibo Mao

Here add pte_sw_mkyoung function to make page readable on MIPS
platform during page fault handling. This patch improves page
fault latency about 10% on my MIPS machine with lmbench
lat_pagefault case.

It is noop function on other arches, there is no negative
influence on those architectures.

Signed-off-by: Bibo Mao 
Acked-by: Andrew Morton 
---
 arch/mips/include/asm/pgtable.h |  2 ++
 include/asm-generic/pgtable.h   | 16 
 mm/memory.c |  3 +++
 3 files changed, 21 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 6f40612..d9772aff 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
+#define pte_sw_mkyoung pte_mkyoung
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index fa5c73f..b5278ec 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -244,6 +244,22 @@ static inline void ptep_set_wrprotect(struct mm_struct 
*mm, unsigned long addres
 }
 #endif
 
+/*
+ * On some architectures hardware does not set page access bit when accessing
+ * memory page, it is responsibilty of software setting this bit. It brings
+ * out extra page fault penalty to track page access bit. For optimization page
+ * access bit can be set during all page fault flow on these arches.
+ * To be differentiate with macro pte_mkyoung, this macro is used on platforms
+ * where software maintains page access bit.
+ */
+#ifndef pte_sw_mkyoung
+static inline pte_t pte_sw_mkyoung(pte_t pte)
+{
+   return pte;
+}
+#define pte_sw_mkyoung pte_sw_mkyoung
+#endif
+
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 8bb31c4..c7c8960 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/*
 * Clear the pte entry and flush it first, before updating the
@@ -3378,6 +3379,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
__SetPageUptodate(page);
 
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3660,6 +3662,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct 
mem_cgroup *memcg,
 
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
-- 
1.8.3.1

[PATCH v7 4/4] MIPS: mm: add page valid judgement in function pte_modify

2020-05-26 Thread Bibo Mao

If original PTE has _PAGE_ACCESSED bit set, and new pte has no
_PAGE_NO_READ bit set, we can add _PAGE_SILENT_READ bit to enable
page valid bit.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index d9772aff..85b39c9 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -532,8 +532,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-   return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
-(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+   pte_val(pte) &= _PAGE_CHG_MASK;
+   pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK;
+   if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ))
+   pte_val(pte) |= _PAGE_SILENT_READ;
+   return pte;
 }
 #endif
 
-- 
1.8.3.1

[PATCH v7 0/4] MIPS: page fault handling optimization

2020-05-26 Thread Bibo Mao

V7:
- define function flush_tlb_fix_spurious_fault as inline rather
  than macro
V6:
- Add update_mmu_tlb function as empty on all platform except mips
  system, we use this function to update local tlb for page fault
  smp-race handling
V5:
- define update_mmu_cache function specified on MIPS platform, and
  add page fault smp-race stats info
V4:
- add pte_sw_mkyoung function to implement readable privilege, and
  this function is  only in effect on MIPS system.
- add page valid bit judgement in function pte_modify
V3:
- add detailed changelog, modify typo issue in patch V2
v2:
- split flush_tlb_fix_spurious_fault and tlb update into two patches
- comments typo modification
- separate tlb update and add pte readable privilege into two patches

Bibo Mao (4):
  MIPS: Do not flush tlb page when updating PTE entry
  mm/memory.c: Update local TLB if PTE entry exists
  mm/memory.c: Add memory read privilege on page fault handling
  MIPS: mm: add page valid judgement in function pte_modify

 arch/mips/include/asm/pgtable.h | 37 +++--
 include/asm-generic/pgtable.h   | 33 +
 mm/memory.c | 30 ++
 3 files changed, 90 insertions(+), 10 deletions(-)

-- 
1.8.3.1

[PATCH v6 3/4] mm/memory.c: Add memory read privilege on page fault handling

2020-05-24 Thread Bibo Mao

Here add pte_sw_mkyoung function to make page readable on MIPS
platform during page fault handling. This patch improves page
fault latency about 10% on my MIPS machine with lmbench
lat_pagefault case.

It is noop function on other arches, there is no negative
influence on those architectures.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h |  2 ++
 include/asm-generic/pgtable.h   | 16 
 mm/memory.c |  3 +++
 3 files changed, 21 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index d2004b5..0743087 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
+#define pte_sw_mkyoung pte_mkyoung
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index fa5c73f..b5278ec 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -244,6 +244,22 @@ static inline void ptep_set_wrprotect(struct mm_struct 
*mm, unsigned long addres
 }
 #endif
 
+/*
+ * On some architectures hardware does not set page access bit when accessing
+ * memory page, it is responsibilty of software setting this bit. It brings
+ * out extra page fault penalty to track page access bit. For optimization page
+ * access bit can be set during all page fault flow on these arches.
+ * To be differentiate with macro pte_mkyoung, this macro is used on platforms
+ * where software maintains page access bit.
+ */
+#ifndef pte_sw_mkyoung
+static inline pte_t pte_sw_mkyoung(pte_t pte)
+{
+   return pte;
+}
+#define pte_sw_mkyoung pte_sw_mkyoung
+#endif
+
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 8bb31c4..c7c8960 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/*
 * Clear the pte entry and flush it first, before updating the
@@ -3378,6 +3379,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
__SetPageUptodate(page);
 
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3660,6 +3662,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct 
mem_cgroup *memcg,
 
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
-- 
1.8.3.1

[PATCH v6 4/4] MIPS: mm: add page valid judgement in function pte_modify

2020-05-24 Thread Bibo Mao

If original PTE has _PAGE_ACCESSED bit set, and new pte has no
_PAGE_NO_READ bit set, we can add _PAGE_SILENT_READ bit to enable
page valid bit.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 0743087..dfe79f4 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -529,8 +529,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-   return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
-(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+   pte_val(pte) &= _PAGE_CHG_MASK;
+   pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK;
+   if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ))
+   pte_val(pte) |= _PAGE_SILENT_READ;
+   return pte;
 }
 #endif
 
-- 
1.8.3.1

[PATCH v6 1/4] MIPS: Do not flush tlb page when updating PTE entry

2020-05-24 Thread Bibo Mao

It is not necessary to flush tlb page on all CPUs if suitable PTE
entry exists already during page fault handling, just updating
TLB is fine.

Here redefine flush_tlb_fix_spurious_fault as empty on MIPS system.
V6:
- Add update_mmu_tlb function as empty on all platform except mips
  system, we use this function to update local tlb for page fault
  smp-race handling
V5:
- define update_mmu_cache function specified on MIPS platform, and
  add page fault smp-race stats info
V4:
- add pte_sw_mkyoung function to implement readable privilege, and
  this function is  only in effect on MIPS system.
- add page valid bit judgement in function pte_modify
V3:
- add detailed changelog, modify typo issue in patch V2
v2:
- split flush_tlb_fix_spurious_fault and tlb update into two patches
- comments typo modification
- separate tlb update and add pte readable privilege into two patches

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9b01d2d..0d625c2 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -478,6 +478,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
return __pgprot(prot);
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
-- 
1.8.3.1

[PATCH v6 2/4] mm/memory.c: Update local TLB if PTE entry exists

2020-05-24 Thread Bibo Mao

If two threads concurrently fault at the same page, the thread that
won the race updates the PTE and its local TLB. For now, the other
thread gives up, simply does nothing, and continues.

It could happen that this second thread triggers another fault, whereby
it only updates its local TLB while handling the fault. Instead of
triggering another fault, let's directly update the local TLB of the
second thread. Function update_mmu_tlb is used here to update local
TLB on the second thread, and it is defined as empty on other arches.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 23 +++
 include/asm-generic/pgtable.h   | 17 +
 mm/memory.c | 27 +++
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 0d625c2..d2004b5 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -480,6 +480,26 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
 
 #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
 
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+   return pte_val(pte_a) == pte_val(pte_b);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep,
+   pte_t entry, int dirty)
+{
+   if (!pte_same(*ptep, entry))
+   set_pte_at(vma->vm_mm, address, ptep, entry);
+   /*
+* update_mmu_cache will unconditionally execute, handling both
+* the case that the PTE changed and the spurious fault case.
+*/
+   return true;
+}
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -523,6 +543,9 @@ static inline void update_mmu_cache(struct vm_area_struct 
*vma,
__update_tlb(vma, address, pte);
 }
 
+#define__HAVE_ARCH_UPDATE_MMU_TLB
+#define update_mmu_tlb update_mmu_cache
+
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
 {
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 329b8c8..fa5c73f 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -188,6 +188,23 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
 }
 #endif
 
+
+/*
+ * If two threads concurrently fault at the same page, the thread that
+ * won the race updates the PTE and its local TLB/Cache. The other thread
+ * gives up, simply does nothing, and continues; on architectures where
+ * software can update TLB,  local TLB can be updated here to avoid next page
+ * fault. This function updates TLB only, do nothing with cache or others.
+ * It is the difference with function update_mmu_cache.
+ */
+#ifndef __HAVE_ARCH_UPDATE_MMU_TLB
+static inline void update_mmu_tlb(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep)
+{
+}
+#define __HAVE_ARCH_UPDATE_MMU_TLB
+#endif
+
 /*
  * Some architectures may be able to avoid expensive synchronization
  * primitives when modifications are made to PTE's which are already
diff --git a/mm/memory.c b/mm/memory.c
index f703fe8..8bb31c4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2436,10 +2436,9 @@ static inline bool cow_user_page(struct page *dst, 
struct page *src,
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
 * Other thread has already handled the fault
-* and we don't need to do anything. If it's
-* not the case, the fault will be triggered
-* again on the same address.
+* and update local tlb only
 */
+   update_mmu_tlb(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2463,7 +2462,8 @@ static inline bool cow_user_page(struct page *dst, struct 
page *src,
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, >ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
-   /* The PTE changed under us. Retry page fault. */
+   /* The PTE changed under us, update local tlb */
+   update_mmu_tlb(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2752,6 +2752,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
new_page = old_page;
page_copied = 1;
} else {
+   update_mmu_t

[PATCH v5 3/4] mm/memory.c: Add memory read privilege on page fault handling

2020-05-20 Thread Bibo Mao

Here add pte_sw_mkyoung function to make page readable on MIPS
platform during page fault handling. This patch improves page
fault latency about 10% on my MIPS machine with lmbench
lat_pagefault case.

It is noop function on other arches, there is no negative
influence on those architectures.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h |  2 ++
 include/asm-generic/pgtable.h   | 16 
 mm/memory.c |  3 +++
 3 files changed, 21 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 5f610ec..9cd811e 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
+#define pte_sw_mkyoung pte_mkyoung
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 329b8c8..7dcfa30 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -227,6 +227,22 @@ static inline void ptep_set_wrprotect(struct mm_struct 
*mm, unsigned long addres
 }
 #endif
 
+/*
+ * On some architectures hardware does not set page access bit when accessing
+ * memory page, it is responsibilty of software setting this bit. It brings
+ * out extra page fault penalty to track page access bit. For optimization page
+ * access bit can be set during all page fault flow on these arches.
+ * To be differentiate with macro pte_mkyoung, this macro is used on platforms
+ * where software maintains page access bit.
+ */
+#ifndef pte_sw_mkyoung
+static inline pte_t pte_sw_mkyoung(pte_t pte)
+{
+   return pte;
+}
+#define pte_sw_mkyoung pte_sw_mkyoung
+#endif
+
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 9e2be4a..33d3b4c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/*
 * Clear the pte entry and flush it first, before updating the
@@ -3378,6 +3379,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
__SetPageUptodate(page);
 
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3660,6 +3662,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct 
mem_cgroup *memcg,
 
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
-- 
1.8.3.1

[PATCH v5 1/4] MIPS: Do not flush tlb page when updating PTE entry

2020-05-20 Thread Bibo Mao

It is not necessary to flush tlb page on all CPUs if suitable PTE
entry exists already during page fault handling, just updating
TLB is fine.

Here redefine flush_tlb_fix_spurious_fault as empty on MIPS system.
V5:
- Define update_mmu_cache function specified on MIPS platform, and
  add page fault smp-race stats info
V4:
- add pte_sw_mkyoung function to implement readable privilege, and
  this function is  only in effect on MIPS system.
- add page valid bit judgement in function pte_modify
V3:
- add detailed changelog, modify typo issue in patch V2
v2:
- split flush_tlb_fix_spurious_fault and tlb update into two patches
- comments typo modification
- separate tlb update and add pte readable privilege into two patches

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9b01d2d..0d625c2 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -478,6 +478,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
return __pgprot(prot);
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
-- 
1.8.3.1

[PATCH v5 2/4] mm/memory.c: Update local TLB if PTE entry exists

2020-05-20 Thread Bibo Mao

If two threads concurrently fault at the same address, the thread that
won the race updates the PTE and its local TLB. For now, the other
thread gives up, simply does nothing, and continues.

It could happen that this second thread triggers another fault, whereby
it only updates its local TLB while handling the fault. Instead of
triggering another fault, let's directly update the local TLB of the
second thread.

It is only useful to architectures where software can update TLB, it may
bring out some negative effect if update_mmu_cache is used for other
purpose also. It seldom happens where multiple threads access the same
page at the same time, so the negative effect is limited on other arches.

With specjvm2008 workload, smp-race pgfault counts is about 3% to 4%
of the total pgfault counts by watching /proc/vmstats information

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 20 
 mm/memory.c | 27 +++
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 0d625c2..5f610ec 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -480,6 +480,26 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
 
 #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
 
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+   return pte_val(pte_a) == pte_val(pte_b);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep,
+   pte_t entry, int dirty)
+{
+   if (!pte_same(*ptep, entry))
+   set_pte_at(vma->vm_mm, address, ptep, entry);
+   /*
+* update_mmu_cache will unconditionally execute, handling both
+* the case that the PTE changed and the spurious fault case.
+*/
+   return true;
+}
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
diff --git a/mm/memory.c b/mm/memory.c
index f703fe8..9e2be4a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2436,10 +2436,9 @@ static inline bool cow_user_page(struct page *dst, 
struct page *src,
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
 * Other thread has already handled the fault
-* and we don't need to do anything. If it's
-* not the case, the fault will be triggered
-* again on the same address.
+* and update local tlb only
 */
+   update_mmu_cache(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2463,7 +2462,8 @@ static inline bool cow_user_page(struct page *dst, struct 
page *src,
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, >ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
-   /* The PTE changed under us. Retry page fault. */
+   /* The PTE changed under us, update local tlb */
+   update_mmu_cache(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2752,6 +2752,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
new_page = old_page;
page_copied = 1;
} else {
+   update_mmu_cache(vma, vmf->address, vmf->pte);
mem_cgroup_cancel_charge(new_page, memcg, false);
}
 
@@ -2812,6 +2813,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 * pte_offset_map_lock.
 */
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+   update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
return VM_FAULT_NOPAGE;
}
@@ -2936,6 +2938,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, >ptl);
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+   update_mmu_cache(vma, vmf->address, vmf->pte);
unlock_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
put_page(vmf->page);
@@ -3341,8 +3344,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)

[PATCH v5 4/4] MIPS: mm: add page valid judgement in function pte_modify

2020-05-20 Thread Bibo Mao

If original PTE has _PAGE_ACCESSED bit set, and new pte has no
_PAGE_NO_READ bit set, we can add _PAGE_SILENT_READ bit to enable
page valid bit.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9cd811e..ef26552 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -529,8 +529,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-   return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
-(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+   pte_val(pte) &= _PAGE_CHG_MASK;
+   pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK;
+   if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ))
+   pte_val(pte) |= _PAGE_SILENT_READ;
+   return pte;
 }
 #endif
 
-- 
1.8.3.1

[PATCH v4 3/4] mm/memory.c: Add memory read privilege on page fault handling

2020-05-19 Thread Bibo Mao

Here add pte_sw_mkyoung function to make page readable on MIPS
platform during page fault handling. This patch improves page
fault latency about 10% on my MIPS machine with lmbench
lat_pagefault case.

It is noop function on other arches, there is no negative
influence on those architectures.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h |  2 ++
 include/asm-generic/pgtable.h   | 15 +++
 mm/memory.c |  3 +++
 3 files changed, 20 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 0d625c2..755d534 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
+#define pte_sw_mkyoung pte_mkyoung
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)  { return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 329b8c8..2542ef1 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -227,6 +227,21 @@ static inline void ptep_set_wrprotect(struct mm_struct 
*mm, unsigned long addres
 }
 #endif
 
+/*
+ * On some architectures hardware does not set page access bit when accessing
+ * memory page, it is responsibilty of software setting this bit. It brings
+ * out extra page fault penalty to track page access bit. For optimization page
+ * access bit can be set during all page fault flow on these arches.
+ * To be differentiate with macro pte_mkyoung, this macro is used on platforms
+ * where software maintains page access bit.
+ */
+#ifndef pte_sw_mkyoung
+static inline pte_t pte_sw_mkyoung(pte_t pte)
+{
+   return pte;
+}
+#endif
+
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 2eb59a9..d9700b1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/*
 * Clear the pte entry and flush it first, before updating the
@@ -3378,6 +3379,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
__SetPageUptodate(page);
 
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3660,6 +3662,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct 
mem_cgroup *memcg,
 
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
+   entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
-- 
1.8.3.1

[PATCH v4 2/4] mm/memory.c: Update local TLB if PTE entry exists

2020-05-19 Thread Bibo Mao

If two threads concurrently fault at the same address, the thread that
won the race updates the PTE and its local TLB. For now, the other
thread gives up, simply does nothing, and continues.

It could happen that this second thread triggers another fault, whereby
it only updates its local TLB while handling the fault. Instead of
triggering another fault, let's directly update the local TLB of the
second thread.

It is only useful to architectures where software can update TLB, it may
bring out some negative effect if update_mmu_cache is used for other
purpose also. It seldom happens where multiple threads access the same
page at the same time, so the negative effect is limited on other arches.

Signed-off-by: Bibo Mao 
---
 mm/memory.c | 44 +++-
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index f703fe8..2eb59a9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1770,8 +1770,8 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, 
unsigned long addr,
}
entry = pte_mkyoung(*pte);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-   if (ptep_set_access_flags(vma, addr, pte, entry, 1))
-   update_mmu_cache(vma, addr, pte);
+   ptep_set_access_flags(vma, addr, pte, entry, 1);
+   update_mmu_cache(vma, addr, pte);
}
goto out_unlock;
}
@@ -2436,17 +2436,16 @@ static inline bool cow_user_page(struct page *dst, 
struct page *src,
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
 * Other thread has already handled the fault
-* and we don't need to do anything. If it's
-* not the case, the fault will be triggered
-* again on the same address.
+* and update local tlb only
 */
+   update_mmu_cache(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
 
entry = pte_mkyoung(vmf->orig_pte);
-   if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
-   update_mmu_cache(vma, addr, vmf->pte);
+   ptep_set_access_flags(vma, addr, vmf->pte, entry, 0);
+   update_mmu_cache(vma, addr, vmf->pte);
}
 
/*
@@ -2463,7 +2462,8 @@ static inline bool cow_user_page(struct page *dst, struct 
page *src,
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, >ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
-   /* The PTE changed under us. Retry page fault. */
+   /* The PTE changed under us, update local tlb */
+   update_mmu_cache(vma, addr, vmf->pte);
ret = false;
goto pte_unlock;
}
@@ -2618,8 +2618,8 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = pte_mkyoung(vmf->orig_pte);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-   if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
-   update_mmu_cache(vma, vmf->address, vmf->pte);
+   ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1);
+   update_mmu_cache(vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
 }
 
@@ -2752,6 +2752,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
new_page = old_page;
page_copied = 1;
} else {
+   update_mmu_cache(vma, vmf->address, vmf->pte);
mem_cgroup_cancel_charge(new_page, memcg, false);
}
 
@@ -2812,6 +2813,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 * pte_offset_map_lock.
 */
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+   update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
return VM_FAULT_NOPAGE;
}
@@ -2936,6 +2938,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, >ptl);
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+   update_mmu_cache(vma, vmf->address, vmf->pte);
unlock_page(vmf->page);
pte_unmap_unlock(vmf->pte, vm

[PATCH v4 1/4] MIPS: Do not flush tlb page when updating PTE entry

2020-05-19 Thread Bibo Mao

It is not necessary to flush tlb page on all CPUs if suitable PTE
entry exists already during page fault handling, just updating
TLB is fine.

Here redefine flush_tlb_fix_spurious_fault as empty on MIPS system.
V4:
- add pte_sw_mkyoung function to implement readable privilege, and
  this function is  only in effect on MIPS system.
- add page valid bit judgement in function pte_modify
V3:
- add detailed changelog, modify typo issue in patch V2
v2:
- split flush_tlb_fix_spurious_fault and tlb update into two patches
- comments typo modification
- separate tlb update and add pte readable privilege into two patches

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9b01d2d..0d625c2 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -478,6 +478,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
return __pgprot(prot);
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
-- 
1.8.3.1

[PATCH v4 4/4] MIPS: mm: add page valid judgement in function pte_modify

2020-05-19 Thread Bibo Mao

If original PTE has _PAGE_ACCESSED bit set, and new pte has no
_PAGE_NO_READ bit set, we can add _PAGE_SILENT_READ bit to enable
page valid bit.

Signed-off-by: Bibo Mao 
---
 arch/mips/include/asm/pgtable.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 755d534..4f6eb56 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -509,8 +509,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-   return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
-(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+   pte_val(pte) &= _PAGE_CHG_MASK;
+   pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK;
+   if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ))
+   pte_val(pte) |= _PAGE_SILENT_READ;
+   return pte;
 }
 #endif
 
-- 
1.8.3.1

1 2 >

1 - 100 of 120 matches

Mail list logo