Introduce a new capability and ioctl to expose CPU compatibility modes
supported by the host processor for nested guests.

On IBM POWER systems, newer processor generations (N) can operate in
compatibility modes corresponding to earlier generations, like (N-1) and
(N-2). This is particularly relevant for nested virtualization, where
nested KVM guests may need to run with a specific processor compatibility
level.

Introduce KVM_CAP_PPC_COMPAT_CAPS capability and the corresponding
KVM_PPC_GET_COMPAT_CAPS vm ioctl. The ioctl returns a bitmap describing
the compatibility modes supported by the host in respective bit numbers,
allowing userspace (e.g., QEMU) to select an appropriate compatibility
level when configuring nested KVM guests.

The ioctl handling is added in kvm_arch_vm_ioctl() and retrieves host
CPU compatibility capabilities via a PowerPC-specific backend
implementation when available.

The struct kvm_ppc_compat_caps places the 'size' field first so it can
be read alone via get_user() before copy_struct_from_user() is called,
avoiding pointer arithmetic to locate the size field.

The ioctl is defined using _IO so the ioctl number remains stable even if
the struct grows in future versions. It uses copy_struct_from_user() and
copy_struct_to_user() to provide forward- and backward-compatible
extensibility: older userspace passing a smaller struct to a newer kernel
gets zero-padded trailing fields, while newer userspace passing a larger
struct to an older kernel (usize > ksize) gets sizeof(struct
kvm_ppc_compat_caps)written back to host_caps.size so it can retry with the
older kernel-supported size, after which the kernel returns -E2BIG.

KVM_PPC_COMPAT_CAPS_SIZE_VER0 is defined as a frozen integer constant
(24) marking the size of the initial struct version, used as the
minimum floor for size field validation, similar to other versioned
struct interfaces in the kernel.

The 'flags' field is reserved for future use. The kernel rejects any
call where flags is non-zero with -EINVAL, preventing garbage values
from being baked into ABI permanently.

The ioctl returns appropriate error codes: EINVAL for an invalid size
or non-zero reserved fields, E2BIG if new userspace provides a larger
struct than the kernel knows about (with ksize written back into
host_caps.size for the retry), EFAULT for failed copy operations, and
ENOTTY if the backend doesn't implement get_compat_caps.

Suggested-by: Vaibhav Jain <[email protected]>
Signed-off-by: Amit Machhiwal <[email protected]>
---
Changes in this version:
  - Moved size as the first member of the struct
  - Replaced strict size equality check with copy_struct_from_user() and
    copy_struct_to_user() for proper forward and backward ABI compatibility
  - Added KVM_PPC_COMPAT_CAPS_SIZE_VER0 (24) as a frozen version floor
    constant, following the convention used by similar interfaces in the kernel
  - Added flags == 0 enforcement to prevent uninitialized stack values from
    being baked into ABI permanently

 arch/powerpc/include/asm/kvm_ppc.h  |  1 +
 arch/powerpc/include/uapi/asm/kvm.h |  8 ++++
 arch/powerpc/kvm/powerpc.c          | 71 +++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h            |  4 ++
 4 files changed, 84 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 0953f2daa466..169ea6a7fbad 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -319,6 +319,7 @@ struct kvmppc_ops {
        bool (*hash_v3_possible)(void);
        int (*create_vm_debugfs)(struct kvm *kvm);
        int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry 
*debugfs_dentry);
+       int (*get_compat_caps)(struct kvm_ppc_compat_caps *host_caps);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 077c5437f521..19e53d5ae540 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -437,6 +437,14 @@ struct kvm_ppc_cpu_char {
        __u64   behaviour_mask;         /* valid bits in behaviour */
 };
 
+/* For KVM_PPC_GET_COMPAT_CAPS */
+struct kvm_ppc_compat_caps {
+       __u64   size;                   /* Size of this structure */
+       __u64   flags;                  /* Reserved for future use */
+       __u64   compat_capabilities;    /* Capabilities supported by the host */
+};
+#define KVM_PPC_COMPAT_CAPS_SIZE_VER0  24 /* sizeof first published struct */
+
 /*
  * Values for character and character_mask.
  * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 98de68379b18..a2919b8b31c0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -701,6 +701,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                        }
                }
                break;
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+       case KVM_CAP_PPC_COMPAT_CAPS:
+               r = 0;
+               if (kvmhv_on_pseries())
+                       r = 1;
+               break;
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
        default:
                r = 0;
                break;
@@ -2467,6 +2474,70 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int 
ioctl, unsigned long arg)
                r = kvm->arch.kvm_ops->svm_off(kvm);
                break;
        }
+       case KVM_PPC_GET_COMPAT_CAPS: {
+               struct kvm_ppc_compat_caps host_caps = {};
+               u64 usize;
+
+               /*
+                * Read the size field first to drive copy_struct_from_user.
+                * size must be the first field of the struct.
+                */
+               r = -EFAULT;
+               if (get_user(usize, (__u64 __user *)argp))
+                       goto out;
+
+               /*
+                * Enforce a minimum: reject buffers smaller than the initial
+                * struct version (VER0). This allows old userspace compiled
+                * against the original struct to still work on a newer kernel
+                * that has grown the struct with appended fields.
+                */
+               r = -EINVAL;
+               if (usize < KVM_PPC_COMPAT_CAPS_SIZE_VER0)
+                       goto out;
+
+               /*
+                * New userspace with a larger struct called an older kernel.
+                * Write back ksize in host_caps.size so userspace knows which
+                * older struct to retry with, then fail with -E2BIG.
+                */
+               if (usize > sizeof(host_caps)) {
+                       host_caps.size = sizeof(host_caps);
+                       r = -EFAULT;
+                       if (put_user(host_caps.size, (__u64 __user *)argp))
+                               goto out;
+                       r = -E2BIG;
+                       goto out;
+               }
+
+               /*
+                * copy_struct_from_user() handles forward/backward compat:
+                *   usize == ksize: verbatim copy
+                *   usize <  ksize: zero-pad trailing (old userspace, new 
kernel)
+                */
+               r = copy_struct_from_user(&host_caps, sizeof(host_caps),
+                                         argp, usize);
+               if (r)
+                       goto out;
+
+               /* Reserved fields must be zero */
+               r = -EINVAL;
+               if (host_caps.flags)
+                       goto out;
+
+               r = -ENOTTY;
+               if (!kvm->arch.kvm_ops->get_compat_caps)
+                       goto out;
+
+               r = kvm->arch.kvm_ops->get_compat_caps(&host_caps);
+               if (r)
+                       goto out;
+
+               host_caps.size = sizeof(host_caps);
+               r = copy_struct_to_user(argp, usize, &host_caps,
+                                       sizeof(host_caps), NULL);
+               break;
+       }
        default: {
                struct kvm *kvm = filp->private_data;
                r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 419011097fa8..1cf9a959669e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -997,6 +997,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_S390_KEYOP 247
 #define KVM_CAP_S390_VSIE_ESAMODE 248
 #define KVM_CAP_S390_HPAGE_2G 249
+#define KVM_CAP_PPC_COMPAT_CAPS 250
 
 struct kvm_irq_routing_irqchip {
        __u32 irqchip;
@@ -1350,6 +1351,9 @@ struct kvm_s390_keyop {
 #define KVM_GET_DEVICE_ATTR      _IOW(KVMIO,  0xe2, struct kvm_device_attr)
 #define KVM_HAS_DEVICE_ATTR      _IOW(KVMIO,  0xe3, struct kvm_device_attr)
 
+/* Available with KVM_CAP_PPC_COMPAT_CAPS */
+#define KVM_PPC_GET_COMPAT_CAPS        _IO(KVMIO,  0xe4)
+
 /*
  * ioctls for vcpu fds
  */
-- 
2.50.1 (Apple Git-155)


Reply via email to