RE: [PATCH v3 -next] memregion: Add cpu_cache_invalidate_memregion() interface

2022-10-22 Thread Dan Williams
[ add the rest of the X86 MM maintainers, Dave and Andy ]

Davidlohr Bueso wrote:
> With CXL security features, global CPU cache flushing nvdimm requirements
> are no longer specific to that subsystem, even beyond the scope of
> security_ops. CXL will need such semantics for features not necessarily
> limited to persistent memory.
> 
> The functionality this is enabling is to be able to instantaneously
> secure erase potentially terabytes of memory at once and the kernel
> needs to be sure that none of the data from before the erase is still
> present in the cache. It is also used when unlocking a memory device
> where speculative reads and firmware accesses could have cached poison
> from before the device was unlocked.
> 
> This capability is typically only used once per-boot (for unlock), or
> once per bare metal provisioning event (secure erase), like when handing
> off the system to another tenant or decommissioning a device. It may
> also be used for dynamic CXL region provisioning.
> 
> Users must first call cpu_cache_has_invalidate_memregion() to know whether
> this functionality is available on the architecture. Only enable it on
> x86-64 via the wbinvd() hammer. Hypervisors are not supported as TDX
> guests may trigger a virtualization exception and may need proper handling
> to recover. See:
> 
>e2efb6359e62 ("ACPICA: Avoid cache flush inside virtual machines")
> 
> Signed-off-by: Davidlohr Bueso 
> ---
> Changes from v2 
> (https://lore.kernel.org/all/20220829212918.4039240-1-d...@stgolabs.net/):
> - Change the names and params (Dan).
> - GPL symbols (Boris).
> - Mentioned VMM check in the changelog (Boris).

Any remaining concerns from x86 maintainers?

Peter had asked whether this requirement to use wbinvd could be
addressed from the CXL device side in the future. I did raise this
question and I will point out that "back-invalidate" (device initiated
requests to invalidate CPU caches) is a prominent capability defined in
the CXL 3.0 specification. So, there is at least line of sight for that
to be used for these flows going forward.

There will also be motivation for this from platforms that do not have
an equivalent to wbinvd once the Linux CXL stack declines to support
dynamic CXL memory region provisioning and secure erase in the absence
of a functional cpu_cache_invalidate_memregion() or a device-side
back-invalidate capability.

> 
> 
>  arch/x86/Kconfig |  1 +
>  arch/x86/mm/pat/set_memory.c | 15 +
>  drivers/acpi/nfit/intel.c| 41 
>  include/linux/memregion.h| 35 ++
>  lib/Kconfig  |  3 +++
>  5 files changed, 72 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 2e8f6fd28e59..fa5cc581315a 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -69,6 +69,7 @@ config X86
>   select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
>   select ARCH_HAS_ACPI_TABLE_UPGRADE  if ACPI
>   select ARCH_HAS_CACHE_LINE_SIZE
> + select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION  if X86_64
>   select ARCH_HAS_CURRENT_STACK_POINTER
>   select ARCH_HAS_DEBUG_VIRTUAL
>   select ARCH_HAS_DEBUG_VM_PGTABLEif !X86_PAE
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 0656db33574d..7d940ae2fede 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -330,6 +330,21 @@ void arch_invalidate_pmem(void *addr, size_t size)
>  EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
>  #endif
>  
> +#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
> +bool cpu_cache_has_invalidate_memregion(void)
> +{
> + return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR);
> +}
> +EXPORT_SYMBOL_GPL(cpu_cache_has_invalidate_memregion);
> +
> +int cpu_cache_invalidate_memregion(int res_desc)
> +{
> + wbinvd_on_all_cpus();
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(cpu_cache_invalidate_memregion);
> +#endif
> +
>  static void __cpa_flush_all(void *arg)
>  {
>   unsigned long cache = (unsigned long)arg;
> diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
> index 8dd792a55730..b2bfbf5797da 100644
> --- a/drivers/acpi/nfit/intel.c
> +++ b/drivers/acpi/nfit/intel.c
> @@ -3,6 +3,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include "intel.h"
>  #include "nfit.h"
> @@ -190,8 +191,6 @@ static int intel_security_change_key(struct nvdimm 
> *nvdimm,
>   }
>  }
>  
> -static void nvdimm_invalidate_cache(void);
> -
>  static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
>   const struct nvdimm_key_data *key_data)
>  {
> @@ -213,6 +212,9 @@ static int __maybe_unused intel_security_unlock(struct 
> nvdimm *nvdimm,
>   if (!test_bit(NVDIMM_INTEL_UNLOCK_UNIT, _mem->dsm_mask))
>   return -ENOTTY;
>  
> + if (!cpu_cache_has_invalidate_memregion())
> + return -EINVAL;
> 

[PATCH v3 -next] memregion: Add cpu_cache_invalidate_memregion() interface

2022-09-19 Thread Davidlohr Bueso
With CXL security features, global CPU cache flushing nvdimm requirements
are no longer specific to that subsystem, even beyond the scope of
security_ops. CXL will need such semantics for features not necessarily
limited to persistent memory.

The functionality this is enabling is to be able to instantaneously
secure erase potentially terabytes of memory at once and the kernel
needs to be sure that none of the data from before the erase is still
present in the cache. It is also used when unlocking a memory device
where speculative reads and firmware accesses could have cached poison
from before the device was unlocked.

This capability is typically only used once per-boot (for unlock), or
once per bare metal provisioning event (secure erase), like when handing
off the system to another tenant or decommissioning a device. It may
also be used for dynamic CXL region provisioning.

Users must first call cpu_cache_has_invalidate_memregion() to know whether
this functionality is available on the architecture. Only enable it on
x86-64 via the wbinvd() hammer. Hypervisors are not supported as TDX
guests may trigger a virtualization exception and may need proper handling
to recover. See:

   e2efb6359e62 ("ACPICA: Avoid cache flush inside virtual machines")

Signed-off-by: Davidlohr Bueso 
---
Changes from v2 
(https://lore.kernel.org/all/20220829212918.4039240-1-d...@stgolabs.net/):
- Change the names and params (Dan).
- GPL symbols (Boris).
- Mentioned VMM check in the changelog (Boris).


 arch/x86/Kconfig |  1 +
 arch/x86/mm/pat/set_memory.c | 15 +
 drivers/acpi/nfit/intel.c| 41 
 include/linux/memregion.h| 35 ++
 lib/Kconfig  |  3 +++
 5 files changed, 72 insertions(+), 23 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2e8f6fd28e59..fa5cc581315a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,6 +69,7 @@ config X86
select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE  if ACPI
select ARCH_HAS_CACHE_LINE_SIZE
+   select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION  if X86_64
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLEif !X86_PAE
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 0656db33574d..7d940ae2fede 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -330,6 +330,21 @@ void arch_invalidate_pmem(void *addr, size_t size)
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 #endif
 
+#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
+bool cpu_cache_has_invalidate_memregion(void)
+{
+   return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR);
+}
+EXPORT_SYMBOL_GPL(cpu_cache_has_invalidate_memregion);
+
+int cpu_cache_invalidate_memregion(int res_desc)
+{
+   wbinvd_on_all_cpus();
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cpu_cache_invalidate_memregion);
+#endif
+
 static void __cpa_flush_all(void *arg)
 {
unsigned long cache = (unsigned long)arg;
diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
index 8dd792a55730..b2bfbf5797da 100644
--- a/drivers/acpi/nfit/intel.c
+++ b/drivers/acpi/nfit/intel.c
@@ -3,6 +3,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "intel.h"
 #include "nfit.h"
@@ -190,8 +191,6 @@ static int intel_security_change_key(struct nvdimm *nvdimm,
}
 }
 
-static void nvdimm_invalidate_cache(void);
-
 static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data)
 {
@@ -213,6 +212,9 @@ static int __maybe_unused intel_security_unlock(struct 
nvdimm *nvdimm,
if (!test_bit(NVDIMM_INTEL_UNLOCK_UNIT, _mem->dsm_mask))
return -ENOTTY;
 
+   if (!cpu_cache_has_invalidate_memregion())
+   return -EINVAL;
+
memcpy(nd_cmd.cmd.passphrase, key_data->data,
sizeof(nd_cmd.cmd.passphrase));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, _cmd, sizeof(nd_cmd), NULL);
@@ -228,7 +230,7 @@ static int __maybe_unused intel_security_unlock(struct 
nvdimm *nvdimm,
}
 
/* DIMM unlocked, invalidate all CPU caches before we read it */
-   nvdimm_invalidate_cache();
+   cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
 
return 0;
 }
@@ -297,8 +299,11 @@ static int __maybe_unused intel_security_erase(struct 
nvdimm *nvdimm,
if (!test_bit(cmd, _mem->dsm_mask))
return -ENOTTY;
 
+   if (!cpu_cache_has_invalidate_memregion())
+   return -EINVAL;
+
/* flush all cache before we erase DIMM */
-   nvdimm_invalidate_cache();
+   cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
memcpy(nd_cmd.cmd.passphrase, key->data,
sizeof(nd_cmd.cmd.passphrase));