+
+ value |= (proxy << 20);
+ native_wrmsrl(reg, value);
+ }
} else {
- wrmsrl(reg, value);
+ native_wrmsrl(reg, value);
}
}
EXPORT_SYMBOL_GPL(hv_set_non_nested_msr);
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index fb8cd8469328..ef540b72f6ea 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -443,20 +443,23 @@ static int __vmbus_establish_gpadl(struct
vmbus_channel *channel,
return ret;
}
- /*
- * Set the "decrypted" flag to true for the set_memory_decrypted()
- * success case. In the failure case, the encryption state of the
- * memory is unknown. Leave "decrypted" as true to ensure the
- * memory will be leaked instead of going back on the free list.
- */
- gpadl->decrypted = true;
- ret = set_memory_decrypted((unsigned long)kbuffer,
- PFN_UP(size));
- if (ret) {
- dev_warn(&channel->device_obj->device,
- "Failed to set host visibility for new GPADL %d.\n",
- ret);
- return ret;
+ if ((!channel->confidential_external_memory && type ==
HV_GPADL_BUFFER) ||
+ (!channel->confidential_ring_buffer && type == HV_GPADL_RING)) {
+ /*
+ * Set the "decrypted" flag to true for the
set_memory_decrypted()
+ * success case. In the failure case, the encryption state of
the
+ * memory is unknown. Leave "decrypted" as true to ensure the
+ * memory will be leaked instead of going back on the free list.
+ */
+ gpadl->decrypted = true;
+ ret = set_memory_decrypted((unsigned long)kbuffer,
+ PFN_UP(size));
+ if (ret) {
+ dev_warn(&channel->device_obj->device,
+ "Failed to set host visibility for new GPADL %d.\n",
+ ret);
+ return ret;
+ }
}
init_completion(&msginfo->waitevent);
@@ -676,12 +679,13 @@ static int __vmbus_open(struct vmbus_channel
*newchannel,
goto error_clean_ring;
err = hv_ringbuffer_init(&newchannel->outbound,
- page, send_pages, 0);
+ page, send_pages, 0, newchannel-
>confidential_ring_buffer);
if (err)
goto error_free_gpadl;
err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
- recv_pages, newchannel->max_pkt_size);
+ recv_pages, newchannel->max_pkt_size,
+ newchannel->confidential_ring_buffer);
if (err)
goto error_free_gpadl;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6e084c207414..39c8b80d967f 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -843,14 +843,14 @@ static void vmbus_wait_for_unload(void)
= per_cpu_ptr(hv_context.cpu_context, cpu);
/*
- * In a CoCo VM the synic_message_page is not allocated
+ * In a CoCo VM the hv_synic_message_page is not allocated
* in hv_synic_alloc(). Instead it is set/cleared in
* hv_synic_enable_regs() and hv_synic_disable_regs()
* such that it is set only when the CPU is online. If
* not all present CPUs are online, the message page
* might be NULL, so skip such CPUs.
*/
- page_addr = hv_cpu->synic_message_page;
+ page_addr = hv_cpu->hv_synic_message_page;
if (!page_addr)
continue;
@@ -891,7 +891,7 @@ static void vmbus_wait_for_unload(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- page_addr = hv_cpu->synic_message_page;
+ page_addr = hv_cpu->hv_synic_message_page;
if (!page_addr)
continue;
@@ -1021,6 +1021,7 @@ static void vmbus_onoffer(struct
vmbus_channel_message_header *hdr)
struct vmbus_channel_offer_channel *offer;
struct vmbus_channel *oldchannel, *newchannel;
size_t offer_sz;
+ bool confidential_ring_buffer, confidential_external_memory;
offer = (struct vmbus_channel_offer_channel *)hdr;
@@ -1033,6 +1034,18 @@ static void vmbus_onoffer(struct
vmbus_channel_message_header *hdr)
return;
}
+ confidential_ring_buffer = is_confidential_ring_buffer(offer);
+ if (confidential_ring_buffer) {
+ if (vmbus_proto_version < VERSION_WIN_COPPER || !
vmbus_is_confidential())
+ return;
+ }
+
+ confidential_external_memory =
is_confidential_external_memory(offer);
+ if (is_confidential_external_memory(offer)) {
+ if (vmbus_proto_version < VERSION_WIN_COPPER || !
vmbus_is_confidential())
+ return;
+ }
+
oldchannel = find_primary_channel_by_offer(offer);
if (oldchannel != NULL) {
@@ -1069,6 +1082,14 @@ static void vmbus_onoffer(struct
vmbus_channel_message_header *hdr)
atomic_dec(&vmbus_connection.offer_in_progress);
+ if ((oldchannel->confidential_ring_buffer && !
confidential_ring_buffer) ||
+ (oldchannel->confidential_external_memory &&
+ !confidential_external_memory)) {
+ pr_err_ratelimited("Offer %d changes the confidential
state\n",
+ offer->child_relid);
+ return;
+ }
+
WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
/* Fix up the relid. */
oldchannel->offermsg.child_relid = offer->child_relid;
@@ -1111,6 +1132,8 @@ static void vmbus_onoffer(struct
vmbus_channel_message_header *hdr)
pr_err("Unable to allocate channel object\n");
return;
}
+ newchannel->confidential_ring_buffer = confidential_ring_buffer;
+ newchannel->confidential_external_memory =
confidential_external_memory;
vmbus_setup_channel_state(newchannel, offer);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 8351360bba16..268b7d58b45b 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -51,7 +51,8 @@ EXPORT_SYMBOL_GPL(vmbus_proto_version);
* Linux guests and are not listed.
*/
static __u32 vmbus_versions[] = {
- VERSION_WIN10_V5_3,
+ VERSION_WIN_COPPER,
+ VERSION_WIN_IRON,
VERSION_WIN10_V5_2,
VERSION_WIN10_V5_1,
VERSION_WIN10_V5,
@@ -65,7 +66,7 @@ static __u32 vmbus_versions[] = {
* Maximal VMBus protocol version guests can negotiate. Useful to
cap the
* VMBus version for testing and debugging purpose.
*/
-static uint max_version = VERSION_WIN10_V5_3;
+static uint max_version = VERSION_WIN_COPPER;
module_param(max_version, uint, S_IRUGO);
MODULE_PARM_DESC(max_version,
@@ -105,6 +106,11 @@ int vmbus_negotiate_version(struct
vmbus_channel_msginfo *msginfo, u32 version)
vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
}
+ if (vmbus_is_confidential() && version >= VERSION_WIN_COPPER)
+ msg->feature_flags = VMBUS_FEATURE_FLAG_CONFIDENTIAL_CHANNELS;
+ else
+ msg->feature_flags = 0;
+
/*
* shared_gpa_boundary is zero in non-SNP VMs, so it's safe to
always
* bitwise OR it
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 308c8f279df8..94be5b3f9e70 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -74,7 +74,7 @@ int hv_post_message(union hv_connection_id
connection_id,
aligned_msg->payload_size = payload_size;
memcpy((void *)aligned_msg->payload, payload, payload_size);
- if (ms_hyperv.paravisor_present) {
+ if (ms_hyperv.paravisor_present && !vmbus_is_confidential()) {
if (hv_isolation_type_tdx())
status = hv_tdx_hypercall(HVCALL_POST_MESSAGE,
virt_to_phys(aligned_msg), 0);
@@ -94,11 +94,135 @@ int hv_post_message(union hv_connection_id
connection_id,
return hv_result(status);
}
+enum hv_page_encryption_action {
+ HV_PAGE_ENC_DEAFULT,
+ HV_PAGE_ENC_ENCRYPT,
+ HV_PAGE_ENC_DECRYPT
typo in the enc_action enum check.
enum HV_PAGE_ENC_DEAFULT is misspelled. It should be corrected to
HV_PAGE_ENC_DEFAULT.
+};
+
+static int hv_alloc_page(unsigned int cpu, void **page, enum
hv_page_encryption_action enc_action,
+ const char *note)
+{
+ int ret = 0;
+
+ pr_debug("allocating %s\n", note);
+
+ /*
+ * After the page changes its encryption status, its contents will
+ * appear scrambled. Thus `get_zeroed_page` would zero the page out
+ * in vain, we do that ourselves exactly one time.
+ *
+ * The function might be called from contexts where sleeping is very
+ * bad (like hotplug callbacks) or not possible (interrupt
handling),
+ * Thus requesting `GFP_ATOMIC`.
+ *
+ * The page order is 0 as we need 1 page and log_2 (1) = 0.
+ */
+ *page = (void *)__get_free_pages(GFP_ATOMIC, 0);
+ if (!*page)
+ return -ENOMEM;
+
+ pr_debug("allocated %s\n", note);
+
+ switch (enc_action) {
+ case HV_PAGE_ENC_ENCRYPT:
+ ret = set_memory_encrypted((unsigned long)*page, 1);
+ break;
+ case HV_PAGE_ENC_DECRYPT:
+ ret = set_memory_decrypted((unsigned long)*page, 1);
+ break;
+ case HV_PAGE_ENC_DEAFULT:
HV_PAGE_ENC_DEFAULT
+ pr_err("page encryption action %d failed for %s, error %d when
allocating the page\n",
"Encryption action %d failed for %s: allocation error %d\n"
+ enc_action, note, ret);
+ free_page((unsigned long)*page);
+ *page = NULL;
a '\n' before return
+ return ret;
+}
+
+static int hv_free_page(void **page, enum hv_page_encryption_action
enc_action,
+ const char *note)
+{
+ int ret = 0;
+
+ pr_debug("freeing %s\n", note);
+
+ if (!page)
+ return 0;
+ if (!*page)
+ return 0;
+
+ switch (enc_action) {
+ case HV_PAGE_ENC_ENCRYPT:
+ ret = set_memory_encrypted((unsigned long)*page, 1);
+ break;
+ case HV_PAGE_ENC_DECRYPT:
+ ret = set_memory_decrypted((unsigned long)*page, 1);
+ break;
+ case HV_PAGE_ENC_DEAFULT:
HV_PAGE_ENC_DEFAULT
+ break;
+ default:
+ pr_warn("unknown page encryption action %d for %s page\n",
+ enc_action, note);
+ break;
+ }
+
+ /*
+ * In the case of the action failure, the page is leaked.
+ * Something is wrong, prefer to lose the page and stay afloat.
+ */
+ if (ret) {
+ pr_err("page encryption action %d failed for %s, error %d
when freeing\n",
+ enc_action, note, ret);
+ } else {
+ pr_debug("freed %s\n", note);
+ free_page((unsigned long)*page);
+ }
+
+ *page = NULL;
+
+ return ret;
+}
+
+static bool hv_should_allocate_post_msg_page(void)
+{
+ return ms_hyperv.paravisor_present && hv_isolation_type_tdx();
+}
+
+static bool hv_should_allocate_synic_pages(void)
+{
+ return !ms_hyperv.paravisor_present && !hv_root_partition();
+}
+
+static bool hv_should_allocate_pv_synic_pages(void)
+{
+ return vmbus_is_confidential();
+}
+
int hv_synic_alloc(void)
{
int cpu, ret = -ENOMEM;
struct hv_per_cpu_context *hv_cpu;
+ const bool allocate_post_msg_page =
hv_should_allocate_post_msg_page();
+ const bool allocate_synic_pages = hv_should_allocate_synic_pages();
+ const bool allocate_pv_synic_pages =
hv_should_allocate_pv_synic_pages();
+ const enum hv_page_encryption_action enc_action =
+ (!vmbus_is_confidential()) ? HV_PAGE_ENC_DECRYPT :
HV_PAGE_ENC_DEAFULT;
+
/*
* First, zero all per-cpu memory areas so hv_synic_free() can
* detect what memory has been allocated and cleanup properly
@@ -122,74 +246,38 @@ int hv_synic_alloc(void)
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long)hv_cpu);
- if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
- hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
- if (!hv_cpu->post_msg_page) {
- pr_err("Unable to allocate post msg page\n");
+ if (allocate_post_msg_page) {
+ ret = hv_alloc_page(cpu, &hv_cpu->post_msg_page,
+ enc_action, "post msg page");
+ if (ret)
goto err;
- }
-
- ret = set_memory_decrypted((unsigned long)hv_cpu-
>post_msg_page, 1);
- if (ret) {
- pr_err("Failed to decrypt post msg page: %d\n", ret);
- /* Just leak the page, as it's unsafe to free the
page. */
- hv_cpu->post_msg_page = NULL;
- goto err;
- }
-
- memset(hv_cpu->post_msg_page, 0, PAGE_SIZE);
}
/*
- * Synic message and event pages are allocated by paravisor.
- * Skip these pages allocation here.
+ * If these SynIC pages are not allocated, SIEF and SIM pages
+ * are configured using what the root partition or the paravisor
+ * provides upon reading the SIEFP and SIMP registers.
*/
- if (!ms_hyperv.paravisor_present && !hv_root_partition()) {
- hv_cpu->synic_message_page =
- (void *)get_zeroed_page(GFP_ATOMIC);
- if (!hv_cpu->synic_message_page) {
- pr_err("Unable to allocate SYNIC message page\n");
+ if (allocate_synic_pages) {
+ ret = hv_alloc_page(cpu, &hv_cpu->hv_synic_message_page,
+ enc_action, "SynIC msg page");
+ if (ret)
goto err;
- }
-
- hv_cpu->synic_event_page =
- (void *)get_zeroed_page(GFP_ATOMIC);
- if (!hv_cpu->synic_event_page) {
- pr_err("Unable to allocate SYNIC event page\n");
-
- free_page((unsigned long)hv_cpu->synic_message_page);
- hv_cpu->synic_message_page = NULL;
+ ret = hv_alloc_page(cpu, &hv_cpu->hv_synic_event_page,
+ enc_action, "SynIC event page");
+ if (ret)
goto err;
- }
}
- if (!ms_hyperv.paravisor_present &&
- (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
- ret = set_memory_decrypted((unsigned long)
- hv_cpu->synic_message_page, 1);
- if (ret) {
- pr_err("Failed to decrypt SYNIC msg page: %d\n", ret);
- hv_cpu->synic_message_page = NULL;
-
- /*
- * Free the event page here so that hv_synic_free()
- * won't later try to re-encrypt it.
- */
- free_page((unsigned long)hv_cpu->synic_event_page);
- hv_cpu->synic_event_page = NULL;
+ if (allocate_pv_synic_pages) {
+ ret = hv_alloc_page(cpu, &hv_cpu->pv_synic_message_page,
+ HV_PAGE_ENC_DEAFULT, "pv SynIC msg page");
+ if (ret)
goto err;
- }
-
- ret = set_memory_decrypted((unsigned long)
- hv_cpu->synic_event_page, 1);
- if (ret) {
- pr_err("Failed to decrypt SYNIC event page: %d\n", ret);
- hv_cpu->synic_event_page = NULL;
+ ret = hv_alloc_page(cpu, &hv_cpu->pv_synic_event_page,
+ HV_PAGE_ENC_DEAFULT, "pv SynIC event page");
+ if (ret)
goto err;
- }
-
- memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
- memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
}
}
@@ -205,55 +293,38 @@ int hv_synic_alloc(void)
void hv_synic_free(void)
{
- int cpu, ret;
+ int cpu;
+
+ const bool free_post_msg_page = hv_should_allocate_post_msg_page();
+ const bool free_synic_pages = hv_should_allocate_synic_pages();
+ const bool free_pv_synic_pages =
hv_should_allocate_pv_synic_pages();
for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu =
per_cpu_ptr(hv_context.cpu_context, cpu);
- /* It's better to leak the page if the encryption fails. */
- if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
- if (hv_cpu->post_msg_page) {
- ret = set_memory_encrypted((unsigned long)
- hv_cpu->post_msg_page, 1);
- if (ret) {
- pr_err("Failed to encrypt post msg page: %d\n",
ret);
- hv_cpu->post_msg_page = NULL;
- }
- }
+ if (free_post_msg_page)
+ hv_free_page(&hv_cpu->post_msg_page,
+ HV_PAGE_ENC_ENCRYPT, "post msg page");
+ if (free_synic_pages) {
+ hv_free_page(&hv_cpu->hv_synic_event_page,
+ HV_PAGE_ENC_ENCRYPT, "SynIC event page");
+ hv_free_page(&hv_cpu->hv_synic_message_page,
+ HV_PAGE_ENC_ENCRYPT, "SynIC msg page");
}
-
- if (!ms_hyperv.paravisor_present &&
- (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
- if (hv_cpu->synic_message_page) {
- ret = set_memory_encrypted((unsigned long)
- hv_cpu->synic_message_page, 1);
- if (ret) {
- pr_err("Failed to encrypt SYNIC msg page: %d\n",
ret);
- hv_cpu->synic_message_page = NULL;
- }
- }
-
- if (hv_cpu->synic_event_page) {
- ret = set_memory_encrypted((unsigned long)
- hv_cpu->synic_event_page, 1);
- if (ret) {
- pr_err("Failed to encrypt SYNIC event page:
%d\n", ret);
- hv_cpu->synic_event_page = NULL;
- }
- }
+ if (free_pv_synic_pages) {
+ hv_free_page(&hv_cpu->pv_synic_event_page,
+ HV_PAGE_ENC_DEAFULT, "pv SynIC event page");
+ hv_free_page(&hv_cpu->pv_synic_message_page,
+ HV_PAGE_ENC_DEAFULT, "pv SynIC msg page");
}
-
- free_page((unsigned long)hv_cpu->post_msg_page);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
}
kfree(hv_context.hv_numa_map);
}
/*
- * hv_synic_init - Initialize the Synthetic Interrupt Controller.
+ * hv_synic_enable_regs - Initialize the Synthetic Interrupt Controller.
*
* If it is already initialized by another entity (ie x2v shim), we
need to
* retrieve the initialized message and event pages. Otherwise, we
create and
@@ -266,7 +337,6 @@ void hv_synic_enable_regs(unsigned int cpu)
union hv_synic_simp simp;
union hv_synic_siefp siefp;
union hv_synic_sint shared_sint;
- union hv_synic_scontrol sctrl;
/* Setup the Synic's message page */
simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
@@ -276,18 +346,18 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
- hv_cpu->synic_message_page =
- (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
- if (!hv_cpu->synic_message_page)
+ hv_cpu->hv_synic_message_page
+ = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
+ if (!hv_cpu->hv_synic_message_page)
pr_err("Fail to map synic message page.\n");
} else {
- simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
+ simp.base_simp_gpa = virt_to_phys(hv_cpu->hv_synic_message_page)
>> HV_HYP_PAGE_SHIFT;
}
hv_set_msr(HV_MSR_SIMP, simp.as_uint64);
- /* Setup the Synic's event page */
+ /* Setup the Synic's event page with the hypervisor. */
siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
siefp.siefp_enabled = 1;
@@ -295,12 +365,12 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
- hv_cpu->synic_event_page =
- (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
- if (!hv_cpu->synic_event_page)
+ hv_cpu->hv_synic_event_page
+ = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
+ if (!hv_cpu->hv_synic_event_page)
pr_err("Fail to map synic event page.\n");
} else {
- siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
+ siefp.base_siefp_gpa = virt_to_phys(hv_cpu->hv_synic_event_page)
>> HV_HYP_PAGE_SHIFT;
}
@@ -313,8 +383,24 @@ void hv_synic_enable_regs(unsigned int cpu)
shared_sint.vector = vmbus_interrupt;
shared_sint.masked = false;
- shared_sint.auto_eoi = hv_recommend_using_aeoi();
- hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT,
shared_sint.as_uint64);
+
+ /*
+ * On architectures where Hyper-V doesn't support AEOI (e.g.,
ARM64),
+ * it doesn't provide a recommendation flag and AEOI must be
disabled.
+ */
+#ifdef HV_DEPRECATING_AEOI_RECOMMENDED
+ shared_sint.auto_eoi =
+ !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
+#else
+ shared_sint.auto_eoi = 0;
+#endif
+ hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64);
+}
+
+static void hv_synic_enable_interrupts(void)
+{
+ union hv_synic_scontrol sctrl;
/* Enable the global synic bit */
sctrl.as_uint64 = hv_get_msr(HV_MSR_SCONTROL);
@@ -323,13 +409,78 @@ void hv_synic_enable_regs(unsigned int cpu)
hv_set_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
}
+/*
+ * The paravisor might not support proxying SynIC, and this
+ * function may fail.
+ */
+static int hv_pv_synic_enable_regs(unsigned int cpu)
+{
+ union hv_synic_simp simp;
+ union hv_synic_siefp siefp;
+
+ int err;
+ struct hv_per_cpu_context *hv_cpu
+ = per_cpu_ptr(hv_context.cpu_context, cpu);
+
+ /* Setup the Synic's message page with the paravisor. */
+ simp.as_uint64 = hv_pv_get_synic_register(HV_MSR_SIMP, &err);
+ if (err)
+ return err;
+ simp.simp_enabled = 1;
+ simp.base_simp_gpa = virt_to_phys(hv_cpu->pv_synic_message_page)
+ >> HV_HYP_PAGE_SHIFT;
+ err = hv_pv_set_synic_register(HV_MSR_SIMP, simp.as_uint64);
+ if (err)
+ return err;
+
+ /* Setup the Synic's event page with the paravisor. */
+ siefp.as_uint64 = hv_pv_get_synic_register(HV_MSR_SIEFP, &err);
+ if (err)
+ return err;
+ siefp.siefp_enabled = 1;
+ siefp.base_siefp_gpa = virt_to_phys(hv_cpu->pv_synic_event_page)
+ >> HV_HYP_PAGE_SHIFT;
a '\n' before return
+ return hv_pv_set_synic_register(HV_MSR_SIEFP, siefp.as_uint64);
+}
+
+static int hv_pv_synic_enable_interrupts(void)
+{
+ union hv_synic_scontrol sctrl;
+ int err;
+
+ /* Enable the global synic bit */
+ sctrl.as_uint64 = hv_pv_get_synic_register(HV_MSR_SCONTROL, &err);
+ if (err)
+ return err;
+ sctrl.enable = 1;
+
+ return hv_pv_set_synic_register(HV_MSR_SCONTROL, sctrl.as_uint64);
+}
+
int hv_synic_init(unsigned int cpu)
{
+ int err = 0;
+
+ /*
+ * The paravisor may not support the confidential VMBus,
+ * check on that first.
+ */
+ if (vmbus_is_confidential())
+ err = hv_pv_synic_enable_regs(cpu);
+ if (err)
+ return err;
+
hv_synic_enable_regs(cpu);
+ if (!vmbus_is_confidential())
+ hv_synic_enable_interrupts();
+ else
+ err = hv_pv_synic_enable_interrupts();
+ if (err)
+ return err;
hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
- return 0;
+ return err;
}
void hv_synic_disable_regs(unsigned int cpu)
@@ -339,7 +490,6 @@ void hv_synic_disable_regs(unsigned int cpu)
union hv_synic_sint shared_sint;
union hv_synic_simp simp;
union hv_synic_siefp siefp;
- union hv_synic_scontrol sctrl;
shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 +
VMBUS_MESSAGE_SINT);
@@ -358,8 +508,8 @@ void hv_synic_disable_regs(unsigned int cpu)
*/
simp.simp_enabled = 0;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
- iounmap(hv_cpu->synic_message_page);
- hv_cpu->synic_message_page = NULL;
+ memunmap(hv_cpu->hv_synic_message_page);
+ hv_cpu->hv_synic_message_page = NULL;
} else {
simp.base_simp_gpa = 0;
}
@@ -370,43 +520,97 @@ void hv_synic_disable_regs(unsigned int cpu)
siefp.siefp_enabled = 0;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
- iounmap(hv_cpu->synic_event_page);
- hv_cpu->synic_event_page = NULL;
+ memunmap(hv_cpu->hv_synic_event_page);
+ hv_cpu->hv_synic_event_page = NULL;
} else {
siefp.base_siefp_gpa = 0;
}
hv_set_msr(HV_MSR_SIEFP, siefp.as_uint64);
+}
+
+static void hv_synic_disable_interrupts(void)
+{
+ union hv_synic_scontrol sctrl;
/* Disable the global synic bit */
sctrl.as_uint64 = hv_get_msr(HV_MSR_SCONTROL);
sctrl.enable = 0;
hv_set_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
+}
+static void hv_vmbus_disable_percpu_interrupts(void)
+{
if (vmbus_irq != -1)
disable_percpu_irq(vmbus_irq);
}
+static void hv_pv_synic_disable_regs(unsigned int cpu)
+{
+ /*
+ * The get/set register errors are deliberatley ignored in
+ * the cleanup path as they are non-consequential here.
typo deliberatley -> deliberately
+ */
+ int err;
+ union hv_synic_simp simp;
+ union hv_synic_siefp siefp;
+
+ struct hv_per_cpu_context *hv_cpu
+ = per_cpu_ptr(hv_context.cpu_context, cpu);
+
+ /* Disable SynIC's message page in the paravisor. */
+ simp.as_uint64 = hv_pv_get_synic_register(HV_MSR_SIMP, &err);
+ if (err)
+ return;
+ simp.simp_enabled = 0;
+
+ memunmap(hv_cpu->pv_synic_message_page);
+ hv_cpu->pv_synic_message_page = NULL;
+
+ err = hv_pv_set_synic_register(HV_MSR_SIMP, simp.as_uint64);
+ if (err)
+ return;
+
+ /* Disable SynIC's event page in the paravisor. */
+ siefp.as_uint64 = hv_pv_get_synic_register(HV_MSR_SIEFP, &err);
+ if (err)
+ return;
+ siefp.siefp_enabled = 0;
+
+ memunmap(hv_cpu->pv_synic_event_page);
+ hv_cpu->pv_synic_event_page = NULL;
+
+ hv_pv_set_synic_register(HV_MSR_SIEFP, siefp.as_uint64);
+}
+
+static void hv_pv_synic_disable_interrupts(void)
+{
+ union hv_synic_scontrol sctrl;
+ int err;
+
+ /* Disable the global synic bit */
+ sctrl.as_uint64 = hv_pv_get_synic_register(HV_MSR_SCONTROL, &err);
+ if (err)
+ return;
+ sctrl.enable = 0;
+ hv_pv_set_synic_register(HV_MSR_SCONTROL, sctrl.as_uint64);
+}
+
#define HV_MAX_TRIES 3
-/*
- * Scan the event flags page of 'this' CPU looking for any bit that
is set. If we find one
- * bit set, then wait for a few milliseconds. Repeat these steps for
a maximum of 3 times.
- * Return 'true', if there is still any set bit after this operation;
'false', otherwise.
- *
- * If a bit is set, that means there is a pending channel interrupt.
The expectation is
- * that the normal interrupt handling mechanism will find and process
the channel interrupt
- * "very soon", and in the process clear the bit.
- */
-static bool hv_synic_event_pending(void)
+
+static bool hv_synic_event_pending_for(union hv_synic_event_flags
*event, int sint)
{
- struct hv_per_cpu_context *hv_cpu =
this_cpu_ptr(hv_context.cpu_context);
- union hv_synic_event_flags *event =
- (union hv_synic_event_flags *)hv_cpu->synic_event_page +
VMBUS_MESSAGE_SINT;
- unsigned long *recv_int_page = event->flags; /* assumes VMBus
version >= VERSION_WIN8 */
+ unsigned long *recv_int_page;
bool pending;
u32 relid;
- int tries = 0;
+ int tries;
+
+ if (!event)
+ return false;
+ tries = 0;
+ event += sint;
+ recv_int_page = event->flags; /* assumes VMBus version >=
VERSION_WIN8 */
retry:
pending = false;
for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
@@ -460,6 +664,26 @@ static int hv_pick_new_cpu(struct vmbus_channel
*channel)
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
+/*
+ * Scan the event flags page of 'this' CPU looking for any bit that
is set. If we find one
+ * bit set, then wait for a few milliseconds. Repeat these steps for
a maximum of 3 times.
+ * Return 'true', if there is still any set bit after this operation;
'false', otherwise.
+ *
+ * If a bit is set, that means there is a pending channel interrupt.
The expectation is
+ * that the normal interrupt handling mechanism will find and process
the channel interrupt
+ * "very soon", and in the process clear the bit.
+ */
+static bool hv_synic_event_pending(void)
+{
+ struct hv_per_cpu_context *hv_cpu =
this_cpu_ptr(hv_context.cpu_context);
+ union hv_synic_event_flags *hv_synic_event_page = hv_cpu-
>hv_synic_event_page;
+ union hv_synic_event_flags *pv_synic_event_page = hv_cpu-
>pv_synic_event_page;
+
+ return
+ hv_synic_event_pending_for(hv_synic_event_page,
VMBUS_MESSAGE_SINT) ||
+ hv_synic_event_pending_for(pv_synic_event_page,
VMBUS_MESSAGE_SINT);
+}
+
int hv_synic_cleanup(unsigned int cpu)
{
struct vmbus_channel *channel, *sc;
@@ -516,6 +740,13 @@ int hv_synic_cleanup(unsigned int cpu)
hv_stimer_legacy_cleanup(cpu);
hv_synic_disable_regs(cpu);
+ if (vmbus_is_confidential())
+ hv_pv_synic_disable_regs(cpu);
+ if (!vmbus_is_confidential())
+ hv_synic_disable_interrupts();
+ else
+ hv_pv_synic_disable_interrupts();
please reconsider name as *_disable_interrupts(), imply that it is
disabling specific interrupts rather than the SynIC control bit.
+ hv_vmbus_disable_percpu_interrupts();
return ret;
}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 29780f3a7478..9337e0afa3ce 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -120,8 +120,10 @@ enum {
* Per cpu state for channel handling
*/
struct hv_per_cpu_context {
- void *synic_message_page;
- void *synic_event_page;
+ void *hv_synic_message_page;
+ void *hv_synic_event_page;
+ void *pv_synic_message_page;
+ void *pv_synic_event_page;
/*
* The page is only used in hv_post_message() for a TDX VM (with
the
@@ -182,7 +184,8 @@ extern int hv_synic_cleanup(unsigned int cpu);
void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
- struct page *pages, u32 pagecnt, u32 max_pkt_size);
+ struct page *pages, u32 pagecnt, u32 max_pkt_size,
+ bool confidential);
void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 3c9b02471760..05c2cd42fc75 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -183,7 +183,8 @@ void hv_ringbuffer_pre_init(struct vmbus_channel
*channel)
/* Initialize the ring buffer. */
int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
- struct page *pages, u32 page_cnt, u32 max_pkt_size)
+ struct page *pages, u32 page_cnt, u32 max_pkt_size,
+ bool confidential)
{
struct page **pages_wraparound;
int i;
@@ -207,7 +208,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info
*ring_info,
ring_info->ring_buffer = (struct hv_ring_buffer *)
vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
- pgprot_decrypted(PAGE_KERNEL));
+ confidential ? PAGE_KERNEL : pgprot_decrypted(PAGE_KERNEL));
kfree(pages_wraparound);
if (!ring_info->ring_buffer)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e431978fa408..375b4e45c762 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1034,12 +1034,9 @@ static void vmbus_onmessage_work(struct
work_struct *work)
kfree(ctx);
}
-void vmbus_on_msg_dpc(unsigned long data)
+static void vmbus_on_msg_dpc_for(void *message_page_addr)
{
- struct hv_per_cpu_context *hv_cpu = (void *)data;
- void *page_addr = hv_cpu->synic_message_page;
- struct hv_message msg_copy, *msg = (struct hv_message *)page_addr +
- VMBUS_MESSAGE_SINT;
+ struct hv_message msg_copy, *msg;
struct vmbus_channel_message_header *hdr;
enum vmbus_channel_message_type msgtype;
const struct vmbus_channel_message_table_entry *entry;
@@ -1047,6 +1044,10 @@ void vmbus_on_msg_dpc(unsigned long data)
__u8 payload_size;
u32 message_type;
+ if (!message_page_addr)
+ return;
+ msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT;
+
/*
* 'enum vmbus_channel_message_type' is supposed to always be
'u32' as
* it is being used in 'struct vmbus_channel_message_header'
definition
@@ -1172,6 +1173,14 @@ void vmbus_on_msg_dpc(unsigned long data)
vmbus_signal_eom(msg, message_type);
}
+void vmbus_on_msg_dpc(unsigned long data)
+{
+ struct hv_per_cpu_context *hv_cpu = (void *)data;
+
+ vmbus_on_msg_dpc_for(hv_cpu->hv_synic_message_page);
+ vmbus_on_msg_dpc_for(hv_cpu->pv_synic_message_page);
+}
+
#ifdef CONFIG_PM_SLEEP
/*
* Fake RESCIND_CHANNEL messages to clean up hv_sock channels by
force for
@@ -1210,21 +1219,19 @@ static void
vmbus_force_channel_rescinded(struct vmbus_channel *channel)
#endif /* CONFIG_PM_SLEEP */
/*
- * Schedule all channels with events pending
+ * Schedule all channels with events pending.
+ * The event page can be directly checked to get the id of
+ * the channel that has the interrupt pending.
*/
-static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
+static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu, void
*event_page_addr)
{
unsigned long *recv_int_page;
u32 maxbits, relid;
+ union hv_synic_event_flags *event;
- /*
- * The event page can be directly checked to get the id of
- * the channel that has the interrupt pending.
- */
- void *page_addr = hv_cpu->synic_event_page;
- union hv_synic_event_flags *event
- = (union hv_synic_event_flags *)page_addr +
- VMBUS_MESSAGE_SINT;
+ if (!event_page_addr)
+ return;
+ event = (union hv_synic_event_flags *)event_page_addr +
VMBUS_MESSAGE_SINT;
maxbits = HV_EVENT_FLAGS_COUNT;
recv_int_page = event->flags;
@@ -1295,26 +1302,35 @@ static void vmbus_chan_sched(struct
hv_per_cpu_context *hv_cpu)
}
}
-static void vmbus_isr(void)
+static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu,
void *message_page_addr)
{
- struct hv_per_cpu_context *hv_cpu
- = this_cpu_ptr(hv_context.cpu_context);
- void *page_addr;
struct hv_message *msg;
- vmbus_chan_sched(hv_cpu);
-
- page_addr = hv_cpu->synic_message_page;
- msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
+ if (!message_page_addr)
+ return;
+ msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT;
/* Check if there are actual msgs to be processed */
if (msg->header.message_type != HVMSG_NONE) {
if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
hv_stimer0_isr();
vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
- } else
+ } else {
tasklet_schedule(&hv_cpu->msg_dpc);
+ }
}
+}
+
+static void vmbus_isr(void)
+{
+ struct hv_per_cpu_context *hv_cpu
+ = this_cpu_ptr(hv_context.cpu_context);
+
+ vmbus_chan_sched(hv_cpu, hv_cpu->hv_synic_event_page);
+ vmbus_chan_sched(hv_cpu, hv_cpu->pv_synic_event_page);
+
+ vmbus_message_sched(hv_cpu, hv_cpu->hv_synic_message_page);
+ vmbus_message_sched(hv_cpu, hv_cpu->pv_synic_message_page);
add_interrupt_randomness(vmbus_interrupt);
}
@@ -1325,11 +1341,35 @@ static irqreturn_t vmbus_percpu_isr(int irq,
void *dev_id)
return IRQ_HANDLED;
}
-static void vmbus_percpu_work(struct work_struct *work)
+static int vmbus_setup_control_plane(void)
{
- unsigned int cpu = smp_processor_id();
+ int ret;
+ int hyperv_cpuhp_online;
+
+ ret = hv_synic_alloc();
+ if (ret < 0)
+ goto err_alloc;
- hv_synic_init(cpu);
+ /*
+ * Initialize the per-cpu interrupt state and stimer state.
+ * Then connect to the host.
+ */
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
+ hv_synic_init, hv_synic_cleanup);
+ if (ret < 0)
+ goto err_alloc;
+ hyperv_cpuhp_online = ret;
+ ret = vmbus_connect();
+ if (ret)
+ goto err_connect;
+ return 0;
+
+err_connect:
+ cpuhp_remove_state(hyperv_cpuhp_online);
+ return -ENODEV;
+err_alloc:
+ hv_synic_free();
+ return -ENOMEM;
}
/*
@@ -1342,8 +1382,7 @@ static void vmbus_percpu_work(struct work_struct
*work)
*/
static int vmbus_bus_init(void)
{
- int ret, cpu;
- struct work_struct __percpu *works;
+ int ret;
ret = hv_init();
if (ret != 0) {
@@ -1378,41 +1417,21 @@ static int vmbus_bus_init(void)
}
}
- ret = hv_synic_alloc();
- if (ret)
- goto err_alloc;
-
- works = alloc_percpu(struct work_struct);
- if (!works) {
- ret = -ENOMEM;
- goto err_alloc;
- }
-
/*
- * Initialize the per-cpu interrupt state and stimer state.
- * Then connect to the host.
+ * Attempt to establish the confidential control plane first if
this VM is
+ .* a hardware confidential VM, and the paravisor is present.
remove . (dott)
*/
- cpus_read_lock();
- for_each_online_cpu(cpu) {
- struct work_struct *work = per_cpu_ptr(works, cpu);
+ ret = -ENODEV;
+ if (ms_hyperv.paravisor_present && (hv_isolation_type_tdx() ||
hv_isolation_type_snp())) {
+ is_confidential = true;
+ ret = vmbus_setup_control_plane();
+ is_confidential = ret == 0;
is_confidential flag is somewhat ambiguous. ?
The flag is set to true before the control plane setup and then
conditionally reset based on the return value of
vmbus_setup_control_plane()
could be clarified it, for better readability and maintainability.
and is_confidential = (ret == 0); or is_confidential = !ret; more generic.
- INIT_WORK(work, vmbus_percpu_work);
- schedule_work_on(cpu, work);
+ pr_info("VMBus control plane is confidential: %d\n",
is_confidential);
}
- for_each_online_cpu(cpu)
- flush_work(per_cpu_ptr(works, cpu));
-
- /* Register the callbacks for possible CPU online/offline'ing */
- ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
"hyperv/vmbus:online",
- hv_synic_init, hv_synic_cleanup);
- cpus_read_unlock();
- free_percpu(works);
- if (ret < 0)
- goto err_alloc;
- hyperv_cpuhp_online = ret;
-
- ret = vmbus_connect();
+ if (!is_confidential)
+ ret = vmbus_setup_control_plane();
if (ret)
goto err_connect;
@@ -1428,9 +1447,6 @@ static int vmbus_bus_init(void)
return 0;
err_connect:
- cpuhp_remove_state(hyperv_cpuhp_online);
-err_alloc:
- hv_synic_free();
if (vmbus_irq == -1) {
hv_remove_vmbus_handler();
} else {
Reviewed-by: Alok Tiwari <alok.a.tiw...@oracle.com>
please consider RB for series.
Thanks,
Alok