From: Cong Wang <[email protected]>

Introduce a structured messaging system built on top of the existing
multikernel IPI infrastructure to enable reliable communication between
kernel instances running on different CPUs.

The messaging layer provides:

* Simple message format with type/subtype hierarchy for extensibility
* Support for I/O interrupt forwarding between kernel instances
* Resource management messages for CPU and memory hotplug operations
* Type-safe payload structures with validation
* Handler registration system for message processing
* Convenient inline functions for common operations

Message types include:
- MK_MSG_IO: I/O interrupt forwarding and load balancing
- MK_MSG_RESOURCE: CPU/memory add/remove operations
- MK_MSG_SYSTEM: System-level coordination messages
- MK_MSG_USER: User-defined message types

The implementation leverages the reliable nature of intra-machine IPIs,
maintaining simplicity and performance. Messages are limited to the
existing 256-byte IPI buffer size, with larger data transfers handled
via the existing PFN-based shared memory mechanism.

This messaging foundation enables sophisticated multikernel coordination
scenarios including dynamic resource allocation, interrupt load
balancing, and system-wide state management.

Signed-off-by: Cong Wang <[email protected]>
---
 include/linux/multikernel.h    | 200 ++++++++++++++++++++++++
 kernel/multikernel/Makefile    |   2 +-
 kernel/multikernel/core.c      |   7 +
 kernel/multikernel/messaging.c | 278 +++++++++++++++++++++++++++++++++
 4 files changed, 486 insertions(+), 1 deletion(-)
 create mode 100644 kernel/multikernel/messaging.c

diff --git a/include/linux/multikernel.h b/include/linux/multikernel.h
index ee96bd2332b6..3bc07361145b 100644
--- a/include/linux/multikernel.h
+++ b/include/linux/multikernel.h
@@ -80,6 +80,206 @@ void *mk_receive_map_page(struct mk_ipi_data *data);
 
 #define mk_receive_unmap_page(p) memunmap(p)
 
+/*
+ * Multikernel Messaging System
+ */
+
+/**
+ * Message type definitions - organized by category
+ */
+
+/* Top-level message categories */
+#define MK_MSG_IO           0x1000
+#define MK_MSG_RESOURCE     0x2000
+#define MK_MSG_SYSTEM       0x3000
+#define MK_MSG_USER         0x4000
+
+/* I/O interrupt forwarding subtypes */
+#define MK_IO_IRQ_FORWARD   (MK_MSG_IO + 1)
+#define MK_IO_IRQ_BALANCE   (MK_MSG_IO + 2)
+#define MK_IO_IRQ_MASK      (MK_MSG_IO + 3)
+#define MK_IO_IRQ_UNMASK    (MK_MSG_IO + 4)
+
+/* Resource management subtypes */
+#define MK_RES_CPU_ADD      (MK_MSG_RESOURCE + 1)
+#define MK_RES_CPU_REMOVE   (MK_MSG_RESOURCE + 2)
+#define MK_RES_MEM_ADD      (MK_MSG_RESOURCE + 3)
+#define MK_RES_MEM_REMOVE   (MK_MSG_RESOURCE + 4)
+#define MK_RES_QUERY        (MK_MSG_RESOURCE + 5)
+
+/* System management subtypes */
+#define MK_SYS_HEARTBEAT    (MK_MSG_SYSTEM + 1)
+#define MK_SYS_SHUTDOWN     (MK_MSG_SYSTEM + 2)
+
+/**
+ * Core message structure
+ */
+struct mk_message {
+       u32 msg_type;           /* Message type identifier */
+       u32 msg_subtype;        /* Subtype for specific operations */
+       u64 msg_id;             /* Optional message ID for correlation */
+       u32 payload_len;        /* Length of payload data */
+       u8 payload[];           /* Variable payload (up to remaining IPI 
buffer) */
+};
+
+/**
+ * Payload structures for specific message types
+ */
+
+/* I/O interrupt forwarding */
+struct mk_io_irq_payload {
+       u32 irq_number;         /* Hardware IRQ number */
+       u32 vector;             /* Interrupt vector */
+       u32 device_id;          /* Device identifier (optional) */
+       u32 flags;              /* Control flags (priority, etc.) */
+};
+
+/* IRQ control flags */
+#define MK_IRQ_HIGH_PRIORITY    0x01
+#define MK_IRQ_LOW_LATENCY      0x02
+#define MK_IRQ_EDGE_TRIGGERED   0x04
+#define MK_IRQ_LEVEL_TRIGGERED  0x08
+
+/* CPU resource operations */
+struct mk_cpu_resource_payload {
+       u32 cpu_id;             /* Physical CPU ID */
+       u32 numa_node;          /* NUMA node (optional) */
+       u32 flags;              /* CPU capabilities/attributes */
+};
+
+/* CPU capability flags */
+#define MK_CPU_HAS_AVX512       0x01
+#define MK_CPU_HAS_TSX          0x02
+#define MK_CPU_HYPERTHREAD      0x04
+
+/* Memory resource operations */
+struct mk_mem_resource_payload {
+       u64 start_pfn;          /* Starting page frame number */
+       u64 nr_pages;           /* Number of pages */
+       u32 numa_node;          /* NUMA node */
+       u32 mem_type;           /* Memory type (normal/DMA/etc.) */
+};
+
+/* Memory types */
+#define MK_MEM_NORMAL           0x01
+#define MK_MEM_DMA              0x02
+#define MK_MEM_DMA32            0x04
+#define MK_MEM_HIGHMEM          0x08
+
+/**
+ * Message handler callback type
+ */
+typedef void (*mk_msg_handler_t)(u32 msg_type, u32 subtype,
+                                void *payload, u32 payload_len, void *ctx);
+
+/**
+ * Message API functions
+ */
+
+/**
+ * mk_send_message - Send a message to another CPU
+ * @instance_id: Target multikernel instance ID
+ * @msg_type: Message type identifier
+ * @subtype: Message subtype
+ * @payload: Pointer to payload data (can be NULL)
+ * @payload_len: Length of payload data
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_send_message(int instance_id, u32 msg_type, u32 subtype,
+                   void *payload, u32 payload_len);
+
+/**
+ * mk_register_msg_handler - Register handler for specific message type
+ * @msg_type: Message type to handle
+ * @handler: Handler function
+ * @ctx: Context pointer passed to handler
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_register_msg_handler(u32 msg_type, mk_msg_handler_t handler, void *ctx);
+
+/**
+ * mk_unregister_msg_handler - Unregister message handler
+ * @msg_type: Message type to unregister
+ * @handler: Handler function to remove
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_unregister_msg_handler(u32 msg_type, mk_msg_handler_t handler);
+
+/**
+ * Convenience functions for common message types
+ */
+
+/* I/O interrupt forwarding */
+static inline int mk_send_irq_forward(int instance_id, u32 irq_number,
+                                     u32 vector, u32 device_id, u32 flags)
+{
+       struct mk_io_irq_payload payload = {
+               .irq_number = irq_number,
+               .vector = vector,
+               .device_id = device_id,
+               .flags = flags
+       };
+       return mk_send_message(instance_id, MK_MSG_IO, MK_IO_IRQ_FORWARD,
+                              &payload, sizeof(payload));
+}
+
+/* CPU resource management */
+static inline int mk_send_cpu_add(int instance_id, u32 cpu_id,
+                                 u32 numa_node, u32 flags)
+{
+       struct mk_cpu_resource_payload payload = {
+               .cpu_id = cpu_id,
+               .numa_node = numa_node,
+               .flags = flags
+       };
+       return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_CPU_ADD,
+                              &payload, sizeof(payload));
+}
+
+static inline int mk_send_cpu_remove(int instance_id, u32 cpu_id)
+{
+       struct mk_cpu_resource_payload payload = {
+               .cpu_id = cpu_id,
+               .numa_node = 0,
+               .flags = 0
+       };
+       return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_CPU_REMOVE,
+                              &payload, sizeof(payload));
+}
+
+/* Memory resource management */
+static inline int mk_send_mem_add(int instance_id, u64 start_pfn, u64 nr_pages,
+                                 u32 numa_node, u32 mem_type)
+{
+       struct mk_mem_resource_payload payload = {
+               .start_pfn = start_pfn,
+               .nr_pages = nr_pages,
+               .numa_node = numa_node,
+               .mem_type = mem_type
+       };
+       return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_MEM_ADD,
+                              &payload, sizeof(payload));
+}
+
+static inline int mk_send_mem_remove(int instance_id, u64 start_pfn, u64 
nr_pages)
+{
+       struct mk_mem_resource_payload payload = {
+               .start_pfn = start_pfn,
+               .nr_pages = nr_pages,
+               .numa_node = 0,
+               .mem_type = 0
+       };
+       return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_MEM_REMOVE,
+                              &payload, sizeof(payload));
+}
+
+/* Messaging system functions */
+int __init mk_messaging_init(void);
+void mk_messaging_cleanup(void);
+
 struct resource;
 
 extern phys_addr_t multikernel_alloc(size_t size);
diff --git a/kernel/multikernel/Makefile b/kernel/multikernel/Makefile
index b539acc656c6..f133e1eaf534 100644
--- a/kernel/multikernel/Makefile
+++ b/kernel/multikernel/Makefile
@@ -3,7 +3,7 @@
 # Makefile for multikernel support
 #
 
-obj-y += core.o mem.o kernfs.o dts.o ipi.o
+obj-y += core.o mem.o kernfs.o dts.o ipi.o messaging.o
 
 # Add libfdt include path for device tree parsing
 CFLAGS_dts.o = -I $(srctree)/scripts/dtc/libfdt
diff --git a/kernel/multikernel/core.c b/kernel/multikernel/core.c
index ee7a21327ea5..37dbf0cf4be6 100644
--- a/kernel/multikernel/core.c
+++ b/kernel/multikernel/core.c
@@ -505,9 +505,16 @@ static int __init multikernel_init(void)
 {
        int ret;
 
+       ret = mk_messaging_init();
+       if (ret < 0) {
+               pr_err("Failed to initialize multikernel messaging: %d\n", ret);
+               return ret;
+       }
+
        ret = mk_kernfs_init();
        if (ret < 0) {
                pr_err("Failed to initialize multikernel sysfs interface: 
%d\n", ret);
+               mk_messaging_cleanup();
                return ret;
        }
 
diff --git a/kernel/multikernel/messaging.c b/kernel/multikernel/messaging.c
new file mode 100644
index 000000000000..be1fba8778ec
--- /dev/null
+++ b/kernel/multikernel/messaging.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Multikernel Messaging System
+ * Copyright (C) 2025 Multikernel Technologies, Inc. All rights reserved
+ *
+ * Simple messaging layer on top of multikernel IPI infrastructure
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/multikernel.h>
+
+/* Per-type message handler registry */
+struct mk_msg_type_handler {
+       u32 msg_type;
+       struct mk_ipi_handler *ipi_handler;
+       mk_msg_handler_t msg_handler;
+       void *context;
+       struct mk_msg_type_handler *next;
+};
+
+static struct mk_msg_type_handler *mk_msg_type_handlers;
+static raw_spinlock_t mk_msg_type_handlers_lock = 
__RAW_SPIN_LOCK_UNLOCKED(mk_msg_type_handlers_lock);
+
+/**
+ * mk_message_type_ipi_callback - IPI callback to handle incoming messages for 
a specific type
+ * @data: IPI data containing the message
+ * @ctx: Context containing the message handler info
+ */
+static void mk_message_type_ipi_callback(struct mk_ipi_data *data, void *ctx)
+{
+       struct mk_msg_type_handler *type_handler = (struct mk_msg_type_handler 
*)ctx;
+       struct mk_message *msg;
+       u32 msg_type, msg_subtype;
+       void *payload;
+       u32 payload_len;
+
+       if (!type_handler || !type_handler->msg_handler) {
+               pr_warn("Multikernel message received but no handler 
registered\n");
+               return;
+       }
+
+       /* Verify this matches our expected message type */
+       if (data->type != type_handler->msg_type) {
+               pr_warn("Multikernel message type mismatch: expected 0x%x, got 
0x%x\n",
+                       type_handler->msg_type, data->type);
+               return;
+       }
+
+       /* Ensure we have at least a message header */
+       if (data->data_size < sizeof(struct mk_message)) {
+               pr_warn("Multikernel message too small: %zu bytes\n", 
data->data_size);
+               return;
+       }
+
+       msg = (struct mk_message *)data->buffer;
+
+       /* Validate message structure */
+       if (msg->payload_len > (data->data_size - sizeof(struct mk_message))) {
+               pr_warn("Multikernel message payload length invalid: %u > 
%zu\n",
+                       msg->payload_len, data->data_size - sizeof(struct 
mk_message));
+               return;
+       }
+
+       msg_type = msg->msg_type;
+       msg_subtype = msg->msg_subtype;
+       payload = msg->payload_len > 0 ? msg->payload : NULL;
+       payload_len = msg->payload_len;
+
+       pr_debug("Multikernel message received: type=0x%x, subtype=0x%x, len=%u 
from CPU %d\n",
+                msg_type, msg_subtype, payload_len, data->sender_cpu);
+
+       /* Call the registered handler for this message type */
+       type_handler->msg_handler(msg_type, msg_subtype, payload, payload_len, 
type_handler->context);
+}
+
+/**
+ * mk_send_message - Send a message to another CPU
+ * @instance_id: Target multikernel instance ID
+ * @msg_type: Message type identifier
+ * @subtype: Message subtype
+ * @payload: Pointer to payload data (can be NULL)
+ * @payload_len: Length of payload data
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_send_message(int instance_id, u32 msg_type, u32 subtype,
+                   void *payload, u32 payload_len)
+{
+       struct mk_message *msg;
+       size_t total_size;
+       int ret;
+
+       /* Calculate total message size */
+       total_size = sizeof(struct mk_message) + payload_len;
+
+       /* Check if message fits in IPI buffer */
+       if (total_size > MK_MAX_DATA_SIZE) {
+               pr_err("Multikernel message too large: %zu > %d bytes\n",
+                      total_size, MK_MAX_DATA_SIZE);
+               return -EMSGSIZE;
+       }
+
+       /* Allocate temporary buffer for message */
+       msg = kzalloc(total_size, GFP_ATOMIC);
+       if (!msg)
+               return -ENOMEM;
+
+       /* Fill in message header */
+       msg->msg_type = msg_type;
+       msg->msg_subtype = subtype;
+       msg->msg_id = 0; /* Could be enhanced with unique IDs later */
+       msg->payload_len = payload_len;
+
+       /* Copy payload if provided */
+       if (payload && payload_len > 0)
+               memcpy(msg->payload, payload, payload_len);
+
+       /* Send via IPI using the message type as IPI type */
+       ret = multikernel_send_ipi_data(instance_id, msg, total_size, msg_type);
+
+       /* Clean up temporary buffer */
+       kfree(msg);
+
+       if (ret < 0) {
+               pr_err("Failed to send multikernel message: %d\n", ret);
+               return ret;
+       }
+
+       pr_debug("Multikernel message sent: type=0x%x, subtype=0x%x, len=%u to 
instance %d\n",
+                msg_type, subtype, payload_len, instance_id);
+
+       return 0;
+}
+EXPORT_SYMBOL(mk_send_message);
+
+/**
+ * mk_register_msg_handler - Register handler for specific message type
+ * @msg_type: Message type to handle
+ * @handler: Handler function
+ * @ctx: Context pointer passed to handler
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_register_msg_handler(u32 msg_type, mk_msg_handler_t handler, void *ctx)
+{
+       struct mk_msg_type_handler *type_handler;
+       unsigned long flags;
+
+       if (!handler)
+               return -EINVAL;
+
+       /* Check if handler for this type already exists */
+       raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+       for (type_handler = mk_msg_type_handlers; type_handler; type_handler = 
type_handler->next) {
+               if (type_handler->msg_type == msg_type) {
+                       raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, 
flags);
+                       pr_warn("Handler for message type 0x%x already 
registered\n", msg_type);
+                       return -EEXIST;
+               }
+       }
+       raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+       /* Allocate new type handler entry */
+       type_handler = kzalloc(sizeof(*type_handler), GFP_KERNEL);
+       if (!type_handler)
+               return -ENOMEM;
+
+       type_handler->msg_type = msg_type;
+       type_handler->msg_handler = handler;
+       type_handler->context = ctx;
+
+       /* Register IPI handler for this message type */
+       type_handler->ipi_handler = 
multikernel_register_handler(mk_message_type_ipi_callback,
+                                                                        
type_handler, msg_type);
+       if (!type_handler->ipi_handler) {
+               pr_err("Failed to register IPI handler for message type 
0x%x\n", msg_type);
+               kfree(type_handler);
+               return -ENOMEM;
+       }
+
+       /* Add to type handler list */
+       raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+       type_handler->next = mk_msg_type_handlers;
+       mk_msg_type_handlers = type_handler;
+       raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+       pr_debug("Registered multikernel message handler for type 0x%x\n", 
msg_type);
+       return 0;
+}
+EXPORT_SYMBOL(mk_register_msg_handler);
+
+/**
+ * mk_unregister_msg_handler - Unregister message handler
+ * @msg_type: Message type to unregister
+ * @handler: Handler function to remove
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_unregister_msg_handler(u32 msg_type, mk_msg_handler_t handler)
+{
+       struct mk_msg_type_handler **pp, *type_handler;
+       unsigned long flags;
+       int found = 0;
+
+       if (!handler)
+               return -EINVAL;
+
+       raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+       pp = &mk_msg_type_handlers;
+       while ((type_handler = *pp) != NULL) {
+               if (type_handler->msg_type == msg_type && 
type_handler->msg_handler == handler) {
+                       *pp = type_handler->next;
+                       found = 1;
+                       break;
+               }
+               pp = &type_handler->next;
+       }
+       raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+       if (found) {
+               /* Unregister the IPI handler */
+               if (type_handler->ipi_handler) {
+                       
multikernel_unregister_handler(type_handler->ipi_handler);
+               }
+               kfree(type_handler);
+               pr_debug("Unregistered multikernel message handler for type 
0x%x\n", msg_type);
+               return 0;
+       }
+
+       return -ENOENT;
+}
+EXPORT_SYMBOL(mk_unregister_msg_handler);
+
+/**
+ * mk_messaging_init - Initialize the messaging system
+ *
+ * Called during multikernel initialization to set up message handling
+ * Returns 0 on success, negative error code on failure
+ */
+int __init mk_messaging_init(void)
+{
+       /* No global IPI handler needed anymore - handlers are registered per 
message type */
+       pr_info("Multikernel messaging system initialized\n");
+       return 0;
+}
+
+/**
+ * mk_messaging_cleanup - Cleanup the messaging system
+ *
+ * Called during multikernel cleanup
+ */
+void mk_messaging_cleanup(void)
+{
+       struct mk_msg_type_handler *type_handler, *next;
+       unsigned long flags;
+
+       /* Clean up all registered message type handlers */
+       raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+       type_handler = mk_msg_type_handlers;
+       mk_msg_type_handlers = NULL;
+       raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+       while (type_handler) {
+               next = type_handler->next;
+
+               /* Unregister IPI handler */
+               if (type_handler->ipi_handler) {
+                       
multikernel_unregister_handler(type_handler->ipi_handler);
+               }
+
+               kfree(type_handler);
+               type_handler = next;
+       }
+
+       pr_info("Multikernel messaging system cleaned up\n");
+}
-- 
2.34.1


Reply via email to