Move the radix tree tracker implementation from the core KHO code into its own dedicated file (kho_radix.c).
This is a pure code movement patch; no logic or functional changes are introduced. Signed-off-by: Pasha Tatashin <[email protected]> --- Documentation/core-api/kho/index.rst | 3 + kernel/liveupdate/Makefile | 6 +- kernel/liveupdate/kexec_handover.c | 273 ------------------------- kernel/liveupdate/kho_radix.c | 290 +++++++++++++++++++++++++++ 4 files changed, 298 insertions(+), 274 deletions(-) create mode 100644 kernel/liveupdate/kho_radix.c diff --git a/Documentation/core-api/kho/index.rst b/Documentation/core-api/kho/index.rst index 320914a42178..a9892c671ec3 100644 --- a/Documentation/core-api/kho/index.rst +++ b/Documentation/core-api/kho/index.rst @@ -83,6 +83,9 @@ Public API .. kernel-doc:: kernel/liveupdate/kexec_handover.c :export: +.. kernel-doc:: kernel/liveupdate/kho_radix.c + :export: + KHO Serialization Blocks API ============================ diff --git a/kernel/liveupdate/Makefile b/kernel/liveupdate/Makefile index eec9d3ae07eb..a3ee8a5c27a2 100644 --- a/kernel/liveupdate/Makefile +++ b/kernel/liveupdate/Makefile @@ -7,7 +7,11 @@ luo-y := \ luo_flb.o \ luo_session.o -obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o +kho-y := \ + kexec_handover.o \ + kho_radix.o + +obj-$(CONFIG_KEXEC_HANDOVER) += kho.o obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o obj-$(CONFIG_KEXEC_HANDOVER_DEBUGFS) += kexec_handover_debugfs.o diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index 4834a809985a..041efff7ca11 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -5,7 +5,6 @@ * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <[email protected]> * Copyright (C) 2025 Google LLC, Changyuan Lyu <[email protected]> * Copyright (C) 2025 Pasha Tatashin <[email protected]> - * Copyright (C) 2026 Google LLC, Jason Miu <[email protected]> */ #define pr_fmt(fmt) "KHO: " fmt @@ -84,278 +83,6 @@ static struct kho_out kho_out = { }, }; -/** - * kho_radix_encode_key - Encodes a physical address and order into a radix key. - * @phys: The physical address of the page. - * @order: The order of the page. - * - * This function combines a page's physical address and its order into a - * single unsigned long, which is used as a key for all radix tree - * operations. - * - * Return: The encoded unsigned long radix key. - */ -static unsigned long kho_radix_encode_key(phys_addr_t phys, unsigned int order) -{ - /* Order bits part */ - unsigned long h = 1UL << (KHO_ORDER_0_LOG2 - order); - /* Shifted physical address part */ - unsigned long l = phys >> (PAGE_SHIFT + order); - - return h | l; -} - -/** - * kho_radix_decode_key - Decodes a radix key back into a physical address and order. - * @key: The unsigned long key to decode. - * @order: An output parameter, a pointer to an unsigned int where the decoded - * page order will be stored. - * - * This function reverses the encoding performed by kho_radix_encode_key(), - * extracting the original physical address and page order from a given key. - * - * Return: The decoded physical address. - */ -static phys_addr_t kho_radix_decode_key(unsigned long key, unsigned int *order) -{ - unsigned int order_bit = fls64(key); - phys_addr_t phys; - - /* order_bit is numbered starting at 1 from fls64 */ - *order = KHO_ORDER_0_LOG2 - order_bit + 1; - /* The order is discarded by the shift */ - phys = key << (PAGE_SHIFT + *order); - - return phys; -} - -static unsigned long kho_radix_get_bitmap_index(unsigned long key) -{ - return key % (1 << KHO_BITMAP_SIZE_LOG2); -} - -static unsigned long kho_radix_get_table_index(unsigned long key, - unsigned int level) -{ - int s; - - s = ((level - 1) * KHO_TABLE_SIZE_LOG2) + KHO_BITMAP_SIZE_LOG2; - return (key >> s) % (1 << KHO_TABLE_SIZE_LOG2); -} - -/** - * kho_radix_add_page - Marks a page as preserved in the radix tree. - * @tree: The KHO radix tree. - * @pfn: The page frame number of the page to preserve. - * @order: The order of the page. - * - * This function traverses the radix tree based on the key derived from @pfn - * and @order. It sets the corresponding bit in the leaf bitmap to mark the - * page for preservation. If intermediate nodes do not exist along the path, - * they are allocated and added to the tree. - * - * Return: 0 on success, or a negative error code on failure. - */ -int kho_radix_add_page(struct kho_radix_tree *tree, - unsigned long pfn, unsigned int order) -{ - /* Newly allocated nodes for error cleanup */ - struct kho_radix_node *intermediate_nodes[KHO_TREE_MAX_DEPTH] = { 0 }; - unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order); - struct kho_radix_node *anchor_node = NULL; - struct kho_radix_node *node = tree->root; - struct kho_radix_node *new_node; - unsigned int i, idx, anchor_idx; - struct kho_radix_leaf *leaf; - int err = 0; - - if (WARN_ON_ONCE(!tree->root)) - return -EINVAL; - - might_sleep(); - - guard(mutex)(&tree->lock); - - /* Go from high levels to low levels */ - for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { - idx = kho_radix_get_table_index(key, i); - - if (node->table[idx]) { - node = phys_to_virt(node->table[idx]); - continue; - } - - /* Next node is empty, create a new node for it */ - new_node = (struct kho_radix_node *)get_zeroed_page(GFP_KERNEL); - if (!new_node) { - err = -ENOMEM; - goto err_free_nodes; - } - - node->table[idx] = virt_to_phys(new_node); - - /* - * Capture the node where the new branch starts for cleanup - * if allocation fails. - */ - if (!anchor_node) { - anchor_node = node; - anchor_idx = idx; - } - intermediate_nodes[i] = new_node; - - node = new_node; - } - - /* Handle the leaf level bitmap (level 0) */ - idx = kho_radix_get_bitmap_index(key); - leaf = (struct kho_radix_leaf *)node; - __set_bit(idx, leaf->bitmap); - - return 0; - -err_free_nodes: - for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { - if (intermediate_nodes[i]) - free_page((unsigned long)intermediate_nodes[i]); - } - if (anchor_node) - anchor_node->table[anchor_idx] = 0; - - return err; -} -EXPORT_SYMBOL_GPL(kho_radix_add_page); - -/** - * kho_radix_del_page - Removes a page's preservation status from the radix tree. - * @tree: The KHO radix tree. - * @pfn: The page frame number of the page to unpreserve. - * @order: The order of the page. - * - * This function traverses the radix tree and clears the bit corresponding to - * the page, effectively removing its "preserved" status. It does not free - * the tree's intermediate nodes, even if they become empty. - */ -void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn, - unsigned int order) -{ - unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order); - struct kho_radix_node *node = tree->root; - struct kho_radix_leaf *leaf; - unsigned int i, idx; - - if (WARN_ON_ONCE(!tree->root)) - return; - - might_sleep(); - - guard(mutex)(&tree->lock); - - /* Go from high levels to low levels */ - for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { - idx = kho_radix_get_table_index(key, i); - - /* - * Attempting to delete a page that has not been preserved, - * return with a warning. - */ - if (WARN_ON(!node->table[idx])) - return; - - node = phys_to_virt(node->table[idx]); - } - - /* Handle the leaf level bitmap (level 0) */ - leaf = (struct kho_radix_leaf *)node; - idx = kho_radix_get_bitmap_index(key); - __clear_bit(idx, leaf->bitmap); -} -EXPORT_SYMBOL_GPL(kho_radix_del_page); - -static int kho_radix_walk_leaf(struct kho_radix_leaf *leaf, - unsigned long key, - kho_radix_tree_walk_callback_t cb) -{ - unsigned long *bitmap = (unsigned long *)leaf; - unsigned int order; - phys_addr_t phys; - unsigned int i; - int err; - - for_each_set_bit(i, bitmap, PAGE_SIZE * BITS_PER_BYTE) { - phys = kho_radix_decode_key(key | i, &order); - err = cb(phys, order); - if (err) - return err; - } - - return 0; -} - -static int __kho_radix_walk_tree(struct kho_radix_node *root, - unsigned int level, unsigned long start, - kho_radix_tree_walk_callback_t cb) -{ - struct kho_radix_node *node; - struct kho_radix_leaf *leaf; - unsigned long key, i; - unsigned int shift; - int err; - - for (i = 0; i < PAGE_SIZE / sizeof(phys_addr_t); i++) { - if (!root->table[i]) - continue; - - shift = ((level - 1) * KHO_TABLE_SIZE_LOG2) + - KHO_BITMAP_SIZE_LOG2; - key = start | (i << shift); - - node = phys_to_virt(root->table[i]); - - if (level == 1) { - /* - * we are at level 1, - * node is pointing to the level 0 bitmap. - */ - leaf = (struct kho_radix_leaf *)node; - err = kho_radix_walk_leaf(leaf, key, cb); - } else { - err = __kho_radix_walk_tree(node, level - 1, - key, cb); - } - - if (err) - return err; - } - - return 0; -} - -/** - * kho_radix_walk_tree - Traverses the radix tree and calls a callback for each preserved page. - * @tree: A pointer to the KHO radix tree to walk. - * @cb: A callback function of type kho_radix_tree_walk_callback_t that will be - * invoked for each preserved page found in the tree. The callback receives - * the physical address and order of the preserved page. - * - * This function walks the radix tree, searching from the specified top level - * down to the lowest level (level 0). For each preserved page found, it invokes - * the provided callback, passing the page's physical address and order. - * - * Return: 0 if the walk completed the specified tree, or the non-zero return - * value from the callback that stopped the walk. - */ -int kho_radix_walk_tree(struct kho_radix_tree *tree, - kho_radix_tree_walk_callback_t cb) -{ - if (WARN_ON_ONCE(!tree->root)) - return -EINVAL; - - guard(mutex)(&tree->lock); - - return __kho_radix_walk_tree(tree->root, KHO_TREE_MAX_DEPTH - 1, 0, cb); -} -EXPORT_SYMBOL_GPL(kho_radix_walk_tree); /* For physically contiguous 0-order pages. */ static void kho_init_pages(struct page *page, unsigned long nr_pages) diff --git a/kernel/liveupdate/kho_radix.c b/kernel/liveupdate/kho_radix.c new file mode 100644 index 000000000000..c836783a1376 --- /dev/null +++ b/kernel/liveupdate/kho_radix.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kho_radix.c - KHO radix tree tracker for preserved memory pages + * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <[email protected]> + * Copyright (C) 2025 Pasha Tatashin <[email protected]> + * Copyright (C) 2026 Google LLC, Jason Miu <[email protected]> + */ + +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kho/abi/kexec_handover.h> +#include <linux/kho_radix_tree.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/types.h> + +/** + * kho_radix_encode_key - Encodes a physical address and order into a radix key. + * @phys: The physical address of the page. + * @order: The order of the page. + * + * This function combines a page's physical address and its order into a + * single unsigned long, which is used as a key for all radix tree + * operations. + * + * Return: The encoded unsigned long radix key. + */ +static unsigned long kho_radix_encode_key(phys_addr_t phys, unsigned int order) +{ + /* Order bits part */ + unsigned long h = 1UL << (KHO_ORDER_0_LOG2 - order); + /* Shifted physical address part */ + unsigned long l = phys >> (PAGE_SHIFT + order); + + return h | l; +} + +/** + * kho_radix_decode_key - Decodes a radix key back into a physical address and order. + * @key: The unsigned long key to decode. + * @order: An output parameter, a pointer to an unsigned int where the decoded + * page order will be stored. + * + * This function reverses the encoding performed by kho_radix_encode_key(), + * extracting the original physical address and page order from a given key. + * + * Return: The decoded physical address. + */ +static phys_addr_t kho_radix_decode_key(unsigned long key, unsigned int *order) +{ + unsigned int order_bit = fls64(key); + phys_addr_t phys; + + /* order_bit is numbered starting at 1 from fls64 */ + *order = KHO_ORDER_0_LOG2 - order_bit + 1; + /* The order is discarded by the shift */ + phys = key << (PAGE_SHIFT + *order); + + return phys; +} + +static unsigned long kho_radix_get_bitmap_index(unsigned long key) +{ + return key % (1 << KHO_BITMAP_SIZE_LOG2); +} + +static unsigned long kho_radix_get_table_index(unsigned long key, + unsigned int level) +{ + int s; + + s = ((level - 1) * KHO_TABLE_SIZE_LOG2) + KHO_BITMAP_SIZE_LOG2; + return (key >> s) % (1 << KHO_TABLE_SIZE_LOG2); +} + +/** + * kho_radix_add_page - Marks a page as preserved in the radix tree. + * @tree: The KHO radix tree. + * @pfn: The page frame number of the page to preserve. + * @order: The order of the page. + * + * This function traverses the radix tree based on the key derived from @pfn + * and @order. It sets the corresponding bit in the leaf bitmap to mark the + * page for preservation. If intermediate nodes do not exist along the path, + * they are allocated and added to the tree. + * + * Return: 0 on success, or a negative error code on failure. + */ +int kho_radix_add_page(struct kho_radix_tree *tree, + unsigned long pfn, unsigned int order) +{ + /* Newly allocated nodes for error cleanup */ + struct kho_radix_node *intermediate_nodes[KHO_TREE_MAX_DEPTH] = { 0 }; + unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order); + struct kho_radix_node *anchor_node = NULL; + struct kho_radix_node *node = tree->root; + struct kho_radix_node *new_node; + unsigned int i, idx, anchor_idx; + struct kho_radix_leaf *leaf; + int err = 0; + + if (WARN_ON_ONCE(!tree->root)) + return -EINVAL; + + might_sleep(); + + guard(mutex)(&tree->lock); + + /* Go from high levels to low levels */ + for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { + idx = kho_radix_get_table_index(key, i); + + if (node->table[idx]) { + node = phys_to_virt(node->table[idx]); + continue; + } + + /* Next node is empty, create a new node for it */ + new_node = (struct kho_radix_node *)get_zeroed_page(GFP_KERNEL); + if (!new_node) { + err = -ENOMEM; + goto err_free_nodes; + } + + node->table[idx] = virt_to_phys(new_node); + + /* + * Capture the node where the new branch starts for cleanup + * if allocation fails. + */ + if (!anchor_node) { + anchor_node = node; + anchor_idx = idx; + } + intermediate_nodes[i] = new_node; + + node = new_node; + } + + /* Handle the leaf level bitmap (level 0) */ + idx = kho_radix_get_bitmap_index(key); + leaf = (struct kho_radix_leaf *)node; + __set_bit(idx, leaf->bitmap); + + return 0; + +err_free_nodes: + for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { + if (intermediate_nodes[i]) + free_page((unsigned long)intermediate_nodes[i]); + } + if (anchor_node) + anchor_node->table[anchor_idx] = 0; + + return err; +} +EXPORT_SYMBOL_GPL(kho_radix_add_page); + +/** + * kho_radix_del_page - Removes a page's preservation status from the radix tree. + * @tree: The KHO radix tree. + * @pfn: The page frame number of the page to unpreserve. + * @order: The order of the page. + * + * This function traverses the radix tree and clears the bit corresponding to + * the page, effectively removing its "preserved" status. It does not free + * the tree's intermediate nodes, even if they become empty. + */ +void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn, + unsigned int order) +{ + unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order); + struct kho_radix_node *node = tree->root; + struct kho_radix_leaf *leaf; + unsigned int i, idx; + + if (WARN_ON_ONCE(!tree->root)) + return; + + might_sleep(); + + guard(mutex)(&tree->lock); + + /* Go from high levels to low levels */ + for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { + idx = kho_radix_get_table_index(key, i); + + /* + * Attempting to delete a page that has not been preserved, + * return with a warning. + */ + if (WARN_ON(!node->table[idx])) + return; + + node = phys_to_virt(node->table[idx]); + } + + /* Handle the leaf level bitmap (level 0) */ + leaf = (struct kho_radix_leaf *)node; + idx = kho_radix_get_bitmap_index(key); + __clear_bit(idx, leaf->bitmap); +} +EXPORT_SYMBOL_GPL(kho_radix_del_page); + +static int kho_radix_walk_leaf(struct kho_radix_leaf *leaf, + unsigned long key, + kho_radix_tree_walk_callback_t cb) +{ + unsigned long *bitmap = (unsigned long *)leaf; + unsigned int order; + phys_addr_t phys; + unsigned int i; + int err; + + for_each_set_bit(i, bitmap, PAGE_SIZE * BITS_PER_BYTE) { + phys = kho_radix_decode_key(key | i, &order); + err = cb(phys, order); + if (err) + return err; + } + + return 0; +} + +static int __kho_radix_walk_tree(struct kho_radix_node *root, + unsigned int level, unsigned long start, + kho_radix_tree_walk_callback_t cb) +{ + struct kho_radix_node *node; + struct kho_radix_leaf *leaf; + unsigned long key, i; + unsigned int shift; + int err; + + for (i = 0; i < PAGE_SIZE / sizeof(phys_addr_t); i++) { + if (!root->table[i]) + continue; + + shift = ((level - 1) * KHO_TABLE_SIZE_LOG2) + + KHO_BITMAP_SIZE_LOG2; + key = start | (i << shift); + + node = phys_to_virt(root->table[i]); + + if (level == 1) { + /* + * we are at level 1, + * node is pointing to the level 0 bitmap. + */ + leaf = (struct kho_radix_leaf *)node; + err = kho_radix_walk_leaf(leaf, key, cb); + } else { + err = __kho_radix_walk_tree(node, level - 1, + key, cb); + } + + if (err) + return err; + } + + return 0; +} + +/** + * kho_radix_walk_tree - Traverses the radix tree and calls a callback for each preserved page. + * @tree: A pointer to the KHO radix tree to walk. + * @cb: A callback function of type kho_radix_tree_walk_callback_t that will be + * invoked for each preserved page found in the tree. The callback receives + * the physical address and order of the preserved page. + * + * This function walks the radix tree, searching from the specified top level + * down to the lowest level (level 0). For each preserved page found, it invokes + * the provided callback, passing the page's physical address and order. + * + * Return: 0 if the walk completed the specified tree, or the non-zero return + * value from the callback that stopped the walk. + */ +int kho_radix_walk_tree(struct kho_radix_tree *tree, + kho_radix_tree_walk_callback_t cb) +{ + if (WARN_ON_ONCE(!tree->root)) + return -EINVAL; + + guard(mutex)(&tree->lock); + + return __kho_radix_walk_tree(tree->root, KHO_TREE_MAX_DEPTH - 1, 0, cb); +} +EXPORT_SYMBOL_GPL(kho_radix_walk_tree); -- 2.53.0

