Here is an example of how you might use a protected memory node. We hack in an mt_compressed_nodelist to memory-tiers.c as a standin for a proper compressed-ram component, and use that nodelist to determine if compressed ram is available in the zswap_compress function.
If there is compressed ram available, we skip the entire software compression process and shunt memcpy directly to a compressed memory folio, and store the newly allocated compressed memory page as the zswap entry->handle. On decompress we do the opposite: copy directly from the stored compressed page to the new destination, and free the compressed memory page. Note: We do not integrate any compressed memory device checks at this point because this is a stand-in to demonstrate how the protected node allocation mechanism works. See the "TODO" comment in `zswap_compress_direct()` for more details on how that would work. In reality, we would want to make this mechanism out of zswap into its own component (cram.c?), and enable a more direct migrate_page() call that actually re-maps the page read-only into any mappings, and then provides a write-fault handler which promotes the page on write. This prevents any run-away compression ratio failures, since the compression ratio would be checked on allocation, rather than allowed to silently decrease on writes until the device becomes unstable. Signed-off-by: Gregory Price <[email protected]> --- include/linux/memory-tiers.h | 1 + mm/memory-tiers.c | 3 ++ mm/memory_hotplug.c | 2 ++ mm/zswap.c | 65 +++++++++++++++++++++++++++++++++++- 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 3d3f3687d134..ff2ab7990e8f 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -42,6 +42,7 @@ extern nodemask_t default_dram_nodes; extern nodemask_t default_sysram_nodelist; #define default_sysram_nodes (nodes_empty(default_sysram_nodelist) ? NULL : \ &default_sysram_nodelist) +extern nodemask_t mt_compressed_nodelist; struct memory_dev_type *alloc_memory_type(int adistance); void put_memory_type(struct memory_dev_type *memtype); void init_node_memory_type(int node, struct memory_dev_type *default_type); diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index b2ee4f73ad54..907635611f17 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -51,6 +51,9 @@ nodemask_t default_dram_nodes = NODE_MASK_NONE; /* default_sysram_nodelist is the list of nodes with RAM at __init time */ nodemask_t default_sysram_nodelist = NODE_MASK_NONE; +/* compressed memory nodes */ +nodemask_t mt_compressed_nodelist = NODE_MASK_NONE; + static const struct bus_type memory_tier_subsys = { .name = "memory_tiering", .dev_name = "memory_tier", diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ceab56b7231d..8fcd894de93c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1592,6 +1592,8 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) /* At this point if not protected, we can add node to sysram nodes */ if (!(mhp_flags & MHP_PROTECTED_MEMORY)) node_set(nid, *default_sysram_nodes); + else /* HACK: We would create a proper interface for something like this */ + node_set(nid, mt_compressed_nodelist); /* create new memmap entry */ if (!strcmp(res->name, "System RAM")) diff --git a/mm/zswap.c b/mm/zswap.c index c1af782e54ec..09010ba2440c 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -25,6 +25,7 @@ #include <linux/scatterlist.h> #include <linux/mempolicy.h> #include <linux/mempool.h> +#include <linux/memory-tiers.h> #include <crypto/acompress.h> #include <linux/zswap.h> #include <linux/mm_types.h> @@ -191,6 +192,7 @@ struct zswap_entry { swp_entry_t swpentry; unsigned int length; bool referenced; + bool direct; struct zswap_pool *pool; unsigned long handle; struct obj_cgroup *objcg; @@ -717,7 +719,8 @@ static void zswap_entry_cache_free(struct zswap_entry *entry) static void zswap_entry_free(struct zswap_entry *entry) { zswap_lru_del(&zswap_list_lru, entry); - zs_free(entry->pool->zs_pool, entry->handle); + if (!entry->direct) + zs_free(entry->pool->zs_pool, entry->handle); zswap_pool_put(entry->pool); if (entry->objcg) { obj_cgroup_uncharge_zswap(entry->objcg, entry->length); @@ -851,6 +854,43 @@ static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx) mutex_unlock(&acomp_ctx->mutex); } +static struct page *zswap_compress_direct(struct page *src, + struct zswap_entry *entry) +{ + int nid = first_node(mt_compressed_nodelist); + struct page *dst; + gfp_t gfp; + + if (nid == NUMA_NO_NODE) + return NULL; + + gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE | + __GFP_PROTECTED; + dst = __alloc_pages(gfp, 0, nid, &mt_compressed_nodelist); + if (!dst) + return NULL; + + /* + * TODO: check that the page is safe to use + * + * In a real implementation, we would not be using ZSWAP to demonstrate this + * and instead would implement a new component (compressed_ram, cram.c?) + * + * At this point we would check via some callback that the device's memory + * is actually safe to use - and if not, free the page (without writing to + * it), and kick off kswapd for that node to make room. + * + * Alternatively, if the compressed memory device(s) report a watermark + * crossing via interrupt, a flag can be set that is checked here rather + * that calling back into a device driver. + * + * In this case, we're testing with normal memory, so the memory is always + * safe to use (i.e. no compression ratio to worry about). + */ + copy_mc_highpage(dst, src); + return dst; +} + static bool zswap_compress(struct page *page, struct zswap_entry *entry, struct zswap_pool *pool) { @@ -862,6 +902,19 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry, gfp_t gfp; u8 *dst; bool mapped = false; + struct page *zpage; + + /* Try to shunt directly to compressed ram */ + if (!nodes_empty(mt_compressed_nodelist)) { + zpage = zswap_compress_direct(page, entry); + if (zpage) { + entry->handle = (unsigned long)zpage; + entry->length = PAGE_SIZE; + entry->direct = true; + return true; + } + /* otherwise fallback to normal zswap */ + } acomp_ctx = acomp_ctx_get_cpu_lock(pool); dst = acomp_ctx->buffer; @@ -939,6 +992,15 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) int decomp_ret = 0, dlen = PAGE_SIZE; u8 *src, *obj; + /* compressed ram page */ + if (entry->direct) { + struct page *src = (struct page*)entry->handle; + struct folio *zfolio = page_folio(src); + memcpy_folio(folio, 0, zfolio, 0, PAGE_SIZE); + __free_page(src); + goto direct_done; + } + acomp_ctx = acomp_ctx_get_cpu_lock(pool); obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer); @@ -972,6 +1034,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) zs_obj_read_end(pool->zs_pool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); +direct_done: if (!decomp_ret && dlen == PAGE_SIZE) return true; -- 2.51.1

