Here is an example of how you might use a SPM memory node. If there is compressed ram available (in this case, a bit present in mt_spm_nodelist), we skip the entire software compression process and memcpy directly to a compressed memory folio, and store the newly allocated compressed memory page as the zswap entry->handle.
On decompress we do the opposite: copy directly from the stored page to the destination, and free the compressed memory page. Note: We do not integrate any compressed memory device checks at this point because this is a stand-in to demonstrate how the SPM node allocation mechanism works. See the "TODO" comment in `zswap_compress_direct()` for more details In reality, we would want to move this mechanism out of zswap into its own component (cram.c?), and enable a more direct migrate_page() call that actually re-maps the page read-only into any mappings, and then provides a write-fault handler which promotes the page on write. (Similar to a NUMA Hint Fault, but only on write-access) This prevents any run-away compression ratio failures, since the compression ratio would be checked on allocation, rather than allowed to silently decrease on writes until the device becomes unstable. Signed-off-by: Gregory Price <[email protected]> --- mm/zswap.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index c1af782e54ec..e6f48a4e90f1 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -25,6 +25,7 @@ #include <linux/scatterlist.h> #include <linux/mempolicy.h> #include <linux/mempool.h> +#include <linux/memory-tiers.h> #include <crypto/acompress.h> #include <linux/zswap.h> #include <linux/mm_types.h> @@ -191,6 +192,7 @@ struct zswap_entry { swp_entry_t swpentry; unsigned int length; bool referenced; + bool direct; struct zswap_pool *pool; unsigned long handle; struct obj_cgroup *objcg; @@ -717,7 +719,8 @@ static void zswap_entry_cache_free(struct zswap_entry *entry) static void zswap_entry_free(struct zswap_entry *entry) { zswap_lru_del(&zswap_list_lru, entry); - zs_free(entry->pool->zs_pool, entry->handle); + if (!entry->direct) + zs_free(entry->pool->zs_pool, entry->handle); zswap_pool_put(entry->pool); if (entry->objcg) { obj_cgroup_uncharge_zswap(entry->objcg, entry->length); @@ -851,6 +854,43 @@ static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx) mutex_unlock(&acomp_ctx->mutex); } +static struct page *zswap_compress_direct(struct page *src, + struct zswap_entry *entry) +{ + int nid = first_node(mt_spm_nodelist); + struct page *dst; + gfp_t gfp; + + if (nid == NUMA_NO_NODE) + return NULL; + + gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE | + __GFP_SPM_NODE; + dst = __alloc_pages(gfp, 0, nid, &mt_spm_nodelist); + if (!dst) + return NULL; + + /* + * TODO: check that the page is safe to use + * + * In a real implementation, we would not be using ZSWAP to demonstrate this + * and instead would implement a new component (compressed_ram, cram.c?) + * + * At this point we would check via some callback that the device's memory + * is actually safe to use - and if not, free the page (without writing to + * it), and kick off kswapd for that node to make room. + * + * Alternatively, if the compressed memory device(s) report a watermark + * crossing via interrupt, a flag can be set that is checked here rather + * that calling back into a device driver. + * + * In this case, we're testing with normal memory, so the memory is always + * safe to use (i.e. no compression ratio to worry about). + */ + copy_mc_highpage(dst, src); + return dst; +} + static bool zswap_compress(struct page *page, struct zswap_entry *entry, struct zswap_pool *pool) { @@ -862,6 +902,19 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry, gfp_t gfp; u8 *dst; bool mapped = false; + struct page *zpage; + + /* Try to shunt directly to compressed ram */ + if (!nodes_empty(mt_spm_nodelist)) { + zpage = zswap_compress_direct(page, entry); + if (zpage) { + entry->handle = (unsigned long)zpage; + entry->length = PAGE_SIZE; + entry->direct = true; + return true; + } + /* otherwise fallback to normal zswap */ + } acomp_ctx = acomp_ctx_get_cpu_lock(pool); dst = acomp_ctx->buffer; @@ -939,6 +992,16 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) int decomp_ret = 0, dlen = PAGE_SIZE; u8 *src, *obj; + /* compressed ram page */ + if (entry->direct) { + struct page *src = (struct page *)entry->handle; + struct folio *zfolio = page_folio(src); + + memcpy_folio(folio, 0, zfolio, 0, PAGE_SIZE); + __free_page(src); + goto direct_done; + } + acomp_ctx = acomp_ctx_get_cpu_lock(pool); obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer); @@ -972,6 +1035,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) zs_obj_read_end(pool->zs_pool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); +direct_done: if (!decomp_ret && dlen == PAGE_SIZE) return true; -- 2.51.1

