Here is an example of how you might use a protected memory node.

We hack in an mt_compressed_nodelist to memory-tiers.c as a standin
for a proper compressed-ram component, and use that nodelist to
determine if compressed ram is available in the zswap_compress
function.

If there is compressed ram available, we skip the entire software
compression process and shunt memcpy directly to a compressed memory
folio, and store the newly allocated compressed memory page as the
zswap entry->handle.

On decompress we do the opposite: copy directly from the stored
compressed page to the new destination, and free the compressed
memory page.

Note: We do not integrate any compressed memory device checks at
this point because this is a stand-in to demonstrate how the protected
node allocation mechanism works.  See the "TODO" comment in
`zswap_compress_direct()` for more details on how that would work.

In reality, we would want to make this mechanism out of zswap into
its own component (cram.c?), and enable a more direct migrate_page()
call that actually re-maps the page read-only into any mappings, and
then provides a write-fault handler which promotes the page on write.

This prevents any run-away compression ratio failures, since the
compression ratio would be checked on allocation, rather than allowed
to silently decrease on writes until the device becomes unstable.

Signed-off-by: Gregory Price <[email protected]>
---
 include/linux/memory-tiers.h |  1 +
 mm/memory-tiers.c            |  3 ++
 mm/memory_hotplug.c          |  2 ++
 mm/zswap.c                   | 65 +++++++++++++++++++++++++++++++++++-
 4 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 3d3f3687d134..ff2ab7990e8f 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -42,6 +42,7 @@ extern nodemask_t default_dram_nodes;
 extern nodemask_t default_sysram_nodelist;
 #define default_sysram_nodes (nodes_empty(default_sysram_nodelist) ? NULL : \
                              &default_sysram_nodelist)
+extern nodemask_t mt_compressed_nodelist;
 struct memory_dev_type *alloc_memory_type(int adistance);
 void put_memory_type(struct memory_dev_type *memtype);
 void init_node_memory_type(int node, struct memory_dev_type *default_type);
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index b2ee4f73ad54..907635611f17 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -51,6 +51,9 @@ nodemask_t default_dram_nodes = NODE_MASK_NONE;
 /* default_sysram_nodelist is the list of nodes with RAM at __init time */
 nodemask_t default_sysram_nodelist = NODE_MASK_NONE;
 
+/* compressed memory nodes */
+nodemask_t mt_compressed_nodelist = NODE_MASK_NONE;
+
 static const struct bus_type memory_tier_subsys = {
        .name = "memory_tiering",
        .dev_name = "memory_tier",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ceab56b7231d..8fcd894de93c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1592,6 +1592,8 @@ int add_memory_resource(int nid, struct resource *res, 
mhp_t mhp_flags)
        /* At this point if not protected, we can add node to sysram nodes */
        if (!(mhp_flags & MHP_PROTECTED_MEMORY))
                node_set(nid, *default_sysram_nodes);
+       else /* HACK: We would create a proper interface for something like 
this */
+               node_set(nid, mt_compressed_nodelist);
 
        /* create new memmap entry */
        if (!strcmp(res->name, "System RAM"))
diff --git a/mm/zswap.c b/mm/zswap.c
index c1af782e54ec..09010ba2440c 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -25,6 +25,7 @@
 #include <linux/scatterlist.h>
 #include <linux/mempolicy.h>
 #include <linux/mempool.h>
+#include <linux/memory-tiers.h>
 #include <crypto/acompress.h>
 #include <linux/zswap.h>
 #include <linux/mm_types.h>
@@ -191,6 +192,7 @@ struct zswap_entry {
        swp_entry_t swpentry;
        unsigned int length;
        bool referenced;
+       bool direct;
        struct zswap_pool *pool;
        unsigned long handle;
        struct obj_cgroup *objcg;
@@ -717,7 +719,8 @@ static void zswap_entry_cache_free(struct zswap_entry 
*entry)
 static void zswap_entry_free(struct zswap_entry *entry)
 {
        zswap_lru_del(&zswap_list_lru, entry);
-       zs_free(entry->pool->zs_pool, entry->handle);
+       if (!entry->direct)
+               zs_free(entry->pool->zs_pool, entry->handle);
        zswap_pool_put(entry->pool);
        if (entry->objcg) {
                obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
@@ -851,6 +854,43 @@ static void acomp_ctx_put_unlock(struct crypto_acomp_ctx 
*acomp_ctx)
        mutex_unlock(&acomp_ctx->mutex);
 }
 
+static struct page *zswap_compress_direct(struct page *src,
+                                         struct zswap_entry *entry)
+{
+       int nid = first_node(mt_compressed_nodelist);
+       struct page *dst;
+       gfp_t gfp;
+
+       if (nid == NUMA_NO_NODE)
+               return NULL;
+
+       gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE |
+             __GFP_PROTECTED;
+       dst = __alloc_pages(gfp, 0, nid, &mt_compressed_nodelist);
+       if (!dst)
+               return NULL;
+
+       /*
+        * TODO: check that the page is safe to use
+        *
+        * In a real implementation, we would not be using ZSWAP to demonstrate 
this
+        * and instead would implement a new component (compressed_ram, cram.c?)
+        *
+        * At this point we would check via some callback that the device's 
memory
+        * is actually safe to use - and if not, free the page (without writing 
to
+        * it), and kick off kswapd for that node to make room.
+        *
+        * Alternatively, if the compressed memory device(s) report a watermark
+        * crossing via interrupt, a flag can be set that is checked here rather
+        * that calling back into a device driver.
+        *
+        * In this case, we're testing with normal memory, so the memory is 
always
+        * safe to use (i.e. no compression ratio to worry about).
+        */
+       copy_mc_highpage(dst, src);
+       return dst;
+}
+
 static bool zswap_compress(struct page *page, struct zswap_entry *entry,
                           struct zswap_pool *pool)
 {
@@ -862,6 +902,19 @@ static bool zswap_compress(struct page *page, struct 
zswap_entry *entry,
        gfp_t gfp;
        u8 *dst;
        bool mapped = false;
+       struct page *zpage;
+
+       /* Try to shunt directly to compressed ram */
+       if (!nodes_empty(mt_compressed_nodelist)) {
+               zpage = zswap_compress_direct(page, entry);
+               if (zpage) {
+                       entry->handle = (unsigned long)zpage;
+                       entry->length = PAGE_SIZE;
+                       entry->direct = true;
+                       return true;
+               }
+               /* otherwise fallback to normal zswap */
+       }
 
        acomp_ctx = acomp_ctx_get_cpu_lock(pool);
        dst = acomp_ctx->buffer;
@@ -939,6 +992,15 @@ static bool zswap_decompress(struct zswap_entry *entry, 
struct folio *folio)
        int decomp_ret = 0, dlen = PAGE_SIZE;
        u8 *src, *obj;
 
+       /* compressed ram page */
+       if (entry->direct) {
+               struct page *src = (struct page*)entry->handle;
+               struct folio *zfolio = page_folio(src);
+               memcpy_folio(folio, 0, zfolio, 0, PAGE_SIZE);
+               __free_page(src);
+               goto direct_done;
+       }
+
        acomp_ctx = acomp_ctx_get_cpu_lock(pool);
        obj = zs_obj_read_begin(pool->zs_pool, entry->handle, 
acomp_ctx->buffer);
 
@@ -972,6 +1034,7 @@ static bool zswap_decompress(struct zswap_entry *entry, 
struct folio *folio)
        zs_obj_read_end(pool->zs_pool, entry->handle, obj);
        acomp_ctx_put_unlock(acomp_ctx);
 
+direct_done:
        if (!decomp_ret && dlen == PAGE_SIZE)
                return true;
 
-- 
2.51.1


Reply via email to