This is used to replace previous zram.
zram users can enable this feature, then a pseudo device will be created
automaticlly after kernel boot.
Just using "mkswp /dev/zram0; swapon /dev/zram0" to use it as a swap disk.

The size of this pseudeo is controlled by zswap boot parameter
zswap.max_pool_percent.
disksize = (totalram_pages * zswap.max_pool_percent/100)*PAGE_SIZE.

Signed-off-by: Bob Liu <bob....@oracle.com>
---
 mm/Kconfig |   12 ++++
 mm/zswap.c |  196 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+)

diff --git a/mm/Kconfig b/mm/Kconfig
index d80a575..3778026 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -525,6 +525,18 @@ choice
          be refused unless frontswap_get happened and freed some space.
 endchoice
 
+config ZSWAP_PSEUDO_BLKDEV
+       bool "Emulate a pseudo blk-dev based on zswap(previous zram)"
+       depends on ZSWAP && ZSMALLOC
+       default n
+
+       help
+         Enable this option will emulate a pseudo block swapdev /dev/zram0
+         with size zswap.max_pool_percent of total ram size. All writes to this
+         block device will be compressed and cached by zswap as a result no
+         real IO disk operations will happen.
+         This feature can be used to replace drivers/staging/zram.
+
 config MEM_SOFT_DIRTY
        bool "Track memory changes"
        depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY
diff --git a/mm/zswap.c b/mm/zswap.c
index 8e8dc99..ae73c9d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -38,6 +38,11 @@
 #include <linux/zbud.h>
 #else
 #include <linux/zsmalloc.h>
+#ifdef CONFIG_ZSWAP_PSEUDO_BLKDEV
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#endif
 #endif
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
@@ -968,6 +973,189 @@ static int __init zswap_debugfs_init(void)
 static void __exit zswap_debugfs_exit(void) { }
 #endif
 
+#ifdef CONFIG_ZSWAP_PSEUDO_BLKDEV
+#define SECTOR_SHIFT           9
+#define SECTOR_SIZE            (1 << SECTOR_SHIFT)
+#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define SECTORS_PER_PAGE       (1 << SECTORS_PER_PAGE_SHIFT)
+
+struct zram {
+       struct rw_semaphore lock; /* protect concurent reads and writes */
+       struct request_queue *queue;
+       struct gendisk *disk;
+
+       /*
+        * This is the disk size for userland. The size is controlled by
+        * boot parameter zswap.max_pool_percent.
+        * disksize = (totalram_pages * zswap.max_pool_percent/100)*PAGE_SIZE
+        */
+       u64 disksize;   /* bytes */
+
+       /*
+        * This page is used to store real data for /dev/zram.
+        * Meanful operation to /dev/zramx is only mkswp and swapon/swapoff.
+        * So use one page to store the real data(written by mkswp).
+        */
+       struct page *metapage;
+};
+
+/*
+ * Only create /dev/zram0, can be extened in future if there is real uercases
+ * need multiple zram devices.
+ */
+static struct zram zram_device;
+static const struct block_device_operations zram_devops = {
+       .owner = THIS_MODULE
+};
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+       if (*offset + bvec->bv_len >= PAGE_SIZE)
+               (*index)++;
+       *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
+static void zram_make_request(struct request_queue *queue, struct bio *bio)
+{
+       u32 index;
+       struct bio_vec *bvec;
+       unsigned char *src, *dst;
+       int offset, i, rw = bio_data_dir(bio);
+       struct zram *zram = queue->queuedata;
+
+       index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
+       offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+
+       bio_for_each_segment(bvec, bio, i) {
+               /*
+                * The only operation to pseudo /dev/zramx is mkswp and
+                * swapon/swapoff, so we only need one extra page to store the
+                * real meta data!
+                */
+               BUG_ON(bvec->bv_len != PAGE_SIZE);
+               BUG_ON(offset);
+
+               if (!index) {
+                       if (rw == READ) {
+                               down_read(&zram->lock);
+                               dst = kmap_atomic(bvec->bv_page);
+                               src = kmap_atomic(zram->metapage);
+                               memcpy(dst, src, bvec->bv_len);
+                               kunmap_atomic(dst);
+                               kunmap_atomic(src);
+                               flush_dcache_page(bvec->bv_page);
+                               up_read(&zram->lock);
+                       } else {
+                               down_write(&zram->lock);
+                               src = kmap_atomic(bvec->bv_page);
+                               dst = kmap_atomic(zram->metapage);
+                               memcpy(dst, src, bvec->bv_len);
+                               kunmap_atomic(dst);
+                               kunmap_atomic(src);
+                               up_write(&zram->lock);
+                       }
+               }
+               update_position(&index, &offset, bvec);
+       }
+       set_bit(BIO_UPTODATE, &bio->bi_flags);
+       bio_endio(bio, 0);
+       return;
+}
+
+static int create_zram_device(struct zram *zram, int major, int device_id)
+{
+       int ret = -ENOMEM;
+       u64 disksize;
+
+       zram->queue = blk_alloc_queue(GFP_KERNEL);
+       if (!zram->queue) {
+               pr_err("Error allocating disk queue for device%d\n", device_id);
+               goto out;
+       }
+
+       blk_queue_make_request(zram->queue, zram_make_request);
+       zram->queue->queuedata = zram;
+
+       /* gendisk structure */
+       zram->disk = alloc_disk(1);
+       if (!zram->disk) {
+               pr_warn("Error allocating disk structure for device %d\n",
+                       device_id);
+               goto out_free_queue;
+       }
+
+       zram->disk->major = major;
+       zram->disk->first_minor = device_id;
+       zram->disk->fops = &zram_devops;
+       zram->disk->queue = zram->queue;
+       snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
+
+       /*
+        * To ensure that we always get PAGE_SIZE aligned
+        * and n*PAGE_SIZED sized I/O requests.
+        */
+       blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
+       blk_queue_logical_block_size(zram->disk->queue, 1<<12);
+       blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
+       blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
+
+       add_disk(zram->disk);
+
+       /* Init blk-dev */
+       disksize = totalram_pages * zswap_max_pool_percent / 100;
+       disksize *= PAGE_SIZE;
+       disksize = PAGE_ALIGN(disksize);
+       zram->disksize = disksize;
+       set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+
+       /* zram devices sort of resembles non-rotational disks */
+       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
+
+       zram->metapage = alloc_page(GFP_KERNEL);
+       if (!zram->metapage)
+               goto out_free_disk;
+
+       pr_debug("Initialization done!\n");
+       return 0;
+
+out_free_disk:
+       pr_debug("Init zram meta pages fail!\n");
+       del_gendisk(zram->disk);
+       put_disk(zram->disk);
+out_free_queue:
+       blk_cleanup_queue(zram->queue);
+out:
+       return ret;
+}
+
+static int zswap_blkdev_init(void)
+{
+       int major, ret = 0;
+
+       major = register_blkdev(0, "zram");
+       if (major <= 0) {
+               pr_warn("Unable to get major number\n");
+               ret = -EBUSY;
+               goto out;
+       }
+
+       ret = create_zram_device(&zram_device, major, 0);
+       if (ret) {
+               unregister_blkdev(major, "zram");
+               goto out;
+       }
+
+       pr_info("Created zram device(%d, %d).\n", major, 0);
+out:
+       return ret;
+}
+#else
+static int zswap_blkdev_init(void)
+{
+       return 0;
+}
+#endif
+
 /*********************************
 * module init and exit
 **********************************/
@@ -989,9 +1177,17 @@ static int __init init_zswap(void)
                pr_err("per-cpu initialization failed\n");
                goto pcpufail;
        }
+
+       if (IS_ENABLED(CONFIG_ZSWAP_PSEUDO_BLKDEV))
+               if (zswap_blkdev_init()) {
+                       pr_err("emulate blk device failed\n");
+                       goto pcpufail;
+               }
+
        frontswap_register_ops(&zswap_frontswap_ops);
        if (zswap_debugfs_init())
                pr_warn("debugfs initialization failed\n");
+
        return 0;
 pcpufail:
        zswap_comp_exit();
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to