This adds two different drivers inside drivers/char/ directory under two
new kernel config options COHERENT_HOTPLUG_DEMO and COHERENT_MEMORY_DEMO.

1) coherent_hotplug_demo: Detects, hoptlugs the coherent device memory
2) coherent_memory_demo:  Exports debugfs interface for VMA migrations

Signed-off-by: Anshuman Khandual <khand...@linux.vnet.ibm.com>
---
 drivers/char/Kconfig                 |  23 +++
 drivers/char/Makefile                |   2 +
 drivers/char/coherent_hotplug_demo.c | 133 ++++++++++++++
 drivers/char/coherent_memory_demo.c  | 337 +++++++++++++++++++++++++++++++++++
 drivers/char/memory_online_sysfs.h   | 148 +++++++++++++++
 mm/migrate.c                         |  14 +-
 6 files changed, 656 insertions(+), 1 deletion(-)
 create mode 100644 drivers/char/coherent_hotplug_demo.c
 create mode 100644 drivers/char/coherent_memory_demo.c
 create mode 100644 drivers/char/memory_online_sysfs.h

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index dcc0973..22c538d 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -588,6 +588,29 @@ config TILE_SROM
          device appear much like a simple EEPROM, and knows
          how to partition a single ROM for multiple purposes.
 
+config COHERENT_HOTPLUG_DEMO
+       tristate "Demo driver to test coherent memory node hotplug"
+       depends on PPC64 || COHERENT_DEVICE
+       default n
+       help
+         Say yes when you want to build a test driver to hotplug all
+         the coherent memory nodes present on the system. This driver
+         scans through the device tree, checks on "ibm,memory-device"
+         property device nodes and onlines its memory. When unloaded,
+         it goes through the list of memory ranges it onlined before
+         and oflines them one by one. If not sure, select N.
+
+config COHERENT_MEMORY_DEMO
+       tristate "Demo driver to test coherent memory node functionality"
+       depends on PPC64 || COHERENT_DEVICE
+       default n
+       help
+         Say yes when you want to build a test driver to demonstrate
+         the coherent memory functionalities, capabilities and probable
+         utilizaton. It also exports a debugfs file to accept inputs for
+         virtual address range migration for any process. If not sure,
+         select N.
+
 source "drivers/char/xillybus/Kconfig"
 
 endmenu
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 6e6c244..92fa338 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -60,3 +60,5 @@ js-rtc-y = rtc.o
 obj-$(CONFIG_TILE_SROM)                += tile-srom.o
 obj-$(CONFIG_XILLYBUS)         += xillybus/
 obj-$(CONFIG_POWERNV_OP_PANEL) += powernv-op-panel.o
+obj-$(CONFIG_COHERENT_HOTPLUG_DEMO)    += coherent_hotplug_demo.o
+obj-$(CONFIG_COHERENT_MEMORY_DEMO)     += coherent_memory_demo.o
diff --git a/drivers/char/coherent_hotplug_demo.c 
b/drivers/char/coherent_hotplug_demo.c
new file mode 100644
index 0000000..3670081
--- /dev/null
+++ b/drivers/char/coherent_hotplug_demo.c
@@ -0,0 +1,133 @@
+/*
+ * Memory hotplug support for coherent memory nodes in runtime.
+ *
+ * Copyright (C) 2016, Reza Arbab, IBM Corporation.
+ * Copyright (C) 2016, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/of.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/memory.h>
+#include <linux/sizes.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/migrate.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include "memory_online_sysfs.h"
+
+#define MAX_HOTADD_NODES 100
+phys_addr_t addr[MAX_HOTADD_NODES][2];
+int nr_addr;
+
+/*
+ * extern int memory_failure(unsigned long pfn, int trapno, int flags);
+ * extern int min_free_kbytes;
+ * extern int user_min_free_kbytes;
+ *
+ * extern unsigned long nr_kernel_pages;
+ * extern unsigned long nr_all_pages;
+ * extern unsigned long dma_reserve;
+ */
+
+static void dump_core_vm_tunables(void)
+{
+/*
+ *     printk(":::::::: VM TUNABLES :::::::\n");
+ *     printk("[min_free_kbytes]       %d\n", min_free_kbytes);
+ *     printk("[user_min_free_kbytes]  %d\n", user_min_free_kbytes);
+ *     printk("[nr_kernel_pages]       %ld\n", nr_kernel_pages);
+ *     printk("[nr_all_pages]          %ld\n", nr_all_pages);
+ *     printk("[dma_reserve]           %ld\n", dma_reserve);
+ */
+}
+
+
+
+static int online_coherent_memory(void)
+{
+       struct device_node *memory;
+
+       nr_addr = 0;
+       disable_auto_online();
+       dump_core_vm_tunables();
+       for_each_compatible_node(memory, NULL, "ibm,memory-device") {
+               struct device_node *mem;
+               const __be64 *reg;
+               unsigned int len, ret;
+               phys_addr_t start, size;
+
+               mem = of_parse_phandle(memory, "memory-region", 0);
+               if (!mem) {
+                       pr_info("memory-region property not found\n");
+                       return -1;
+               }
+
+               reg = of_get_property(mem, "reg", &len);
+               if (!reg || len <= 0) {
+                       pr_info("memory-region property not found\n");
+                       return -1;
+               }
+               start = be64_to_cpu(*reg);
+               size = be64_to_cpu(*(reg + 1));
+               pr_info("Coherent memory start %llx size %llx\n", start, size);
+               ret = memory_probe_store(start, size);
+               if (ret)
+                       pr_info("probe faile\n");
+
+               ret = store_mem_state(start, size, "online_movable");
+               if (ret)
+                       pr_info("online_movable failed\n");
+
+               addr[nr_addr][0] = start;
+               addr[nr_addr][1] = size;
+               nr_addr++;
+       }
+       dump_core_vm_tunables();
+       enable_auto_online();
+       return 0;
+}
+
+static int offline_coherent_memory(void)
+{
+       int i;
+
+       for (i = 0; i < nr_addr; i++)
+               store_mem_state(addr[i][0], addr[i][1], "offline");
+       return 0;
+}
+
+static void __exit coherent_hotplug_exit(void)
+{
+       pr_info("%s\n", __func__);
+       offline_coherent_memory();
+}
+
+static int __init coherent_hotplug_init(void)
+{
+       pr_info("%s\n", __func__);
+       return online_coherent_memory();
+}
+module_init(coherent_hotplug_init);
+module_exit(coherent_hotplug_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/coherent_memory_demo.c 
b/drivers/char/coherent_memory_demo.c
new file mode 100644
index 0000000..1dcd9f7
--- /dev/null
+++ b/drivers/char/coherent_memory_demo.c
@@ -0,0 +1,337 @@
+/*
+ * Demonstrating various aspects of the coherent memory.
+ *
+ * Copyright (C) 2016, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/of.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/memory.h>
+#include <linux/sizes.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/migrate.h>
+#include <linux/memblock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+
+#define COHERENT_DEV_MAJOR 89
+#define COHERENT_DEV_NAME  "coherent_memory"
+
+#define CRNT_NODE_NID1 1
+#define CRNT_NODE_NID2 2
+#define CRNT_NODE_NID3 3
+
+#define RAM_CRNT_MIGRATE 1
+#define CRNT_RAM_MIGRATE 2
+
+struct vma_map_info {
+       struct list_head list;
+       unsigned long nr_pages;
+       spinlock_t lock;
+};
+
+static void vma_map_info_init(struct vm_area_struct *vma)
+{
+       struct vma_map_info *info = kmalloc(sizeof(struct vma_map_info),
+                                                               GFP_KERNEL);
+
+       BUG_ON(!info);
+       INIT_LIST_HEAD(&info->list);
+       spin_lock_init(&info->lock);
+       vma->vm_private_data = info;
+       info->nr_pages = 0;
+}
+
+static void coherent_vmops_open(struct vm_area_struct *vma)
+{
+       vma_map_info_init(vma);
+}
+
+static void coherent_vmops_close(struct vm_area_struct *vma)
+{
+       struct vma_map_info *info = vma->vm_private_data;
+
+       BUG_ON(!info);
+again:
+       cond_resched();
+       spin_lock(&info->lock);
+       while (info->nr_pages) {
+               struct page *page, *page2;
+
+               list_for_each_entry_safe(page, page2, &info->list, lru) {
+                       if (!trylock_page(page)) {
+                               spin_unlock(&info->lock);
+                               goto again;
+                       }
+
+                       list_del_init(&page->lru);
+                       info->nr_pages--;
+                       unlock_page(page);
+                       SetPageReclaim(page);
+                       put_page(page);
+               }
+               spin_unlock(&info->lock);
+               cond_resched();
+               spin_lock(&info->lock);
+       }
+       spin_unlock(&info->lock);
+       kfree(info);
+       vma->vm_private_data = NULL;
+}
+
+static int coherent_vmops_fault(struct vm_area_struct *vma,
+                                       struct vm_fault *vmf)
+{
+       struct vma_map_info *info;
+       struct page *page;
+       static int coherent_node = CRNT_NODE_NID1;
+
+       if (coherent_node == CRNT_NODE_NID1)
+               coherent_node = CRNT_NODE_NID2;
+       else
+               coherent_node = CRNT_NODE_NID1;
+
+       page = alloc_pages_node(coherent_node,
+                               GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
+       if (!page)
+               return VM_FAULT_SIGBUS;
+
+       info = (struct vma_map_info *) vma->vm_private_data;
+       BUG_ON(!info);
+       spin_lock(&info->lock);
+       list_add(&page->lru, &info->list);
+       info->nr_pages++;
+       spin_unlock(&info->lock);
+
+       page->index = vmf->pgoff;
+       get_page(page);
+       vmf->page = page;
+       return 0;
+}
+
+static const struct vm_operations_struct coherent_memory_vmops = {
+       .open = coherent_vmops_open,
+       .close = coherent_vmops_close,
+       .fault = coherent_vmops_fault,
+};
+
+static int coherent_memory_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       pr_info("Mmap opened (file: %lx vma: %lx)\n",
+                       (unsigned long) file, (unsigned long) vma);
+       vma->vm_ops = &coherent_memory_vmops;
+       coherent_vmops_open(vma);
+       return 0;
+}
+
+static int coherent_memory_open(struct inode *inode, struct file *file)
+{
+       pr_info("Device opened (inode: %lx file: %lx)\n",
+                       (unsigned long) inode, (unsigned long) file);
+       return 0;
+}
+
+static int coherent_memory_close(struct inode *inode, struct file *file)
+{
+       pr_info("Device closed (inode: %lx file: %lx)\n",
+                       (unsigned long) inode, (unsigned long) file);
+       return 0;
+}
+
+static void lru_ram_coherent_migrate(unsigned long addr)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       nodemask_t nmask;
+       LIST_HEAD(mlist);
+
+       nodes_clear(nmask);
+       nodes_setall(nmask);
+       down_write(&mm->mmap_sem);
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if  ((addr < vma->vm_start) || (addr > vma->vm_end))
+                       continue;
+               break;
+       }
+       up_write(&mm->mmap_sem);
+       if (!vma) {
+               pr_info("%s: No VMA found\n", __func__);
+               return;
+       }
+       migrate_virtual_range(current->pid, vma->vm_start, vma->vm_end, 2);
+}
+
+static void lru_coherent_ram_migrate(unsigned long addr)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       nodemask_t nmask;
+       LIST_HEAD(mlist);
+
+       nodes_clear(nmask);
+       nodes_setall(nmask);
+       down_write(&mm->mmap_sem);
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if  ((addr < vma->vm_start) || (addr > vma->vm_end))
+                       continue;
+               break;
+       }
+       up_write(&mm->mmap_sem);
+       if (!vma) {
+               pr_info("%s: No VMA found\n", __func__);
+               return;
+       }
+       migrate_virtual_range(current->pid, vma->vm_start, vma->vm_end, 0);
+}
+
+static long coherent_memory_ioctl(struct file *file,
+                                       unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case RAM_CRNT_MIGRATE:
+               lru_ram_coherent_migrate(arg);
+               break;
+
+       case CRNT_RAM_MIGRATE:
+               lru_coherent_ram_migrate(arg);
+               break;
+
+       default:
+               pr_info("%s Invalid ioctl() command: %d\n", __func__, cmd);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static const struct file_operations fops = {
+       .mmap = coherent_memory_mmap,
+       .open = coherent_memory_open,
+       .release = coherent_memory_close,
+       .unlocked_ioctl = &coherent_memory_ioctl
+};
+
+static char kbuf[100]; /* Will store original user passed buffer */
+static char str[100];  /* Working copy for individual substring */
+
+static u64 args[4];
+static u64 index;
+static void convert_substring(const char *buf)
+{
+       u64 val = 0;
+
+       if (kstrtou64(buf, 0, &val))
+               pr_info("String conversion failed\n");
+
+       args[index] = val;
+       index++;
+}
+
+static ssize_t coherent_debug_write(struct file *file,
+                                       const char __user *user_buf,
+                                       size_t count, loff_t *ppos)
+{
+       char *tmp, *tmp1;
+       size_t ret;
+
+       memset(args, 0, sizeof(args));
+       index = 0;
+
+       ret = simple_write_to_buffer(kbuf, sizeof(kbuf), ppos, user_buf, count);
+       if (ret < 0)
+               return ret;
+
+       kbuf[ret] = '\0';
+       tmp = kbuf;
+       do {
+               tmp1 = strchr(tmp, ',');
+               if (tmp1) {
+                       *tmp1 = '\0';
+                       strncpy(str, (const char *)tmp, strlen(tmp));
+                       convert_substring(str);
+               } else {
+                       strncpy(str, (const char *)tmp, strlen(tmp));
+                       convert_substring(str);
+                       break;
+               }
+               tmp = tmp1 + 1;
+               memset(str, 0, sizeof(str));
+       } while (true);
+       migrate_virtual_range(args[0], args[1], args[2], args[3]);
+       return ret;
+}
+
+static int coherent_debug_show(struct seq_file *m, void *v)
+{
+       seq_puts(m, "Expected Value: <pid,vaddr,size,nid>\n");
+       return 0;
+}
+
+static int coherent_debug_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, coherent_debug_show, NULL);
+}
+
+static const struct file_operations coherent_debug_fops = {
+       .open           = coherent_debug_open,
+       .write          = coherent_debug_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static struct dentry *debugfile;
+
+static void coherent_memory_debugfs(void)
+{
+
+       debugfile = debugfs_create_file("coherent_debug", 0644, NULL, NULL,
+                               &coherent_debug_fops);
+       if (!debugfile)
+               pr_warn("Failed to create coherent_memory in debugfs");
+}
+
+static void __exit coherent_memory_exit(void)
+{
+       pr_info("%s\n", __func__);
+       debugfs_remove(debugfile);
+       unregister_chrdev(COHERENT_DEV_MAJOR, COHERENT_DEV_NAME);
+}
+
+static int __init coherent_memory_init(void)
+{
+       int ret;
+
+       pr_info("%s\n", __func__);
+       ret = register_chrdev(COHERENT_DEV_MAJOR, COHERENT_DEV_NAME, &fops);
+       if (ret < 0) {
+               pr_info("%s register_chrdev() failed\n", __func__);
+               return -1;
+       }
+       coherent_memory_debugfs();
+       return 0;
+}
+
+module_init(coherent_memory_init);
+module_exit(coherent_memory_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/memory_online_sysfs.h 
b/drivers/char/memory_online_sysfs.h
new file mode 100644
index 0000000..a5f022d
--- /dev/null
+++ b/drivers/char/memory_online_sysfs.h
@@ -0,0 +1,148 @@
+/*
+ * Accessing sysfs interface for memory hotplug operation from
+ * inside the kernel.
+ *
+ * Licensed under GPL V2
+ */
+#ifndef __SYSFS_H
+#define __SYSFS_H
+
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+
+#define AUTO_ONLINE_BLOCKS "/sys/devices/system/memory/auto_online_blocks"
+#define BLOCK_SIZE_BYTES   "/sys/devices/system/memory/block_size_bytes"
+#define MEMORY_PROBE       "/sys/devices/system/memory/probe"
+
+static ssize_t read_buf(char *filename, char *buf, ssize_t count)
+{
+       mm_segment_t old_fs;
+       struct file *filp;
+       loff_t pos = 0;
+
+       if (!count)
+               return 0;
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+
+       filp = filp_open(filename, O_RDONLY, 0);
+       if (IS_ERR(filp)) {
+               count = PTR_ERR(filp);
+               goto err_open;
+       }
+
+       count = vfs_read(filp, buf, count - 1, &pos);
+       buf[count] = '\0';
+
+       filp_close(filp, NULL);
+
+err_open:
+       set_fs(old_fs);
+
+       return count;
+}
+
+static unsigned long long read_0x(char *filename)
+{
+       unsigned long long ret;
+       char buf[32];
+
+       if (read_buf(filename, buf, 32) <= 0)
+               return 0;
+
+       if (kstrtoull(buf, 16, &ret))
+               return 0;
+
+       return ret;
+}
+
+static ssize_t write_buf(char *filename, char *buf)
+{
+       int ret;
+       mm_segment_t old_fs;
+       struct file *filp;
+       loff_t pos = 0;
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+
+       filp = filp_open(filename, O_WRONLY, 0);
+       if (IS_ERR(filp)) {
+               ret = PTR_ERR(filp);
+               goto err_open;
+       }
+
+       ret = vfs_write(filp, buf, strlen(buf), &pos);
+
+       filp_close(filp, NULL);
+
+err_open:
+       set_fs(old_fs);
+
+       return ret;
+}
+
+int memory_probe_store(phys_addr_t addr, phys_addr_t size)
+{
+       phys_addr_t block_sz =
+               read_0x(BLOCK_SIZE_BYTES);
+       long i;
+
+       for (i = 0; i < size / block_sz; i++, addr += block_sz) {
+               char s[32];
+               ssize_t count;
+
+               snprintf(s, 32, "0x%llx", addr);
+
+               count = write_buf(MEMORY_PROBE, s);
+               if (count < 0)
+                       return count;
+       }
+
+       return 0;
+}
+
+int store_mem_state(phys_addr_t addr, phys_addr_t size, char *state)
+{
+       phys_addr_t block_sz = read_0x(BLOCK_SIZE_BYTES);
+       unsigned long start_block, end_block, i;
+
+       start_block = addr / block_sz;
+       end_block = start_block + size / block_sz;
+
+       for (i = end_block - 1; i >= start_block; i--) {
+               char filename[64];
+               ssize_t count;
+
+               snprintf(filename, 64,
+                        "/sys/devices/system/memory/memory%ld/state", i);
+
+               count = write_buf(filename, state);
+               if (count < 0)
+                       return count;
+       }
+
+       return 0;
+}
+
+int disable_auto_online(void)
+{
+       int ret;
+
+       ret = write_buf(AUTO_ONLINE_BLOCKS, "offline");
+       if (ret)
+               return ret;
+       return 0;
+}
+
+int enable_auto_online(void)
+{
+       int ret;
+
+       ret = write_buf(AUTO_ONLINE_BLOCKS, "online");
+       if (ret)
+               return ret;
+       return 0;
+}
+#endif
diff --git a/mm/migrate.c b/mm/migrate.c
index 4f20415..87861f6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1396,6 +1396,7 @@ int migrate_virtual_range(int pid, unsigned long start,
        struct vm_area_struct *vma;
        nodemask_t nmask;
        int ret = -EINVAL;
+       bool found = false;
 
        LIST_HEAD(mlist);
 
@@ -1405,6 +1406,7 @@ int migrate_virtual_range(int pid, unsigned long start,
        if ((!start) || (!end))
                return -EINVAL;
 
+       pr_info("%s: %d %lx %lx %d: ", __func__, pid, start, end, nid);
        rcu_read_lock();
        mm = find_task_by_vpid(pid)->mm;
        rcu_read_unlock();
@@ -1416,23 +1418,33 @@ int migrate_virtual_range(int pid, unsigned long start,
                if  ((start < vma->vm_start) || (end > vma->vm_end))
                        continue;
 
+               found = true;
                ret = queue_pages_range(mm, start, end, &nmask, MPOL_MF_MOVE_ALL
                                                | MPOL_MF_DISCONTIG_OK, &mlist);
                if (ret) {
+                       pr_info("queue_pages_range_failed\n");
                        putback_movable_pages(&mlist);
                        break;
                }
 
                if (list_empty(&mlist)) {
+                       pr_info("list_empty\n");
                        ret = -ENOMEM;
                        break;
                }
 
                ret = migrate_pages(&mlist, new_node_page, NULL, nid,
                                        MIGRATE_SYNC, MR_COMPACTION);
-               if (ret)
+               if (ret) {
+                       pr_info("migration_failed\n");
                        putback_movable_pages(&mlist);
+               } else {
+                       pr_info("migration_passed\n");
+               }
        }
+       if (!found)
+               pr_info("vma_missing\n");
+
        up_write(&mm->mmap_sem);
        return ret;
 }
-- 
1.8.3.1

Reply via email to