[PATCH v11 09/16] mm/damon: Implement access pattern recording

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit implements the recording feature of DAMON. If this feature
is enabled, DAMON writes the monitored access patterns in its binary
format into a file which specified by the user. This is already able to
be implemented by each user using the callbacks.  However, as the
recording is expected to be used widely, this commit implements the
feature in the DAMON, for more convenience and efficiency.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |   7 +++
 mm/damon.c| 131 +-
 2 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 264569b21502..bc46ea00e9a1 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -50,6 +50,11 @@ struct damon_ctx {
struct timespec64 last_aggregation;
struct timespec64 last_regions_update;
 
+   unsigned char *rbuf;
+   unsigned int rbuf_len;
+   unsigned int rbuf_offset;
+   char *rfile_path;
+
struct task_struct *kdamond;
bool kdamond_stop;
struct mutex kdamond_lock;
@@ -65,6 +70,8 @@ int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t 
nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_set_recording(struct damon_ctx *ctx,
+   unsigned int rbuf_len, char *rfile_path);
 int damon_start(struct damon_ctx *ctx);
 int damon_stop(struct damon_ctx *ctx);
 
diff --git a/mm/damon.c b/mm/damon.c
index 6b01aa641503..306640c72b7d 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -44,6 +44,9 @@
 #define damon_for_each_task_safe(ctx, t, next) \
list_for_each_entry_safe(t, next, &(ctx)->tasks_list, list)
 
+#define MAX_RECORD_BUFFER_LEN  (4 * 1024 * 1024)
+#define MAX_RFILE_PATH_LEN 256
+
 /* Get a random number in [l, r) */
 #define damon_rand(l, r) (l + prandom_u32() % (r - l))
 
@@ -565,16 +568,80 @@ static bool kdamond_aggregate_interval_passed(struct 
damon_ctx *ctx)
 }
 
 /*
- * Reset the aggregated monitoring results
+ * Flush the content in the result buffer to the result file
+ */
+static void damon_flush_rbuffer(struct damon_ctx *ctx)
+{
+   ssize_t sz;
+   loff_t pos = 0;
+   struct file *rfile;
+
+   rfile = filp_open(ctx->rfile_path, O_CREAT | O_RDWR | O_APPEND, 0644);
+   if (IS_ERR(rfile)) {
+   pr_err("Cannot open the result file %s\n",
+   ctx->rfile_path);
+   return;
+   }
+
+   while (ctx->rbuf_offset) {
+   sz = kernel_write(rfile, ctx->rbuf, ctx->rbuf_offset, );
+   if (sz < 0)
+   break;
+   ctx->rbuf_offset -= sz;
+   }
+   filp_close(rfile, NULL);
+}
+
+/*
+ * Write a data into the result buffer
+ */
+static void damon_write_rbuf(struct damon_ctx *ctx, void *data, ssize_t size)
+{
+   if (!ctx->rbuf_len || !ctx->rbuf)
+   return;
+   if (ctx->rbuf_offset + size > ctx->rbuf_len)
+   damon_flush_rbuffer(ctx);
+
+   memcpy(>rbuf[ctx->rbuf_offset], data, size);
+   ctx->rbuf_offset += size;
+}
+
+/*
+ * Flush the aggregated monitoring results to the result buffer
+ *
+ * Stores current tracking results to the result buffer and reset 'nr_accesses'
+ * of each region.  The format for the result buffer is as below:
+ *
+ * 
+ *
+ *   task info:   
+ *   region info:   
  */
 static void kdamond_reset_aggregated(struct damon_ctx *c)
 {
struct damon_task *t;
-   struct damon_region *r;
+   struct timespec64 now;
+   unsigned int nr;
+
+   ktime_get_coarse_ts64();
+
+   damon_write_rbuf(c, , sizeof(struct timespec64));
+   nr = nr_damon_tasks(c);
+   damon_write_rbuf(c, , sizeof(nr));
 
damon_for_each_task(c, t) {
-   damon_for_each_region(r, t)
+   struct damon_region *r;
+
+   damon_write_rbuf(c, >pid, sizeof(t->pid));
+   nr = nr_damon_regions(t);
+   damon_write_rbuf(c, , sizeof(nr));
+   damon_for_each_region(r, t) {
+   damon_write_rbuf(c, >vm_start, sizeof(r->vm_start));
+   damon_write_rbuf(c, >vm_end, sizeof(r->vm_end));
+   damon_write_rbuf(c, >nr_accesses,
+   sizeof(r->nr_accesses));
r->nr_accesses = 0;
+   }
}
 }
 
@@ -837,6 +904,14 @@ static bool kdamond_need_stop(struct damon_ctx *ctx)
return true;
 }
 
+static void kdamond_write_record_header(struct damon_ctx *ctx)
+{
+   int recfmt_ver = 1;
+
+   damon_write_rbuf(ctx, "damon_recfmt_ver", 16);
+   damon_write_r

[PATCH v11 10/16] mm/damon: Add debugfs interface

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit adds a debugfs interface for DAMON.

DAMON exports four files, ``attrs``, ``pids``, ``record``, and
``monitor_on`` under its debugfs directory, ``/damon/``.

Attributes
--

Users can read and write the ``sampling interval``, ``aggregation
interval``, ``regions update interval``, and min/max number of
monitoring target regions by reading from and writing to the ``attrs``
file.  For example, below commands set those values to 5 ms, 100 ms,
1,000 ms, 10, 1000 and check it again::

# cd /damon
# echo 5000 10 100 10 1000 > attrs
# cat attrs
5000 10 100 10 1000

Target PIDs
---

Users can read and write the pids of current monitoring target processes
by reading from and writing to the ``pids`` file.  For example, below
commands set processes having pids 42 and 4242 as the processes to be
monitored and check it again::

# cd /damon
# echo 42 4242 > pids
# cat pids
42 4242

Note that setting the pids doesn't start the monitoring.

Record
--

DAMON supports direct monitoring result record feature.  The recorded
results are first written to a buffer and flushed to a file in batch.
Users can set the size of the buffer and the path to the result file by
reading from and writing to the ``record`` file.  For example, below
commands set the buffer to be 4 KiB and the result to be saved in
'/damon.data'.

# cd /damon
# echo 4096 /damon.data > pids
# cat record
4096 /damon.data

Turning On/Off
--

You can check current status, start and stop the monitoring by reading
from and writing to the ``monitor_on`` file.  Writing ``on`` to the file
starts DAMON to monitor the target processes with the attributes.
Writing ``off`` to the file stops DAMON.  DAMON also stops if every
target processes is terminated.  Below example commands turn on, off,
and check status of DAMON::

# cd /damon
# echo on > monitor_on
# echo off > monitor_on
# cat monitor_on
off

Please note that you cannot write to the ``attrs`` and ``pids`` files
while the monitoring is turned on.  If you write to the files while
DAMON is running, ``-EINVAL`` will be returned.

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 351 -
 1 file changed, 350 insertions(+), 1 deletion(-)

diff --git a/mm/damon.c b/mm/damon.c
index 306640c72b7d..7a6525b708b6 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt) "damon: " fmt
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -50,6 +51,15 @@
 /* Get a random number in [l, r) */
 #define damon_rand(l, r) (l + prandom_u32() % (r - l))
 
+/* A monitoring context for debugfs interface users. */
+static struct damon_ctx damon_user_ctx = {
+   .sample_interval = 5 * 1000,
+   .aggr_interval = 100 * 1000,
+   .regions_update_interval = 1000 * 1000,
+   .min_nr_regions = 10,
+   .max_nr_regions = 1000,
+};
+
 /*
  * Construct a damon_region struct
  *
@@ -1134,13 +1144,352 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned 
long sample_int,
return 0;
 }
 
-static int __init damon_init(void)
+static ssize_t debugfs_monitor_on_read(struct file *file,
+   char __user *buf, size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char monitor_on_buf[5];
+   bool monitor_on;
+   int len;
+
+   monitor_on = damon_kdamond_running(ctx);
+   len = snprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n");
+
+   return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len);
+}
+
+static ssize_t debugfs_monitor_on_write(struct file *file,
+   const char __user *buf, size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   ssize_t ret;
+   char cmdbuf[5];
+   int err;
+
+   ret = simple_write_to_buffer(cmdbuf, 5, ppos, buf, count);
+   if (ret < 0)
+   return ret;
+
+   if (sscanf(cmdbuf, "%s", cmdbuf) != 1)
+   return -EINVAL;
+   if (!strncmp(cmdbuf, "on", 5))
+   err = damon_start(ctx);
+   else if (!strncmp(cmdbuf, "off", 5))
+   err = damon_stop(ctx);
+   else
+   return -EINVAL;
+
+   if (err)
+   ret = err;
+   return ret;
+}
+
+static ssize_t damon_sprint_pids(struct damon_ctx *ctx, char *buf, ssize_t len)
+{
+   struct damon_task *t;
+   int written = 0;
+   int rc;
+
+   damon_for_each_task(ctx, t) {
+   rc = snprintf([written], len - written, "%d ", t->pid);
+   if (!rc)
+   return -ENOMEM;
+   written += rc;
+   }
+   if (written)
+   written -= 1;
+   written += snprintf([written], len - written, "\n");
+   return written;
+}
+
+static ssize_t debugfs_pids_re

[PATCH v11 11/16] mm/damon: Add tracepoints

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit adds a tracepoint for DAMON.  It traces the monitoring
results of each region for each aggregation interval.  Using this, DAMON
will be easily integrated with any tracepoints supporting tools such as
perf.

Signed-off-by: SeongJae Park 
---
 include/trace/events/damon.h | 43 
 mm/damon.c   |  5 +
 2 files changed, 48 insertions(+)
 create mode 100644 include/trace/events/damon.h

diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
new file mode 100644
index ..22236642d366
--- /dev/null
+++ b/include/trace/events/damon.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM damon
+
+#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DAMON_H
+
+#include 
+#include 
+
+TRACE_EVENT(damon_aggregated,
+
+   TP_PROTO(int pid, unsigned int nr_regions,
+   unsigned long vm_start, unsigned long vm_end,
+   unsigned int nr_accesses),
+
+   TP_ARGS(pid, nr_regions, vm_start, vm_end, nr_accesses),
+
+   TP_STRUCT__entry(
+   __field(int, pid)
+   __field(unsigned int, nr_regions)
+   __field(unsigned long, vm_start)
+   __field(unsigned long, vm_end)
+   __field(unsigned int, nr_accesses)
+   ),
+
+   TP_fast_assign(
+   __entry->pid = pid;
+   __entry->nr_regions = nr_regions;
+   __entry->vm_start = vm_start;
+   __entry->vm_end = vm_end;
+   __entry->nr_accesses = nr_accesses;
+   ),
+
+   TP_printk("pid=%d nr_regions=%u %lu-%lu: %u", __entry->pid,
+   __entry->nr_regions, __entry->vm_start,
+   __entry->vm_end, __entry->nr_accesses)
+);
+
+#endif /* _TRACE_DAMON_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/mm/damon.c b/mm/damon.c
index 7a6525b708b6..ee117e920b39 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -9,6 +9,8 @@
 
 #define pr_fmt(fmt) "damon: " fmt
 
+#define CREATE_TRACE_POINTS
+
 #include 
 #include 
 #include 
@@ -20,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Minimal region size.  Every damon_region is aligned by this. */
 #define MIN_REGION PAGE_SIZE
@@ -650,6 +653,8 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
damon_write_rbuf(c, >vm_end, sizeof(r->vm_end));
damon_write_rbuf(c, >nr_accesses,
sizeof(r->nr_accesses));
+   trace_damon_aggregated(t->pid, nr,
+   r->vm_start, r->vm_end, r->nr_accesses);
r->nr_accesses = 0;
}
}
-- 
2.17.1



[PATCH v11 12/16] tools: Add a minimal user-space tool for DAMON

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit adds a shallow wrapper python script, ``/tools/damon/damo``
that provides more convenient interface.  Note that it is only aimed to
be used for minimal reference of the DAMON's debugfs interfaces and for
debugging of the DAMON itself.

Signed-off-by: SeongJae Park 
---
 tools/damon/.gitignore|   1 +
 tools/damon/_dist.py  |  36 
 tools/damon/_recfile.py   |  23 +++
 tools/damon/bin2txt.py|  67 +++
 tools/damon/damo  |  37 
 tools/damon/heats.py  | 362 ++
 tools/damon/nr_regions.py |  91 ++
 tools/damon/record.py | 212 ++
 tools/damon/report.py |  45 +
 tools/damon/wss.py|  97 ++
 10 files changed, 971 insertions(+)
 create mode 100644 tools/damon/.gitignore
 create mode 100644 tools/damon/_dist.py
 create mode 100644 tools/damon/_recfile.py
 create mode 100644 tools/damon/bin2txt.py
 create mode 100755 tools/damon/damo
 create mode 100644 tools/damon/heats.py
 create mode 100644 tools/damon/nr_regions.py
 create mode 100644 tools/damon/record.py
 create mode 100644 tools/damon/report.py
 create mode 100644 tools/damon/wss.py

diff --git a/tools/damon/.gitignore b/tools/damon/.gitignore
new file mode 100644
index ..96403d36ff93
--- /dev/null
+++ b/tools/damon/.gitignore
@@ -0,0 +1 @@
+__pycache__/*
diff --git a/tools/damon/_dist.py b/tools/damon/_dist.py
new file mode 100644
index ..9851ec964e5c
--- /dev/null
+++ b/tools/damon/_dist.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import struct
+import subprocess
+
+def access_patterns(f):
+nr_regions = struct.unpack('I', f.read(4))[0]
+
+patterns = []
+for r in range(nr_regions):
+saddr = struct.unpack('L', f.read(8))[0]
+eaddr = struct.unpack('L', f.read(8))[0]
+nr_accesses = struct.unpack('I', f.read(4))[0]
+patterns.append([eaddr - saddr, nr_accesses])
+return patterns
+
+def plot_dist(data_file, output_file, xlabel, ylabel):
+terminal = output_file.split('.')[-1]
+if not terminal in ['pdf', 'jpeg', 'png', 'svg']:
+os.remove(data_file)
+print("Unsupported plot output type.")
+exit(-1)
+
+gnuplot_cmd = """
+set term %s;
+set output '%s';
+set key off;
+set xlabel '%s';
+set ylabel '%s';
+plot '%s' with linespoints;""" % (terminal, output_file, xlabel, ylabel,
+data_file)
+subprocess.call(['gnuplot', '-e', gnuplot_cmd])
+os.remove(data_file)
+
diff --git a/tools/damon/_recfile.py b/tools/damon/_recfile.py
new file mode 100644
index ..331b4d8165d8
--- /dev/null
+++ b/tools/damon/_recfile.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import struct
+
+fmt_version = 0
+
+def set_fmt_version(f):
+global fmt_version
+
+mark = f.read(16)
+if mark == b'damon_recfmt_ver':
+fmt_version = struct.unpack('i', f.read(4))[0]
+else:
+fmt_version = 0
+f.seek(0)
+return fmt_version
+
+def pid(f):
+if fmt_version == 0:
+return struct.unpack('L', f.read(8))[0]
+else:
+return struct.unpack('i', f.read(4))[0]
diff --git a/tools/damon/bin2txt.py b/tools/damon/bin2txt.py
new file mode 100644
index ..8b9b57a0d727
--- /dev/null
+++ b/tools/damon/bin2txt.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import argparse
+import os
+import struct
+import sys
+
+import _recfile
+
+def parse_time(bindat):
+"bindat should be 16 bytes"
+sec = struct.unpack('l', bindat[0:8])[0]
+nsec = struct.unpack('l', bindat[8:16])[0]
+return sec * 10 + nsec;
+
+def pr_region(f):
+saddr = struct.unpack('L', f.read(8))[0]
+eaddr = struct.unpack('L', f.read(8))[0]
+nr_accesses = struct.unpack('I', f.read(4))[0]
+print("%012x-%012x(%10d):\t%d" %
+(saddr, eaddr, eaddr - saddr, nr_accesses))
+
+def pr_task_info(f):
+pid = _recfile.pid(f)
+print("pid: ", pid)
+nr_regions = struct.unpack('I', f.read(4))[0]
+print("nr_regions: ", nr_regions)
+for r in range(nr_regions):
+pr_region(f)
+
+def set_argparser(parser):
+parser.add_argument('--input', '-i', type=str, metavar='',
+default='damon.data', help='input file name')
+
+def main(args=None):
+if not args:
+parser = argparse.ArgumentParser()
+set_argparser(parser)
+args = parser.parse_args()
+
+file_path = args.input
+
+if not os.path.isfile(file_path):
+print('input file (%s) is not exist' % file_path)
+exit(1)
+
+with open(file_path, 'rb') as f:
+_recfile.set_fmt_version(f)
+start_time = None
+while True:
+timebin = f.read(16)
+if len(timebin) != 16:
+

[PATCH v11 13/16] Documentation/admin-guide/mm: Add a document for DAMON

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit adds a simple document for DAMON under
`Documentation/admin-guide/mm`.

Signed-off-by: SeongJae Park 
---
 .../admin-guide/mm/data_access_monitor.rst| 428 ++
 Documentation/admin-guide/mm/index.rst|   1 +
 2 files changed, 429 insertions(+)
 create mode 100644 Documentation/admin-guide/mm/data_access_monitor.rst

diff --git a/Documentation/admin-guide/mm/data_access_monitor.rst 
b/Documentation/admin-guide/mm/data_access_monitor.rst
new file mode 100644
index ..1d5e18d6ab9e
--- /dev/null
+++ b/Documentation/admin-guide/mm/data_access_monitor.rst
@@ -0,0 +1,428 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+DAMON: Data Access MONitor
+==
+
+Introduction
+
+
+Memory management decisions can be improved if finer data access information is
+available.  However, because such finer information usually comes with higher
+overhead, most systems including Linux forgives the potential benefit and rely
+on only coarse information or some light-weight heuristics.
+
+A number of data access pattern awared memory management optimizations
+consistently say the potential benefit is not small (2.55x speedup).  However,
+none of those has successfully adopted into into the Linux kernel mainly due to
+the absence of a scalable and efficient data access monitoring mechanism.
+
+DAMON is a data access monitoring subsystem for the problem.  It is 1) accurate
+enough to be used for the DRAM level memory management, 2) light-weight enough
+to be applied online, and 3) keeps predefined upper-bound overhead
+regardless of the size of target workloads (thus scalable).
+
+DAMON is implemented as a standalone kernel module and provides several simple
+interfaces.  Owing to that, though it has mainly designed for the kernel's
+memory management mechanisms, it can be also used for a wide range of user
+space programs and people.
+
+
+Frequently Asked Questions
+==
+
+Q: Why not integrated with perf?
+A: From the perspective of perf like profilers, DAMON can be thought of as a
+data source in kernel, like tracepoints, pressure stall information (psi), or
+idle page tracking.  Thus, it can be easily integrated with those.  However,
+this patchset doesn't provide a fancy perf integration because current step of
+DAMON development is focused on its core logic only.  That said, DAMON already
+provides two interfaces for user space programs, which based on debugfs and
+tracepoint, respectively.  Using the tracepoint interface, you can use DAMON
+with perf.  This patchset also provides the debugfs interface based user space
+tool for DAMON.  It can be used to record, visualize, and analyze data access
+pattern of target processes in a convenient way.
+
+Q: Why a new module, instead of extending perf or other tools?
+A: First, DAMON aims to be used by other programs including the kernel.
+Therefore, having dependency to specific tools like perf is not desirable.
+Second, because it need to be lightweight as much as possible so that it can be
+used online, any unnecessary overhead such as kernel - user space context
+switching cost should be avoided.  These are the two most biggest reasons why
+DAMON is implemented in the kernel space.  The idle page tracking subsystem
+would be the kernel module that most seems similar to DAMON.  However, it's own
+interface is not compatible with DAMON.  Also, the internal implementation of
+it has no common part to be reused by DAMON.
+
+Q: Can 'perf mem' provide the data required for DAMON?
+A: On the systems supporting 'perf mem', yes.  DAMON is using the PTE Accessed
+bits in low level.  Other H/W or S/W features that can be used for the purpose
+could be used.  However, as explained with above question, DAMON need to be
+implemented in the kernel space.
+
+
+Expected Use-cases
+==
+
+A straightforward usecase of DAMON would be the program behavior analysis.
+With the DAMON output, users can confirm whether the program is running as
+intended or not.  This will be useful for debuggings and tests of design
+points.
+
+The monitored results can also be useful for counting the dynamic working set
+size of workloads.  For the administration of memory overcommitted systems or
+selection of the environments (e.g., containers providing different amount of
+memory) for your workloads, this will be useful.
+
+If you are a programmer, you can optimize your program by managing the memory
+based on the actual data access pattern.  For example, you can identify the
+dynamic hotness of your data using DAMON and call ``mlock()`` to keep your hot
+data in DRAM, or call ``madvise()`` with ``MADV_PAGEOUT`` to proactively
+reclaim cold data.  Even though your program is guaranteed to not encounter
+memory pressure, you can still improve the performance by applying the DAMON
+outputs for call of ``MADV_HUGEPAGE`` and ``MADV_NOHUGEPAGE``.  More creative

[PATCH v11 14/16] mm/damon: Add kunit tests

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit adds kunit based unit tests for DAMON.

Signed-off-by: SeongJae Park 
Reviewed-by: Brendan Higgins 
---
 mm/Kconfig  |  11 +
 mm/damon-test.h | 622 
 mm/damon.c  |   6 +
 3 files changed, 639 insertions(+)
 create mode 100644 mm/damon-test.h

diff --git a/mm/Kconfig b/mm/Kconfig
index 9ea49633a6df..81ace52f1c23 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -751,4 +751,15 @@ config DAMON
  and 2) sufficiently light-weight so that it can be applied online.
  If unsure, say N.
 
+config DAMON_KUNIT_TEST
+   bool "Test for damon"
+   depends on DAMON=y && KUNIT
+   help
+ This builds the DAMON Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
 endmenu
diff --git a/mm/damon-test.h b/mm/damon-test.h
new file mode 100644
index ..851c2062a480
--- /dev/null
+++ b/mm/damon-test.h
@@ -0,0 +1,622 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates.  All rights reserved.
+ *
+ * Author: SeongJae Park 
+ */
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+
+#ifndef _DAMON_TEST_H
+#define _DAMON_TEST_H
+
+#include 
+
+static void damon_test_str_to_pids(struct kunit *test)
+{
+   char *question;
+   int *answers;
+   int expected[] = {12, 35, 46};
+   ssize_t nr_integers = 0, i;
+
+   question = "123";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+   KUNIT_EXPECT_EQ(test, 123, answers[0]);
+   kfree(answers);
+
+   question = "123abc";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+   KUNIT_EXPECT_EQ(test, 123, answers[0]);
+   kfree(answers);
+
+   question = "a123";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, answers, (int *)NULL);
+
+   question = "12 35";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+   for (i = 0; i < nr_integers; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "12 35 46";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers);
+   for (i = 0; i < nr_integers; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "12 35 abc 46";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+   for (i = 0; i < 2; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, (int *)NULL, answers);
+   kfree(answers);
+
+   question = "\n";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, (int *)NULL, answers);
+   kfree(answers);
+}
+
+static void damon_test_regions(struct kunit *test)
+{
+   struct damon_region *r;
+   struct damon_task *t;
+
+   r = damon_new_region(_user_ctx, 1, 2);
+   KUNIT_EXPECT_EQ(test, 1ul, r->vm_start);
+   KUNIT_EXPECT_EQ(test, 2ul, r->vm_end);
+   KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+
+   t = damon_new_task(42);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_regions(t));
+
+   damon_add_region(r, t);
+   KUNIT_EXPECT_EQ(test, 1u, nr_damon_regions(t));
+
+   damon_del_region(r);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_regions(t));
+
+   damon_free_task(t);
+}
+
+static void damon_test_tasks(struct kunit *test)
+{
+   struct damon_ctx *c = _user_ctx;
+   struct damon_task *t;
+
+   t = damon_new_task(42);
+   KUNIT_EXPECT_EQ(test, 42, t->pid);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_tasks(c));
+
+   damon_add_task(_user_ctx, t);
+   KUNIT_EXPECT_EQ(test, 1u, nr_damon_tasks(c));
+
+   damon_destroy_task(t);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_tasks(c));
+}
+
+static void damon_test_set_pids(struct kunit *test)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   int pids[] = {1, 2, 3};
+   char buf[64];
+
+   damon_set_pids(ctx, pids, 3);
+   damon_sprint_pids(ctx, buf, 64);
+   KUNIT_EXP

[PATCH v11 15/16] mm/damon: Add user space selftests

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit adds a simple user space tests for DAMON.  The tests are
using kselftest framework.

Signed-off-by: SeongJae Park 
---
 tools/testing/selftests/damon/Makefile|   7 +
 .../selftests/damon/_chk_dependency.sh|  28 
 tools/testing/selftests/damon/_chk_record.py  | 108 ++
 .../testing/selftests/damon/debugfs_attrs.sh  | 139 ++
 .../testing/selftests/damon/debugfs_record.sh |  50 +++
 5 files changed, 332 insertions(+)
 create mode 100644 tools/testing/selftests/damon/Makefile
 create mode 100644 tools/testing/selftests/damon/_chk_dependency.sh
 create mode 100644 tools/testing/selftests/damon/_chk_record.py
 create mode 100755 tools/testing/selftests/damon/debugfs_attrs.sh
 create mode 100755 tools/testing/selftests/damon/debugfs_record.sh

diff --git a/tools/testing/selftests/damon/Makefile 
b/tools/testing/selftests/damon/Makefile
new file mode 100644
index ..cfd5393a4639
--- /dev/null
+++ b/tools/testing/selftests/damon/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for damon selftests
+
+TEST_FILES = _chk_dependency.sh _chk_record_file.py
+TEST_PROGS = debugfs_attrs.sh debugfs_record.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh 
b/tools/testing/selftests/damon/_chk_dependency.sh
new file mode 100644
index ..814dcadd5e96
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_dependency.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+DBGFS=/sys/kernel/debug/damon
+
+if [ $EUID -ne 0 ];
+then
+   echo "Run as root"
+   exit $ksft_skip
+fi
+
+if [ ! -d $DBGFS ]
+then
+   echo "$DBGFS not found"
+   exit $ksft_skip
+fi
+
+for f in attrs record pids monitor_on
+do
+   if [ ! -f "$DBGFS/$f" ]
+   then
+   echo "$f not found"
+   exit 1
+   fi
+done
diff --git a/tools/testing/selftests/damon/_chk_record.py 
b/tools/testing/selftests/damon/_chk_record.py
new file mode 100644
index ..5cfcf4161404
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_record.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"Check whether the DAMON record file is valid"
+
+import argparse
+import struct
+import sys
+
+fmt_version = 0
+
+def set_fmt_version(f):
+global fmt_version
+
+mark = f.read(16)
+if mark == b'damon_recfmt_ver':
+fmt_version = struct.unpack('i', f.read(4))[0]
+else:
+fmt_version = 0
+f.seek(0)
+return fmt_version
+
+def read_pid(f):
+if fmt_version == 0:
+pid = struct.unpack('L', f.read(8))[0]
+else:
+pid = struct.unpack('i', f.read(4))[0]
+def err_percent(val, expected):
+return abs(val - expected) / expected * 100
+
+def chk_task_info(f):
+pid = read_pid(f)
+nr_regions = struct.unpack('I', f.read(4))[0]
+
+if nr_regions > max_nr_regions:
+print('too many regions: %d > %d' % (nr_regions, max_nr_regions))
+exit(1)
+
+nr_gaps = 0
+eaddr = 0
+for r in range(nr_regions):
+saddr = struct.unpack('L', f.read(8))[0]
+if eaddr and saddr != eaddr:
+nr_gaps += 1
+eaddr = struct.unpack('L', f.read(8))[0]
+nr_accesses = struct.unpack('I', f.read(4))[0]
+
+if saddr >= eaddr:
+print('wrong region [%d,%d)' % (saddr, eaddr))
+exit(1)
+
+max_nr_accesses = aint / sint
+if nr_accesses > max_nr_accesses:
+if err_percent(nr_accesses, max_nr_accesses) > 15:
+print('too high nr_access: expected %d but %d' %
+(max_nr_accesses, nr_accesses))
+exit(1)
+if nr_gaps != 2:
+print('number of gaps are not two but %d' % nr_gaps)
+exit(1)
+
+def parse_time_us(bindat):
+sec = struct.unpack('l', bindat[0:8])[0]
+nsec = struct.unpack('l', bindat[8:16])[0]
+return (sec * 10 + nsec) / 1000
+
+def main():
+global sint
+global aint
+global min_nr
+global max_nr_regions
+
+parser = argparse.ArgumentParser()
+parser.add_argument('file', metavar='',
+help='path to the record file')
+parser.add_argument('--attrs', metavar='',
+default='5000 10 100 10 1000',
+help='content of debugfs attrs file')
+args = parser.parse_args()
+file_path = args.file
+attrs = [int(x) for x in args.attrs.split()]
+sint, aint, rint, min_nr, max_nr_regions = attrs
+
+with open(file_path, 'rb') as f:
+set_fmt_version(f)
+last_aggr_time = None
+while True:
+timebin = f.read(16)
+if len(timebin) != 16:
+break
+
+now = parse_time_us(timebin)
+if not last_aggr_time:
+ 

[PATCH v11 16/16] MAINTAINERS: Update for DAMON

2020-05-11 Thread SeongJae Park
From: SeongJae Park 

This commit updates MAINTAINERS file for DAMON related files.

Signed-off-by: SeongJae Park 
---
 MAINTAINERS | 12 
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5a5332b3591d..cb4a7fa3cdfe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4661,6 +4661,18 @@ F:   net/ax25/ax25_out.c
 F: net/ax25/ax25_timer.c
 F: net/ax25/sysctl_net_ax25.c
 
+DATA ACCESS MONITOR
+M: SeongJae Park 
+L: linux...@kvack.org
+S: Maintained
+F: Documentation/admin-guide/mm/data_access_monitor.rst
+F: include/linux/damon.h
+F: include/trace/events/damon.h
+F: mm/damon-test.h
+F: mm/damon.c
+F: tools/damon/*
+F: tools/testing/selftests/damon/*
+
 DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER
 L: net...@vger.kernel.org
 S: Orphan
-- 
2.17.1



Re: Re: Re: Re: Re: [PATCH net v2 0/2] Revert the 'socket_alloc' life cycle change

2020-05-06 Thread SeongJae Park
TL; DR: It was not kernel's fault, but the benchmark program.

So, the problem is reproducible using the lebench[1] only.  I carefully read
it's code again.

Before running the problem occurred "poll big" sub test, lebench executes
"context switch" sub test.  For the test, it sets the cpu affinity[2] and
process priority[3] of itself to '0' and '-20', respectively.  However, it
doesn't restore the values to original value even after the "context switch" is
finished.  For the reason, "select big" sub test also run binded on CPU 0 and
has lowest nice value.  Therefore, it can disturb the RCU callback thread for
the CPU 0, which processes the deferred deallocations of the sockets, and as a
result it triggers the OOM.

We confirmed the problem disappears by offloading the RCU callbacks from the
CPU 0 using rcu_nocbs=0 boot parameter or simply restoring the affinity and/or
priority.

Someone _might_ still argue that this is kernel problem because the problem
didn't occur on the old kernels prior to the Al's patches.  However, setting
the affinity and priority was available because the program received the
permission.  Therefore, it would be reasonable to blame the system
administrators rather than the kernel.

So, please ignore this patchset, apology for making confuse.  If you still has
some doubts or need more tests, please let me know.

[1] https://github.com/LinuxPerfStudy/LEBench
[2] 
https://github.com/LinuxPerfStudy/LEBench/blob/master/TEST_DIR/OS_Eval.c#L820
[3] 
https://github.com/LinuxPerfStudy/LEBench/blob/master/TEST_DIR/OS_Eval.c#L822


Thanks,
SeongJae Park


Re: Re: Re: Re: Re: Re: [PATCH net v2 0/2] Revert the 'socket_alloc' life cycle change

2020-05-06 Thread SeongJae Park
On Wed, 6 May 2020 07:41:51 -0700 "Paul E. McKenney"  wrote:

> On Wed, May 06, 2020 at 02:59:26PM +0200, SeongJae Park wrote:
> > TL; DR: It was not kernel's fault, but the benchmark program.
> > 
> > So, the problem is reproducible using the lebench[1] only.  I carefully read
> > it's code again.
> > 
> > Before running the problem occurred "poll big" sub test, lebench executes
> > "context switch" sub test.  For the test, it sets the cpu affinity[2] and
> > process priority[3] of itself to '0' and '-20', respectively.  However, it
> > doesn't restore the values to original value even after the "context 
> > switch" is
> > finished.  For the reason, "select big" sub test also run binded on CPU 0 
> > and
> > has lowest nice value.  Therefore, it can disturb the RCU callback thread 
> > for
> > the CPU 0, which processes the deferred deallocations of the sockets, and 
> > as a
> > result it triggers the OOM.
> > 
> > We confirmed the problem disappears by offloading the RCU callbacks from the
> > CPU 0 using rcu_nocbs=0 boot parameter or simply restoring the affinity 
> > and/or
> > priority.
> > 
> > Someone _might_ still argue that this is kernel problem because the problem
> > didn't occur on the old kernels prior to the Al's patches.  However, setting
> > the affinity and priority was available because the program received the
> > permission.  Therefore, it would be reasonable to blame the system
> > administrators rather than the kernel.
> > 
> > So, please ignore this patchset, apology for making confuse.  If you still 
> > has
> > some doubts or need more tests, please let me know.
> > 
> > [1] https://github.com/LinuxPerfStudy/LEBench
> > [2] 
> > https://github.com/LinuxPerfStudy/LEBench/blob/master/TEST_DIR/OS_Eval.c#L820
> > [3] 
> > https://github.com/LinuxPerfStudy/LEBench/blob/master/TEST_DIR/OS_Eval.c#L822
> 
> Thank you for chasing this down!
> 
> I have had this sort of thing on my list as a potential issue, but given
> that it is now really showing up, it sounds like it is time to bump
> up its priority a bit.  Of course there are limits, so if userspace is
> running at any of the real-time priorities, making sufficient CPU time
> available to RCU's kthreads becomes userspace's responsibility.  But if
> everything is running at SCHED_OTHER (which is this case here, correct?),

Correct.

> then it is reasonable for RCU to do some work to avoid this situation.

That would be also great!

> 
> But still, yes, the immediate job is fixing the benchmark.  ;-)

Totally agreed.

> 
>   Thanx, Paul
> 
> PS.  Why not just attack all potential issues on my list?  Because I
>  usually learn quite a bit from seeing the problem actually happen.
>  And sometimes other changes in RCU eliminate the potential issue
>  before it has a chance to happen.

Sounds interesting, I will try some of those in my spare time ;)


Thanks,
SeongJae Park


Re: [PATCH v10 06/16] mm/damon: Split regions into 4 subregions if possible

2020-05-07 Thread SeongJae Park
On Tue, 5 May 2020 13:08:05 +0200 SeongJae Park  wrote:

> From: SeongJae Park 
> 
> Suppose that there are a very wide and cold region and a hot region, and
> both regions are identified by DAMON.  And then, the middle small region
> inside the very wide and cold region becomes hot.  DAMON will not be
> able to identify this new region because the adaptive regions adjustment
> mechanism splits each region to only two subregions.
> 
> This commit modifies the logic to split each region to 4 subregions if
> possible so that such problematic region can eventually identified.

As you can check with the 'Evaluations' section in the cover letter of this
patchset, the extend of the slowdown DAMON's recording feature made to the
target workload has doulbed (0.55% with v9, 1.02% with this patchset).  It's
still just a small number, but the worst case slowdown is 5.29%.  It was only
1.88% before.  I suspect this patch is the reason, as this is the only one
major change we made after v9.

For the reason, I would like to make this patch to split each region into 3 sub
regions instead of 4 subregions.  It will reduce the overhead a little bit
while still allow the corner case regions be eventually detected.  If you have
other concerns or opinions, please let me know.


Thanks,
SeongJae Park

> 
> Suggested-by: James Cameron 
> Signed-off-by: SeongJae Park 
> ---
>  mm/damon.c | 44 +++-
>  1 file changed, 27 insertions(+), 17 deletions(-)
> 
> diff --git a/mm/damon.c b/mm/damon.c
> index cec946197e13..342f905927a0 100644
> --- a/mm/damon.c
> +++ b/mm/damon.c
> @@ -650,26 +650,32 @@ static void damon_split_region_at(struct damon_ctx *ctx,
>   damon_insert_region(new, r, damon_next_region(r));
>  }
>  
> -/* Split every region in the given task into two randomly-sized regions */
> -static void damon_split_regions_of(struct damon_ctx *ctx, struct damon_task 
> *t)
> +/* Split every region in the given task into 'nr_subs' regions */
> +static void damon_split_regions_of(struct damon_ctx *ctx,
> +  struct damon_task *t, int nr_subs)
>  {
>   struct damon_region *r, *next;
> - unsigned long sz_orig_region, sz_left_region;
> + unsigned long sz_region, sz_sub = 0;
> + int i;
>  
>   damon_for_each_region_safe(r, next, t) {
> - sz_orig_region = r->vm_end - r->vm_start;
> -
> - /*
> -  * Randomly select size of left sub-region to be at least
> -  * 10 percent and at most 90% of original region
> -  */
> - sz_left_region = ALIGN_DOWN(damon_rand(1, 10) * sz_orig_region
> - / 10, MIN_REGION);
> - /* Do not allow blank region */
> - if (sz_left_region == 0 || sz_left_region >= sz_orig_region)
> - continue;
> -
> - damon_split_region_at(ctx, r, sz_left_region);
> + sz_region = r->vm_end - r->vm_start;
> +
> + for (i = 0; i < nr_subs - 1 &&
> + sz_region > 2 * MIN_REGION; i++) {
> + /*
> +  * Randomly select size of left sub-region to be at
> +  * least 10 percent and at most 90% of original region
> +  */
> + sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
> + sz_region / 10, MIN_REGION);
> + /* Do not allow blank region */
> + if (sz_sub == 0 || sz_sub >= sz_region)
> + continue;
> +
> + damon_split_region_at(ctx, r, sz_sub);
> + sz_region = sz_sub;
> + }
>   }
>  }
>  
> @@ -687,14 +693,18 @@ static void kdamond_split_regions(struct damon_ctx *ctx)
>  {
>   struct damon_task *t;
>   unsigned int nr_regions = 0;
> + int nr_subregions = 2;
>  
>   damon_for_each_task(ctx, t)
>   nr_regions += nr_damon_regions(t);
>   if (nr_regions > ctx->max_nr_regions / 2)
>   return;
>  
> + if (nr_regions < ctx->max_nr_regions / 4)
> + nr_subregions = 4;
> +
>   damon_for_each_task(ctx, t)
> - damon_split_regions_of(ctx, t);
> + damon_split_regions_of(ctx, t, nr_subregions);
>  }
>  
>  /*
> -- 
> 2.17.1
> 


[RFC v8 0/8] Implement Data Access Monitoring-based Memory Operation Schemes

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

DAMON[1] can be used as a primitive for data access awared memory management
optimizations.  That said, users who want such optimizations should run DAMON,
read the monitoring results, analyze it, plan a new memory management scheme,
and apply the new scheme by themselves.  Such efforts will be inevitable for
some complicated optimizations.

However, in many other cases, the users would simply want the system to apply a
memory management action to a memory region of a specific size having a
specific access frequency for a specific time.  For example, "page out a memory
region larger than 100 MiB keeping only rare accesses more than 2 minutes", or
"Do not use THP for a memory region larger than 2 MiB rarely accessed for more
than 1 seconds".

This RFC patchset makes DAMON to handle such data access monitoring-based
operation schemes.  With this change, users can do the data access aware
optimizations by simply specifying their schemes to DAMON.

[1] https://lore.kernel.org/linux-mm/20200406130938.14066-1-sjp...@amazon.com/


Evaluations
===

Setup
-

On my personal QEMU/KVM based virtual machine on an Intel i7 host machine
running Ubuntu 18.04, I measure runtime and consumed system memory while
running various realistic workloads with several configurations.  I use 13 and
12 workloads in PARSEC3[3] and SPLASH-2X[4] benchmark suites, respectively.  I
personally use another wrapper scripts[5] for setup and run of the workloads.
On top of this patchset, we also applied the DAMON-based operation schemes
patchset[6] for this evaluation.

Measurement
~~~

For the measurement of the amount of consumed memory in system global scope, I
drop caches before starting each of the workloads and monitor 'MemFree' in the
'/proc/meminfo' file.  To make results more stable, I repeat the runs 5 times
and average results.  You can get stdev, min, and max of the numbers among the
repeated runs in appendix below.

Configurations
~~

The configurations I use are as below.

orig: Linux v5.5 with 'madvise' THP policy
rec: 'orig' plus DAMON running with record feature
thp: same with 'orig', but use 'always' THP policy
ethp: 'orig' plus a DAMON operation scheme[6], 'efficient THP'
prcl: 'orig' plus a DAMON operation scheme, 'proactive reclaim[7]'

I use 'rec' for measurement of DAMON overheads to target workloads and system
memory.  The remaining configs including 'thp', 'ethp', and 'prcl' are for
measurement of DAMON monitoring accuracy.

'ethp' and 'prcl' is simple DAMON-based operation schemes developed for
proof of concepts of DAMON.  'ethp' reduces memory space waste of THP by using
DAMON for decision of promotions and demotion for huge pages, while 'prcl' is
as similar as the original work.  Those are implemented as below:

# format:
# ethp: Use huge pages if a region >2MB shows >5% access rate, use regular
# pages if a region >2MB shows <5% access rate for >1 second
2M null5 nullnull nullhugepage
2M nullnull 51s null  nohugepage

# prcl: If a region >4KB shows <5% access rate for >5 seconds, page out.
4K nullnull 5500ms null  pageout

Note that both 'ethp' and 'prcl' are designed with my only straightforward
intuition, because those are for only proof of concepts and monitoring accuracy
of DAMON.  In other words, those are not for production.  For production use,
those should be tuned more.


[1] "Redis latency problems troubleshooting", https://redis.io/topics/latency
[2] "Disable Transparent Huge Pages (THP)",
https://docs.mongodb.com/manual/tutorial/transparent-huge-pages/
[3] "The PARSEC Becnhmark Suite", https://parsec.cs.princeton.edu/index.htm
[4] "SPLASH-2x", https://parsec.cs.princeton.edu/parsec3-doc.htm#splash2x
[5] "parsec3_on_ubuntu", https://github.com/sjp38/parsec3_on_ubuntu
[6] "[RFC v4 0/7] Implement Data Access Monitoring-based Memory Operation
Schemes",
https://lore.kernel.org/linux-mm/20200303121406.20954-1-sjp...@amazon.com/
[7] "Proactively reclaiming idle memory", https://lwn.net/Articles/787611/


Results
---

Below two tables show the measurement results.  The runtimes are in seconds
while the memory usages are in KiB.  Each configurations except 'orig' shows
its overhead relative to 'orig' in percent within parenthesises.

runtime orig rec  (overhead) thp  (overhead) ethp   
  (overhead) prcl (overhead)
parsec3/blackscholes107.065  107.478  (0.39) 106.682  (-0.36)
107.365  (0.28) 111.811  (4.43)
parsec3/bodytrack   79.256   79.450   (0.25) 78.645   (-0.77)79.314 
  (0.07) 80.305   (1.32)
parsec3/canneal 139.497  141.181  (1.21) 121.526  (-12.88)   
130.074  (-6.75)154.644  (10.86)
parsec3/dedup   11.879   11.873   (-0.05)11.693   (-1.56)11.948 
  (0.58) 12.694   (6.86)
parsec3/fa

[RFC v8 1/8] mm/madvise: Export do_madvise() to external GPL modules

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

This commit exports 'do_madvise()' to external GPL modules, so that
other modules including DAMON could use the function.

Signed-off-by: SeongJae Park 
---
 mm/madvise.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/madvise.c b/mm/madvise.c
index 80f8a1839f70..151aaf285cdd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1151,6 +1151,7 @@ int do_madvise(struct task_struct *target_task, struct 
mm_struct *mm,
 
return error;
 }
+EXPORT_SYMBOL_GPL(do_madvise);
 
 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
-- 
2.17.1



[RFC v8 2/8] mm/damon: Account age of target regions

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

DAMON can be used as a primitive for data access pattern aware memory
management optimizations.  However, users who want such optimizations
should run DAMON, read the monitoring results, analyze it, plan a new
memory management scheme, and apply the new scheme by themselves.  It
would not be too hard, but still require some level of effort.  For
complicated optimizations, this effort is inevitable.

That said, in many cases, users would simply want to apply an actions to
a memory region of a specific size having a specific access frequency
for a specific time.  For example, "page out a memory region larger than
100 MiB but having a low access frequency more than 10 minutes", or "Use
THP for a memory region larger than 2 MiB having a high access frequency
for more than 2 seconds".

For such optimizations, users will need to first account the age of each
region themselves.  To reduce such efforts, this commit implements a
simple age account of each region in DAMON.  For each aggregation step,
DAMON compares the access frequency and start/end address of each region
with those from last aggregation and reset the age of the region if the
change is significant.  Else, the age is incremented.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |   5 ++
 mm/damon.c| 106 --
 2 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index bc46ea00e9a1..7276b2a31c38 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -22,6 +22,11 @@ struct damon_region {
unsigned long sampling_addr;
unsigned int nr_accesses;
struct list_head list;
+
+   unsigned int age;
+   unsigned long last_vm_start;
+   unsigned long last_vm_end;
+   unsigned int last_nr_accesses;
 };
 
 /* Represents a monitoring target task */
diff --git a/mm/damon.c b/mm/damon.c
index 6fb847312a5d..da266e8b2b30 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -86,6 +86,10 @@ static struct damon_region *damon_new_region(struct 
damon_ctx *ctx,
region->nr_accesses = 0;
INIT_LIST_HEAD(>list);
 
+   region->age = 0;
+   region->last_vm_start = vm_start;
+   region->last_vm_end = vm_end;
+
return region;
 }
 
@@ -659,11 +663,45 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
sizeof(r->nr_accesses));
trace_damon_aggregated(t->pid, nr,
r->vm_start, r->vm_end, r->nr_accesses);
+   r->last_nr_accesses = r->nr_accesses;
r->nr_accesses = 0;
}
}
 }
 
+#define diff_of(a, b) (a > b ? a - b : b - a)
+
+/*
+ * Increase or reset the age of the given monitoring target region
+ *
+ * If the area or '->nr_accesses' has changed significantly, reset the '->age'.
+ * Else, increase the age.
+ */
+static void damon_do_count_age(struct damon_region *r, unsigned int threshold)
+{
+   unsigned long region_threshold = (r->vm_end - r->vm_start) / 4;
+   unsigned long region_diff = diff_of(r->vm_start, r->last_vm_start) +
+   diff_of(r->vm_end, r->last_vm_end);
+   unsigned int nr_accesses_diff = diff_of(r->nr_accesses,
+   r->last_nr_accesses);
+
+   if (region_diff > region_threshold || nr_accesses_diff > threshold)
+   r->age = 0;
+   else
+   r->age++;
+}
+
+static void kdamond_count_age(struct damon_ctx *c, unsigned int threshold)
+{
+   struct damon_task *t;
+   struct damon_region *r;
+
+   damon_for_each_task(c, t) {
+   damon_for_each_region(r, t)
+   damon_do_count_age(r, threshold);
+   }
+}
+
 #define sz_damon_region(r) (r->vm_end - r->vm_start)
 
 /*
@@ -672,33 +710,86 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
 static void damon_merge_two_regions(struct damon_region *l,
struct damon_region *r)
 {
-   l->nr_accesses = (l->nr_accesses * sz_damon_region(l) +
-   r->nr_accesses * sz_damon_region(r)) /
-   (sz_damon_region(l) + sz_damon_region(r));
+   unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
+
+   l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
+   (sz_l + sz_r);
+   l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
l->vm_end = r->vm_end;
damon_destroy_region(r);
 }
 
-#define diff_of(a, b) (a > b ? a - b : b - a)
+static inline void set_last_area(struct damon_region *r, struct region *last)
+{
+   r->last_vm_start = last->start;
+   r->last_vm_end = last->end;
+}
+
+static inline void get_las

[RFC v8 4/8] mm/damon/schemes: Implement a debugfs interface

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

This commit implements a debugfs interface for the data access
monitoring oriented memory management schemes.  It is supposed to be
used by administrators and/or privileged user space programs.  Users can
read and update the rules using ``/damon/schemes`` file.  The
format is::

   

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 174 -
 1 file changed, 172 insertions(+), 2 deletions(-)

diff --git a/mm/damon.c b/mm/damon.c
index 13275c31a6c5..be8a986f08e0 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -174,6 +174,29 @@ static void damon_destroy_task(struct damon_task *t)
damon_free_task(t);
 }
 
+static struct damos *damon_new_scheme(
+   unsigned int min_sz_region, unsigned int max_sz_region,
+   unsigned int min_nr_accesses, unsigned int max_nr_accesses,
+   unsigned int min_age_region, unsigned int max_age_region,
+   enum damos_action action)
+{
+   struct damos *scheme;
+
+   scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
+   if (!scheme)
+   return NULL;
+   scheme->min_sz_region = min_sz_region;
+   scheme->max_sz_region = max_sz_region;
+   scheme->min_nr_accesses = min_nr_accesses;
+   scheme->max_nr_accesses = max_nr_accesses;
+   scheme->min_age_region = min_age_region;
+   scheme->max_age_region = max_age_region;
+   scheme->action = action;
+   INIT_LIST_HEAD(>list);
+
+   return scheme;
+}
+
 static void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
 {
list_add_tail(>list, >schemes_list);
@@ -1439,6 +1462,147 @@ static ssize_t debugfs_monitor_on_write(struct file 
*file,
return ret;
 }
 
+static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len)
+{
+   struct damos *s;
+   int written = 0;
+   int rc;
+
+   damon_for_each_scheme(c, s) {
+   rc = snprintf([written], len - written,
+   "%u %u %u %u %u %u %d\n",
+   s->min_sz_region, s->max_sz_region,
+   s->min_nr_accesses, s->max_nr_accesses,
+   s->min_age_region, s->max_age_region,
+   s->action);
+   if (!rc)
+   return -ENOMEM;
+
+   written += rc;
+   }
+   return written;
+}
+
+static ssize_t debugfs_schemes_read(struct file *file, char __user *buf,
+   size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char *kbuf;
+   ssize_t len;
+
+   kbuf = kmalloc(count, GFP_KERNEL);
+   if (!kbuf)
+   return -ENOMEM;
+
+   len = sprint_schemes(ctx, kbuf, count);
+   if (len < 0)
+   goto out;
+   len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+   kfree(kbuf);
+   return len;
+}
+
+static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes)
+{
+   ssize_t i;
+
+   for (i = 0; i < nr_schemes; i++)
+   kfree(schemes[i]);
+   kfree(schemes);
+}
+
+/*
+ * Converts a string into an array of struct damos pointers
+ *
+ * Returns an array of struct damos pointers that converted if the conversion
+ * success, or NULL otherwise.
+ */
+static struct damos **str_to_schemes(const char *str, ssize_t len,
+   ssize_t *nr_schemes)
+{
+   struct damos *scheme, **schemes;
+   const int max_nr_schemes = 256;
+   int pos = 0, parsed, ret;
+   unsigned int min_sz, max_sz, min_nr_a, max_nr_a, min_age, max_age;
+   unsigned int action;
+
+   schemes = kmalloc_array(max_nr_schemes, sizeof(scheme),
+   GFP_KERNEL);
+   if (!schemes)
+   return NULL;
+
+   *nr_schemes = 0;
+   while (pos < len && *nr_schemes < max_nr_schemes) {
+   ret = sscanf([pos], "%u %u %u %u %u %u %u%n",
+   _sz, _sz, _nr_a, _nr_a,
+   _age, _age, , );
+   if (ret != 7)
+   break;
+   if (action >= DAMOS_ACTION_LEN) {
+   pr_err("wrong action %d\n", action);
+   goto fail;
+   }
+
+   pos += parsed;
+   scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
+   min_age, max_age, action);
+   if (!scheme)
+   goto fail;
+
+   schemes[*nr_schemes] = scheme;
+   *nr_schemes += 1;
+   }
+   if (!*nr_schemes)
+   goto fail;
+   return schemes;
+fail:
+   free_schemes_arr(schemes, *nr_schemes);
+   return NULL;
+}
+
+static ssize_t debugfs_schemes_write(struct file *file, const char __user *buf,
+   

[RFC v8 3/8] mm/damon: Implement data access monitoring-based operation schemes

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

In many cases, users might use DAMON for simple data access aware
memory management optimizations such as applying an operation scheme to
a memory region of a specific size having a specific access frequency
for a specific time.  For example, "page out a memory region larger than
100 MiB but having a low access frequency more than 10 minutes", or "Use
THP for a memory region larger than 2 MiB having a high access frequency
for more than 2 seconds".

To minimize users from spending their time for implementation of such
simple data access monitoring-based operation schemes, this commit makes
DAMON to handle such schemes directly.  With this commit, users can
simply specify their desired schemes to DAMON.

Each of the schemes is composed with conditions for filtering of the
target memory regions and desired memory management action for the
target.  Specifically, the format is::

   

The filtering conditions are size of memory region, number of accesses
to the region monitored by DAMON, and the age of the region.  The age of
region is incremented periodically but reset when its addresses or
access frequency has significantly changed or the action of a scheme was
applied.  For the action, current implementation supports only a few of
madvise() hints, ``MADV_WILLNEED``, ``MADV_COLD``, ``MADV_PAGEOUT``,
``MADV_HUGEPAGE``, and ``MADV_NOHUGEPAGE``.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |  24 +++
 mm/damon.c| 149 ++
 2 files changed, 173 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 7276b2a31c38..0f26d8aad33c 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -36,6 +36,27 @@ struct damon_task {
struct list_head list;
 };
 
+/* Data Access Monitoring-based Operation Scheme */
+enum damos_action {
+   DAMOS_WILLNEED,
+   DAMOS_COLD,
+   DAMOS_PAGEOUT,
+   DAMOS_HUGEPAGE,
+   DAMOS_NOHUGEPAGE,
+   DAMOS_ACTION_LEN,
+};
+
+struct damos {
+   unsigned int min_sz_region;
+   unsigned int max_sz_region;
+   unsigned int min_nr_accesses;
+   unsigned int max_nr_accesses;
+   unsigned int min_age_region;
+   unsigned int max_age_region;
+   enum damos_action action;
+   struct list_head list;
+};
+
 /*
  * For each 'sample_interval', DAMON checks whether each region is accessed or
  * not.  It aggregates and keeps the access information (number of accesses to
@@ -65,6 +86,7 @@ struct damon_ctx {
struct mutex kdamond_lock;
 
struct list_head tasks_list;/* 'damon_task' objects */
+   struct list_head schemes_list;  /* 'damos' objects */
 
/* callbacks */
void (*sample_cb)(struct damon_ctx *context);
@@ -75,6 +97,8 @@ int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t 
nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_set_schemes(struct damon_ctx *ctx,
+   struct damos **schemes, ssize_t nr_schemes);
 int damon_set_recording(struct damon_ctx *ctx,
unsigned int rbuf_len, char *rfile_path);
 int damon_start(struct damon_ctx *ctx);
diff --git a/mm/damon.c b/mm/damon.c
index da266e8b2b30..13275c31a6c5 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -11,6 +11,7 @@
 
 #define CREATE_TRACE_POINTS
 
+#include 
 #include 
 #include 
 #include 
@@ -52,6 +53,12 @@
 #define damon_for_each_task_safe(ctx, t, next) \
list_for_each_entry_safe(t, next, &(ctx)->tasks_list, list)
 
+#define damon_for_each_scheme(ctx, r) \
+   list_for_each_entry(r, &(ctx)->schemes_list, list)
+
+#define damon_for_each_scheme_safe(ctx, s, next) \
+   list_for_each_entry_safe(s, next, &(ctx)->schemes_list, list)
+
 #define MAX_RECORD_BUFFER_LEN  (4 * 1024 * 1024)
 #define MAX_RFILE_PATH_LEN 256
 
@@ -167,6 +174,27 @@ static void damon_destroy_task(struct damon_task *t)
damon_free_task(t);
 }
 
+static void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
+{
+   list_add_tail(>list, >schemes_list);
+}
+
+static void damon_del_scheme(struct damos *s)
+{
+   list_del(>list);
+}
+
+static void damon_free_scheme(struct damos *s)
+{
+   kfree(s);
+}
+
+static void damon_destroy_scheme(struct damos *s)
+{
+   damon_del_scheme(s);
+   damon_free_scheme(s);
+}
+
 static unsigned int nr_damon_tasks(struct damon_ctx *ctx)
 {
struct damon_task *t;
@@ -702,6 +730,101 @@ static void kdamond_count_age(struct damon_ctx *c, 
unsigned int threshold)
}
 }
 
+#ifndef CONFIG_ADVISE_SYSCALLS
+static int damos_madvise(struct damon_task *task, struct damon_region *r,
+   int behavior)
+{
+   return -EINVAL;
+}
+#else
+static int damos_madvi

[RFC v8 6/8] mm/damon/selftests: Add 'schemes' debugfs tests

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

This commit adds simple selftets for 'schemes' debugfs file of DAMON.

Signed-off-by: SeongJae Park 
---
 .../testing/selftests/damon/debugfs_attrs.sh  | 29 +++
 1 file changed, 29 insertions(+)

diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh 
b/tools/testing/selftests/damon/debugfs_attrs.sh
index d5188b0f71b1..4aeb2037a67e 100755
--- a/tools/testing/selftests/damon/debugfs_attrs.sh
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
@@ -97,6 +97,35 @@ fi
 
 echo $ORIG_CONTENT > $file
 
+# Test schemes file
+file="$DBGFS/schemes"
+
+ORIG_CONTENT=$(cat $file)
+echo "1 2 3 4 5 6 3" > $file
+if [ $? -ne 0 ]
+then
+   echo "$file write fail"
+   echo $ORIG_CONTENT > $file
+   exit 1
+fi
+
+echo "1 2
+3 4 5 6 3" > $file
+if [ $? -eq 0 ]
+then
+   echo "$file multi line write success (expected fail)"
+   echo $ORIG_CONTENT > $file
+   exit 1
+fi
+
+echo > $file
+if [ $? -ne 0 ]
+then
+   echo "$file empty string writing fail"
+   echo $ORIG_CONTENT > $file
+   exit 1
+fi
+
 # Test pids file
 file="$DBGFS/pids"
 
-- 
2.17.1



[RFC v8 5/8] mm/damon-test: Add kunit test case for regions age accounting

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

After merges of regions, each region should know their last shape in
proper way to measure the changes from the last modification and reset
the age if the changes are significant.  This commit adds kunit test
cases checking whether the regions are knowing their last shape properly
after merges of regions.

Signed-off-by: SeongJae Park 
Reviewed-by: Brendan Higgins 
---
 mm/damon-test.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/mm/damon-test.h b/mm/damon-test.h
index 851c2062a480..c7a7f3b9b57f 100644
--- a/mm/damon-test.h
+++ b/mm/damon-test.h
@@ -551,6 +551,8 @@ static void damon_test_merge_regions_of(struct kunit *test)
 
unsigned long saddrs[] = {0, 114, 130, 156, 170};
unsigned long eaddrs[] = {112, 130, 156, 170, 230};
+   unsigned long lsa[] = {0, 114, 130, 156, 184};
+   unsigned long lea[] = {100, 122, 156, 170, 230};
int i;
 
t = damon_new_task(42);
@@ -567,6 +569,9 @@ static void damon_test_merge_regions_of(struct kunit *test)
r = __nth_region_of(t, i);
KUNIT_EXPECT_EQ(test, r->vm_start, saddrs[i]);
KUNIT_EXPECT_EQ(test, r->vm_end, eaddrs[i]);
+   KUNIT_EXPECT_EQ(test, r->last_vm_start, lsa[i]);
+   KUNIT_EXPECT_EQ(test, r->last_vm_end, lea[i]);
+
}
damon_free_task(t);
 }
-- 
2.17.1



[RFC v8 7/8] damon/tools: Support more human friendly 'schemes' control

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

This commit implements 'schemes' subcommand of the damon userspace tool.
It can be used to describe and apply the data access monitoring-based
operation schemes in more human friendly fashion.

Signed-off-by: SeongJae Park 
---
 tools/damon/_convert_damos.py | 127 ++
 tools/damon/_damon.py | 143 ++
 tools/damon/damo  |   7 ++
 tools/damon/record.py | 135 +++-
 tools/damon/schemes.py| 105 +
 5 files changed, 394 insertions(+), 123 deletions(-)
 create mode 100755 tools/damon/_convert_damos.py
 create mode 100644 tools/damon/_damon.py
 create mode 100644 tools/damon/schemes.py

diff --git a/tools/damon/_convert_damos.py b/tools/damon/_convert_damos.py
new file mode 100755
index ..709d0991899c
--- /dev/null
+++ b/tools/damon/_convert_damos.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Change human readable data access monitoring-based operation schemes to the low
+level input for the '/damon/schemes' file.  Below is an example of the
+schemes written in the human readable format:
+
+# format is:
+#
+#
+# B/K/M/G/T for Bytes/KiB/MiB/GiB/TiB
+# us/ms/s/m/h/d for micro-seconds/milli-seconds/seconds/minutes/hours/days
+# 'null' means zero for size and age.
+
+# if a region keeps a high access frequency for more than 100ms, put the
+# region on the head of the LRU list (call madvise() with MADV_WILLNEED).
+nullnull80  null100ms   0s  willneed
+
+# if a region keeps a low access frequency for more than 200ms and less
+# than one hour, put the # region on the tail of the LRU list (call
+# madvise() with MADV_COLD).
+0B  0B  10  20  200ms   1h cold
+
+# if a region keeps a very low access frequency for more than 1 minute,
+# swap out the region immediately (call madvise() with MADV_PAGEOUT).
+0B  null0   10  60s 0s pageout
+
+# if a region of a size bigger than 2MiB keeps a very high access frequency
+# for more than 100ms, let the region to use huge pages (call madvise()
+# with MADV_HUGEPAGE).
+2M  null90  99  100ms   0s hugepage
+
+# If a regions of a size bigger than 2MiB keeps small access frequency for
+# more than 100ms, avoid the region using huge pages (call madvise() with
+# MADV_NOHUGEPAGE).
+2M  null0   25  100ms   0s nohugepage
+"""
+
+import argparse
+
+unit_to_bytes = {'B': 1, 'K': 1024, 'M': 1024 * 1024, 'G': 1024 * 1024 * 1024,
+'T': 1024 * 1024 * 1024 * 1024}
+
+def text_to_bytes(txt):
+if txt == 'null':
+return 0
+unit = txt[-1]
+number = int(txt[:-1])
+return number * unit_to_bytes[unit]
+
+unit_to_usecs = {'us': 1, 'ms': 1000, 's': 1000 * 1000, 'm': 60 * 1000 * 1000,
+'h': 60 * 60 * 1000 * 1000, 'd': 24 * 60 * 60 * 1000 * 1000}
+
+def text_to_us(txt):
+if txt == 'null':
+return 0
+unit = txt[-2:]
+if unit in ['us', 'ms']:
+number = int(txt[:-2])
+else:
+unit = txt[-1]
+number = int(txt[:-1])
+return number * unit_to_usecs[unit]
+
+damos_action_to_int = {'DAMOS_WILLNEED': 0, 'DAMOS_COLD': 1,
+'DAMOS_PAGEOUT': 2, 'DAMOS_HUGEPAGE': 3, 'DAMOS_NOHUGEPAGE': 4}
+
+def text_to_damos_action(txt):
+return damos_action_to_int['DAMOS_' + txt.upper()]
+
+def text_to_nr_accesses(txt, max_nr_accesses):
+if txt == 'null':
+return 0
+return int(int(txt) * max_nr_accesses / 100)
+
+def debugfs_scheme(line, sample_interval, aggr_interval):
+fields = line.split()
+if len(fields) != 7:
+print('wrong input line: %s' % line)
+exit(1)
+
+limit_nr_accesses = aggr_interval / sample_interval
+try:
+min_sz = text_to_bytes(fields[0])
+max_sz = text_to_bytes(fields[1])
+min_nr_accesses = text_to_nr_accesses(fields[2], limit_nr_accesses)
+max_nr_accesses = text_to_nr_accesses(fields[3], limit_nr_accesses)
+min_age = text_to_us(fields[4]) / aggr_interval
+max_age = text_to_us(fields[5]) / aggr_interval
+action = text_to_damos_action(fields[6])
+except:
+print('wrong input field')
+raise
+return '%d\t%d\t%d\t%d\t%d\t%d\t%d' % (min_sz, max_sz, min_nr_accesses,
+max_nr_accesses, min_age, max_age, action)
+
+def convert(schemes_file, sample_interval, aggr_interval):
+lines = []
+with open(schemes_file, 'r') as f:
+for line in f:
+if line.startswith('#'):
+continue
+line = line.strip()
+if line == '':
+continue
+lines.append(debugfs_scheme(line, sample_interval, aggr_interval))
+return '\n'.join(lines)
+
+def main():
+parser = argparse.ArgumentParser()
+

[RFC v8 8/8] Documentation/admin-guide/mm: Document DAMON-based operation schemes

2020-05-12 Thread SeongJae Park
From: SeongJae Park 

This commit documents DAMON-based operation schemes in the DAMON
document.

Signed-off-by: SeongJae Park 
---
 .../admin-guide/mm/data_access_monitor.rst| 100 +-
 1 file changed, 98 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/mm/data_access_monitor.rst 
b/Documentation/admin-guide/mm/data_access_monitor.rst
index 915956aa1065..d4a48bc63400 100644
--- a/Documentation/admin-guide/mm/data_access_monitor.rst
+++ b/Documentation/admin-guide/mm/data_access_monitor.rst
@@ -182,8 +182,8 @@ only for each of a user-specified time interval (``regions 
update interval``).
 ``debugfs`` Interface
 =
 
-DAMON exports four files, ``attrs``, ``pids``, ``record``, and ``monitor_on``
-under its debugfs directory, ``/damon/``.
+DAMON exports five files, ``attrs``, ``pids``, ``record``, ``schemes`` and
+``monitor_on`` under its debugfs directory, ``/damon/``.
 
 Attributes
 --
@@ -227,6 +227,46 @@ be 4 KiB and the result to be saved in ``/damon.data``::
 # cat record
 4096 /damon.data
 
+Schemes
+---
+
+For usual DAMON-based data access awared memory management optimizations, users
+would simply want the system to apply a memory management action to a memory
+region of a specific size having a specific access frequency for a specific
+time.  DAMON receives such formalized operation schemes from user and applies
+those to the target processes.
+
+Users can get and set the schemes by reading from and writing to ``schemes``
+debugfs file.  To the file, each of the schemes should represented in each line
+in below form:
+
+min-size max-size min-acc max-acc min-age max-age action
+
+Bytes for size of regions (``min-size`` and ``max-size``), number of monitored
+accesses per aggregate interval for access frequency (``min-acc`` and
+``max-acc``), number of aggregate intervals for age of regions (``min-age`` and
+``max-age``), and predefined integer for memory management actions should be
+used.  ``madvise()`` system call with specific hint are currently available.
+The numbers and their representing memory hint are as below::
+
+0   MADV_WILLNEED
+1   MADV_COLD
+2   MADV_PAGEOUT
+3   MADV_HUGEPAGE
+4   MADV_NOHUGEPAGE
+
+You can disable schemes by simply writing empty string to the file.  For
+example, below commands applies a scheme saying “If a memory region larger than
+4 KiB (4096 0) is showing less than 5 accesses per aggregate interval (0 5) for
+more than 5 aggregate interval (5 0), page out the region (2)”, check the
+entered scheme again, and finally remove the scheme.::
+
+# cd /damon
+# echo "4096 0 0 5 5 0 2" > schemes
+# cat schemes
+4096 0 0 5 5 0 2
+# echo > schemes
+
 Turning On/Off
 --
 
@@ -426,3 +466,59 @@ made.
 
 Users can specify the resolution of the distribution (``--range``).  It also
 supports 'gnuplot' based simple visualization (``--plot``) of the distribution.
+
+
+DAMON-based Operation Schemes
+-
+
+The ``schemes`` subcommand applies given data access pattern based operation
+schemes to the given target processes.  The target processes are described
+using the command to spawn the processes or pid of running processes, as
+similar to that of ``record`` subcommand.  Meanwhile, the operation schemes
+should be saved in a text file using below format and passed to ``schemes``
+subcommand via ``--schemes`` option.
+
+min-size max-size min-acc max-acc min-age max-age action
+
+The format also supports comments, several units for size and age of regions,
+and human readable action names.  Currently supported operation actions are
+WILLNEED, COLD, PAGEOUT, HUGEPAGE, and NOHUGEPAGE.  Each of the actions works
+as same to that of madvise() system call.  Below is an example schemes file.
+Please also note that 0 for max values means infinite.::
+
+# format is:
+#
+#
+# B/K/M/G/T for Bytes/KiB/MiB/GiB/TiB
+# us/ms/s/m/h/d for micro-seconds/milli-seconds/seconds/minutes/hours/days
+# 'null' means zero for size and age.
+
+# if a region keeps a high access frequency for more than 100ms, put the
+# region on the head of the LRU list (call madvise() with MADV_WILLNEED).
+nullnull80  null100ms   0s  willneed
+
+# if a region keeps a low access frequency for more than 200ms and less
+# than one hour, put the # region on the tail of the LRU list (call
+# madvise() with MADV_COLD).
+0B  0B  10  20  200ms   1h cold
+
+# if a region keeps a very low access frequency for more than 1 minute,
+# swap out the region immediately (call madvise() with MADV_PAGEOUT).
+0B  null0   10  60s 0s pageout
+
+# if a region of a size bigger than 2MiB keeps a very high access frequency
+# for more than 100ms, let the region to use huge pages (call madvise()
+# with MADV_HUGEPAGE).
+2M  nu

[PATCH v12 01/16] scripts/spelling: Add a few more typos

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit adds typos found from DAMON patchset.

Signed-off-by: SeongJae Park 
---
 scripts/spelling.txt | 8 
 1 file changed, 8 insertions(+)

diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index ffa838f3a2b5..a8214d8e8e4b 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -59,6 +59,7 @@ actualy||actually
 acumulating||accumulating
 acumulative||accumulative
 acumulator||accumulator
+acutally||actually
 adapater||adapter
 addional||additional
 additionaly||additionally
@@ -245,6 +246,7 @@ calescing||coalescing
 calle||called
 callibration||calibration
 callled||called
+callser||caller
 calucate||calculate
 calulate||calculate
 cancelation||cancellation
@@ -664,6 +666,7 @@ hanlde||handle
 hanled||handled
 happend||happened
 harware||hardware
+havind||having
 heirarchically||hierarchically
 helpfull||helpful
 hexdecimal||hexadecimal
@@ -835,6 +838,7 @@ logile||logfile
 loobpack||loopback
 loosing||losing
 losted||lost
+maangement||management
 machinary||machinery
 maibox||mailbox
 maintainance||maintenance
@@ -999,6 +1003,7 @@ partiton||partition
 pased||passed
 passin||passing
 pathes||paths
+pattrns||patterns
 pecularities||peculiarities
 peformance||performance
 peforming||performing
@@ -1238,6 +1243,7 @@ shoule||should
 shrinked||shrunk
 siginificantly||significantly
 signabl||signal
+significanly||significantly
 similary||similarly
 similiar||similar
 simlar||similar
@@ -1352,6 +1358,7 @@ thead||thread
 therfore||therefore
 thier||their
 threds||threads
+threee||three
 threshhold||threshold
 thresold||threshold
 throught||through
@@ -1391,6 +1398,7 @@ tyep||type
 udpate||update
 uesd||used
 uknown||unknown
+usccess||success
 usupported||unsupported
 uncommited||uncommitted
 unconditionaly||unconditionally
-- 
2.17.1



[PATCH v12 00/16] Introduce Data Access MONitor (DAMON)

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

Introduction


DAMON is a data access monitoring framework subsystem for the Linux kernel.
The core mechanisms of DAMON called 'region based sampling' and adaptive
regions adjustment' (refer to :doc:`mechanisms` for the detail) make it
accurate, efficient, and scalable.  Using this framework, therefore, the
kernel's core memory management mechanisms including reclamation and THP can be
optimized for better memory management.  The memory management optimization
works that have not merged into the mainline due to their high data access
monitoring overhead will be able to have another try.  In user space,
meanwhile, users who have some special workloads will be able to write
personalized tools or applications for more understanding and specialized
optimizations of their systems using the DAMON as a framework.

More Information


We prepared a showcase web site[1] that you can get more information of DAMON.
There are

- the official documentation of DAMON[2],
- the heatmap format dynamic access pattern of various realistic workloads for
  heap area[3], mmap()-ed area[4], and stack[5] area,
- the dynamic working set size distribution[6] and chronological working set
  size changes[7], and
- the latest performance test results[8].

[1] https://damonitor.github.io
[2] https://damonitor.github.io/doc/html/latest
[3] https://damonitor.github.io/test/result/visual/latest/heatmap.0.html
[4] https://damonitor.github.io/test/result/visual/latest/heatmap.1.html
[5] https://damonitor.github.io/test/result/visual/latest/heatmap.2.html
[6] https://damonitor.github.io/test/result/visual/latest/wss_sz.html
[7] https://damonitor.github.io/test/result/visual/latest/wss_time.html
[8] https://damonitor.github.io/test/result/perf/latest/html/index.html

Evaluations
===

We evaluated DAMON's overhead, monitoring quality and usefulness using 25
realistic workloads on my QEMU/KVM based virtual machine.

DAMON is lightweight.  It consumes only 0.03% more system memory and up to 1%
CPU time.  It makes target worloads only 0.7% slower.

DAMON is accurate and useful for memory management optimizations.  An
experimental DAMON-based operation scheme for THP removes 63.12% of THP memory
overheads while preserving 49.15% of THP speedup.  Another experimental
DAMON-based 'proactive reclamation' implementation reduces 85.85% of
residentail sets and 21.98% of system memory footprint while incurring only
2.42% runtime overhead in best case (parsec3/freqmine).

NOTE that the experimentail THP optimization and proactive reclamation are not
for production, just only for proof of concepts.

Please refer to 'Appendix C' for detailed evaluation setup and results.

Baseline and Complete Git Trees
===

The patches are based on the v5.6.  You can also clone the complete git
tree:

$ git clone git://github.com/sjp38/linux -b damon/patches/v12

The web is also available:
https://github.com/sjp38/linux/releases/tag/damon/patches/v12

There are a couple of trees for entire DAMON patchset series.  The first one[1]
contains the changes for latest release, while the other one[2] contains the
changes for next release.

[1] https://github.com/sjp38/linux/tree/damon/master
[2] https://github.com/sjp38/linux/tree/damon/next

Sequence Of Patches
===

The patches are organized in the following sequence.  The first two patches are
preparation of DAMON patchset.  The 1st patch adds typos found in previous
versions of DAMON patchset to 'scripts/spelling.txt' so that the typos can be
caught by 'checkpatch.pl'.  The 2nd patch exports 'lookup_page_ext()' to GPL
modules so that it can be used by DAMON even though it is built as a loadable
module.

Next five patches implement the core of DAMON and it's programming interface.
The 3rd patch introduces DAMON module, it's data structures, and data structure
related common functions.  Following four patches (4nd to 7th) implements the
core mechanisms of DAMON, namely regions based sampling (patch 4), adaptive
regions adjustment (patches 5-6), and dynamic memory mapping chage adoption
(patch 7).

Following four patches are for low level users of DAMON.  The 8th patch
implements callbacks for each of monitoring steps so that users can do whatever
they want with the access patterns.  The 9th one implements recording of access
patterns in DAMON for better convenience and efficiency.  Each of next two
patches (10th and 11th) respectively adds a debugfs interface for privileged
people and/or programs in user space, and a tracepoint for other tracepoints
supporting tracers such as perf.

Two patches for high level users of DAMON follows.  To provide a minimal
reference to the debugfs interface and for high level use/tests of the DAMON,
the next patch (12th) implements an user space tool.  The 13th patch adds a
document for administrators of DAMON.

Next two patches are for tests.  The 14th and 15th patches provide unit tests

[PATCH v12 02/16] mm/page_ext: Export lookup_page_ext() to GPL modules

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit exports 'lookup_page_ext()' to GPL modules.  This will be
used by DAMON.

Signed-off-by: SeongJae Park 
---
 mm/page_ext.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/page_ext.c b/mm/page_ext.c
index 4ade843ff588..71169b45bba9 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -131,6 +131,7 @@ struct page_ext *lookup_page_ext(const struct page *page)
MAX_ORDER_NR_PAGES);
return get_entry(base, index);
 }
+EXPORT_SYMBOL_GPL(lookup_page_ext);
 
 static int __init alloc_node_page_ext(int nid)
 {
-- 
2.17.1



[PATCH v12 03/16] mm: Introduce Data Access MONitor (DAMON)

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit introduces a kernel module named DAMON.  Note that this
commit is implementing only the stub for the module load/unload, basic
data structures, and simple manipulation functions of the structures to
keep the size of commit small.  The core mechanisms of DAMON will be
implemented one by one by following commits.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |  36 +
 mm/Kconfig|  12 +++
 mm/Makefile   |   1 +
 mm/damon.c| 173 ++
 4 files changed, 222 insertions(+)
 create mode 100644 include/linux/damon.h
 create mode 100644 mm/damon.c

diff --git a/include/linux/damon.h b/include/linux/damon.h
new file mode 100644
index ..19f411d36c0d
--- /dev/null
+++ b/include/linux/damon.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON api
+ *
+ * Copyright 2019-2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Author: SeongJae Park 
+ */
+
+#ifndef _DAMON_H_
+#define _DAMON_H_
+
+#include 
+#include 
+
+/* Represents a monitoring target region of [vm_start, vm_end) */
+struct damon_region {
+   unsigned long vm_start;
+   unsigned long vm_end;
+   unsigned long sampling_addr;
+   unsigned int nr_accesses;
+   struct list_head list;
+};
+
+/* Represents a monitoring target task */
+struct damon_task {
+   int pid;
+   struct list_head regions_list;
+   struct list_head list;
+};
+
+struct damon_ctx {
+   struct list_head tasks_list;/* 'damon_task' objects */
+};
+
+#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index ab80933be65f..9ea49633a6df 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -739,4 +739,16 @@ config ARCH_HAS_HUGEPD
 config MAPPING_DIRTY_HELPERS
 bool
 
+config DAMON
+   tristate "Data Access Monitor"
+   depends on MMU
+   help
+ Provides data access monitoring.
+
+ DAMON is a kernel module that allows users to monitor the actual
+ memory access pattern of specific user-space processes.  It aims to
+ be 1) accurate enough to be useful for performance-centric domains,
+ and 2) sufficiently light-weight so that it can be applied online.
+ If unsure, say N.
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index 272e66039e70..5346314edee6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -110,3 +110,4 @@ obj-$(CONFIG_HMM_MIRROR) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
 obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
 obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
+obj-$(CONFIG_DAMON) += damon.o
diff --git a/mm/damon.c b/mm/damon.c
new file mode 100644
index ..a5a7820ef0ad
--- /dev/null
+++ b/mm/damon.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Access Monitor
+ *
+ * Copyright 2019-2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Author: SeongJae Park 
+ */
+
+#define pr_fmt(fmt) "damon: " fmt
+
+#include 
+#include 
+#include 
+#include 
+
+#define damon_get_task_struct(t) \
+   (get_pid_task(find_vpid(t->pid), PIDTYPE_PID))
+
+#define damon_next_region(r) \
+   (container_of(r->list.next, struct damon_region, list))
+
+#define damon_prev_region(r) \
+   (container_of(r->list.prev, struct damon_region, list))
+
+#define damon_for_each_region(r, t) \
+   list_for_each_entry(r, >regions_list, list)
+
+#define damon_for_each_region_safe(r, next, t) \
+   list_for_each_entry_safe(r, next, >regions_list, list)
+
+#define damon_for_each_task(t, ctx) \
+   list_for_each_entry(t, &(ctx)->tasks_list, list)
+
+#define damon_for_each_task_safe(t, next, ctx) \
+   list_for_each_entry_safe(t, next, &(ctx)->tasks_list, list)
+
+/* Get a random number in [l, r) */
+#define damon_rand(l, r) (l + prandom_u32() % (r - l))
+
+/*
+ * Construct a damon_region struct
+ *
+ * Returns the pointer to the new struct if success, or NULL otherwise
+ */
+static struct damon_region *damon_new_region(struct damon_ctx *ctx,
+   unsigned long vm_start, unsigned long vm_end)
+{
+   struct damon_region *region;
+
+   region = kmalloc(sizeof(*region), GFP_KERNEL);
+   if (!region)
+   return NULL;
+
+   region->vm_start = vm_start;
+   region->vm_end = vm_end;
+   region->nr_accesses = 0;
+   INIT_LIST_HEAD(>list);
+
+   return region;
+}
+
+/*
+ * Add a region between two other regions
+ */
+static inline void damon_insert_region(struct damon_region *r,
+   struct damon_region *prev, struct damon_region *next)
+{
+   __list_add(>list, >list, >list);
+}
+
+static void damon_add_region(struct damon_region *r, struct damon_task *t)
+{
+   list_add_tail(>list, >regions_list);
+}
+
+static void damon_del_region(struct damon_region *r)
+{
+   list_del(>list);
+}
+
+static void damon_free_region(struct damon_region *r)
+{

[PATCH v12 05/16] mm/damon: Adaptively adjust regions

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

At the beginning of the monitoring, DAMON constructs the initial regions
by evenly splitting the memory mapped address space of the process into
the user-specified minimal number of regions.  In this initial state,
the assumption of the regions (pages in same region have similar access
frequencies) is normally not kept and thus the monitoring quality could
be low.  To keep the assumption as much as possible, DAMON adaptively
merges and splits each region.

For each ``aggregation interval``, it compares the access frequencies of
adjacent regions and merges those if the frequency difference is small.
Then, after it reports and clears the aggregated access frequency of
each region, it splits each region into two regions if the total number
of regions is smaller than the half of the user-specified maximum number
of regions.

In this way, DAMON provides its best-effort quality and minimal overhead
while keeping the bounds users set for their trade-off.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |   6 +-
 mm/damon.c| 158 +++---
 2 files changed, 152 insertions(+), 12 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 558dd6ae0afa..f1c3f491fc50 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -42,6 +42,7 @@ struct damon_ctx {
unsigned long sample_interval;
unsigned long aggr_interval;
unsigned long min_nr_regions;
+   unsigned long max_nr_regions;
 
struct timespec64 last_aggregation;
 
@@ -53,8 +54,9 @@ struct damon_ctx {
 };
 
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
-int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
-   unsigned long aggr_int, unsigned long min_nr_reg);
+int damon_set_attrs(struct damon_ctx *ctx,
+   unsigned long sample_int, unsigned long aggr_int,
+   unsigned long min_nr_reg, unsigned long max_nr_reg);
 int damon_start(struct damon_ctx *ctx);
 int damon_stop(struct damon_ctx *ctx);
 
diff --git a/mm/damon.c b/mm/damon.c
index ca1eeb51c7d0..6c9581104caf 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -332,9 +332,12 @@ static int damon_three_regions_of(struct damon_task *t,
  * regions is wasteful.  That said, because we can deal with small noises,
  * tracking every mapping is not strictly required but could even incur a high
  * overhead if the mapping frequently changes or the number of mappings is
- * high.  Nonetheless, this may seems very weird.  DAMON's dynamic regions
- * adjustment mechanism, which will be implemented with following commit will
- * make this more sense.
+ * high.  The adaptive regions adjustment mechanism will further help to deal
+ * with the noise by simply identifying the unmapped areas as a region that
+ * has no access.  Moreover, applying the real mappings that would have many
+ * unmapped areas inside will make the adaptive mechanism quite complex.  That
+ * said, too huge unmapped areas inside the monitoring target should be removed
+ * to not take the time for the adaptive mechanism.
  *
  * For the reason, we convert the complex mappings to three distinct regions
  * that cover every mapped area of the address space.  Also the two gaps
@@ -508,20 +511,25 @@ static void damon_check_access(struct damon_ctx *ctx,
last_addr = r->sampling_addr;
 }
 
-static void kdamond_check_accesses(struct damon_ctx *ctx)
+static unsigned int kdamond_check_accesses(struct damon_ctx *ctx)
 {
struct damon_task *t;
struct mm_struct *mm;
struct damon_region *r;
+   unsigned int max_nr_accesses = 0;
 
damon_for_each_task(t, ctx) {
mm = damon_get_mm(t);
if (!mm)
continue;
-   damon_for_each_region(r, t)
+   damon_for_each_region(r, t) {
damon_check_access(ctx, mm, r);
+   max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+   }
+
mmput(mm);
}
+   return max_nr_accesses;
 }
 
 /**
@@ -570,6 +578,125 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
}
 }
 
+#define sz_damon_region(r) (r->vm_end - r->vm_start)
+
+/*
+ * Merge two adjacent regions into one region
+ */
+static void damon_merge_two_regions(struct damon_region *l,
+   struct damon_region *r)
+{
+   l->nr_accesses = (l->nr_accesses * sz_damon_region(l) +
+   r->nr_accesses * sz_damon_region(r)) /
+   (sz_damon_region(l) + sz_damon_region(r));
+   l->vm_end = r->vm_end;
+   damon_destroy_region(r);
+}
+
+#define diff_of(a, b) (a > b ? a - b : b - a)
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * t   task affected by merge operation
+ * thres   '->nr_accesses' diff threshold for the merge
+ */
+static v

[PATCH v12 04/16] mm/damon: Implement region based sampling

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit implements DAMON's basic access check and region based
sampling mechanisms.  This change would seems make no sense, mainly
because it is only a part of the DAMON's logics.  Following two commits
will make more sense.

Basic Access Check
--

DAMON basically reports what pages are how frequently accessed.  Note
that the frequency is not an absolute number of accesses, but a relative
frequency among the pages of the target workloads.

Users can control the resolution of the reports by setting two time
intervals, ``sampling interval`` and ``aggregation interval``.  In
detail, DAMON checks access to each page per ``sampling interval``,
aggregates the results (counts the number of the accesses to each page),
and reports the aggregated results per ``aggregation interval``.  For
the access check of each page, DAMON uses the Accessed bits of PTEs.

This is thus similar to common periodic access checks based access
tracking mechanisms, which overhead is increasing as the size of the
target process grows.

Region Based Sampling
-

To avoid the unbounded increase of the overhead, DAMON groups a number
of adjacent pages that assumed to have same access frequencies into a
region.  As long as the assumption (pages in a region have same access
frequencies) is kept, only one page in the region is required to be
checked.  Thus, for each ``sampling interval``, DAMON randomly picks one
page in each region and clears its Accessed bit.  After one more
``sampling interval``, DAMON reads the Accessed bit of the page and
increases the access frequency of the region if the bit has set
meanwhile.  Therefore, the monitoring overhead is controllable by
setting the number of regions.

Nonetheless, this scheme cannot preserve the quality of the output if
the assumption is not kept.  Following commit will introduce how we can
make the guarantee with best effort.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |  25 ++
 mm/damon.c| 597 ++
 2 files changed, 622 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 19f411d36c0d..558dd6ae0afa 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -11,6 +11,8 @@
 #define _DAMON_H_
 
 #include 
+#include 
+#include 
 #include 
 
 /* Represents a monitoring target region of [vm_start, vm_end) */
@@ -29,8 +31,31 @@ struct damon_task {
struct list_head list;
 };
 
+/*
+ * For each 'sample_interval', DAMON checks whether each region is accessed or
+ * not.  It aggregates and keeps the access information (number of accesses to
+ * each region) for each 'aggr_interval' time.
+ *
+ * All time intervals are in micro-seconds.
+ */
 struct damon_ctx {
+   unsigned long sample_interval;
+   unsigned long aggr_interval;
+   unsigned long min_nr_regions;
+
+   struct timespec64 last_aggregation;
+
+   struct task_struct *kdamond;
+   bool kdamond_stop;
+   struct mutex kdamond_lock;
+
struct list_head tasks_list;/* 'damon_task' objects */
 };
 
+int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+   unsigned long aggr_int, unsigned long min_nr_reg);
+int damon_start(struct damon_ctx *ctx);
+int damon_stop(struct damon_ctx *ctx);
+
 #endif
diff --git a/mm/damon.c b/mm/damon.c
index a5a7820ef0ad..ca1eeb51c7d0 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -10,10 +10,19 @@
 #define pr_fmt(fmt) "damon: " fmt
 
 #include 
+#include 
+#include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 #include 
 
+/* Minimal region size.  Every damon_region is aligned by this. */
+#define MIN_REGION PAGE_SIZE
+
 #define damon_get_task_struct(t) \
(get_pid_task(find_vpid(t->pid), PIDTYPE_PID))
 
@@ -156,6 +165,594 @@ static unsigned int nr_damon_regions(struct damon_task *t)
return nr_regions;
 }
 
+/*
+ * Get the mm_struct of the given task
+ *
+ * Caller _must_ put the mm_struct after use, unless it is NULL.
+ *
+ * Returns the mm_struct of the task on success, NULL on failure
+ */
+static struct mm_struct *damon_get_mm(struct damon_task *t)
+{
+   struct task_struct *task;
+   struct mm_struct *mm;
+
+   task = damon_get_task_struct(t);
+   if (!task)
+   return NULL;
+
+   mm = get_task_mm(task);
+   put_task_struct(task);
+   return mm;
+}
+
+/*
+ * Size-evenly split a region into 'nr_pieces' small regions
+ *
+ * Returns 0 on success, or negative error code otherwise.
+ */
+static int damon_split_region_evenly(struct damon_ctx *ctx,
+   struct damon_region *r, unsigned int nr_pieces)
+{
+   unsigned long sz_orig, sz_piece, orig_end;
+   struct damon_region *n = NULL, *next;
+   unsigned long start;
+
+   if (!r || !nr_pieces)
+   return -EINVAL;
+
+   orig_end = r->vm_end;

[PATCH v12 06/16] mm/damon: Split regions into 3 subregions if necessary

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

Suppose that there are a very wide and cold region and a hot region, and
both regions are identified by DAMON.  And then, the middle small region
inside the very wide and cold region becomes hot.  DAMON will not be
able to identify this new region because the adaptive regions adjustment
mechanism splits each region to only two subregions.

This commit modifies the logic to split each region to 3 subregions if
such corner case is detected.

Suggested-by: James Cameron 
Signed-off-by: SeongJae Park 
---
 mm/damon.c | 51 ++-
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/mm/damon.c b/mm/damon.c
index 6c9581104caf..bbd5b1d921da 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -650,26 +650,32 @@ static void damon_split_region_at(struct damon_ctx *ctx,
damon_insert_region(new, r, damon_next_region(r));
 }
 
-/* Split every region in the given task into two randomly-sized regions */
-static void damon_split_regions_of(struct damon_ctx *ctx, struct damon_task *t)
+/* Split every region in the given task into 'nr_subs' regions */
+static void damon_split_regions_of(struct damon_ctx *ctx,
+struct damon_task *t, int nr_subs)
 {
struct damon_region *r, *next;
-   unsigned long sz_orig_region, sz_left_region;
+   unsigned long sz_region, sz_sub = 0;
+   int i;
 
damon_for_each_region_safe(r, next, t) {
-   sz_orig_region = r->vm_end - r->vm_start;
-
-   /*
-* Randomly select size of left sub-region to be at least
-* 10 percent and at most 90% of original region
-*/
-   sz_left_region = ALIGN_DOWN(damon_rand(1, 10) * sz_orig_region
-   / 10, MIN_REGION);
-   /* Do not allow blank region */
-   if (sz_left_region == 0 || sz_left_region >= sz_orig_region)
-   continue;
-
-   damon_split_region_at(ctx, r, sz_left_region);
+   sz_region = r->vm_end - r->vm_start;
+
+   for (i = 0; i < nr_subs - 1 &&
+   sz_region > 2 * MIN_REGION; i++) {
+   /*
+* Randomly select size of left sub-region to be at
+* least 10 percent and at most 90% of original region
+*/
+   sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
+   sz_region / 10, MIN_REGION);
+   /* Do not allow blank region */
+   if (sz_sub == 0 || sz_sub >= sz_region)
+   continue;
+
+   damon_split_region_at(ctx, r, sz_sub);
+   sz_region = sz_sub;
+   }
}
 }
 
@@ -687,14 +693,25 @@ static void kdamond_split_regions(struct damon_ctx *ctx)
 {
struct damon_task *t;
unsigned int nr_regions = 0;
+   static unsigned int last_nr_regions;
+   int nr_subregions = 2;
 
damon_for_each_task(t, ctx)
nr_regions += nr_damon_regions(t);
+
if (nr_regions > ctx->max_nr_regions / 2)
return;
 
+   /* If number of regions is not changed, we are maybe in corner case */
+   if (last_nr_regions == nr_regions &&
+   nr_regions < ctx->max_nr_regions / 3)
+   nr_subregions = 3;
+
damon_for_each_task(t, ctx)
-   damon_split_regions_of(ctx, t);
+   damon_split_regions_of(ctx, t, nr_subregions);
+
+   if (!last_nr_regions)
+   last_nr_regions = nr_regions;
 }
 
 /*
-- 
2.17.1



[PATCH v12 07/16] mm/damon: Apply dynamic memory mapping changes

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

Only a number of parts in the virtual address space of the processes is
mapped to physical memory and accessed.  Thus, tracking the unmapped
address regions is just wasteful.  However, tracking every memory
mapping change might incur an overhead.  For the reason, DAMON applies
the dynamic memory mapping changes to the tracking regions only for each
of a user-specified time interval (``regions update interval``).

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |  10 +++--
 mm/damon.c| 101 +-
 2 files changed, 106 insertions(+), 5 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index f1c3f491fc50..62b9f90ed87b 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -34,17 +34,21 @@ struct damon_task {
 /*
  * For each 'sample_interval', DAMON checks whether each region is accessed or
  * not.  It aggregates and keeps the access information (number of accesses to
- * each region) for each 'aggr_interval' time.
+ * each region) for 'aggr_interval' time.  DAMON also checks whether the memory
+ * mapping of the target tasks has changed (e.g., by mmap() calls from the
+ * application) and applies the changes for each 'regions_update_interval'.
  *
  * All time intervals are in micro-seconds.
  */
 struct damon_ctx {
unsigned long sample_interval;
unsigned long aggr_interval;
+   unsigned long regions_update_interval;
unsigned long min_nr_regions;
unsigned long max_nr_regions;
 
struct timespec64 last_aggregation;
+   struct timespec64 last_regions_update;
 
struct task_struct *kdamond;
bool kdamond_stop;
@@ -54,8 +58,8 @@ struct damon_ctx {
 };
 
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
-int damon_set_attrs(struct damon_ctx *ctx,
-   unsigned long sample_int, unsigned long aggr_int,
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+   unsigned long aggr_int, unsigned long regions_update_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
 int damon_start(struct damon_ctx *ctx);
 int damon_stop(struct damon_ctx *ctx);
diff --git a/mm/damon.c b/mm/damon.c
index bbd5b1d921da..38a8a68b7beb 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -714,6 +714,98 @@ static void kdamond_split_regions(struct damon_ctx *ctx)
last_nr_regions = nr_regions;
 }
 
+/*
+ * Check whether it is time to check and apply the dynamic mmap changes
+ *
+ * Returns true if it is.
+ */
+static bool kdamond_need_update_regions(struct damon_ctx *ctx)
+{
+   return damon_check_reset_time_interval(>last_regions_update,
+   ctx->regions_update_interval);
+}
+
+/*
+ * Check whether regions are intersecting
+ *
+ * Note that this function checks 'struct damon_region' and 'struct region'.
+ *
+ * Returns true if it is.
+ */
+static bool damon_intersect(struct damon_region *r, struct region *re)
+{
+   return !(r->vm_end <= re->start || re->end <= r->vm_start);
+}
+
+/*
+ * Update damon regions for the three big regions of the given task
+ *
+ * t   the given task
+ * bregionsthe three big regions of the task
+ */
+static void damon_apply_three_regions(struct damon_ctx *ctx,
+   struct damon_task *t, struct region bregions[3])
+{
+   struct damon_region *r, *next;
+   unsigned int i = 0;
+
+   /* Remove regions which are not in the three big regions now */
+   damon_for_each_region_safe(r, next, t) {
+   for (i = 0; i < 3; i++) {
+   if (damon_intersect(r, [i]))
+   break;
+   }
+   if (i == 3)
+   damon_destroy_region(r);
+   }
+
+   /* Adjust intersecting regions to fit with the three big regions */
+   for (i = 0; i < 3; i++) {
+   struct damon_region *first = NULL, *last;
+   struct damon_region *newr;
+   struct region *br;
+
+   br = [i];
+   /* Get the first and last regions which intersects with br */
+   damon_for_each_region(r, t) {
+   if (damon_intersect(r, br)) {
+   if (!first)
+   first = r;
+   last = r;
+   }
+   if (r->vm_start >= br->end)
+   break;
+   }
+   if (!first) {
+   /* no damon_region intersects with this big region */
+   newr = damon_new_region(ctx,
+   ALIGN_DOWN(br->start, MIN_REGION),
+   ALIGN(br->end, MIN_REGION));
+   if (!newr)
+   continue;
+   damo

[PATCH v12 08/16] mm/damon: Implement callbacks

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit implements callbacks for DAMON.  Using this, DAMON users can
install their callbacks for each step of the access monitoring so that
they can do something interesting with the monitored access patterns
online.  For example, callbacks can report the monitored patterns to
users or do some access pattern based memory management such as
proactive reclamations or access pattern based THP promotions/demotions
decision makings.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h | 4 
 mm/damon.c| 4 
 2 files changed, 8 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 62b9f90ed87b..264569b21502 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -55,6 +55,10 @@ struct damon_ctx {
struct mutex kdamond_lock;
 
struct list_head tasks_list;/* 'damon_task' objects */
+
+   /* callbacks */
+   void (*sample_cb)(struct damon_ctx *context);
+   void (*aggregate_cb)(struct damon_ctx *context);
 };
 
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
diff --git a/mm/damon.c b/mm/damon.c
index 38a8a68b7beb..498160b5e086 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -851,6 +851,8 @@ static int kdamond_fn(void *data)
kdamond_init_regions(ctx);
while (!kdamond_need_stop(ctx)) {
kdamond_prepare_access_checks(ctx);
+   if (ctx->sample_cb)
+   ctx->sample_cb(ctx);
 
usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
 
@@ -858,6 +860,8 @@ static int kdamond_fn(void *data)
 
if (kdamond_aggregate_interval_passed(ctx)) {
kdamond_merge_regions(ctx, max_nr_accesses / 10);
+   if (ctx->aggregate_cb)
+   ctx->aggregate_cb(ctx);
kdamond_reset_aggregated(ctx);
kdamond_split_regions(ctx);
}
-- 
2.17.1



[PATCH v12 09/16] mm/damon: Implement access pattern recording

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit implements the recording feature of DAMON. If this feature
is enabled, DAMON writes the monitored access patterns in its binary
format into a file which specified by the user. This is already able to
be implemented by each user using the callbacks.  However, as the
recording is expected to be used widely, this commit implements the
feature in the DAMON, for more convenience and efficiency.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |   7 +++
 mm/damon.c| 131 +-
 2 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 264569b21502..bc46ea00e9a1 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -50,6 +50,11 @@ struct damon_ctx {
struct timespec64 last_aggregation;
struct timespec64 last_regions_update;
 
+   unsigned char *rbuf;
+   unsigned int rbuf_len;
+   unsigned int rbuf_offset;
+   char *rfile_path;
+
struct task_struct *kdamond;
bool kdamond_stop;
struct mutex kdamond_lock;
@@ -65,6 +70,8 @@ int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t 
nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_set_recording(struct damon_ctx *ctx,
+   unsigned int rbuf_len, char *rfile_path);
 int damon_start(struct damon_ctx *ctx);
 int damon_stop(struct damon_ctx *ctx);
 
diff --git a/mm/damon.c b/mm/damon.c
index 498160b5e086..ddd78843f79a 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -44,6 +44,9 @@
 #define damon_for_each_task_safe(t, next, ctx) \
list_for_each_entry_safe(t, next, &(ctx)->tasks_list, list)
 
+#define MAX_RECORD_BUFFER_LEN  (4 * 1024 * 1024)
+#define MAX_RFILE_PATH_LEN 256
+
 /* Get a random number in [l, r) */
 #define damon_rand(l, r) (l + prandom_u32() % (r - l))
 
@@ -565,16 +568,80 @@ static bool kdamond_aggregate_interval_passed(struct 
damon_ctx *ctx)
 }
 
 /*
- * Reset the aggregated monitoring results
+ * Flush the content in the result buffer to the result file
+ */
+static void damon_flush_rbuffer(struct damon_ctx *ctx)
+{
+   ssize_t sz;
+   loff_t pos = 0;
+   struct file *rfile;
+
+   rfile = filp_open(ctx->rfile_path, O_CREAT | O_RDWR | O_APPEND, 0644);
+   if (IS_ERR(rfile)) {
+   pr_err("Cannot open the result file %s\n",
+   ctx->rfile_path);
+   return;
+   }
+
+   while (ctx->rbuf_offset) {
+   sz = kernel_write(rfile, ctx->rbuf, ctx->rbuf_offset, );
+   if (sz < 0)
+   break;
+   ctx->rbuf_offset -= sz;
+   }
+   filp_close(rfile, NULL);
+}
+
+/*
+ * Write a data into the result buffer
+ */
+static void damon_write_rbuf(struct damon_ctx *ctx, void *data, ssize_t size)
+{
+   if (!ctx->rbuf_len || !ctx->rbuf)
+   return;
+   if (ctx->rbuf_offset + size > ctx->rbuf_len)
+   damon_flush_rbuffer(ctx);
+
+   memcpy(>rbuf[ctx->rbuf_offset], data, size);
+   ctx->rbuf_offset += size;
+}
+
+/*
+ * Flush the aggregated monitoring results to the result buffer
+ *
+ * Stores current tracking results to the result buffer and reset 'nr_accesses'
+ * of each region.  The format for the result buffer is as below:
+ *
+ * 
+ *
+ *   task info:   
+ *   region info:   
  */
 static void kdamond_reset_aggregated(struct damon_ctx *c)
 {
struct damon_task *t;
-   struct damon_region *r;
+   struct timespec64 now;
+   unsigned int nr;
+
+   ktime_get_coarse_ts64();
+
+   damon_write_rbuf(c, , sizeof(struct timespec64));
+   nr = nr_damon_tasks(c);
+   damon_write_rbuf(c, , sizeof(nr));
 
damon_for_each_task(t, c) {
-   damon_for_each_region(r, t)
+   struct damon_region *r;
+
+   damon_write_rbuf(c, >pid, sizeof(t->pid));
+   nr = nr_damon_regions(t);
+   damon_write_rbuf(c, , sizeof(nr));
+   damon_for_each_region(r, t) {
+   damon_write_rbuf(c, >vm_start, sizeof(r->vm_start));
+   damon_write_rbuf(c, >vm_end, sizeof(r->vm_end));
+   damon_write_rbuf(c, >nr_accesses,
+   sizeof(r->nr_accesses));
r->nr_accesses = 0;
+   }
}
 }
 
@@ -837,6 +904,14 @@ static bool kdamond_need_stop(struct damon_ctx *ctx)
return true;
 }
 
+static void kdamond_write_record_header(struct damon_ctx *ctx)
+{
+   int recfmt_ver = 1;
+
+   damon_write_rbuf(ctx, "damon_recfmt_ver", 16);
+   damon_write_r

[PATCH v12 10/16] mm/damon: Add debugfs interface

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit adds a debugfs interface for DAMON.

DAMON exports four files, ``attrs``, ``pids``, ``record``, and
``monitor_on`` under its debugfs directory, ``/damon/``.

Attributes
--

Users can read and write the ``sampling interval``, ``aggregation
interval``, ``regions update interval``, and min/max number of
monitoring target regions by reading from and writing to the ``attrs``
file.  For example, below commands set those values to 5 ms, 100 ms,
1,000 ms, 10, 1000 and check it again::

# cd /damon
# echo 5000 10 100 10 1000 > attrs
# cat attrs
5000 10 100 10 1000

Target PIDs
---

Users can read and write the pids of current monitoring target processes
by reading from and writing to the ``pids`` file.  For example, below
commands set processes having pids 42 and 4242 as the processes to be
monitored and check it again::

# cd /damon
# echo 42 4242 > pids
# cat pids
42 4242

Note that setting the pids doesn't start the monitoring.

Record
--

DAMON supports direct monitoring result record feature.  The recorded
results are first written to a buffer and flushed to a file in batch.
Users can set the size of the buffer and the path to the result file by
reading from and writing to the ``record`` file.  For example, below
commands set the buffer to be 4 KiB and the result to be saved in
'/damon.data'.

# cd /damon
# echo 4096 /damon.data > pids
# cat record
4096 /damon.data

Turning On/Off
--

You can check current status, start and stop the monitoring by reading
from and writing to the ``monitor_on`` file.  Writing ``on`` to the file
starts DAMON to monitor the target processes with the attributes.
Writing ``off`` to the file stops DAMON.  DAMON also stops if every
target processes is terminated.  Below example commands turn on, off,
and check status of DAMON::

# cd /damon
# echo on > monitor_on
# echo off > monitor_on
# cat monitor_on
off

Please note that you cannot write to the ``attrs`` and ``pids`` files
while the monitoring is turned on.  If you write to the files while
DAMON is running, ``-EINVAL`` will be returned.

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 351 -
 1 file changed, 350 insertions(+), 1 deletion(-)

diff --git a/mm/damon.c b/mm/damon.c
index ddd78843f79a..f31310536c79 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt) "damon: " fmt
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -50,6 +51,15 @@
 /* Get a random number in [l, r) */
 #define damon_rand(l, r) (l + prandom_u32() % (r - l))
 
+/* A monitoring context for debugfs interface users. */
+static struct damon_ctx damon_user_ctx = {
+   .sample_interval = 5 * 1000,
+   .aggr_interval = 100 * 1000,
+   .regions_update_interval = 1000 * 1000,
+   .min_nr_regions = 10,
+   .max_nr_regions = 1000,
+};
+
 /*
  * Construct a damon_region struct
  *
@@ -1134,13 +1144,352 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned 
long sample_int,
return 0;
 }
 
-static int __init damon_init(void)
+static ssize_t debugfs_monitor_on_read(struct file *file,
+   char __user *buf, size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char monitor_on_buf[5];
+   bool monitor_on;
+   int len;
+
+   monitor_on = damon_kdamond_running(ctx);
+   len = snprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n");
+
+   return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len);
+}
+
+static ssize_t debugfs_monitor_on_write(struct file *file,
+   const char __user *buf, size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   ssize_t ret;
+   char cmdbuf[5];
+   int err;
+
+   ret = simple_write_to_buffer(cmdbuf, 5, ppos, buf, count);
+   if (ret < 0)
+   return ret;
+
+   if (sscanf(cmdbuf, "%s", cmdbuf) != 1)
+   return -EINVAL;
+   if (!strncmp(cmdbuf, "on", 5))
+   err = damon_start(ctx);
+   else if (!strncmp(cmdbuf, "off", 5))
+   err = damon_stop(ctx);
+   else
+   return -EINVAL;
+
+   if (err)
+   ret = err;
+   return ret;
+}
+
+static ssize_t damon_sprint_pids(struct damon_ctx *ctx, char *buf, ssize_t len)
+{
+   struct damon_task *t;
+   int written = 0;
+   int rc;
+
+   damon_for_each_task(t, ctx) {
+   rc = snprintf([written], len - written, "%d ", t->pid);
+   if (!rc)
+   return -ENOMEM;
+   written += rc;
+   }
+   if (written)
+   written -= 1;
+   written += snprintf([written], len - written, "\n");
+   return written;
+}
+
+static ssize_t debugfs_pids_re

[PATCH v12 11/16] mm/damon: Add tracepoints

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit adds a tracepoint for DAMON.  It traces the monitoring
results of each region for each aggregation interval.  Using this, DAMON
will be easily integrated with any tracepoints supporting tools such as
perf.

Signed-off-by: SeongJae Park 
---
 include/trace/events/damon.h | 43 
 mm/damon.c   |  5 +
 2 files changed, 48 insertions(+)
 create mode 100644 include/trace/events/damon.h

diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
new file mode 100644
index ..22236642d366
--- /dev/null
+++ b/include/trace/events/damon.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM damon
+
+#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DAMON_H
+
+#include 
+#include 
+
+TRACE_EVENT(damon_aggregated,
+
+   TP_PROTO(int pid, unsigned int nr_regions,
+   unsigned long vm_start, unsigned long vm_end,
+   unsigned int nr_accesses),
+
+   TP_ARGS(pid, nr_regions, vm_start, vm_end, nr_accesses),
+
+   TP_STRUCT__entry(
+   __field(int, pid)
+   __field(unsigned int, nr_regions)
+   __field(unsigned long, vm_start)
+   __field(unsigned long, vm_end)
+   __field(unsigned int, nr_accesses)
+   ),
+
+   TP_fast_assign(
+   __entry->pid = pid;
+   __entry->nr_regions = nr_regions;
+   __entry->vm_start = vm_start;
+   __entry->vm_end = vm_end;
+   __entry->nr_accesses = nr_accesses;
+   ),
+
+   TP_printk("pid=%d nr_regions=%u %lu-%lu: %u", __entry->pid,
+   __entry->nr_regions, __entry->vm_start,
+   __entry->vm_end, __entry->nr_accesses)
+);
+
+#endif /* _TRACE_DAMON_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/mm/damon.c b/mm/damon.c
index f31310536c79..6fd55aec5275 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -9,6 +9,8 @@
 
 #define pr_fmt(fmt) "damon: " fmt
 
+#define CREATE_TRACE_POINTS
+
 #include 
 #include 
 #include 
@@ -20,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Minimal region size.  Every damon_region is aligned by this. */
 #define MIN_REGION PAGE_SIZE
@@ -650,6 +653,8 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
damon_write_rbuf(c, >vm_end, sizeof(r->vm_end));
damon_write_rbuf(c, >nr_accesses,
sizeof(r->nr_accesses));
+   trace_damon_aggregated(t->pid, nr,
+   r->vm_start, r->vm_end, r->nr_accesses);
r->nr_accesses = 0;
}
}
-- 
2.17.1



[PATCH v12 12/16] tools: Add a minimal user-space tool for DAMON

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit adds a shallow wrapper python script, ``/tools/damon/damo``
that provides more convenient interface.  Note that it is only aimed to
be used for minimal reference of the DAMON's debugfs interfaces and for
debugging of the DAMON itself.

Signed-off-by: SeongJae Park 
---
 tools/damon/.gitignore|   1 +
 tools/damon/_dist.py  |  36 
 tools/damon/_recfile.py   |  23 +++
 tools/damon/bin2txt.py|  67 +++
 tools/damon/damo  |  37 
 tools/damon/heats.py  | 362 ++
 tools/damon/nr_regions.py |  91 ++
 tools/damon/record.py | 212 ++
 tools/damon/report.py |  45 +
 tools/damon/wss.py|  97 ++
 10 files changed, 971 insertions(+)
 create mode 100644 tools/damon/.gitignore
 create mode 100644 tools/damon/_dist.py
 create mode 100644 tools/damon/_recfile.py
 create mode 100644 tools/damon/bin2txt.py
 create mode 100755 tools/damon/damo
 create mode 100644 tools/damon/heats.py
 create mode 100644 tools/damon/nr_regions.py
 create mode 100644 tools/damon/record.py
 create mode 100644 tools/damon/report.py
 create mode 100644 tools/damon/wss.py

diff --git a/tools/damon/.gitignore b/tools/damon/.gitignore
new file mode 100644
index ..96403d36ff93
--- /dev/null
+++ b/tools/damon/.gitignore
@@ -0,0 +1 @@
+__pycache__/*
diff --git a/tools/damon/_dist.py b/tools/damon/_dist.py
new file mode 100644
index ..9851ec964e5c
--- /dev/null
+++ b/tools/damon/_dist.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import struct
+import subprocess
+
+def access_patterns(f):
+nr_regions = struct.unpack('I', f.read(4))[0]
+
+patterns = []
+for r in range(nr_regions):
+saddr = struct.unpack('L', f.read(8))[0]
+eaddr = struct.unpack('L', f.read(8))[0]
+nr_accesses = struct.unpack('I', f.read(4))[0]
+patterns.append([eaddr - saddr, nr_accesses])
+return patterns
+
+def plot_dist(data_file, output_file, xlabel, ylabel):
+terminal = output_file.split('.')[-1]
+if not terminal in ['pdf', 'jpeg', 'png', 'svg']:
+os.remove(data_file)
+print("Unsupported plot output type.")
+exit(-1)
+
+gnuplot_cmd = """
+set term %s;
+set output '%s';
+set key off;
+set xlabel '%s';
+set ylabel '%s';
+plot '%s' with linespoints;""" % (terminal, output_file, xlabel, ylabel,
+data_file)
+subprocess.call(['gnuplot', '-e', gnuplot_cmd])
+os.remove(data_file)
+
diff --git a/tools/damon/_recfile.py b/tools/damon/_recfile.py
new file mode 100644
index ..331b4d8165d8
--- /dev/null
+++ b/tools/damon/_recfile.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import struct
+
+fmt_version = 0
+
+def set_fmt_version(f):
+global fmt_version
+
+mark = f.read(16)
+if mark == b'damon_recfmt_ver':
+fmt_version = struct.unpack('i', f.read(4))[0]
+else:
+fmt_version = 0
+f.seek(0)
+return fmt_version
+
+def pid(f):
+if fmt_version == 0:
+return struct.unpack('L', f.read(8))[0]
+else:
+return struct.unpack('i', f.read(4))[0]
diff --git a/tools/damon/bin2txt.py b/tools/damon/bin2txt.py
new file mode 100644
index ..8b9b57a0d727
--- /dev/null
+++ b/tools/damon/bin2txt.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import argparse
+import os
+import struct
+import sys
+
+import _recfile
+
+def parse_time(bindat):
+"bindat should be 16 bytes"
+sec = struct.unpack('l', bindat[0:8])[0]
+nsec = struct.unpack('l', bindat[8:16])[0]
+return sec * 10 + nsec;
+
+def pr_region(f):
+saddr = struct.unpack('L', f.read(8))[0]
+eaddr = struct.unpack('L', f.read(8))[0]
+nr_accesses = struct.unpack('I', f.read(4))[0]
+print("%012x-%012x(%10d):\t%d" %
+(saddr, eaddr, eaddr - saddr, nr_accesses))
+
+def pr_task_info(f):
+pid = _recfile.pid(f)
+print("pid: ", pid)
+nr_regions = struct.unpack('I', f.read(4))[0]
+print("nr_regions: ", nr_regions)
+for r in range(nr_regions):
+pr_region(f)
+
+def set_argparser(parser):
+parser.add_argument('--input', '-i', type=str, metavar='',
+default='damon.data', help='input file name')
+
+def main(args=None):
+if not args:
+parser = argparse.ArgumentParser()
+set_argparser(parser)
+args = parser.parse_args()
+
+file_path = args.input
+
+if not os.path.isfile(file_path):
+print('input file (%s) is not exist' % file_path)
+exit(1)
+
+with open(file_path, 'rb') as f:
+_recfile.set_fmt_version(f)
+start_time = None
+while True:
+timebin = f.read(16)
+if len(timebin) != 16:
+

[PATCH v12 14/16] mm/damon: Add kunit tests

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit adds kunit based unit tests for DAMON.

Signed-off-by: SeongJae Park 
Reviewed-by: Brendan Higgins 
---
 mm/Kconfig  |  11 +
 mm/damon-test.h | 622 
 mm/damon.c  |   6 +
 3 files changed, 639 insertions(+)
 create mode 100644 mm/damon-test.h

diff --git a/mm/Kconfig b/mm/Kconfig
index 9ea49633a6df..81ace52f1c23 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -751,4 +751,15 @@ config DAMON
  and 2) sufficiently light-weight so that it can be applied online.
  If unsure, say N.
 
+config DAMON_KUNIT_TEST
+   bool "Test for damon"
+   depends on DAMON=y && KUNIT
+   help
+ This builds the DAMON Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
 endmenu
diff --git a/mm/damon-test.h b/mm/damon-test.h
new file mode 100644
index ..cf715529ff64
--- /dev/null
+++ b/mm/damon-test.h
@@ -0,0 +1,622 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates.  All rights reserved.
+ *
+ * Author: SeongJae Park 
+ */
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+
+#ifndef _DAMON_TEST_H
+#define _DAMON_TEST_H
+
+#include 
+
+static void damon_test_str_to_pids(struct kunit *test)
+{
+   char *question;
+   int *answers;
+   int expected[] = {12, 35, 46};
+   ssize_t nr_integers = 0, i;
+
+   question = "123";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+   KUNIT_EXPECT_EQ(test, 123, answers[0]);
+   kfree(answers);
+
+   question = "123abc";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+   KUNIT_EXPECT_EQ(test, 123, answers[0]);
+   kfree(answers);
+
+   question = "a123";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, answers, (int *)NULL);
+
+   question = "12 35";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+   for (i = 0; i < nr_integers; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "12 35 46";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers);
+   for (i = 0; i < nr_integers; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "12 35 abc 46";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+   for (i = 0; i < 2; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, (int *)NULL, answers);
+   kfree(answers);
+
+   question = "\n";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, (int *)NULL, answers);
+   kfree(answers);
+}
+
+static void damon_test_regions(struct kunit *test)
+{
+   struct damon_region *r;
+   struct damon_task *t;
+
+   r = damon_new_region(_user_ctx, 1, 2);
+   KUNIT_EXPECT_EQ(test, 1ul, r->vm_start);
+   KUNIT_EXPECT_EQ(test, 2ul, r->vm_end);
+   KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+
+   t = damon_new_task(42);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_regions(t));
+
+   damon_add_region(r, t);
+   KUNIT_EXPECT_EQ(test, 1u, nr_damon_regions(t));
+
+   damon_del_region(r);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_regions(t));
+
+   damon_free_task(t);
+}
+
+static void damon_test_tasks(struct kunit *test)
+{
+   struct damon_ctx *c = _user_ctx;
+   struct damon_task *t;
+
+   t = damon_new_task(42);
+   KUNIT_EXPECT_EQ(test, 42, t->pid);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_tasks(c));
+
+   damon_add_task(_user_ctx, t);
+   KUNIT_EXPECT_EQ(test, 1u, nr_damon_tasks(c));
+
+   damon_destroy_task(t);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_tasks(c));
+}
+
+static void damon_test_set_pids(struct kunit *test)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   int pids[] = {1, 2, 3};
+   char buf[64];
+
+   damon_set_pids(ctx, pids, 3);
+   damon_sprint_pids(ctx, buf, 64);
+   KUNIT_EXP

[PATCH v12 15/16] mm/damon: Add user space selftests

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit adds a simple user space tests for DAMON.  The tests are
using kselftest framework.

Signed-off-by: SeongJae Park 
---
 tools/testing/selftests/damon/Makefile|   7 +
 .../selftests/damon/_chk_dependency.sh|  28 
 tools/testing/selftests/damon/_chk_record.py  | 108 ++
 .../testing/selftests/damon/debugfs_attrs.sh  | 139 ++
 .../testing/selftests/damon/debugfs_record.sh |  50 +++
 5 files changed, 332 insertions(+)
 create mode 100644 tools/testing/selftests/damon/Makefile
 create mode 100644 tools/testing/selftests/damon/_chk_dependency.sh
 create mode 100644 tools/testing/selftests/damon/_chk_record.py
 create mode 100755 tools/testing/selftests/damon/debugfs_attrs.sh
 create mode 100755 tools/testing/selftests/damon/debugfs_record.sh

diff --git a/tools/testing/selftests/damon/Makefile 
b/tools/testing/selftests/damon/Makefile
new file mode 100644
index ..cfd5393a4639
--- /dev/null
+++ b/tools/testing/selftests/damon/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for damon selftests
+
+TEST_FILES = _chk_dependency.sh _chk_record_file.py
+TEST_PROGS = debugfs_attrs.sh debugfs_record.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh 
b/tools/testing/selftests/damon/_chk_dependency.sh
new file mode 100644
index ..814dcadd5e96
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_dependency.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+DBGFS=/sys/kernel/debug/damon
+
+if [ $EUID -ne 0 ];
+then
+   echo "Run as root"
+   exit $ksft_skip
+fi
+
+if [ ! -d $DBGFS ]
+then
+   echo "$DBGFS not found"
+   exit $ksft_skip
+fi
+
+for f in attrs record pids monitor_on
+do
+   if [ ! -f "$DBGFS/$f" ]
+   then
+   echo "$f not found"
+   exit 1
+   fi
+done
diff --git a/tools/testing/selftests/damon/_chk_record.py 
b/tools/testing/selftests/damon/_chk_record.py
new file mode 100644
index ..5cfcf4161404
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_record.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"Check whether the DAMON record file is valid"
+
+import argparse
+import struct
+import sys
+
+fmt_version = 0
+
+def set_fmt_version(f):
+global fmt_version
+
+mark = f.read(16)
+if mark == b'damon_recfmt_ver':
+fmt_version = struct.unpack('i', f.read(4))[0]
+else:
+fmt_version = 0
+f.seek(0)
+return fmt_version
+
+def read_pid(f):
+if fmt_version == 0:
+pid = struct.unpack('L', f.read(8))[0]
+else:
+pid = struct.unpack('i', f.read(4))[0]
+def err_percent(val, expected):
+return abs(val - expected) / expected * 100
+
+def chk_task_info(f):
+pid = read_pid(f)
+nr_regions = struct.unpack('I', f.read(4))[0]
+
+if nr_regions > max_nr_regions:
+print('too many regions: %d > %d' % (nr_regions, max_nr_regions))
+exit(1)
+
+nr_gaps = 0
+eaddr = 0
+for r in range(nr_regions):
+saddr = struct.unpack('L', f.read(8))[0]
+if eaddr and saddr != eaddr:
+nr_gaps += 1
+eaddr = struct.unpack('L', f.read(8))[0]
+nr_accesses = struct.unpack('I', f.read(4))[0]
+
+if saddr >= eaddr:
+print('wrong region [%d,%d)' % (saddr, eaddr))
+exit(1)
+
+max_nr_accesses = aint / sint
+if nr_accesses > max_nr_accesses:
+if err_percent(nr_accesses, max_nr_accesses) > 15:
+print('too high nr_access: expected %d but %d' %
+(max_nr_accesses, nr_accesses))
+exit(1)
+if nr_gaps != 2:
+print('number of gaps are not two but %d' % nr_gaps)
+exit(1)
+
+def parse_time_us(bindat):
+sec = struct.unpack('l', bindat[0:8])[0]
+nsec = struct.unpack('l', bindat[8:16])[0]
+return (sec * 10 + nsec) / 1000
+
+def main():
+global sint
+global aint
+global min_nr
+global max_nr_regions
+
+parser = argparse.ArgumentParser()
+parser.add_argument('file', metavar='',
+help='path to the record file')
+parser.add_argument('--attrs', metavar='',
+default='5000 10 100 10 1000',
+help='content of debugfs attrs file')
+args = parser.parse_args()
+file_path = args.file
+attrs = [int(x) for x in args.attrs.split()]
+sint, aint, rint, min_nr, max_nr_regions = attrs
+
+with open(file_path, 'rb') as f:
+set_fmt_version(f)
+last_aggr_time = None
+while True:
+timebin = f.read(16)
+if len(timebin) != 16:
+break
+
+now = parse_time_us(timebin)
+if not last_aggr_time:
+ 

[PATCH v12 16/16] MAINTAINERS: Update for DAMON

2020-05-18 Thread SeongJae Park
From: SeongJae Park 

This commit updates MAINTAINERS file for DAMON related files.

Signed-off-by: SeongJae Park 
---
 MAINTAINERS | 12 
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5a5332b3591d..586513e92b65 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4661,6 +4661,18 @@ F:   net/ax25/ax25_out.c
 F: net/ax25/ax25_timer.c
 F: net/ax25/sysctl_net_ax25.c
 
+DATA ACCESS MONITOR
+M: SeongJae Park 
+L: linux...@kvack.org
+S: Maintained
+F: Documentation/admin-guide/mm/damon/*
+F: include/linux/damon.h
+F: include/trace/events/damon.h
+F: mm/damon-test.h
+F: mm/damon.c
+F: tools/damon/*
+F: tools/testing/selftests/damon/*
+
 DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER
 L: net...@vger.kernel.org
 S: Orphan
-- 
2.17.1



Upstream fixes not merged in 5.4.y

2020-06-29 Thread SeongJae Park
Hello,


With my little script, I found below commits in the mainline tree are more than
1 week old and fixing commits that back-ported in v5.4..v5.4.49 but not merged
in the stable/linux-5.4.y tree.  Are those need to be merged in but missed or
dealyed?

9210c075cef2 ("nvme-pci: avoid race between nvme_reap_pending_cqes() and 
nvme_poll()")
9fecd13202f5 ("btrfs: fix a block group ref counter leak after failure to 
remove block group")
9d964e1b82d8 ("fix a braino in "sparc32: fix register window handling in 
genregs32_[gs]et()"")
8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding")
6e2f83884c09 ("bnxt_en: Fix AER reset logic on 57500 chips.")
efb94790852a ("drm/panel-simple: fix connector type for LogicPD Type28 Display")
ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
8dbe4c5d5e40 ("net: dsa: bcm_sf2: Fix node reference count")
ca8826095e4d ("selftests/net: report etf errors correctly")
5a8d7f126c97 ("of: of_mdio: Correct loop scanning logic")
d35d3660e065 ("binder: fix null deref of proc->context")

The script found several more commits but I exclude those here, because those
seems not applicable on 5.4.y or fixing trivial problems only.  If I'm not
following a proper process for this kind of reports, please let me know.


Thanks,
SeongJae Park


Re: Re: Upstream fixes not merged in 5.4.y

2020-06-30 Thread SeongJae Park
On Mon, 29 Jun 2020 18:15:42 +0200 Greg KH  wrote:

> On Mon, Jun 29, 2020 at 04:28:05PM +0200, SeongJae Park wrote:
> > Hello,
> > 
> > 
> > With my little script, I found below commits in the mainline tree are more 
> > than
> > 1 week old and fixing commits that back-ported in v5.4..v5.4.49, but not 
> > merged
> > in the stable/linux-5.4.y tree.  Are those need to be merged in but missed 
> > or
> > dealyed?
> > 
> > 9210c075cef2 ("nvme-pci: avoid race between nvme_reap_pending_cqes() and 
> > nvme_poll()")
> > 9fecd13202f5 ("btrfs: fix a block group ref counter leak after failure to 
> > remove block group")
> > 9d964e1b82d8 ("fix a braino in "sparc32: fix register window handling in 
> > genregs32_[gs]et()"")
> > 8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding")
> > 6e2f83884c09 ("bnxt_en: Fix AER reset logic on 57500 chips.")
> > efb94790852a ("drm/panel-simple: fix connector type for LogicPD Type28 
> > Display")
> > ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
> > ff58bbc7b970 ("ALSA: usb-audio: Fix potential use-after-free of streams")
> > 8dbe4c5d5e40 ("net: dsa: bcm_sf2: Fix node reference count")
> > ca8826095e4d ("selftests/net: report etf errors correctly")
> > 5a8d7f126c97 ("of: of_mdio: Correct loop scanning logic")
> > d35d3660e065 ("binder: fix null deref of proc->context")
> > 
> > The script found several more commits but I exclude those here, because 
> > those
> > seems not applicable on 5.4.y or fixing trivial problems only.  If I'm not
> > following a proper process for this kind of reports, please let me know.
> 
> For commits that only have a "Fixes:" tag, and not a "cc: stable..."
> tag, wait a few weeks, or a month, for us to catch up with them.  We
> usually get to them eventually, but it takes us a while as we have lots
> more to deal with by developers and maintainers that are properly
> tagging patches for this type of thing.
> 
> Some of the above commits are queued up already, but not all of them.
> I'll take a look at the list after this next round of patches go out,
> and will let you know.
> 
> And yes, we do want this type of list, it's greatly appreciated.

Appreciate your kind explanation.  I will keep those in my mind for future
reports.


Thanks,
SeongJae Park

> 
> thanks,
> 
> greg k-h


Re: Re: [RFC v11 3/8] mm/damon: Implement data access monitoring-based operation schemes

2020-06-09 Thread SeongJae Park
On Tue, 9 Jun 2020 10:47:45 +0200 David Hildenbrand  wrote:

> On 09.06.20 08:53, SeongJae Park wrote:
> > From: SeongJae Park 
> > 
> > In many cases, users might use DAMON for simple data access aware
> > memory management optimizations such as applying an operation scheme to
> > a memory region of a specific size having a specific access frequency
> > for a specific time.  For example, "page out a memory region larger than
> > 100 MiB but having a low access frequency more than 10 minutes", or "Use
> > THP for a memory region larger than 2 MiB having a high access frequency
> > for more than 2 seconds".
> > 
> > To minimize users from spending their time for implementation of such
> > simple data access monitoring-based operation schemes, this commit makes
> > DAMON to handle such schemes directly.  With this commit, users can
> > simply specify their desired schemes to DAMON.
> 
> What would be the alternative? How would a solution where these policies
> are handled by user space (or inside an application?) look like?

Most simple form of the altermative solution would be doing offline data access
pattern profiling using DAMON and modifying the application source code or
system configuration based on the profiling results.

More automated alternative solution would be a daemon constructed with two
modules:

 - monitor: monitors the data access pattern of the workload via the DAMON
   debugfs interface
 - memory manager: based on the monitoring result, make appropriate memory
   management changes via mlock(), madvise(), sysctl, etc.

The daemon would be able to run inside the application process as a thread, or
outside as a standalone process.  If the daemon could not run inside the
application process, the memory management changes it could make would be
further limited, though, as mlock() and madvise() would not be available.  The
madvise_process(), which is already merged in the next tree, would be helpful
in this case.

> > 
> > Each of the schemes is composed with conditions for filtering of the
> > target memory regions and desired memory management action for the
> > target.  Specifically, the format is::
> > 
> >
> > 
> > The filtering conditions are size of memory region, number of accesses
> > to the region monitored by DAMON, and the age of the region.  The age of
> > region is incremented periodically but reset when its addresses or
> > access frequency has significantly changed or the action of a scheme was
> > applied.  For the action, current implementation supports only a few of
> > madvise() hints, ``MADV_WILLNEED``, ``MADV_COLD``, ``MADV_PAGEOUT``,
> > ``MADV_HUGEPAGE``, and ``MADV_NOHUGEPAGE``.
> 
> I am missing some important information. Is this specified for *all*
> user space processes? Or how is this configured? What are examples?
> 
> E.g., messing with ``MADV_HUGEPAGE`` vs. ``MADV_NOHUGEPAGE`` of random
> applications can change the behavior/break these applications. (e.g., if
> userfaultfd is getting used and the applciation explicitly sets
> MADV_NOHUGEPAGE).

Only monitoring target processes will be applied.  The monitoring target
processes can be specified by writing the process ids to 'pids' debugfs file or
constructing the 'struct damon_ctx' via the programming interface.

I will refine the commit message to make the points clearer, in the next spin.

[...]
> 
> 
> -- 
> Thanks,
> 
> David / dhildenb


[PATCH] scripts/spelling: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit recommends the patches to replace 'blacklist' and
'whitelist' with the 'blocklist' and 'allowlist', because the new
suggestions are incontrovertible, doesn't make people hurt, and more
self-explanatory.

Signed-off-by: SeongJae Park 

cr https://code.amazon.com/reviews/CR-27247203
---
 scripts/spelling.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index d9cd24cf0d40..ea785568d8b8 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -230,6 +230,7 @@ beter||better
 betweeen||between
 bianries||binaries
 bitmast||bitmask
+blacklist||blocklist
 boardcast||broadcast
 borad||board
 boundry||boundary
@@ -1495,6 +1496,7 @@ whcih||which
 whenver||whenever
 wheter||whether
 whe||when
+whitelist||allowlist
 wierd||weird
 wiil||will
 wirte||write
-- 
2.17.1



Re: [PATCH] scripts/spelling: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-09 Thread SeongJae Park
On Tue, 9 Jun 2020 14:18:43 +0200 SeongJae Park  wrote:

> From: SeongJae Park 
> 
> This commit recommends the patches to replace 'blacklist' and
> 'whitelist' with the 'blocklist' and 'allowlist', because the new
> suggestions are incontrovertible, doesn't make people hurt, and more
> self-explanatory.
> 
> Signed-off-by: SeongJae Park 
> 
> cr https://code.amazon.com/reviews/CR-27247203

Oops, sorry for leaving this unnecessary text.  Will post the 2nd version soon,
please ignore this patch.


Thanks,
SeongJae Park

> ---
>  scripts/spelling.txt | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/scripts/spelling.txt b/scripts/spelling.txt
> index d9cd24cf0d40..ea785568d8b8 100644
> --- a/scripts/spelling.txt
> +++ b/scripts/spelling.txt
> @@ -230,6 +230,7 @@ beter||better
>  betweeen||between
>  bianries||binaries
>  bitmast||bitmask
> +blacklist||blocklist
>  boardcast||broadcast
>  borad||board
>  boundry||boundary
> @@ -1495,6 +1496,7 @@ whcih||which
>  whenver||whenever
>  wheter||whether
>  whe||when
> +whitelist||allowlist
>  wierd||weird
>  wiil||will
>  wirte||write
> -- 
> 2.17.1
> 


[PATCH v2] scripts/spelling: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit recommends the patches to replace 'blacklist' and
'whitelist' with the 'blocklist' and 'allowlist', because the new
suggestions are incontrovertible, doesn't make people hurt, and more
self-explanatory.

Signed-off-by: SeongJae Park 
---
 scripts/spelling.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index d9cd24cf0d40..ea785568d8b8 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -230,6 +230,7 @@ beter||better
 betweeen||between
 bianries||binaries
 bitmast||bitmask
+blacklist||blocklist
 boardcast||broadcast
 borad||board
 boundry||boundary
@@ -1495,6 +1496,7 @@ whcih||which
 whenver||whenever
 wheter||whether
 whe||when
+whitelist||allowlist
 wierd||weird
 wiil||will
 wirte||write
-- 
2.17.1



Re: Re: [PATCH v2] scripts/spelling: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-09 Thread SeongJae Park
On Tue, 9 Jun 2020 14:56:07 +0200 Greg KH  wrote:

> On Tue, Jun 09, 2020 at 02:25:49PM +0200, SeongJae Park wrote:
> > From: SeongJae Park 
> > 
> > This commit recommends the patches to replace 'blacklist' and
> > 'whitelist' with the 'blocklist' and 'allowlist', because the new
> > suggestions are incontrovertible, doesn't make people hurt, and more
> > self-explanatory.
> > 
> > Signed-off-by: SeongJae Park 
> > ---
> >  scripts/spelling.txt | 2 ++
> >  1 file changed, 2 insertions(+)
> 
> 
> 
> This is not the correct way to submit patches for inclusion in the
> stable kernel tree.  Please read:
> https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
> for how to do this properly.

Sorry, I only mistakenly Cc-ed sta...@vger.kernel.org.  Please don't pull this
in the stable tree.


Thanks,
SeongJae Park

> 
> 
> 


[RFC v3 01/10] mm/damon: Use vm-independent address range concept

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

DAMON's main idea is not limited to virtual address space.  To prepare
for further expansion of the support for other address spaces including
physical memory, this commit modifies one of its core struct, 'struct
damon_region' to use virtual memory independent address space concept,
and cleans up the code using it.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h| 20 +---
 include/trace/events/damon.h |  4 +-
 mm/damon-test.h  | 78 +++---
 mm/damon.c   | 94 +---
 4 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e77256cf30dd..b4b06ca905a2 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -16,11 +16,18 @@
 #include 
 
 /**
- * struct damon_region - Represents a monitoring target region of
- * [@vm_start, @vm_end).
- *
- * @vm_start:  Start address of the region (inclusive).
- * @vm_end:End address of the region (exclusive).
+ * struct damon_addr_range - Represents an address region of [@start, @end).
+ * @start: Start address of the region (inclusive).
+ * @end:   End address of the region (exclusive).
+ */
+struct damon_addr_range {
+   unsigned long start;
+   unsigned long end;
+};
+
+/**
+ * struct damon_region - Represents a monitoring target region.
+ * @ar:The address range of the region.
  * @sampling_addr: Address of the sample for the next access check.
  * @nr_accesses:   Access frequency of this region.
  * @list:  List head for siblings.
@@ -33,8 +40,7 @@
  * region are set as region size-weighted average of those of the two regions.
  */
 struct damon_region {
-   unsigned long vm_start;
-   unsigned long vm_end;
+   struct damon_addr_range ar;
unsigned long sampling_addr;
unsigned int nr_accesses;
struct list_head list;
diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
index fd260463d5b8..471b6e49960f 100644
--- a/include/trace/events/damon.h
+++ b/include/trace/events/damon.h
@@ -27,8 +27,8 @@ TRACE_EVENT(damon_aggregated,
TP_fast_assign(
__entry->pid = t->pid;
__entry->nr_regions = nr_regions;
-   __entry->vm_start = r->vm_start;
-   __entry->vm_end = r->vm_end;
+   __entry->vm_start = r->ar.start;
+   __entry->vm_end = r->ar.end;
__entry->nr_accesses = r->nr_accesses;
),
 
diff --git a/mm/damon-test.h b/mm/damon-test.h
index 5b18619efe72..6d01f0e782d5 100644
--- a/mm/damon-test.h
+++ b/mm/damon-test.h
@@ -78,8 +78,8 @@ static void damon_test_regions(struct kunit *test)
struct damon_task *t;
 
r = damon_new_region(_user_ctx, 1, 2);
-   KUNIT_EXPECT_EQ(test, 1ul, r->vm_start);
-   KUNIT_EXPECT_EQ(test, 2ul, r->vm_end);
+   KUNIT_EXPECT_EQ(test, 1ul, r->ar.start);
+   KUNIT_EXPECT_EQ(test, 2ul, r->ar.end);
KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
 
t = damon_new_task(42);
@@ -177,7 +177,7 @@ static void damon_test_set_recording(struct kunit *test)
  */
 static void damon_test_three_regions_in_vmas(struct kunit *test)
 {
-   struct region regions[3] = {0,};
+   struct damon_addr_range regions[3] = {0,};
/* 10-20-25, 200-210-220, 300-305, 307-330 */
struct vm_area_struct vmas[] = {
(struct vm_area_struct) {.vm_start = 10, .vm_end = 20},
@@ -267,7 +267,7 @@ static void damon_test_aggregate(struct kunit *test)
KUNIT_EXPECT_EQ(test, 3, it);
 
/* The aggregated information should be written in the buffer */
-   sr = sizeof(r->vm_start) + sizeof(r->vm_end) + sizeof(r->nr_accesses);
+   sr = sizeof(r->ar.start) + sizeof(r->ar.end) + sizeof(r->nr_accesses);
sp = sizeof(t->pid) + sizeof(unsigned int) + 3 * sr;
sz = sizeof(struct timespec64) + sizeof(unsigned int) + 3 * sp;
KUNIT_EXPECT_EQ(test, (unsigned int)sz, ctx->rbuf_offset);
@@ -331,7 +331,7 @@ static struct damon_region *__nth_region_of(struct 
damon_task *t, int idx)
  */
 static void damon_do_test_apply_three_regions(struct kunit *test,
unsigned long *regions, int nr_regions,
-   struct region *three_regions,
+   struct damon_addr_range *three_regions,
unsigned long *expected, int nr_expected)
 {
struct damon_task *t;
@@ -350,8 +350,8 @@ static void damon_do_test_apply_three_regions(struct kunit 
*test,
 
for (i = 0; i < nr_expected / 2; i++) {
r = __nth_region_of(t, i);
-   KUNIT_EXPECT_EQ(test, r->vm_start, expected[i * 2]);
-   KUNIT_EXPECT_EQ(test, r->vm_end, 

[RFC v3 00/10] DAMON: Support Access Monitoring of Any Address Space Including Physical Memory

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

Currently, DAMON[1] supports only virtual memory address spaces because it
utilizes PTE Accessed bits as its low-level access check primitive and ``struct
vma`` as a way to address the monitoring target regions.  However, the core
idea of DAMON, which makes it able to provide the accurate, efficient, and
scalable monitoring, is in a separate higher layer.  Therefore, DAMON can be
extended for other various address spaces by changing the two low primitives to
others for the address spaces.

This patchset makes the DAMON's low level primitives configurable and provide
reference implementation of the primitives for the virtual memory address
spaces and the physical memory address space.  Therefore, users can monitor
both of the two address spaces by simply configuring the provided low level
primitives.  Note that only the user memory is supported, as same to the idle
page access tracking feature.

After this patchset, the programming interface users can implement the
primitives by themselves for their special use cases.  Clean/dirty/entire page
cache, NUMA nodes, specific files, or block devices would be examples of such
special use cases.

[1] https://lore.kernel.org/linux-mm/20200608114047.26589-1-sjp...@amazon.com/


Baseline and Complete Git Trees
===

The patches are based on the v5.7 plus DAMON v15 patchset[1] and DAMOS RFC v11
patchset[2].  You can also clone the complete git tree:

$ git clone git://github.com/sjp38/linux -b cdamon/rfc/v3

The web is also available:
https://github.com/sjp38/linux/releases/tag/cdamon/rfc/v3

[1] https://lore.kernel.org/linux-mm/20200608114047.26589-1-sjp...@amazon.com/
[2] https://lore.kernel.org/linux-mm/20200609065320.12941-1-sjp...@amazon.com/


Sequence of Patches
===

The sequence of patches is as follow.  The 1st patch defines the monitoring
region again based on pure address range abstraction so that no assumption of
virtual memory is in there.

The 2nd patch allows users to configure the low level pritimives for
initialization and dynamic update of the target address regions, which were
previously coupled with the virtual memory.  Then, the 3rd and 4th patches
allow user space to also be able to set the monitoring target regions via the
debugfs and the user space tool.  The 5th patch documents this feature.

The 6th patch makes the access check primitives, which were coupled with the
virtual memory address, freely configurable.  Now any address space can be
supported.  The 7th patch provides the reference implementations of the
configurable primitives for the physical memory monitoring.  The 8th and 9th
patch makes the user space to be able to use the physical memory monitoring via
debugfs and the user space tool, respectively.  Finally, the 10th patch
documents the physical memory monitoring support.


Patch History
=

Changes from RFC v2
(https://lore.kernel.org/linux-mm/20200603141135.10575-1-sjp...@amazon.com/)
 - Support the physical memory monitoring with the user space tool
 - Use 'pfn_to_online_page()' (David Hildenbrand)
 - Document more detail on random 'pfn' and its safeness (David Hildenbrand)

Changes from RFC v1
(https://lore.kernel.org/linux-mm/20200409094232.29680-1-sjp...@amazon.com/)
 - Provide the reference primitive implementations for the physical memory
 - Connect the extensions with the debugfs interface

SeongJae Park (10):
  mm/damon: Use vm-independent address range concept
  mm/damon: Make monitoring target regions init/update configurable
  mm/damon/debugfs: Allow users to set initial monitoring target regions
  tools/damon: Implement init target regions feature
  Docs/damon: Document 'initial_regions' feature
  mm/damon: Make access check primitive configurable
  mm/damon: Implement callbacks for physical memory monitoring
  mm/damon/debugfs: Support physical memory monitoring
  tools/damon/record: Support physical memory address spce
  Docs/damon: Document physical memory monitoring support

 Documentation/admin-guide/mm/damon/faq.rst|  16 +-
 Documentation/admin-guide/mm/damon/index.rst  |   1 -
 .../admin-guide/mm/damon/mechanisms.rst   |   4 +-
 Documentation/admin-guide/mm/damon/plans.rst  |  29 -
 Documentation/admin-guide/mm/damon/usage.rst  |  73 ++-
 include/linux/damon.h |  47 +-
 include/trace/events/damon.h  |   4 +-
 mm/damon-test.h   |  78 +--
 mm/damon.c| 518 +++---
 tools/damon/_damon.py |  41 ++
 tools/damon/heats.py  |   2 +-
 tools/damon/record.py |  41 +-
 tools/damon/schemes.py|  12 +-
 13 files changed, 690 insertions(+), 176 deletions(-)
 delete mode 100644 Documentation/admin-guide/mm/damon/plans.rst

-- 
2.17.1



[RFC v3 02/10] mm/damon: Make monitoring target regions init/update configurable

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit allows DAMON users to configure their own monitoring target
regions initializer / updater.  Using this, users can confine the
monitoring address spaces as they want.  For example, users can track
only stack, heap, shared memory area, or specific file-backed area, as
they want.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h | 13 +
 mm/damon.c| 17 ++---
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index b4b06ca905a2..a1b6810ce0eb 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -158,9 +158,16 @@ struct damos {
  * @tasks_list:Head of monitoring target tasks (_task) 
list.
  * @schemes_list:  Head of schemes () list.
  *
+ * @init_target_regions:   Constructs initial monitoring target regions.
+ * @update_target_regions: Updates monitoring target regions.
  * @sample_cb: Called for each sampling interval.
  * @aggregate_cb:  Called for each aggregation interval.
  *
+ * The monitoring thread calls @init_target_regions before starting the
+ * monitoring, @update_target_regions for each @regions_update_interval.  By
+ * setting these callbacks to appropriate functions, therefore, users can
+ * monitor specific range of virtual address space.
+ *
  * @sample_cb and @aggregate_cb are called from @kdamond for each of the
  * sampling intervals and aggregation intervals, respectively.  Therefore,
  * users can safely access to the monitoring results via @tasks_list without
@@ -190,10 +197,16 @@ struct damon_ctx {
struct list_head schemes_list;  /* 'damos' objects */
 
/* callbacks */
+   void (*init_target_regions)(struct damon_ctx *context);
+   void (*update_target_regions)(struct damon_ctx *context);
void (*sample_cb)(struct damon_ctx *context);
void (*aggregate_cb)(struct damon_ctx *context);
 };
 
+/* Reference callback implementations for virtual memory */
+void kdamond_init_vm_regions(struct damon_ctx *ctx);
+void kdamond_update_vm_regions(struct damon_ctx *ctx);
+
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
diff --git a/mm/damon.c b/mm/damon.c
index fcd919e96754..b55d501365c0 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -87,6 +87,9 @@ static struct damon_ctx damon_user_ctx = {
.regions_update_interval = 1000 * 1000,
.min_nr_regions = 10,
.max_nr_regions = 1000,
+
+   .init_target_regions = kdamond_init_vm_regions,
+   .update_target_regions = kdamond_update_vm_regions,
 };
 
 /*
@@ -343,7 +346,7 @@ static void swap_ranges(struct damon_addr_range *r1,
  *
  * This function receives an address space and finds three regions in it which
  * separated by the two biggest unmapped regions in the space.  Please refer to
- * below comments of 'damon_init_regions_of()' function to know why this is
+ * below comments of 'damon_init_vm_regions_of()' function to know why this is
  * necessary.
  *
  * Returns 0 if success, or negative error code otherwise.
@@ -455,7 +458,7 @@ static int damon_three_regions_of(struct damon_task *t,
  *   
  *   
  */
-static void damon_init_regions_of(struct damon_ctx *c, struct damon_task *t)
+static void damon_init_vm_regions_of(struct damon_ctx *c, struct damon_task *t)
 {
struct damon_region *r, *m = NULL;
struct damon_addr_range regions[3];
@@ -484,12 +487,12 @@ static void damon_init_regions_of(struct damon_ctx *c, 
struct damon_task *t)
 }
 
 /* Initialize '->regions_list' of every task */
-static void kdamond_init_regions(struct damon_ctx *ctx)
+void kdamond_init_vm_regions(struct damon_ctx *ctx)
 {
struct damon_task *t;
 
damon_for_each_task(t, ctx)
-   damon_init_regions_of(ctx, t);
+   damon_init_vm_regions_of(ctx, t);
 }
 
 /*
@@ -563,7 +566,7 @@ static void damon_apply_three_regions(struct damon_ctx *ctx,
 /*
  * Update regions for current memory mappings
  */
-static void kdamond_update_regions(struct damon_ctx *ctx)
+void kdamond_update_vm_regions(struct damon_ctx *ctx)
 {
struct damon_addr_range three_regions[3];
struct damon_task *t;
@@ -1131,7 +1134,7 @@ static int kdamond_fn(void *data)
unsigned int max_nr_accesses = 0;
 
pr_info("kdamond (%d) starts\n", ctx->kdamond->pid);
-   kdamond_init_regions(ctx);
+   ctx->init_target_regions(ctx);
 
kdamond_write_record_header(ctx);
 
@@ -1154,7 +1157,7 @@ static int kdamond_fn(void *data)
}
 
if (kdamond_need_update_regions(ctx))
-   kdamond_update_regions(ctx);
+   ctx->update_target_regions(ctx);
}
damon_flush_rbuffer(ctx);
damon_for_each_task(t, ctx) {
-- 
2.17.1



[RFC v3 04/10] tools/damon: Implement init target regions feature

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit updates the damon user space tool to support the initial
monitoring target regions feature.

Signed-off-by: SeongJae Park 
---
 tools/damon/_damon.py  | 39 +++
 tools/damon/record.py  | 12 +++-
 tools/damon/schemes.py | 12 +++-
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/tools/damon/_damon.py b/tools/damon/_damon.py
index 3620ef12a5ea..ad476cc61421 100644
--- a/tools/damon/_damon.py
+++ b/tools/damon/_damon.py
@@ -12,12 +12,25 @@ debugfs_attrs = None
 debugfs_record = None
 debugfs_schemes = None
 debugfs_pids = None
+debugfs_init_regions = None
 debugfs_monitor_on = None
 
 def set_target_pid(pid):
 return subprocess.call('echo %s > %s' % (pid, debugfs_pids), shell=True,
 executable='/bin/bash')
 
+def set_target(pid, init_regions=[]):
+rc = set_target_pid(pid)
+if rc:
+return rc
+
+if not os.path.exists(debugfs_init_regions):
+return 0
+
+string = ' '.join(['%s %d %d' % (pid, r[0], r[1]) for r in init_regions])
+return subprocess.call('echo "%s" > %s' % (string, debugfs_init_regions),
+shell=True, executable='/bin/bash')
+
 def turn_damon(on_off):
 return subprocess.call("echo %s > %s" % (on_off, debugfs_monitor_on),
 shell=True, executable="/bin/bash")
@@ -97,6 +110,7 @@ def chk_update_debugfs(debugfs):
 global debugfs_record
 global debugfs_schemes
 global debugfs_pids
+global debugfs_init_regions
 global debugfs_monitor_on
 
 debugfs_damon = os.path.join(debugfs, 'damon')
@@ -104,6 +118,7 @@ def chk_update_debugfs(debugfs):
 debugfs_record = os.path.join(debugfs_damon, 'record')
 debugfs_schemes = os.path.join(debugfs_damon, 'schemes')
 debugfs_pids = os.path.join(debugfs_damon, 'pids')
+debugfs_init_regions = os.path.join(debugfs_damon, 'init_regions')
 debugfs_monitor_on = os.path.join(debugfs_damon, 'monitor_on')
 
 if not os.path.isdir(debugfs_damon):
@@ -131,6 +146,26 @@ def cmd_args_to_attrs(args):
 return Attrs(sample_interval, aggr_interval, regions_update_interval,
 min_nr_regions, max_nr_regions, rbuf_len, rfile_path, schemes)
 
+def cmd_args_to_init_regions(args):
+regions = []
+for arg in args.regions.split():
+addrs = arg.split('-')
+try:
+if len(addrs) != 2:
+raise Exception('two addresses not given')
+start = int(addrs[0])
+end = int(addrs[1])
+if start >= end:
+raise Exception('start >= end')
+if regions and regions[-1][1] > start:
+raise Exception('regions overlap')
+except Exception as e:
+print('Wrong \'--regions\' argument (%s)' % e)
+exit(1)
+
+regions.append([start, end])
+return regions
+
 def set_attrs_argparser(parser):
 parser.add_argument('-d', '--debugfs', metavar='', type=str,
 default='/sys/kernel/debug', help='debugfs mounted path')
@@ -144,3 +179,7 @@ def set_attrs_argparser(parser):
 default=10, help='minimal number of regions')
 parser.add_argument('-m', '--maxr', metavar='<# regions>', type=int,
 default=1000, help='maximum number of regions')
+
+def set_init_regions_argparser(parser):
+parser.add_argument('-r', '--regions', metavar='"- ..."',
+type=str, default='', help='monitoring target address regions')
diff --git a/tools/damon/record.py b/tools/damon/record.py
index 44fa3a12af35..6ce8721d782a 100644
--- a/tools/damon/record.py
+++ b/tools/damon/record.py
@@ -13,7 +13,7 @@ import time
 
 import _damon
 
-def do_record(target, is_target_cmd, attrs, old_attrs):
+def do_record(target, is_target_cmd, init_regions, attrs, old_attrs):
 if os.path.isfile(attrs.rfile_path):
 os.rename(attrs.rfile_path, attrs.rfile_path + '.old')
 
@@ -24,8 +24,8 @@ def do_record(target, is_target_cmd, attrs, old_attrs):
 if is_target_cmd:
 p = subprocess.Popen(target, shell=True, executable='/bin/bash')
 target = p.pid
-if _damon.set_target_pid(target):
-print('pid setting (%s) failed' % target)
+if _damon.set_target(target, init_regions):
+print('target setting (%s, %s) failed' % (target, init_regions))
 cleanup_exit(old_attrs, -2)
 if _damon.turn_damon('on'):
 print('could not turn on damon' % target)
@@ -65,6 +65,7 @@ def chk_permission():
 
 def set_argparser(parser):
 _damon.set_attrs_argparser(parser)
+_damon.set_init_regions_argparser(parser)
 parser.add_argument('target', type=str, metavar='',
 help='the target command or the pid to record')
 parser.add_argument('-l', '--rbuf', metavar='', type=int,
@@ -88,19 +89,20 @@ def main(args=None):
 
 args.schemes = ''
 new_attrs = _damon.cmd_args_to_attrs(args)
+init_regions =

[RFC v3 03/10] mm/damon/debugfs: Allow users to set initial monitoring target regions

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

Some users would want to monitor only a part of the entire virtual
memory address space.  The '->init_target_regions' callback is therefore
provided, but only programming interface can use it.

For the reason, this commit introduces a new debugfs file,
'init_region'.  Users can specify which initial monitoring target
address regions they want by writing special input to the file.  The
input should describe each region in each line in below form:

  

This commit also makes the default '->init_target_regions' callback,
'kdamon_init_vm_regions()' to do nothing if the user has set the initial
target regions already.

Note that the regions will be updated to cover entire memory mapped
regions after 'regions update interval'.  If you want the regions to not
be updated after the initial setting, you could set the interval as a
very long time, say, a few decades.

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 168 +++--
 1 file changed, 162 insertions(+), 6 deletions(-)

diff --git a/mm/damon.c b/mm/damon.c
index b55d501365c0..2d8322e6558f 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -491,8 +491,10 @@ void kdamond_init_vm_regions(struct damon_ctx *ctx)
 {
struct damon_task *t;
 
-   damon_for_each_task(t, ctx)
-   damon_init_vm_regions_of(ctx, t);
+   damon_for_each_task(t, ctx) {
+   if (!nr_damon_regions(t))
+   damon_init_vm_regions_of(ctx, t);
+   }
 }
 
 /*
@@ -1721,6 +1723,153 @@ static ssize_t debugfs_record_write(struct file *file,
return ret;
 }
 
+static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len)
+{
+   struct damon_task *t;
+   struct damon_region *r;
+   int written = 0;
+   int rc;
+
+   damon_for_each_task(t, c) {
+   damon_for_each_region(r, t) {
+   rc = snprintf([written], len - written,
+   "%d %lu %lu\n",
+   t->pid, r->ar.start, r->ar.end);
+   if (!rc)
+   return -ENOMEM;
+   written += rc;
+   }
+   }
+   return written;
+}
+
+static ssize_t debugfs_init_regions_read(struct file *file, char __user *buf,
+   size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char *kbuf;
+   ssize_t len;
+
+   kbuf = kmalloc(count, GFP_KERNEL);
+   if (!kbuf)
+   return -ENOMEM;
+
+   mutex_lock(>kdamond_lock);
+   if (ctx->kdamond) {
+   mutex_unlock(>kdamond_lock);
+   return -EBUSY;
+   }
+
+   len = sprint_init_regions(ctx, kbuf, count);
+   mutex_unlock(>kdamond_lock);
+   if (len < 0)
+   goto out;
+   len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+   kfree(kbuf);
+   return len;
+}
+
+static int add_init_region(struct damon_ctx *c,
+int pid, struct damon_addr_range *ar)
+{
+   struct damon_task *t;
+   struct damon_region *r, *prev;
+   int rc = -EINVAL;
+
+   if (ar->start >= ar->end)
+   return -EINVAL;
+
+   damon_for_each_task(t, c) {
+   if (t->pid == pid) {
+   r = damon_new_region(c, ar->start, ar->end);
+   if (!r)
+   return -ENOMEM;
+   damon_add_region(r, t);
+   if (nr_damon_regions(t) > 1) {
+   prev = damon_prev_region(r);
+   if (prev->ar.end > r->ar.start) {
+   damon_destroy_region(r);
+   return -EINVAL;
+   }
+   }
+   rc = 0;
+   }
+   }
+   return rc;
+}
+
+static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len)
+{
+   struct damon_task *t;
+   struct damon_region *r, *next;
+   int pos = 0, parsed, ret;
+   int pid;
+   struct damon_addr_range ar;
+   int err;
+
+   damon_for_each_task(t, c) {
+   damon_for_each_region_safe(r, next, t)
+   damon_destroy_region(r);
+   }
+
+   while (pos < len) {
+   ret = sscanf([pos], "%d %lu %lu%n",
+   , , , );
+   if (ret != 3)
+   break;
+   err = add_init_region(c, pid, );
+   if (err)
+   goto fail;
+   pos += parsed;
+   }
+
+   return 0;
+
+fail:
+   damon_for_each_task(t, c) {
+   damon_for_each_region_safe(r, next, t)
+   damon_destroy_region(r);
+   }
+   return

[RFC v3 05/10] Docs/damon: Document 'initial_regions' feature

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit documents the 'initial_regions' feature.

Signed-off-by: SeongJae Park 
---
 Documentation/admin-guide/mm/damon/usage.rst | 34 
 1 file changed, 34 insertions(+)

diff --git a/Documentation/admin-guide/mm/damon/usage.rst 
b/Documentation/admin-guide/mm/damon/usage.rst
index 18a19c35b4f3..f600366cdd4e 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -326,6 +326,40 @@ having pids 42 and 4242 as the processes to be monitored 
and check it again::
 Note that setting the pids doesn't start the monitoring.
 
 
+Initial Monitoring Target Regions
+-
+
+DAMON automatically sets and updates the monitoring target regions so that
+entire memory mappings of target processes can be covered.  However, users
+might want to limit the monitoring region to specific address ranges, such as
+the heap, the stack, or specific file-mapped area.  Or, some users might know
+the initial access pattern of their workloads and therefore want to set optimal
+initial regions for the 'adaptive regions adjustment'.
+
+In such cases, users can explicitly set the initial monitoring target regions
+as they want, by writing proper values to the ``init_regions`` file.  Each line
+of the input should represent one region in below form.::
+
+  
+
+The ``pid`` should be already in ``pids`` file, and the regions should be
+passed in address order.  For example, below commands will set a couple of
+address ranges, ``1-100`` and ``100-200`` as the initial monitoring target
+region of process 42, and another couple of address ranges, ``20-40`` and
+``50-100`` as that of process 4242.::
+
+# cd /damon
+# echo "42   1   100
+42   100 200
+4242 20  40
+4242 50  100" > init_regions
+
+Note that this sets the initial monitoring target regions only.  DAMON will
+automatically updates the boundary of the regions after one ``regions update
+interval``.  Therefore, users should set the ``regions update interval`` large
+enough.
+
+
 Record
 --
 
-- 
2.17.1



[RFC v3 06/10] mm/damon: Make access check primitive configurable

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

DAMON assumes the target region is in virtual address space and
therefore uses PTE Accessed bit checking for access checking.  However,
as some CPU provides H/W based memory access check features that usually
more accurate and light-weight than PTE Accessed bit checking, some
users would want to use those in special use cases.  Also, some users
might want to use DAMON for different address spaces such as physical
memory space, which needs different ways to check the access.

This commit therefore allows DAMON users to configure the low level
access check primitives as they want.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h | 13 +++--
 mm/damon.c| 20 +++-
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index a1b6810ce0eb..1a788bfd1b4e 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -160,13 +160,18 @@ struct damos {
  *
  * @init_target_regions:   Constructs initial monitoring target regions.
  * @update_target_regions: Updates monitoring target regions.
+ * @prepare_access_checks: Prepares next access check of target regions.
+ * @check_accesses:Checks the access of target regions.
  * @sample_cb: Called for each sampling interval.
  * @aggregate_cb:  Called for each aggregation interval.
  *
  * The monitoring thread calls @init_target_regions before starting the
- * monitoring, @update_target_regions for each @regions_update_interval.  By
+ * monitoring, @update_target_regions for each @regions_update_interval, and
+ * @prepare_access_checks and @check_accesses for each @sample_interval.  By
  * setting these callbacks to appropriate functions, therefore, users can
- * monitor specific range of virtual address space.
+ * monitor any address space with special handling.  If these are not
+ * explicitly configured, the functions for virtual memory address space
+ * monitoring are used.
  *
  * @sample_cb and @aggregate_cb are called from @kdamond for each of the
  * sampling intervals and aggregation intervals, respectively.  Therefore,
@@ -199,6 +204,8 @@ struct damon_ctx {
/* callbacks */
void (*init_target_regions)(struct damon_ctx *context);
void (*update_target_regions)(struct damon_ctx *context);
+   void (*prepare_access_checks)(struct damon_ctx *context);
+   unsigned int (*check_accesses)(struct damon_ctx *context);
void (*sample_cb)(struct damon_ctx *context);
void (*aggregate_cb)(struct damon_ctx *context);
 };
@@ -206,6 +213,8 @@ struct damon_ctx {
 /* Reference callback implementations for virtual memory */
 void kdamond_init_vm_regions(struct damon_ctx *ctx);
 void kdamond_update_vm_regions(struct damon_ctx *ctx);
+void kdamond_prepare_vm_access_checks(struct damon_ctx *ctx);
+unsigned int kdamond_check_vm_accesses(struct damon_ctx *ctx);
 
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
diff --git a/mm/damon.c b/mm/damon.c
index 2d8322e6558f..95a4a7fedf4d 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -90,6 +90,8 @@ static struct damon_ctx damon_user_ctx = {
 
.init_target_regions = kdamond_init_vm_regions,
.update_target_regions = kdamond_update_vm_regions,
+   .prepare_access_checks = kdamond_prepare_vm_access_checks,
+   .check_accesses = kdamond_check_vm_accesses,
 };
 
 /*
@@ -613,7 +615,7 @@ static void damon_mkold(struct mm_struct *mm, unsigned long 
addr)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 }
 
-static void damon_prepare_access_check(struct damon_ctx *ctx,
+static void damon_prepare_vm_access_check(struct damon_ctx *ctx,
struct mm_struct *mm, struct damon_region *r)
 {
r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
@@ -621,7 +623,7 @@ static void damon_prepare_access_check(struct damon_ctx 
*ctx,
damon_mkold(mm, r->sampling_addr);
 }
 
-static void kdamond_prepare_access_checks(struct damon_ctx *ctx)
+void kdamond_prepare_vm_access_checks(struct damon_ctx *ctx)
 {
struct damon_task *t;
struct mm_struct *mm;
@@ -632,7 +634,7 @@ static void kdamond_prepare_access_checks(struct damon_ctx 
*ctx)
if (!mm)
continue;
damon_for_each_region(r, t)
-   damon_prepare_access_check(ctx, mm, r);
+   damon_prepare_vm_access_check(ctx, mm, r);
mmput(mm);
}
 }
@@ -670,7 +672,7 @@ static bool damon_young(struct mm_struct *mm, unsigned long 
addr,
  * mm  'mm_struct' for the given virtual address space
  * r   the region to be checked
  */
-static void damon_check_access(struct damon_ctx *ctx,
+static void damon_check_vm_access(struct damon_ctx *ctx,
   struct mm_struct *mm, struct damon_region *r)
 {
s

[RFC v3 07/10] mm/damon: Implement callbacks for physical memory monitoring

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit implements the four callbacks (->init_target_regions,
->update_target_regions, ->prepare_access_check, and ->check_accesses)
for the basic access monitoring of the physical memory address space.
By setting the callback pointers to point those, users can easily
monitor the accesses to the physical memory.

Internally, it uses the PTE Accessed bit, as similar to that of the
virtual memory support.  Also, it supports only user memory pages, as
idle page tracking also does, for the same reason.  If the monitoring
target physical memory address range contains non-user memory pages,
access check of the pages will do nothing but simply treat the pages as
not accessed.

Users who want to use other access check primitives and/or monitor the
non-user memory regions could implement and use their own callbacks.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |   5 ++
 mm/damon.c| 201 ++
 2 files changed, 206 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 1a788bfd1b4e..f96503a532ea 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -216,6 +216,11 @@ void kdamond_update_vm_regions(struct damon_ctx *ctx);
 void kdamond_prepare_vm_access_checks(struct damon_ctx *ctx);
 unsigned int kdamond_check_vm_accesses(struct damon_ctx *ctx);
 
+void kdamond_init_phys_regions(struct damon_ctx *ctx);
+void kdamond_update_phys_regions(struct damon_ctx *ctx);
+void kdamond_prepare_phys_access_checks(struct damon_ctx *ctx);
+unsigned int kdamond_check_phys_accesses(struct damon_ctx *ctx);
+
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
diff --git a/mm/damon.c b/mm/damon.c
index 95a4a7fedf4d..3a5d14720cb7 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -27,10 +27,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -499,6 +502,18 @@ void kdamond_init_vm_regions(struct damon_ctx *ctx)
}
 }
 
+/*
+ * The initial regions construction function for the physical address space.
+ *
+ * This default version does nothing in actual.  Users should set the initial
+ * regions by themselves before passing their damon_ctx to 'start_damon()', or
+ * implement their version of this and set '->init_target_regions' of their
+ * damon_ctx to point it.
+ */
+void kdamond_init_phys_regions(struct damon_ctx *ctx)
+{
+}
+
 /*
  * Functions for the dynamic monitoring target regions update
  */
@@ -582,6 +597,19 @@ void kdamond_update_vm_regions(struct damon_ctx *ctx)
}
 }
 
+/*
+ * The dynamic monitoring target regions update function for the physical
+ * address space.
+ *
+ * This default version does nothing in actual.  Users should update the
+ * regions in other callbacks such as '->aggregate_cb', or implement their
+ * version of this and set the '->init_target_regions' of their damon_ctx to
+ * point it.
+ */
+void kdamond_update_phys_regions(struct damon_ctx *ctx)
+{
+}
+
 /*
  * Functions for the access checking of the regions
  */
@@ -717,6 +745,179 @@ unsigned int kdamond_check_vm_accesses(struct damon_ctx 
*ctx)
return max_nr_accesses;
 }
 
+/* access check functions for physical address based regions */
+
+/*
+ * Get a page by pfn if it is in the LRU list.  Otherwise, returns NULL.
+ *
+ * The body of this function is stollen from the 'page_idle_get_page()'.  We
+ * steal rather than reuse it because the code is quite simple .
+ */
+static struct page *damon_phys_get_page(unsigned long pfn)
+{
+   struct page *page = pfn_to_online_page(pfn);
+   pg_data_t *pgdat;
+
+   if (!page || !PageLRU(page) ||
+   !get_page_unless_zero(page))
+   return NULL;
+
+   pgdat = page_pgdat(page);
+   spin_lock_irq(>lru_lock);
+   if (unlikely(!PageLRU(page))) {
+   put_page(page);
+   page = NULL;
+   }
+   spin_unlock_irq(>lru_lock);
+   return page;
+}
+
+static bool damon_page_mkold(struct page *page, struct vm_area_struct *vma,
+   unsigned long addr, void *arg)
+{
+   damon_mkold(vma->vm_mm, addr);
+   return true;
+}
+
+static void damon_phys_mkold(unsigned long paddr)
+{
+   struct page *page = damon_phys_get_page(PHYS_PFN(paddr));
+   struct rmap_walk_control rwc = {
+   .rmap_one = damon_page_mkold,
+   .anon_lock = page_lock_anon_vma_read,
+   };
+   bool need_lock;
+
+   if (!page)
+   return;
+
+   if (!page_mapped(page) || !page_rmapping(page))
+   return;
+
+   need_lock = !PageAnon(page) || PageKsm(page);
+   if (need_lock && !trylock_page(page))
+   return;
+
+   rmap

[RFC v3 08/10] mm/damon/debugfs: Support physical memory monitoring

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit makes the debugfs interface to support the physical memory
monitoring, in addition to the virtual memory monitoring.

Users can do the physical memory monitoring by writing a special
keyword, 'paddr\n' to the 'pids' debugfs file.  Then, DAMON will check
the special keyword and configure the callbacks of the monitoring
context for the debugfs user for physical memory.  This will internally
add one fake monitoring target process, which has pid as -1.

Unlike the virtual memory monitoring, DAMON debugfs will not
automatically set the monitoring target region.  Therefore, users should
also set the monitoring target address region using the 'init_regions'
debugfs file.  While doing this, the 'pid' in the input should be '-1'.

Finally, the physical memory monitoring will not automatically
terminated because it has fake monitoring target process.  The user
should explicitly turn off the monitoring by writing 'off' to the
'monitor_on' debugfs file.

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/mm/damon.c b/mm/damon.c
index 3a5d14720cb7..b9cec7766b6c 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -1310,6 +1310,9 @@ static bool kdamond_need_stop(struct damon_ctx *ctx)
return true;
 
damon_for_each_task(t, ctx) {
+   if (t->pid == -1)
+   return false;
+
task = damon_get_task_struct(t);
if (task) {
put_task_struct(task);
@@ -1849,6 +1852,23 @@ static ssize_t debugfs_pids_write(struct file *file,
if (ret < 0)
goto out;
 
+   if (!strncmp(kbuf, "paddr\n", count)) {
+   /* Configure the context for physical memory monitoring */
+   ctx->init_target_regions = kdamond_init_phys_regions;
+   ctx->update_target_regions = kdamond_update_phys_regions;
+   ctx->prepare_access_checks = kdamond_prepare_phys_access_checks;
+   ctx->check_accesses = kdamond_check_phys_accesses;
+
+   /* Set the fake target task pid as -1 */
+   snprintf(kbuf, count, "-1");
+   } else {
+   /* Configure the context for virtual memory monitoring */
+   ctx->init_target_regions = kdamond_init_vm_regions;
+   ctx->update_target_regions = kdamond_update_vm_regions;
+   ctx->prepare_access_checks = kdamond_prepare_vm_access_checks;
+   ctx->check_accesses = kdamond_check_vm_accesses;
+   }
+
targets = str_to_pids(kbuf, ret, _targets);
if (!targets) {
ret = -ENOMEM;
-- 
2.17.1



[RFC v3 10/10] Docs/damon: Document physical memory monitoring support

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit adds description for the physical memory monitoring usage in
the DAMON document.

Signed-off-by: SeongJae Park 
---
 Documentation/admin-guide/mm/damon/faq.rst| 16 +++--
 Documentation/admin-guide/mm/damon/index.rst  |  1 -
 .../admin-guide/mm/damon/mechanisms.rst   |  4 +-
 Documentation/admin-guide/mm/damon/plans.rst  | 29 -
 Documentation/admin-guide/mm/damon/usage.rst  | 59 ++-
 5 files changed, 54 insertions(+), 55 deletions(-)
 delete mode 100644 Documentation/admin-guide/mm/damon/plans.rst

diff --git a/Documentation/admin-guide/mm/damon/faq.rst 
b/Documentation/admin-guide/mm/damon/faq.rst
index 02f7581b05f6..e9b69d1bf975 100644
--- a/Documentation/admin-guide/mm/damon/faq.rst
+++ b/Documentation/admin-guide/mm/damon/faq.rst
@@ -38,9 +38,13 @@ separate document, :doc:`guide`.  Please refer to that.
 Does DAMON support virtual memory only?
 
 
-For now, yes.  But, DAMON will be able to support various address spaces
-including physical memory in near future.  An RFC patchset [1]_ for this
-extension is already available.  Please refer :doc:`plans` for detailed plan
-for this.
-
-.. [1] 
https://lore.kernel.org/linux-mm/20200409094232.29680-1-sjp...@amazon.com/
+No.  DAMON supports the virtual memory address spaces and the physical memory
+address space for now.  However, it can be extended to support any address
+space, because the core logic of the DAMON can be configured with the low level
+primitives for each address space.
+
+In other words, DAMON provides the reference implementations of the low level
+primitives for the virtual memory address spaces and the physical memory
+address spaces.  The programming interface users can further implement their
+own low level primitives for their special use cases and configure those in
+DAMON if they need.
diff --git a/Documentation/admin-guide/mm/damon/index.rst 
b/Documentation/admin-guide/mm/damon/index.rst
index 4d128e4fd9c8..7b2939d50408 100644
--- a/Documentation/admin-guide/mm/damon/index.rst
+++ b/Documentation/admin-guide/mm/damon/index.rst
@@ -33,4 +33,3 @@ optimizations of their systems.
faq
mechanisms
eval
-   plans
diff --git a/Documentation/admin-guide/mm/damon/mechanisms.rst 
b/Documentation/admin-guide/mm/damon/mechanisms.rst
index f0bc52c698a6..8a93a945cacf 100644
--- a/Documentation/admin-guide/mm/damon/mechanisms.rst
+++ b/Documentation/admin-guide/mm/damon/mechanisms.rst
@@ -76,9 +76,7 @@ keeping the bounds users set for their trade-off.
 Handling Virtual Memory Mappings
 
 
-This is for monitoring of virtual memory address space only.  It is the only
-one address space that supported by DAMON as of now, but other address spaces
-will be supported in the future.
+This is for monitoring of virtual memory address space only.
 
 Only small parts in the super-huge virtual address space of the processes are
 mapped to physical memory and accessed.  Thus, tracking the unmapped address
diff --git a/Documentation/admin-guide/mm/damon/plans.rst 
b/Documentation/admin-guide/mm/damon/plans.rst
deleted file mode 100644
index 8eba8a1dcb98..
--- a/Documentation/admin-guide/mm/damon/plans.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-
-Future Plans
-
-
-DAMON is still on its early stage.  Below plans are still under development.
-
-
-Support Various Address Spaces
-==
-
-Currently, DAMON supports only virtual memory address spaces because it
-utilizes PTE Accessed bits as its low-level access check primitive and ``struct
-vma`` as a way to address the monitoring target regions.  However, the core
-idea of DAMON is in a separate higher layer.  Therefore, DAMON can support
-other various address spaces by changing the two low primitives to others for
-the address spaces.
-
-In the future, DAMON will make the lower level primitives configurable so that
-it can support various address spaces including physical memory.  The
-configuration will be highly flexible so that users can even implement the
-primitives by themselves for their special use cases.  Monitoring of
-clean/dirty/entire page cache, NUMA nodes, specific files, or block devices
-would be examples of such use cases.
-
-An RFC patchset for this plan is available [1]_.
-
-.. [1] 
https://lore.kernel.org/linux-mm/20200409094232.29680-1-sjp...@amazon.com/
diff --git a/Documentation/admin-guide/mm/damon/usage.rst 
b/Documentation/admin-guide/mm/damon/usage.rst
index f600366cdd4e..8942d09bfd49 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -58,9 +58,11 @@ Recording Data Access Pattern
 -
 
 The ``record`` subcommand records the data access pattern of target processes
-in a file (``./damon.data`` by default).  You can specify the target as either
-pid of running

[RFC v3 09/10] tools/damon/record: Support physical memory address spce

2020-06-09 Thread SeongJae Park
From: SeongJae Park 

This commit allows users to record the data accesses on physical memory
address space by passing 'paddr' as target to 'damo-record'.  If the
init regions are given, the regions will be monitored.  Else, it will
monitor biggest conitguous 'System RAM' region in '/proc/iomem' and
monitor the region.

Signed-off-by: SeongJae Park 
---
 tools/damon/_damon.py |  2 ++
 tools/damon/heats.py  |  2 +-
 tools/damon/record.py | 29 -
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/tools/damon/_damon.py b/tools/damon/_damon.py
index ad476cc61421..95d23c2ab6ee 100644
--- a/tools/damon/_damon.py
+++ b/tools/damon/_damon.py
@@ -27,6 +27,8 @@ def set_target(pid, init_regions=[]):
 if not os.path.exists(debugfs_init_regions):
 return 0
 
+if pid == 'paddr':
+pid = -1
 string = ' '.join(['%s %d %d' % (pid, r[0], r[1]) for r in init_regions])
 return subprocess.call('echo "%s" > %s' % (string, debugfs_init_regions),
 shell=True, executable='/bin/bash')
diff --git a/tools/damon/heats.py b/tools/damon/heats.py
index 99837083874e..34dbcf1a839d 100644
--- a/tools/damon/heats.py
+++ b/tools/damon/heats.py
@@ -307,7 +307,7 @@ def plot_heatmap(data_file, output_file):
 set xrange [0:];
 set yrange [0:];
 set xlabel 'Time (ns)';
-set ylabel 'Virtual Address (bytes)';
+set ylabel 'Address (bytes)';
 plot '%s' using 1:2:3 with image;""" % (terminal, output_file, data_file)
 subprocess.call(['gnuplot', '-e', gnuplot_cmd])
 os.remove(data_file)
diff --git a/tools/damon/record.py b/tools/damon/record.py
index 6ce8721d782a..416dca940c1d 100644
--- a/tools/damon/record.py
+++ b/tools/damon/record.py
@@ -73,6 +73,29 @@ def set_argparser(parser):
 parser.add_argument('-o', '--out', metavar='', type=str,
 default='damon.data', help='output file path')
 
+def default_paddr_region():
+"Largest System RAM region becomes the default"
+ret = []
+with open('/proc/iomem', 'r') as f:
+# example of the line: '1-42b201fff : System RAM'
+for line in f:
+fields = line.split(':')
+if len(fields) != 2:
+continue
+name = fields[1].strip()
+if name != 'System RAM':
+continue
+addrs = fields[0].split('-')
+if len(addrs) != 2:
+continue
+start = int(addrs[0], 16)
+end = int(addrs[1], 16)
+
+sz_region = end - start
+if not ret or sz_region > (ret[1] - ret[0]):
+ret = [start, end]
+return ret
+
 def main(args=None):
 global orig_attrs
 if not args:
@@ -93,7 +116,11 @@ def main(args=None):
 target = args.target
 
 target_fields = target.split()
-if not subprocess.call('which %s > /dev/null' % target_fields[0],
+if target == 'paddr':   # physical memory address space
+if not init_regions:
+init_regions = [default_paddr_region()]
+do_record(target, False, init_regions, new_attrs, orig_attrs)
+elif not subprocess.call('which %s > /dev/null' % target_fields[0],
 shell=True, executable='/bin/bash'):
 do_record(target, True, init_regions, new_attrs, orig_attrs)
 else:
-- 
2.17.1



Re: Re: [PATCH v2] scripts/spelling: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-09 Thread SeongJae Park
On Tue, 09 Jun 2020 18:35:46 -0700 Joe Perches  wrote:

> On Tue, 2020-06-09 at 14:25 +0200, SeongJae Park wrote:
> > From: SeongJae Park 
> >
> > This commit recommends the patches to replace 'blacklist' and
> > 'whitelist' with the 'blocklist' and 'allowlist', because the new
> > suggestions are incontrovertible, doesn't make people hurt, and more
> > self-explanatory.
> 
> nack.  Spelling is for typos not for politics.

Agreed, I'm abusing the spell checking.  I personally believe the terms will
eventually removed from the dictionary and become typos, though.

I will update checkpatch to support deprecated terms checking, and set the
'blacklist' and 'whitelist' as the deprecated terms in the next spin.


Thanks,
SeongJae Park

> 
> > Signed-off-by: SeongJae Park 
> > ---
> >  scripts/spelling.txt | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/scripts/spelling.txt b/scripts/spelling.txt
> > index d9cd24cf0d40..ea785568d8b8 100644
> > --- a/scripts/spelling.txt
> > +++ b/scripts/spelling.txt
> > @@ -230,6 +230,7 @@ beter||better
> >  betweeen||between
> >  bianries||binaries
> >  bitmast||bitmask
> > +blacklist||blocklist
> >  boardcast||broadcast
> >  borad||board
> >  boundry||boundary
> > @@ -1495,6 +1496,7 @@ whcih||which
> >  whenver||whenever
> >  wheter||whether
> >  whe||when
> > +whitelist||allowlist
> >  wierd||weird
> >  wiil||will
> >  wirte||write


[PATCH v3 1/2] checkpatch: support deprecated terms checking

2020-06-10 Thread SeongJae Park
From: SeongJae Park 

Some terms could be deprecated for various reasons, but it is hard to
update the entire old usages.  That said, we could at least encourage
new patches to use the suggested replacements.  This commit adds check
of deprecated terms in the 'checkpatch.pl' for that.  The script will
get deprecated terms and suggested replacements of those from
'scripts/deprecated_terms.txt' file and warn if the deprecated terms are
used.  The mechanism and the format of the file are almost the same as
that of 'spelling.txt'.

Signed-off-by: SeongJae Park 
---
 scripts/checkpatch.pl| 44 
 scripts/deprecated_terms.txt |  5 
 2 files changed, 49 insertions(+)
 create mode 100644 scripts/deprecated_terms.txt

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 524df88f9364..970e0444dc1f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -57,6 +57,7 @@ my $max_line_length = 100;
 my $ignore_perl_version = 0;
 my $minimum_perl_version = 5.10.0;
 my $min_conf_desc_length = 4;
+my $deprecated_terms_file = "$D/deprecated_terms.txt";
 my $spelling_file = "$D/spelling.txt";
 my $codespell = 0;
 my $codespellfile = "/usr/share/codespell/dictionary.txt";
@@ -692,6 +693,31 @@ our $allowed_asm_includes = qr{(?x:
 )};
 # memory.h: ARM has a custom one
 
+# Load deprecated terms and build regular expression list.
+my $deprecated_terms;
+my %deprecated_terms_fix;
+
+if (open(my $deprecates, '<', $deprecated_terms_file)) {
+   while (<$deprecates>) {
+   my $line = $_;
+
+   $line =~ s/\s*\n?$//g;
+   $line =~ s/^\s*//g;
+
+   next if ($line =~ m/^\s*#/);
+   next if ($line =~ m/^\s*$/);
+
+   my ($suspect, $fix) = split(/\|\|/, $line);
+
+   $deprecated_terms_fix{$suspect} = $fix;
+   }
+   close($deprecates);
+} else {
+   warn "No deprecated term will be found - file '$deprecated_terms_file': 
$!\n";
+}
+
+$deprecated_terms = join("|", sort keys %deprecated_terms_fix) if keys 
%deprecated_terms_fix;
+
 # Load common spelling mistakes and build regular expression list.
 my $misspellings;
 my %spelling_fix;
@@ -2957,6 +2983,24 @@ sub process {
}
}
 
+# Check for deprecated terms
+   if (defined($deprecated_terms) &&
+   ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
+   while ($rawline =~ 
/(?:^|[^a-z@])($deprecated_terms)(?:\b|$|[^a-z@])/gi) {
+   my $deprecated_term = $1;
+   my $suggested = 
$deprecated_terms_fix{lc($deprecated_term)};
+   $suggested = ucfirst($suggested) if 
($deprecated_term=~ /^[A-Z]/);
+   $suggested = uc($suggested) if 
($deprecated_term =~ /^[A-Z]+$/);
+   my $msg_level = \
+   $msg_level = \ if ($file);
+   if (&{$msg_level}("DEPRECATED_TERM",
+ "Use of '$deprecated_term' is 
deprecated, please '$suggested', instead.\n" . $herecurr) &&
+   $fix) {
+   $fixed[$fixlinenr] =~ 
s/(^|[^A-Za-z@])($deprecated_term)($|[^A-Za-z@])/$1$suggested$3/;
+   }
+   }
+   }
+
 # Check for various typo / spelling mistakes
if (defined($misspellings) &&
($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
diff --git a/scripts/deprecated_terms.txt b/scripts/deprecated_terms.txt
new file mode 100644
index ..6faa06451c3d
--- /dev/null
+++ b/scripts/deprecated_terms.txt
@@ -0,0 +1,5 @@
+# License: GPLv2
+#
+# The format of each line is:
+# deprecated||suggested
+#
-- 
2.17.1



[PATCH v3 0/2] Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-10 Thread SeongJae Park
From: SeongJae Park 

This patchset 1) adds support of deprecated terms in the 'checkpatch.pl'
and 2) set the 'blacklist' and 'whitelist' as deprecated with
replacement suggestion of 'blocklist' and 'allowlist', because the
suggestions are incontrovertible, doesn't make people hurt, and more
self-explanatory.

The patches are based on latest 'next/master'.  You can get the complete
git tree at:

https://github.com/sjp38/linux/tree/patches/checkpatch/deprecate_blacklist_whitelist_on_next


Patch History
=

Changes from v2
(https://lore.kernel.org/lkml/20200609122549.26304-1-sjp...@amazon.com/)
 - Implement and use deprecated terms check

Changes from v1
(https://lore.kernel.org/lkml/20200609121843.24147-1-sjp...@amazon.com/)
 - Remove unnecessary commit message

SeongJae Park (2):
  checkpatch: support deprecated terms checking
  scripts/deprecated_terms: Recommend blocklist/allowlist instead of
blacklist/whitelist

 scripts/checkpatch.pl| 44 
 scripts/deprecated_terms.txt |  7 ++
 2 files changed, 51 insertions(+)
 create mode 100644 scripts/deprecated_terms.txt

-- 
2.17.1



[PATCH v3 2/2] scripts/deprecated_terms: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-10 Thread SeongJae Park
From: SeongJae Park 

This commit recommends the patches to replace 'blacklist' and
'whitelist' with the 'blocklist' and 'allowlist', because the new
suggestions are incontrovertible, doesn't make people hurt, and more
self-explanatory.

Signed-off-by: SeongJae Park 
---
 scripts/deprecated_terms.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/deprecated_terms.txt b/scripts/deprecated_terms.txt
index 6faa06451c3d..102a0c82c6a0 100644
--- a/scripts/deprecated_terms.txt
+++ b/scripts/deprecated_terms.txt
@@ -3,3 +3,5 @@
 # The format of each line is:
 # deprecated||suggested
 #
+blacklist||blocklist
+whitelist||allowlist
-- 
2.17.1



Re: Re: [PATCH v3 1/2] checkpatch: support deprecated terms checking

2020-06-10 Thread SeongJae Park
On Wed, 10 Jun 2020 00:13:42 -0700 Joe Perches  wrote:

> On Wed, 2020-06-10 at 08:52 +0200, SeongJae Park wrote:
> > From: SeongJae Park 
> > 
> > Some terms could be deprecated for various reasons, but it is hard to
> > update the entire old usages.  That said, we could at least encourage
> > new patches to use the suggested replacements.  This commit adds check
> > of deprecated terms in the 'checkpatch.pl' for that.  The script will
> > get deprecated terms and suggested replacements of those from
> > 'scripts/deprecated_terms.txt' file and warn if the deprecated terms are
> > used.  The mechanism and the format of the file are almost the same as
> > that of 'spelling.txt'.
> []
> > diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> []
> > +# Load deprecated terms and build regular expression list.
> > +my $deprecated_terms;
> > +my %deprecated_terms_fix;
> > +
> > +if (open(my $deprecates, '<', $deprecated_terms_file)) {
> > +   while (<$deprecates>) {
> > +   my $line = $_;
> > +
> > +   $line =~ s/\s*\n?$//g;
> > +   $line =~ s/^\s*//g;
> > +
> > +   next if ($line =~ m/^\s*#/);
> > +   next if ($line =~ m/^\s*$/);
> > +
> > +   my ($suspect, $fix) = split(/\|\|/, $line);
> > +
> > +   $deprecated_terms_fix{$suspect} = $fix;
> > +   }
> > +   close($deprecates);
> > +} else {
> > +   warn "No deprecated term will be found - file '$deprecated_terms_file': 
> > $!\n";
> > +}
> > +
> > +$deprecated_terms = join("|", sort keys %deprecated_terms_fix) if keys 
> > %deprecated_terms_fix;
> > +
> 
> This is a direct copy of the spelling dictionary
> loading code, so maybe these could be consolidated.

Agreed, how about below one?

= >8 
>From 76987b0f062c981243b49b7bede8b68de30ac3e2 Mon Sep 17 00:00:00 2001
From: SeongJae Park 
Date: Wed, 10 Jun 2020 07:11:57 +0200
Subject: [PATCH] checkpatch: support deprecated terms checking

Some terms could be deprecated for various reasons, but it is hard to
update the entire old usages.  That said, we could at least encourage
new patches to use the suggested replacements.  This commit adds check
of deprecated terms in the 'checkpatch.pl' for that.  The script will
get deprecated terms and suggested replacements of those from
'scripts/deprecated_terms.txt' file and warn if the deprecated terms are
used.  The mechanism and the format of the file are almost the same as
that of 'spelling.txt'.  For the reason, this commit modularizes the
read of the 'spelling.txt' and reuses.

Signed-off-by: SeongJae Park 
---
 scripts/checkpatch.pl| 61 +++-
 scripts/deprecated_terms.txt |  5 +++
 2 files changed, 51 insertions(+), 15 deletions(-)
 create mode 100644 scripts/deprecated_terms.txt

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 524df88f9364..226f24e1f1f3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -57,6 +57,7 @@ my $max_line_length = 100;
 my $ignore_perl_version = 0;
 my $minimum_perl_version = 5.10.0;
 my $min_conf_desc_length = 4;
+my $deprecated_terms_file = "$D/deprecated_terms.txt";
 my $spelling_file = "$D/spelling.txt";
 my $codespell = 0;
 my $codespellfile = "/usr/share/codespell/dictionary.txt";
@@ -692,29 +693,41 @@ our $allowed_asm_includes = qr{(?x:
 )};
 # memory.h: ARM has a custom one
 
-# Load common spelling mistakes and build regular expression list.
-my $misspellings;
-my %spelling_fix;
+sub read_word_corrections {
+   my ($file) = @_;
+   my $suspects;
+   my %fixes;
 
-if (open(my $spelling, '<', $spelling_file)) {
-   while (<$spelling>) {
-   my $line = $_;
+   if (open(my $corrections, '<', $file)) {
+   while (<$corrections>) {
+   my $line = $_;
 
-   $line =~ s/\s*\n?$//g;
-   $line =~ s/^\s*//g;
+   $line =~ s/\s*\n?$//g;
+   $line =~ s/^\s*//g;
 
-   next if ($line =~ m/^\s*#/);
-   next if ($line =~ m/^\s*$/);
+   next if ($line =~ m/^\s*#/);
+   next if ($line =~ m/^\s*$/);
 
-   my ($suspect, $fix) = split(/\|\|/, $line);
+   my ($suspect, $fix) = split(/\|\|/, $line);
 
-   $spelling_fix{$suspect} = $fix;
+   $fixes{$suspect} = $fix;
+   }
+   close($corrections);
+   } else {
+   warn "No correction will be found - file '$file': $!\n";
}
-   close($spelling);
-} else {
-   warn "

Re: Re: Re: [PATCH v3 1/2] checkpatch: support deprecated terms checking

2020-06-10 Thread SeongJae Park
On Wed, 10 Jun 2020 01:45:41 -0700 Joe Perches  wrote:

> On Wed, 2020-06-10 at 10:01 +0200, SeongJae Park wrote:
> > On Wed, 10 Jun 2020 00:13:42 -0700 Joe Perches  wrote:
> []
> > > This is a direct copy of the spelling dictionary
> > > loading code, so maybe these could be consolidated.
> > 
> > Agreed, how about below one?
> []
> > diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> > index 524df88f9364..226f24e1f1f3 100755
> []
> > +sub read_word_corrections {
> > +   my ($file) = @_;
> > +   my $suspects;
> > +   my %fixes;
> 
> Right.
> 
> But I think this should take a hash reference
> as the second argument so the complete hash
> isn't created and returned.
> 
> []
> 
> > +# Load deprecated terms and build regular expression list.
> > +my %deprecated_terms_fix = read_word_corrections($deprecated_terms_file);
> 
> So this might be something like:
> 
> my %deprecated_terms;
> read_word_corrections($deprecated_terms_file, \%deprecated_terms);
> 
> etc...

Appreciate your nice suggestion!  So, I updated the patch as below:

===== >8 ==
>From 0bcba551f429b0ccec4183437098b3b961d0a724 Mon Sep 17 00:00:00 2001
From: SeongJae Park 
Date: Wed, 10 Jun 2020 07:11:57 +0200
Subject: [PATCH] checkpatch: support deprecated terms checking

Some terms could be deprecated for various reasons, but it is hard to
update the entire old usages.  That said, we could at least encourage
new patches to use the suggested replacements.  This commit adds check
of deprecated terms in the 'checkpatch.pl' for that.  The script will
get deprecated terms and suggested replacements of those from
'scripts/deprecated_terms.txt' file and warn if the deprecated terms are
used.  The mechanism and the format of the file are almost the same as
that of 'spelling.txt'.  For the reason, this commit modularizes the
read of the 'spelling.txt' and reuses.

Signed-off-by: SeongJae Park 
---
 scripts/checkpatch.pl| 60 +++-
 scripts/deprecated_terms.txt |  5 +++
 2 files changed, 50 insertions(+), 15 deletions(-)
 create mode 100644 scripts/deprecated_terms.txt

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 524df88f9364..c672091932bb 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -57,6 +57,7 @@ my $max_line_length = 100;
 my $ignore_perl_version = 0;
 my $minimum_perl_version = 5.10.0;
 my $min_conf_desc_length = 4;
+my $deprecated_terms_file = "$D/deprecated_terms.txt";
 my $spelling_file = "$D/spelling.txt";
 my $codespell = 0;
 my $codespellfile = "/usr/share/codespell/dictionary.txt";
@@ -692,29 +693,40 @@ our $allowed_asm_includes = qr{(?x:
 )};
 # memory.h: ARM has a custom one
 
-# Load common spelling mistakes and build regular expression list.
-my $misspellings;
-my %spelling_fix;
+sub read_word_corrections {
+   my ($file, $fixesRef) = @_;
+   my $suspects;
 
-if (open(my $spelling, '<', $spelling_file)) {
-   while (<$spelling>) {
-   my $line = $_;
+   if (open(my $corrections, '<', $file)) {
+   while (<$corrections>) {
+   my $line = $_;
 
-   $line =~ s/\s*\n?$//g;
-   $line =~ s/^\s*//g;
+   $line =~ s/\s*\n?$//g;
+   $line =~ s/^\s*//g;
 
-   next if ($line =~ m/^\s*#/);
-   next if ($line =~ m/^\s*$/);
+   next if ($line =~ m/^\s*#/);
+   next if ($line =~ m/^\s*$/);
 
-   my ($suspect, $fix) = split(/\|\|/, $line);
+   my ($suspect, $fix) = split(/\|\|/, $line);
 
-   $spelling_fix{$suspect} = $fix;
+   $fixesRef->{$suspect} = $fix;
+   }
+   close($corrections);
+   } else {
+   warn "No correction will be found - file '$file': $!\n";
}
-   close($spelling);
-} else {
-   warn "No typos will be found - file '$spelling_file': $!\n";
 }
 
+# Load deprecated terms and build regular expression list.
+my %deprecated_terms_fix;
+read_word_corrections($deprecated_terms_file, \%deprecated_terms_fix);
+my $deprecated_terms = join("|", sort keys %deprecated_terms_fix) if keys 
%deprecated_terms_fix;
+
+# Load common spelling mistakes and build regular expression list.
+my $misspellings;
+my %spelling_fix;
+read_word_corrections($spelling_file, \%spelling_fix);
+
 if ($codespell) {
if (open(my $spelling, '<', $codespellfile)) {
while (<$spelling>) {
@@ -2957,6 +2969,24 @@ sub process {
}
}
 
+# Check for deprecated terms
+   if (defined($deprecate

Re: Re: [PATCH v2] scripts/spelling: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-10 Thread SeongJae Park
On Wed, 10 Jun 2020 10:50:24 +0200 Jiri Slaby  wrote:

> On 09. 06. 20, 14:25, SeongJae Park wrote:
> > From: SeongJae Park 
> > 
> > This commit recommends the patches to replace 'blacklist' and
> > 'whitelist' with the 'blocklist' and 'allowlist', because the new
> > suggestions are incontrovertible, doesn't make people hurt, and more
> > self-explanatory.
> 
> Sorry, but no, it's definitely not.
> 
> > Signed-off-by: SeongJae Park 
> > ---
> >  scripts/spelling.txt | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/scripts/spelling.txt b/scripts/spelling.txt
> > index d9cd24cf0d40..ea785568d8b8 100644
> > --- a/scripts/spelling.txt
> > +++ b/scripts/spelling.txt
> > @@ -230,6 +230,7 @@ beter||better
> >  betweeen||between
> >  bianries||binaries
> >  bitmast||bitmask
> > +blacklist||blocklist
> 
> Blocklist means a list of blocks here.

How about 'denylist', then?

> 
> >  boardcast||broadcast
> >  borad||board
> >  boundry||boundary
> > @@ -1495,6 +1496,7 @@ whcih||which
> >  whenver||whenever
> >  wheter||whether
> >  whe||when
> > +whitelist||allowlist
> 
> Wut? allowlist I am seeing for the 1st time.

Wouldn't it easy to infer the intention, though?

> 
> Some purists, linguists, and politicians are true fellows at times, or
> at least they think so. This comes in waves and even if they try hard,
> people won't adopt their nonsense. Like we, Czechs, still call piano by
> German Klavier, and not břinkoklapka, suggested in 19th century (among
> many others) by the horny extremists.
> 
> Shall we stop using black, white, blue, and other colors only because
> they relate to skin color of avatars now? I doubt that.

Well, I have no strong opinion on this, but... if some people are really being
hurt by use of some terms and we could avoid spread of the term with only
little cost, I believe it's worth to make the change.


Thanks,
SeongJae Park

> 
> thanks,
> -- 
> js
> suse labs


Re: [PATCH v15 04/14] mm/damon: Adaptively adjust regions

2020-06-10 Thread SeongJae Park
On Mon, 8 Jun 2020 13:40:37 +0200 SeongJae Park  wrote:

> From: SeongJae Park 
> 
> At the beginning of the monitoring, DAMON constructs the initial regions
> by evenly splitting the memory mapped address space of the process into
> the user-specified minimal number of regions.  In this initial state,
> the assumption of the regions (pages in same region have similar access
> frequencies) is normally not kept and thus the monitoring quality could
> be low.  To keep the assumption as much as possible, DAMON adaptively
> merges and splits each region.
> 
> For each ``aggregation interval``, it compares the access frequencies of
> adjacent regions and merges those if the frequency difference is small.
> Then, after it reports and clears the aggregated access frequency of
> each region, it splits each region into two regions if the total number
> of regions is smaller than the half of the user-specified maximum number
> of regions.

I recently realized that only the 'maximum number of regions' is respected,
meanwhile the 'minimum number of regions' isn't.  In the next spin, I will
update the code to 1) set new internal variable, 'max_sz_region' as size of
entire monitoring target regions divided by the 'minimum number of regions',
and 2) avoid merging regions if it results in region of size larger than that.

This change would make DAMON more flexible for special cases.  For example,
some use cases would need static granularity monitoring.  In such case, users
will be able to adjust the granularity by controlling the 'minimum number of
regions', and avoid the split/merge of regions by setting the 'maximum number
of regions' as same to the 'minimum number of regions'.


Thanks,
SeongJae Park

> 
> In this way, DAMON provides its best-effort quality and minimal overhead
> while keeping the bounds users set for their trade-off.
> 
> Signed-off-by: SeongJae Park 
> Reviewed-by: Leonard Foerster 


Re: [PATCH v3 2/2] scripts/deprecated_terms: Recommend blocklist/allowlist instead of blacklist/whitelist

2020-06-10 Thread SeongJae Park
Based on Jiri's feedback[1], I'm updating the replacement suggestion of 
blacklist
from blocklist to denylist, as the previous one might be confused to block
layer people.  Also, the new recommendation is more short ;)

[1] https://lore.kernel.org/lkml/20200610091655.4682-1-sjp...@amazon.com/

== >8 =
>From 1376e327de8316ef30c393507b29d70d38bffd05 Mon Sep 17 00:00:00 2001
From: SeongJae Park 
Date: Wed, 10 Jun 2020 07:23:33 +0200
Subject: [PATCH v3.1] scripts/deprecated_terms: Recommend denylist/allowlist
 instead of blacklist/whitelist

This commit recommends the patches to replace 'blacklist' and
'whitelist' with the 'denylist' and 'allowlist', because the new
suggestions are incontrovertible, doesn't make people hurt, and more
self-explanatory.

Signed-off-by: SeongJae Park 
---
 scripts/deprecated_terms.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/deprecated_terms.txt b/scripts/deprecated_terms.txt
index 6faa06451c3d..4512ef5d5ffa 100644
--- a/scripts/deprecated_terms.txt
+++ b/scripts/deprecated_terms.txt
@@ -3,3 +3,5 @@
 # The format of each line is:
 # deprecated||suggested
 #
+blacklist||denylist
+whitelist||allowlist
-- 
2.17.1



[PATCH v18 00/14] Introduce Data Access MONitor (DAMON)

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

Changes from Previous Version
=

- Reorganize the doc and remove png blobs (Mike Rapoport)
- Wordsmith mechnisms doc and commit messages
- tools/wss: Set default working set access frequency threshold
- Avoid race in damon deamon start

Introduction


DAMON is a data access monitoring framework subsystem for the Linux kernel.
The core mechanisms of DAMON called 'region based sampling' and 'adaptive
regions adjustment' (refer to 'mechanisms.rst' in the 11th patch of this
patchset for the detail) make it

 - accurate (The monitored information is useful for DRAM level memory
   management. It might not appropriate for Cache-level accuracy, though.),
 - light-weight (The monitoring overhead is low enough to be applied online
   while making no impact on the performance of the target workloads.), and
 - scalable (the upper-bound of the instrumentation overhead is controllable
   regardless of the size of target workloads.).

Using this framework, therefore, the kernel's core memory management mechanisms
such as reclamation and THP can be optimized for better memory management.  The
experimental memory management optimization works that incurring high
instrumentation overhead will be able to have another try.  In user space,
meanwhile, users who have some special workloads will be able to write
personalized tools or applications for deeper understanding and specialized
optimizations of their systems.

Evaluations
===

We evaluated DAMON's overhead, monitoring quality and usefulness using 25
realistic workloads on my QEMU/KVM based virtual machine running a kernel that
v16 DAMON patchset is applied.

DAMON is lightweight. It increases system memory usage by only -0.25% and
consumes less than 1% CPU time in most case. It slows target workloads down by
only 0.94%.

DAMON is accurate and useful for memory management optimizations. An
experimental DAMON-based operation scheme for THP, 'ethp', removes 31.29% of
THP memory overheads while preserving 60.64% of THP speedup. Another
experimental DAMON-based 'proactive reclamation' implementation, 'prcl',
reduces 87.95% of residential sets and 29.52% of system memory footprint while
incurring only 2.15% runtime overhead in the best case (parsec3/freqmine).

NOTE that the experimentail THP optimization and proactive reclamation are not
for production, just only for proof of concepts.

Please refer to the official document[1] or "Documentation/admin-guide/mm: Add
a document for DAMON" patch in this patchset for detailed evaluation setup and
results.

[1] 
https://damonitor.github.io/doc/html/latest-damon/admin-guide/mm/damon/eval.html

More Information


We prepared a showcase web site[1] that you can get more information.  There
are

- the official documentations[2],
- the heatmap format dynamic access pattern of various realistic workloads for
  heap area[3], mmap()-ed area[4], and stack[5] area,
- the dynamic working set size distribution[6] and chronological working set
  size changes[7], and
- the latest performance test results[8].

[1] https://damonitor.github.io/_index
[2] https://damonitor.github.io/doc/html/latest-damon
[3] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.0.png.html
[4] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.1.png.html
[5] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.2.png.html
[6] https://damonitor.github.io/test/result/visual/latest/rec.wss_sz.png.html
[7] https://damonitor.github.io/test/result/visual/latest/rec.wss_time.png.html
[8] https://damonitor.github.io/test/result/perf/latest/html/index.html

Baseline and Complete Git Trees
===

The patches are based on the v5.7.  You can also clone the complete git
tree:

$ git clone git://github.com/sjp38/linux -b damon/patches/v18

The web is also available:
https://github.com/sjp38/linux/releases/tag/damon/patches/v18

There are a couple of trees for entire DAMON patchset series.  It includes
future features.  The first one[1] contains the changes for latest release,
while the other one[2] contains the changes for next release.

[1] https://github.com/sjp38/linux/tree/damon/master
[2] https://github.com/sjp38/linux/tree/damon/next

Sequence Of Patches
===

The 1st patch exports 'lookup_page_ext()' to GPL modules so that it can be used
by DAMON even though it is built as a loadable module.

Next four patches implement the target address space independent core logics of
DAMON and it's programming interface.  The 2nd patch introduces DAMON module,
it's data structures, and data structure related common functions.  Following
three patches (3rd to 5th) implements the core mechanisms of DAMON, namely
regions based sampling (patch 3), adaptive regions adjustment (patch 4), and
dynamic memory mapping chage adoption (patch 5).

The following one (patch 6) implements the virtual memory address spac

[PATCH v18 02/14] mm: Introduce Data Access MONitor (DAMON)

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

DAMON is a data access monitoring framework subsystem for the Linux
kernel.  The core mechanisms of DAMON make it

 - accurate (the monitoring output is useful enough for DRAM level
   memory management; It might not appropriate for CPU Cache levels,
   though),
 - light-weight (the monitoring overhead is low enough to be applied
   online), and
 - scalable (the upper-bound of the overhead is in constant range
   regardless of the size of target workloads).

Using this framework, therefore, the kernel's memory management
mechanisms can make advanced decisions.  Experimental memory management
optimization works that incurring high data accesses monitoring overhead
could implemented again.  In user space, meanwhile, users who have some
special workloads can write personalized applications for better
understanding and optimizations of their workloads and systems.

This commit is implementing only the stub for the module load/unload,
basic data structures, and simple manipulation functions of the
structures to keep the size of commit small.  The core mechanisms of
DAMON will be implemented one by one by following commits.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
Reviewed-by: Varad Gautam 
---
 include/linux/damon.h |  63 ++
 mm/Kconfig|  12 +++
 mm/Makefile   |   1 +
 mm/damon.c| 188 ++
 4 files changed, 264 insertions(+)
 create mode 100644 include/linux/damon.h
 create mode 100644 mm/damon.c

diff --git a/include/linux/damon.h b/include/linux/damon.h
new file mode 100644
index ..c8f8c1c41a45
--- /dev/null
+++ b/include/linux/damon.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON api
+ *
+ * Copyright 2019-2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Author: SeongJae Park 
+ */
+
+#ifndef _DAMON_H_
+#define _DAMON_H_
+
+#include 
+#include 
+
+/**
+ * struct damon_addr_range - Represents an address region of [@start, @end).
+ * @start: Start address of the region (inclusive).
+ * @end:   End address of the region (exclusive).
+ */
+struct damon_addr_range {
+   unsigned long start;
+   unsigned long end;
+};
+
+/**
+ * struct damon_region - Represents a monitoring target region.
+ * @ar:The address range of the region.
+ * @sampling_addr: Address of the sample for the next access check.
+ * @nr_accesses:   Access frequency of this region.
+ * @list:  List head for siblings.
+ */
+struct damon_region {
+   struct damon_addr_range ar;
+   unsigned long sampling_addr;
+   unsigned int nr_accesses;
+   struct list_head list;
+};
+
+/**
+ * struct damon_task - Represents a monitoring target task.
+ * @pid:   Process id of the task.
+ * @regions_list:  Head of the monitoring target regions of this task.
+ * @list:  List head for siblings.
+ *
+ * If the monitoring target address space is task independent (e.g., physical
+ * memory address space monitoring), @pid should be '-1'.
+ */
+struct damon_task {
+   int pid;
+   struct list_head regions_list;
+   struct list_head list;
+};
+
+/**
+ * struct damon_ctx - Represents a context for each monitoring.
+ * @tasks_list:Head of monitoring target tasks (_task) 
list.
+ */
+struct damon_ctx {
+   struct list_head tasks_list;/* 'damon_task' objects */
+};
+
+#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index c1acc34c1c35..464e9594dcec 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -867,4 +867,16 @@ config ARCH_HAS_HUGEPD
 config MAPPING_DIRTY_HELPERS
 bool
 
+config DAMON
+   tristate "Data Access Monitor"
+   depends on MMU
+   help
+ This feature allows to monitor access frequency of each memory
+ region. The information can be useful for performance-centric DRAM
+ level memory management.
+
+ See https://damonitor.github.io/doc/html/latest-damon/index.html for
+ more information.
+ If unsure, say N.
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index fccd3756b25f..230e545b6e07 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -112,3 +112,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
 obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
 obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
 obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
+obj-$(CONFIG_DAMON) += damon.o
diff --git a/mm/damon.c b/mm/damon.c
new file mode 100644
index ..5ab13b1c15cf
--- /dev/null
+++ b/mm/damon.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Access Monitor
+ *
+ * Copyright 2019-2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Author: SeongJae Park 
+ *
+ * This file is constructed in below parts.
+ *
+ * - Functions and macros for DAMON data structures
+ * - Functions for the module loading/unloading
+ *
+ * The core parts are not implemented yet.
+ */
+
+#define pr_fmt(fmt) &qu

[PATCH v18 04/14] mm/damon: Adaptively adjust regions

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

Even somehow the initial monitoring target regions are well constructed
to fulfill the assumption (pages in same region have similar access
frequencies), the data access pattern can be dynamically changed.  This
will result in low monitoring quality.  To keep the assumption as much
as possible, DAMON adaptively merges and splits each region based on
their access frequency.

For each ``aggregation interval``, it compares the access frequencies of
adjacent regions and merges those if the frequency difference is small.
Then, after it reports and clears the aggregated access frequency of
each region, it splits each region into two or three regions if the
total number of regions will not exceed the user-specified maximum
number of regions after the split.

In this way, DAMON provides its best-effort quality and minimal overhead
while keeping the upper-bound overhead that users set.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
---
 include/linux/damon.h |  11 ++-
 mm/damon.c| 191 --
 2 files changed, 189 insertions(+), 13 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 7adc7b6b3507..97ddc74e207f 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -61,7 +61,8 @@ struct damon_task {
  *
  * @sample_interval:   The time between access samplings.
  * @aggr_interval: The time between monitor results aggregations.
- * @nr_regions:The number of monitoring regions.
+ * @min_nr_regions:The minimum number of monitoring regions.
+ * @max_nr_regions:The maximum number of monitoring regions.
  *
  * For each @sample_interval, DAMON checks whether each region is accessed or
  * not.  It aggregates and keeps the access information (number of accesses to
@@ -114,7 +115,8 @@ struct damon_task {
 struct damon_ctx {
unsigned long sample_interval;
unsigned long aggr_interval;
-   unsigned long nr_regions;
+   unsigned long min_nr_regions;
+   unsigned long max_nr_regions;
 
struct timespec64 last_aggregation;
 
@@ -133,8 +135,9 @@ struct damon_ctx {
 };
 
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
-int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
-   unsigned long aggr_int, unsigned long min_nr_reg);
+int damon_set_attrs(struct damon_ctx *ctx,
+   unsigned long sample_int, unsigned long aggr_int,
+   unsigned long min_nr_reg, unsigned long max_nr_reg);
 int damon_start(struct damon_ctx *ctx);
 int damon_stop(struct damon_ctx *ctx);
 
diff --git a/mm/damon.c b/mm/damon.c
index 29d82c2d65be..02bc7542a76f 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -176,6 +176,26 @@ static unsigned int nr_damon_regions(struct damon_task *t)
return nr_regions;
 }
 
+/* Returns the size upper limit for each monitoring region */
+static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
+{
+   struct damon_task *t;
+   struct damon_region *r;
+   unsigned long sz = 0;
+
+   damon_for_each_task(t, ctx) {
+   damon_for_each_region(r, t)
+   sz += r->ar.end - r->ar.start;
+   }
+
+   if (ctx->min_nr_regions)
+   sz /= ctx->min_nr_regions;
+   if (sz < MIN_REGION)
+   sz = MIN_REGION;
+
+   return sz;
+}
+
 /*
  * Functions for DAMON core logics and features
  */
@@ -226,6 +246,145 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
}
 }
 
+#define sz_damon_region(r) (r->ar.end - r->ar.start)
+
+/*
+ * Merge two adjacent regions into one region
+ */
+static void damon_merge_two_regions(struct damon_region *l,
+   struct damon_region *r)
+{
+   l->nr_accesses = (l->nr_accesses * sz_damon_region(l) +
+   r->nr_accesses * sz_damon_region(r)) /
+   (sz_damon_region(l) + sz_damon_region(r));
+   l->ar.end = r->ar.end;
+   damon_destroy_region(r);
+}
+
+#define diff_of(a, b) (a > b ? a - b : b - a)
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * t   task affected by merge operation
+ * thres   '->nr_accesses' diff threshold for the merge
+ * sz_limitsize upper limit of each region
+ */
+static void damon_merge_regions_of(struct damon_task *t, unsigned int thres,
+  unsigned long sz_limit)
+{
+   struct damon_region *r, *prev = NULL, *next;
+
+   damon_for_each_region_safe(r, next, t) {
+   if (prev && prev->ar.end == r->ar.start &&
+   diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
+   sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
+   damon_merge_two_regions(prev, r);
+   else
+ 

[PATCH v18 03/14] mm/damon: Implement region based sampling

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

DAMON separates its monitoring target address space independent high
level logics from the target space dependent low level primitives for
flexible support of various address spaces.

This commit implements DAMON's target address space independent high
level logics for basic access check and region based sampling.  Hence,
without the target address space specific parts implementations, this
doesn't work alone.  A reference implementation of those will be
provided by a later commit.

Basic Access Check
==

The output of DAMON says what pages are how frequently accessed for a
given duration.  The resolution of the access frequency is controlled by
setting ``sampling interval`` and ``aggregation interval``.  In detail,
DAMON checks access to each page per ``sampling interval`` and
aggregates the results.  In other words, counts the number of the
accesses to each page.  After each ``aggregation interval`` passes,
DAMON calls callback functions that previously registered by users so
that users can read the aggregated results and then clears the results.
This can be described in below simple pseudo-code::

while monitoring_on:
for page in monitoring_target:
if accessed(page):
nr_accesses[page] += 1
if time() % aggregation_interval == 0:
for callback in user_registered_callbacks:
callback(monitoring_target, nr_accesses)
for page in monitoring_target:
nr_accesses[page] = 0
sleep(sampling interval)

The monitoring overhead of this mechanism will arbitrarily increase as
the size of the target workload grows.

Region Based Sampling
=

To avoid the unbounded increase of the overhead, DAMON groups adjacent
pages that assumed to have the same access frequencies into a region.
As long as the assumption (pages in a region have the same access
frequencies) is kept, only one page in the region is required to be
checked.  Thus, for each ``sampling interval``, DAMON randomly picks one
page in each region, waits for one ``sampling interval``, checks whether
the page is accessed meanwhile, and increases the access frequency of
the region if so.  Therefore, the monitoring overhead is controllable by
setting the number of regions.  DAMON allows users to set the minimum
and the maximum number of regions for the trade-off.

This scheme, however, cannot preserve the quality of the output if the
assumption is not guaranteed.  Next commit will address this problem.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
---
 include/linux/damon.h |  80 -
 mm/damon.c| 260 +-
 2 files changed, 337 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index c8f8c1c41a45..7adc7b6b3507 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -11,6 +11,8 @@
 #define _DAMON_H_
 
 #include 
+#include 
+#include 
 #include 
 
 /**
@@ -53,11 +55,87 @@ struct damon_task {
 };
 
 /**
- * struct damon_ctx - Represents a context for each monitoring.
+ * struct damon_ctx - Represents a context for each monitoring.  This is the
+ * main interface that allows users to set the attributes and get the results
+ * of the monitoring.
+ *
+ * @sample_interval:   The time between access samplings.
+ * @aggr_interval: The time between monitor results aggregations.
+ * @nr_regions:The number of monitoring regions.
+ *
+ * For each @sample_interval, DAMON checks whether each region is accessed or
+ * not.  It aggregates and keeps the access information (number of accesses to
+ * each region) for @aggr_interval time.  All time intervals are in
+ * micro-seconds.
+ *
+ * @kdamond:   Kernel thread who does the monitoring.
+ * @kdamond_stop:  Notifies whether kdamond should stop.
+ * @kdamond_lock:  Mutex for the synchronizations with @kdamond.
+ *
+ * For each monitoring request (damon_start()), a kernel thread for the
+ * monitoring is created.  The pointer to the thread is stored in @kdamond.
+ *
+ * The monitoring thread sets @kdamond to NULL when it terminates.  Therefore,
+ * users can know whether the monitoring is ongoing or terminated by reading
+ * @kdamond.  Also, users can ask @kdamond to be terminated by writing non-zero
+ * to @kdamond_stop.  Reads and writes to @kdamond and @kdamond_stop from
+ * outside of the monitoring thread must be protected by @kdamond_lock.
+ *
+ * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
+ * @kdamond_lock.  Accesses to other fields must be protected by themselves.
+ *
  * @tasks_list:Head of monitoring target tasks (_task) 
list.
+ *
+ * @init_target_regions:   Constructs initial monitoring target regions.
+ * @prepare_access_checks: Prepares next access check of target regions.
+ * @check_accesses:Checks the access

[PATCH v18 01/14] mm/page_ext: Export lookup_page_ext() to GPL modules

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit exports 'lookup_page_ext()' to GPL modules.  It will be used
by DAMON in following commit for the implementation of the region based
sampling.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
Reviewed-by: Varad Gautam 
---
 mm/page_ext.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/page_ext.c b/mm/page_ext.c
index a3616f7a0e9e..9d802d01fcb5 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -131,6 +131,7 @@ struct page_ext *lookup_page_ext(const struct page *page)
MAX_ORDER_NR_PAGES);
return get_entry(base, index);
 }
+EXPORT_SYMBOL_GPL(lookup_page_ext);
 
 static int __init alloc_node_page_ext(int nid)
 {
-- 
2.17.1



[PATCH v18 07/14] mm/damon: Implement access pattern recording

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit implements the recording feature of DAMON.  If this feature
is enabled, DAMON writes the monitored access patterns in its binary
format into a file which specified by the user.  This is already able to
be implemented by each user using the callbacks.  However, as the
recording is expected to be widely used, this commit implements the
feature in the DAMON, for more convenience.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
---
 include/linux/damon.h |  15 +
 mm/damon.c| 141 +-
 2 files changed, 153 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 310d36d123b3..b0e7e31a22b3 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -72,6 +72,14 @@ struct damon_task {
  * in case of virtual memory monitoring) and applies the changes for each
  * @regions_update_interval.  All time intervals are in micro-seconds.
  *
+ * @rbuf: In-memory buffer for monitoring result recording.
+ * @rbuf_len: The length of @rbuf.
+ * @rbuf_offset: The offset for next write to @rbuf.
+ * @rfile_path: Record file path.
+ *
+ * If @rbuf, @rbuf_len, and @rfile_path are set, the monitored results are
+ * automatically stored in @rfile_path file.
+ *
  * @kdamond:   Kernel thread who does the monitoring.
  * @kdamond_stop:  Notifies whether kdamond should stop.
  * @kdamond_lock:  Mutex for the synchronizations with @kdamond.
@@ -129,6 +137,11 @@ struct damon_ctx {
struct timespec64 last_aggregation;
struct timespec64 last_regions_update;
 
+   unsigned char *rbuf;
+   unsigned int rbuf_len;
+   unsigned int rbuf_offset;
+   char *rfile_path;
+
struct task_struct *kdamond;
bool kdamond_stop;
struct mutex kdamond_lock;
@@ -154,6 +167,8 @@ int damon_set_pids(struct damon_ctx *ctx, int *pids, 
ssize_t nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_set_recording(struct damon_ctx *ctx,
+   unsigned int rbuf_len, char *rfile_path);
 int damon_start(struct damon_ctx *ctx);
 int damon_stop(struct damon_ctx *ctx);
 
diff --git a/mm/damon.c b/mm/damon.c
index 386780739007..55ecfab64220 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -58,6 +58,10 @@
 #define damon_for_each_task_safe(t, next, ctx) \
list_for_each_entry_safe(t, next, &(ctx)->tasks_list, list)
 
+#define MIN_RECORD_BUFFER_LEN  1024
+#define MAX_RECORD_BUFFER_LEN  (4 * 1024 * 1024)
+#define MAX_RFILE_PATH_LEN 256
+
 /* Get a random number in [l, r) */
 #define damon_rand(l, r) (l + prandom_u32() % (r - l))
 
@@ -707,16 +711,88 @@ static bool kdamond_aggregate_interval_passed(struct 
damon_ctx *ctx)
 }
 
 /*
- * Reset the aggregated monitoring results
+ * Flush the content in the result buffer to the result file
+ */
+static void damon_flush_rbuffer(struct damon_ctx *ctx)
+{
+   ssize_t sz;
+   loff_t pos = 0;
+   struct file *rfile;
+
+   if (!ctx->rbuf_offset)
+   return;
+
+   rfile = filp_open(ctx->rfile_path, O_CREAT | O_RDWR | O_APPEND, 0644);
+   if (IS_ERR(rfile)) {
+   pr_err("Cannot open the result file %s\n",
+   ctx->rfile_path);
+   return;
+   }
+
+   while (ctx->rbuf_offset) {
+   sz = kernel_write(rfile, ctx->rbuf, ctx->rbuf_offset, );
+   if (sz < 0)
+   break;
+   ctx->rbuf_offset -= sz;
+   }
+   filp_close(rfile, NULL);
+}
+
+/*
+ * Write a data into the result buffer
+ */
+static void damon_write_rbuf(struct damon_ctx *ctx, void *data, ssize_t size)
+{
+   if (!ctx->rbuf_len || !ctx->rbuf || !ctx->rfile_path)
+   return;
+   if (ctx->rbuf_offset + size > ctx->rbuf_len)
+   damon_flush_rbuffer(ctx);
+   if (ctx->rbuf_offset + size > ctx->rbuf_len) {
+   pr_warn("%s: flush failed, or wrong size given(%u, %zu)\n",
+   __func__, ctx->rbuf_offset, size);
+   return;
+   }
+
+   memcpy(>rbuf[ctx->rbuf_offset], data, size);
+   ctx->rbuf_offset += size;
+}
+
+/*
+ * Flush the aggregated monitoring results to the result buffer
+ *
+ * Stores current tracking results to the result buffer and reset 'nr_accesses'
+ * of each region.  The format for the result buffer is as below:
+ *
+ * 
+ *
+ *   task info:   
+ *   region info:   
  */
 static void kdamond_reset_aggregated(struct damon_ctx *c)
 {
struct damon_task *t;
-   struct damon_region *r;
+   struct timespec64 now;
+   unsigned int nr;
+
+   ktime_get_coarse_ts64();
+
+   damon_write_rbuf(

[PATCH v18 05/14] mm/damon: Track dynamic monitoring target regions update

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

The monitoring target address range can be dynamically changed.  For
example, virtual memory could be dynamically mapped and unmapped.
Physical memory could be hot-plugged.

As the changes could be quite frequent in some cases, DAMON checks the
dynamic memory mapping changes and applies it to the abstracted target
area only for each of a user-specified time interval, ``regions update
interval``.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
---
 include/linux/damon.h | 20 +++-
 mm/damon.c| 23 +--
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 97ddc74e207f..3c0b92a679e8 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -61,13 +61,16 @@ struct damon_task {
  *
  * @sample_interval:   The time between access samplings.
  * @aggr_interval: The time between monitor results aggregations.
+ * @regions_update_interval:   The time between monitor regions updates.
  * @min_nr_regions:The minimum number of monitoring regions.
  * @max_nr_regions:The maximum number of monitoring regions.
  *
  * For each @sample_interval, DAMON checks whether each region is accessed or
  * not.  It aggregates and keeps the access information (number of accesses to
- * each region) for @aggr_interval time.  All time intervals are in
- * micro-seconds.
+ * each region) for @aggr_interval time.  DAMON also checks whether the target
+ * memory regions need update (e.g., by ``mmap()`` calls from the application,
+ * in case of virtual memory monitoring) and applies the changes for each
+ * @regions_update_interval.  All time intervals are in micro-seconds.
  *
  * @kdamond:   Kernel thread who does the monitoring.
  * @kdamond_stop:  Notifies whether kdamond should stop.
@@ -88,6 +91,7 @@ struct damon_task {
  * @tasks_list:Head of monitoring target tasks (_task) 
list.
  *
  * @init_target_regions:   Constructs initial monitoring target regions.
+ * @update_target_regions: Updates monitoring target regions.
  * @prepare_access_checks: Prepares next access check of target regions.
  * @check_accesses:Checks the access of target regions.
  * @sample_cb: Called for each sampling interval.
@@ -96,11 +100,14 @@ struct damon_task {
  * DAMON can be extended for various address spaces by users.  For this, users
  * can register the target address space dependent low level functions for
  * their usecases via the callback pointers of the context.  The monitoring
- * thread calls @init_target_regions before starting the monitoring, and
+ * thread calls @init_target_regions before starting the monitoring,
+ * @update_target_regions for each @regions_update_interval, and
  * @prepare_access_checks and @check_accesses for each @sample_interval.
  *
  * @init_target_regions should construct proper monitoring target regions and
  * link those to the DAMON context struct.
+ * @update_target_regions should update the monitoring target regions for
+ * current status.
  * @prepare_access_checks should manipulate the monitoring regions to be
  * prepare for the next access check.
  * @check_accesses should check the accesses to each region that made after the
@@ -115,10 +122,12 @@ struct damon_task {
 struct damon_ctx {
unsigned long sample_interval;
unsigned long aggr_interval;
+   unsigned long regions_update_interval;
unsigned long min_nr_regions;
unsigned long max_nr_regions;
 
struct timespec64 last_aggregation;
+   struct timespec64 last_regions_update;
 
struct task_struct *kdamond;
bool kdamond_stop;
@@ -128,6 +137,7 @@ struct damon_ctx {
 
/* callbacks */
void (*init_target_regions)(struct damon_ctx *context);
+   void (*update_target_regions)(struct damon_ctx *context);
void (*prepare_access_checks)(struct damon_ctx *context);
unsigned int (*check_accesses)(struct damon_ctx *context);
void (*sample_cb)(struct damon_ctx *context);
@@ -135,8 +145,8 @@ struct damon_ctx {
 };
 
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
-int damon_set_attrs(struct damon_ctx *ctx,
-   unsigned long sample_int, unsigned long aggr_int,
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+   unsigned long aggr_int, unsigned long regions_update_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
 int damon_start(struct damon_ctx *ctx);
 int damon_stop(struct damon_ctx *ctx);
diff --git a/mm/damon.c b/mm/damon.c
index 02bc7542a76f..b844924b9fdb 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -385,6 +385,17 @@ static void kdamond_split_regions(struct damon_ctx *ctx)
last_nr_regions = nr_regions;
 }
 
+/*
+ * Check whether it is time to check and apply the target monitoring regions

[PATCH v18 08/14] mm/damon: Add a tracepoint

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit adds a tracepoint for DAMON.  It traces the monitoring
results of each region for each aggregation interval.  Using this, DAMON
can easily integrated with tracepoints supporting tools such as perf.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
---
 include/trace/events/damon.h | 43 
 mm/damon.c   |  4 
 2 files changed, 47 insertions(+)
 create mode 100644 include/trace/events/damon.h

diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
new file mode 100644
index ..40b249a28b30
--- /dev/null
+++ b/include/trace/events/damon.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM damon
+
+#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DAMON_H
+
+#include 
+#include 
+#include 
+
+TRACE_EVENT(damon_aggregated,
+
+   TP_PROTO(struct damon_task *t, struct damon_region *r,
+   unsigned int nr_regions),
+
+   TP_ARGS(t, r, nr_regions),
+
+   TP_STRUCT__entry(
+   __field(int, pid)
+   __field(unsigned int, nr_regions)
+   __field(unsigned long, start)
+   __field(unsigned long, end)
+   __field(unsigned int, nr_accesses)
+   ),
+
+   TP_fast_assign(
+   __entry->pid = t->pid;
+   __entry->nr_regions = nr_regions;
+   __entry->start = r->ar.start;
+   __entry->end = r->ar.end;
+   __entry->nr_accesses = r->nr_accesses;
+   ),
+
+   TP_printk("pid=%d nr_regions=%u %lu-%lu: %u", __entry->pid,
+   __entry->nr_regions, __entry->start,
+   __entry->end, __entry->nr_accesses)
+);
+
+#endif /* _TRACE_DAMON_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/mm/damon.c b/mm/damon.c
index 55ecfab64220..00df1a4c3d5c 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -19,6 +19,8 @@
 
 #define pr_fmt(fmt) "damon: " fmt
 
+#define CREATE_TRACE_POINTS
+
 #include 
 #include 
 #include 
@@ -29,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Minimal region size.  Every damon_region is aligned by this. */
 #define MIN_REGION PAGE_SIZE
@@ -791,6 +794,7 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
damon_write_rbuf(c, >ar.end, sizeof(r->ar.end));
damon_write_rbuf(c, >nr_accesses,
sizeof(r->nr_accesses));
+   trace_damon_aggregated(t, r, nr);
r->nr_accesses = 0;
}
}
-- 
2.17.1



[PATCH v18 06/14] mm/damon: Implement callbacks for the virtual memory address spaces

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit introduces a reference implementation of the address space
specific low level primitives for the virtual address space, so that
users of DAMON can easily monitor the data accesses on virtual address
spaces of specific processes by simply configuring the implementation to
be used by DAMON.

The low level primitives for the fundamental access monitoring are
defined in two parts:
1. Identification of the monitoring target address range for the address
space.
2. Access check of specific address range in the target space.

The reference implementation for the virtual address space provided by
this commit is designed as below.

PTE Accessed-bit Based Access Check
---

The implementation uses PTE Accessed-bit for basic access checks.  That
is, it clears the bit for next sampling target page and checks whether
it set again after one sampling period.  To avoid disturbing other
Accessed bit users such as the reclamation logic, the implementation
adjusts the ``PG_Idle`` and ``PG_Young`` appropriately, as same to the
'Idle Page Tracking'.

VMA-based Target Address Range Construction
---

Only small parts in the super-huge virtual address space of the
processes are mapped to physical memory and accessed.  Thus, tracking
the unmapped address regions is just wasteful.  However, because DAMON
can deal with some level of noise using the adaptive regions adjustment
mechanism, tracking every mapping is not strictly required but could
even incur a high overhead in some cases.  That said, too huge unmapped
areas inside the monitoring target should be removed to not take the
time for the adaptive mechanism.

For the reason, this implementation converts the complex mappings to
three distinct regions that cover every mapped area of the address
space.  Also, the two gaps between the three regions are the two biggest
unmapped areas in the given address space.  The two biggest unmapped
areas would be the gap between the heap and the uppermost mmap()-ed
region, and the gap between the lowermost mmap()-ed region and the stack
in most of the cases.  Because these gaps are exceptionally huge in
usual address spacees, excluding these will be sufficient to make a
reasonable trade-off.  Below shows this in detail::




(small mmap()-ed regions and munmap()-ed regions)




Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
---
 include/linux/damon.h |   6 +
 mm/damon.c| 474 ++
 2 files changed, 480 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 3c0b92a679e8..310d36d123b3 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -144,6 +144,12 @@ struct damon_ctx {
void (*aggregate_cb)(struct damon_ctx *context);
 };
 
+/* Reference callback implementations for virtual memory */
+void kdamond_init_vm_regions(struct damon_ctx *ctx);
+void kdamond_update_vm_regions(struct damon_ctx *ctx);
+void kdamond_prepare_vm_access_checks(struct damon_ctx *ctx);
+unsigned int kdamond_check_vm_accesses(struct damon_ctx *ctx);
+
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
diff --git a/mm/damon.c b/mm/damon.c
index b844924b9fdb..386780739007 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -9,6 +9,9 @@
  * This file is constructed in below parts.
  *
  * - Functions and macros for DAMON data structures
+ * - Functions for the initial monitoring target regions construction
+ * - Functions for the dynamic monitoring target regions update
+ * - Functions for the access checking of the regions
  * - Functions for DAMON core logics and features
  * - Functions for the DAMON programming interface
  * - Functions for the module loading/unloading
@@ -196,6 +199,477 @@ static unsigned long damon_region_sz_limit(struct 
damon_ctx *ctx)
return sz;
 }
 
+/*
+ * Get the mm_struct of the given task
+ *
+ * Caller _must_ put the mm_struct after use, unless it is NULL.
+ *
+ * Returns the mm_struct of the task on success, NULL on failure
+ */
+static struct mm_struct *damon_get_mm(struct damon_task *t)
+{
+   struct task_struct *task;
+   struct mm_struct *mm;
+
+   task = damon_get_task_struct(t);
+   if (!task)
+   return NULL;
+
+   mm = get_task_mm(task);
+   put_task_struct(task);
+   return mm;
+}
+
+/*
+ * Functions for the initial monitoring target regions construction
+ */
+
+/*
+ * Size-evenly split a region into 'nr_pieces' small regions
+ *
+ * Returns 0 on success, or negative error code otherwise.
+ */
+static int damon_split_region_evenly(struct damon_ctx *ctx,
+   struct damon_region *r, unsigned int nr_pieces)
+{
+   unsigned long sz_orig, sz_piece, orig_end;
+   struct damon_region *n

[PATCH v18 10/14] tools: Introduce a minimal user-space tool for DAMON

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit imtroduces a shallow wrapper python script,
``/tools/damon/damo`` that provides more convenient interface.  Note
that it is only aimed to be used for minimal reference of the DAMON's
debugfs interfaces and for debugging of the DAMON itself.

Signed-off-by: SeongJae Park 
---
 tools/damon/.gitignore|   1 +
 tools/damon/_damon.py | 129 ++
 tools/damon/_dist.py  |  36 
 tools/damon/_recfile.py   |  23 +++
 tools/damon/bin2txt.py|  67 +++
 tools/damon/damo  |  37 
 tools/damon/heats.py  | 362 ++
 tools/damon/nr_regions.py |  91 ++
 tools/damon/record.py | 106 +++
 tools/damon/report.py |  45 +
 tools/damon/wss.py| 100 +++
 11 files changed, 997 insertions(+)
 create mode 100644 tools/damon/.gitignore
 create mode 100644 tools/damon/_damon.py
 create mode 100644 tools/damon/_dist.py
 create mode 100644 tools/damon/_recfile.py
 create mode 100644 tools/damon/bin2txt.py
 create mode 100755 tools/damon/damo
 create mode 100644 tools/damon/heats.py
 create mode 100644 tools/damon/nr_regions.py
 create mode 100644 tools/damon/record.py
 create mode 100644 tools/damon/report.py
 create mode 100644 tools/damon/wss.py

diff --git a/tools/damon/.gitignore b/tools/damon/.gitignore
new file mode 100644
index ..96403d36ff93
--- /dev/null
+++ b/tools/damon/.gitignore
@@ -0,0 +1 @@
+__pycache__/*
diff --git a/tools/damon/_damon.py b/tools/damon/_damon.py
new file mode 100644
index ..2a08468ad27e
--- /dev/null
+++ b/tools/damon/_damon.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Contains core functions for DAMON debugfs control.
+"""
+
+import os
+import subprocess
+
+debugfs_attrs = None
+debugfs_record = None
+debugfs_pids = None
+debugfs_monitor_on = None
+
+def set_target_pid(pid):
+return subprocess.call('echo %s > %s' % (pid, debugfs_pids), shell=True,
+executable='/bin/bash')
+
+def turn_damon(on_off):
+return subprocess.call("echo %s > %s" % (on_off, debugfs_monitor_on),
+shell=True, executable="/bin/bash")
+
+def is_damon_running():
+with open(debugfs_monitor_on, 'r') as f:
+return f.read().strip() == 'on'
+
+class Attrs:
+sample_interval = None
+aggr_interval = None
+regions_update_interval = None
+min_nr_regions = None
+max_nr_regions = None
+rbuf_len = None
+rfile_path = None
+
+def __init__(self, s, a, r, n, x, l, f):
+self.sample_interval = s
+self.aggr_interval = a
+self.regions_update_interval = r
+self.min_nr_regions = n
+self.max_nr_regions = x
+self.rbuf_len = l
+self.rfile_path = f
+
+def __str__(self):
+return "%s %s %s %s %s %s %s" % (self.sample_interval,
+self.aggr_interval, self.regions_update_interval,
+self.min_nr_regions, self.max_nr_regions, self.rbuf_len,
+self.rfile_path)
+
+def attr_str(self):
+return "%s %s %s %s %s " % (self.sample_interval, self.aggr_interval,
+self.regions_update_interval, self.min_nr_regions,
+self.max_nr_regions)
+
+def record_str(self):
+return '%s %s ' % (self.rbuf_len, self.rfile_path)
+
+def apply(self):
+ret = subprocess.call('echo %s > %s' % (self.attr_str(), 
debugfs_attrs),
+shell=True, executable='/bin/bash')
+if ret:
+return ret
+ret = subprocess.call('echo %s > %s' % (self.record_str(),
+debugfs_record), shell=True, executable='/bin/bash')
+if ret:
+return ret
+
+def current_attrs():
+with open(debugfs_attrs, 'r') as f:
+attrs = f.read().split()
+attrs = [int(x) for x in attrs]
+
+with open(debugfs_record, 'r') as f:
+rattrs = f.read().split()
+attrs.append(int(rattrs[0]))
+attrs.append(rattrs[1])
+
+return Attrs(*attrs)
+
+def chk_update_debugfs(debugfs):
+global debugfs_attrs
+global debugfs_record
+global debugfs_pids
+global debugfs_monitor_on
+
+debugfs_damon = os.path.join(debugfs, 'damon')
+debugfs_attrs = os.path.join(debugfs_damon, 'attrs')
+debugfs_record = os.path.join(debugfs_damon, 'record')
+debugfs_pids = os.path.join(debugfs_damon, 'pids')
+debugfs_monitor_on = os.path.join(debugfs_damon, 'monitor_on')
+
+if not os.path.isdir(debugfs_damon):
+print("damon debugfs dir (%s) not found", debugfs_damon)
+exit(1)
+
+for f in [debugfs_attrs, debugfs_record, debugfs_pids, debugfs_monitor_on]:
+if not os.path.isfile(f):
+print("damon debugfs file (%s) not found" % f)
+exit(1)
+
+def cmd_args_to_attrs(args):
+"Generate attributes with specified arguments&

[PATCH v18 09/14] mm/damon: Implement a debugfs interface

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit implements a debugfs interface for DAMON.  It works for the
virtual address spaces monitoring.

DAMON exports four files, ``attrs``, ``pids``, ``record``, and
``monitor_on`` under its debugfs directory, ``/damon/``.

Attributes
--

Users can read and write the ``sampling interval``, ``aggregation
interval``, ``regions update interval``, and min/max number of
monitoring target regions by reading from and writing to the ``attrs``
file.  For example, below commands set those values to 5 ms, 100 ms,
1,000 ms, 10, 1000 and check it again::

# cd /damon
# echo 5000 10 100 10 1000 > attrs
# cat attrs
5000 10 100 10 1000

Target PIDs
---

Users can read and write the pids of current monitoring target processes
by reading from and writing to the ``pids`` file.  For example, below
commands set processes having pids 42 and 4242 as the processes to be
monitored and check it again::

# cd /damon
# echo 42 4242 > pids
# cat pids
42 4242

Note that setting the pids doesn't start the monitoring.

Record
--

DAMON supports direct monitoring result record feature.  The recorded
results are first written to a buffer and flushed to a file in batch.
Users can set the size of the buffer and the path to the result file by
reading from and writing to the ``record`` file.  For example, below
commands set the buffer to be 4 KiB and the result to be saved in
'/damon.data'.

# cd /damon
# echo 4096 /damon.data > pids
# cat record
4096 /damon.data

Turning On/Off
--

You can check current status, start and stop the monitoring by reading
from and writing to the ``monitor_on`` file.  Writing ``on`` to the file
starts DAMON to monitor the target processes with the attributes.
Writing ``off`` to the file stops DAMON.  DAMON also stops if every
target processes is terminated.  Below example commands turn on, off,
and check status of DAMON::

# cd /damon
# echo on > monitor_on
# echo off > monitor_on
# cat monitor_on
off

Please note that you cannot write to the ``attrs`` and ``pids`` files
while the monitoring is turned on.  If you write to the files while
DAMON is running, ``-EINVAL`` will be returned.

Signed-off-by: SeongJae Park 
Reviewed-by: Leonard Foerster 
---
 mm/damon.c | 381 -
 1 file changed, 380 insertions(+), 1 deletion(-)

diff --git a/mm/damon.c b/mm/damon.c
index 00df1a4c3d5c..df05bd821ff8 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -14,6 +14,7 @@
  * - Functions for the access checking of the regions
  * - Functions for DAMON core logics and features
  * - Functions for the DAMON programming interface
+ * - Functions for the DAMON debugfs interface
  * - Functions for the module loading/unloading
  */
 
@@ -22,6 +23,7 @@
 #define CREATE_TRACE_POINTS
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -68,6 +70,20 @@
 /* Get a random number in [l, r) */
 #define damon_rand(l, r) (l + prandom_u32() % (r - l))
 
+/* A monitoring context for debugfs interface users. */
+static struct damon_ctx damon_user_ctx = {
+   .sample_interval = 5 * 1000,
+   .aggr_interval = 100 * 1000,
+   .regions_update_interval = 1000 * 1000,
+   .min_nr_regions = 10,
+   .max_nr_regions = 1000,
+
+   .init_target_regions = kdamond_init_vm_regions,
+   .update_target_regions = kdamond_update_vm_regions,
+   .prepare_access_checks = kdamond_prepare_vm_access_checks,
+   .check_accesses = kdamond_check_vm_accesses,
+};
+
 /*
  * Construct a damon_region struct
  *
@@ -1228,17 +1244,380 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned 
long sample_int,
return 0;
 }
 
+/*
+ * Functions for the DAMON debugfs interface
+ */
+
+static ssize_t debugfs_monitor_on_read(struct file *file,
+   char __user *buf, size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char monitor_on_buf[5];
+   bool monitor_on;
+   int len;
+
+   monitor_on = damon_kdamond_running(ctx);
+   len = snprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n");
+
+   return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len);
+}
+
+/*
+ * Returns non-empty string on success, negarive error code otherwise.
+ */
+static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos)
+{
+   char *kbuf;
+   ssize_t ret;
+
+   /* We do not accept continuous write */
+   if (*ppos)
+   return ERR_PTR(-EINVAL);
+
+   kbuf = kmalloc(count + 1, GFP_KERNEL);
+   if (!kbuf)
+   return ERR_PTR(-ENOMEM);
+
+   ret = simple_write_to_buffer(kbuf, count + 1, ppos, buf, count);
+   if (ret != count) {
+   kfree(kbuf);
+   return ERR_PTR(-EIO);
+   }
+   kbuf[ret] = '\0';
+
+   return kbuf;
+}
+
+static ssize_t debugfs_monitor_on_write(struc

[PATCH v18 13/14] mm/damon: Add user space selftests

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit adds a simple user space tests for DAMON.  The tests are
using kselftest framework.

Signed-off-by: SeongJae Park 
---
 tools/testing/selftests/damon/Makefile|   7 +
 .../selftests/damon/_chk_dependency.sh|  28 
 tools/testing/selftests/damon/_chk_record.py  | 108 ++
 .../testing/selftests/damon/debugfs_attrs.sh  | 139 ++
 .../testing/selftests/damon/debugfs_record.sh |  50 +++
 5 files changed, 332 insertions(+)
 create mode 100644 tools/testing/selftests/damon/Makefile
 create mode 100644 tools/testing/selftests/damon/_chk_dependency.sh
 create mode 100644 tools/testing/selftests/damon/_chk_record.py
 create mode 100755 tools/testing/selftests/damon/debugfs_attrs.sh
 create mode 100755 tools/testing/selftests/damon/debugfs_record.sh

diff --git a/tools/testing/selftests/damon/Makefile 
b/tools/testing/selftests/damon/Makefile
new file mode 100644
index ..cfd5393a4639
--- /dev/null
+++ b/tools/testing/selftests/damon/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for damon selftests
+
+TEST_FILES = _chk_dependency.sh _chk_record_file.py
+TEST_PROGS = debugfs_attrs.sh debugfs_record.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh 
b/tools/testing/selftests/damon/_chk_dependency.sh
new file mode 100644
index ..814dcadd5e96
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_dependency.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+DBGFS=/sys/kernel/debug/damon
+
+if [ $EUID -ne 0 ];
+then
+   echo "Run as root"
+   exit $ksft_skip
+fi
+
+if [ ! -d $DBGFS ]
+then
+   echo "$DBGFS not found"
+   exit $ksft_skip
+fi
+
+for f in attrs record pids monitor_on
+do
+   if [ ! -f "$DBGFS/$f" ]
+   then
+   echo "$f not found"
+   exit 1
+   fi
+done
diff --git a/tools/testing/selftests/damon/_chk_record.py 
b/tools/testing/selftests/damon/_chk_record.py
new file mode 100644
index ..5cfcf4161404
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_record.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"Check whether the DAMON record file is valid"
+
+import argparse
+import struct
+import sys
+
+fmt_version = 0
+
+def set_fmt_version(f):
+global fmt_version
+
+mark = f.read(16)
+if mark == b'damon_recfmt_ver':
+fmt_version = struct.unpack('i', f.read(4))[0]
+else:
+fmt_version = 0
+f.seek(0)
+return fmt_version
+
+def read_pid(f):
+if fmt_version == 0:
+pid = struct.unpack('L', f.read(8))[0]
+else:
+pid = struct.unpack('i', f.read(4))[0]
+def err_percent(val, expected):
+return abs(val - expected) / expected * 100
+
+def chk_task_info(f):
+pid = read_pid(f)
+nr_regions = struct.unpack('I', f.read(4))[0]
+
+if nr_regions > max_nr_regions:
+print('too many regions: %d > %d' % (nr_regions, max_nr_regions))
+exit(1)
+
+nr_gaps = 0
+eaddr = 0
+for r in range(nr_regions):
+saddr = struct.unpack('L', f.read(8))[0]
+if eaddr and saddr != eaddr:
+nr_gaps += 1
+eaddr = struct.unpack('L', f.read(8))[0]
+nr_accesses = struct.unpack('I', f.read(4))[0]
+
+if saddr >= eaddr:
+print('wrong region [%d,%d)' % (saddr, eaddr))
+exit(1)
+
+max_nr_accesses = aint / sint
+if nr_accesses > max_nr_accesses:
+if err_percent(nr_accesses, max_nr_accesses) > 15:
+print('too high nr_access: expected %d but %d' %
+(max_nr_accesses, nr_accesses))
+exit(1)
+if nr_gaps != 2:
+print('number of gaps are not two but %d' % nr_gaps)
+exit(1)
+
+def parse_time_us(bindat):
+sec = struct.unpack('l', bindat[0:8])[0]
+nsec = struct.unpack('l', bindat[8:16])[0]
+return (sec * 10 + nsec) / 1000
+
+def main():
+global sint
+global aint
+global min_nr
+global max_nr_regions
+
+parser = argparse.ArgumentParser()
+parser.add_argument('file', metavar='',
+help='path to the record file')
+parser.add_argument('--attrs', metavar='',
+default='5000 10 100 10 1000',
+help='content of debugfs attrs file')
+args = parser.parse_args()
+file_path = args.file
+attrs = [int(x) for x in args.attrs.split()]
+sint, aint, rint, min_nr, max_nr_regions = attrs
+
+with open(file_path, 'rb') as f:
+set_fmt_version(f)
+last_aggr_time = None
+while True:
+timebin = f.read(16)
+if len(timebin) != 16:
+break
+
+now = parse_time_us(timebin)
+if not last_aggr_time:
+ 

[PATCH v18 12/14] mm/damon: Add kunit tests

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit adds kunit based unit tests for DAMON.

Signed-off-by: SeongJae Park 
Reviewed-by: Brendan Higgins 
---
 mm/Kconfig  |  11 +
 mm/damon-test.h | 661 
 mm/damon.c  |   6 +
 3 files changed, 678 insertions(+)
 create mode 100644 mm/damon-test.h

diff --git a/mm/Kconfig b/mm/Kconfig
index 464e9594dcec..e32761985611 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -879,4 +879,15 @@ config DAMON
  more information.
  If unsure, say N.
 
+config DAMON_KUNIT_TEST
+   bool "Test for damon"
+   depends on DAMON=y && KUNIT
+   help
+ This builds the DAMON Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
 endmenu
diff --git a/mm/damon-test.h b/mm/damon-test.h
new file mode 100644
index ..b31c7fe913ca
--- /dev/null
+++ b/mm/damon-test.h
@@ -0,0 +1,661 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates.  All rights reserved.
+ *
+ * Author: SeongJae Park 
+ */
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+
+#ifndef _DAMON_TEST_H
+#define _DAMON_TEST_H
+
+#include 
+
+static void damon_test_str_to_pids(struct kunit *test)
+{
+   char *question;
+   int *answers;
+   int expected[] = {12, 35, 46};
+   ssize_t nr_integers = 0, i;
+
+   question = "123";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+   KUNIT_EXPECT_EQ(test, 123, answers[0]);
+   kfree(answers);
+
+   question = "123abc";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+   KUNIT_EXPECT_EQ(test, 123, answers[0]);
+   kfree(answers);
+
+   question = "a123";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, answers, (int *)NULL);
+
+   question = "12 35";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+   for (i = 0; i < nr_integers; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "12 35 46";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers);
+   for (i = 0; i < nr_integers; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "12 35 abc 46";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+   for (i = 0; i < 2; i++)
+   KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+   kfree(answers);
+
+   question = "";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, (int *)NULL, answers);
+   kfree(answers);
+
+   question = "\n";
+   answers = str_to_pids(question, strnlen(question, 128), _integers);
+   KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+   KUNIT_EXPECT_PTR_EQ(test, (int *)NULL, answers);
+   kfree(answers);
+}
+
+static void damon_test_regions(struct kunit *test)
+{
+   struct damon_region *r;
+   struct damon_task *t;
+
+   r = damon_new_region(1, 2);
+   KUNIT_EXPECT_EQ(test, 1ul, r->ar.start);
+   KUNIT_EXPECT_EQ(test, 2ul, r->ar.end);
+   KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+
+   t = damon_new_task(42);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_regions(t));
+
+   damon_add_region(r, t);
+   KUNIT_EXPECT_EQ(test, 1u, nr_damon_regions(t));
+
+   damon_del_region(r);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_regions(t));
+
+   damon_free_task(t);
+}
+
+static void damon_test_tasks(struct kunit *test)
+{
+   struct damon_ctx *c = _user_ctx;
+   struct damon_task *t;
+
+   t = damon_new_task(42);
+   KUNIT_EXPECT_EQ(test, 42, t->pid);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_tasks(c));
+
+   damon_add_task(_user_ctx, t);
+   KUNIT_EXPECT_EQ(test, 1u, nr_damon_tasks(c));
+
+   damon_destroy_task(t);
+   KUNIT_EXPECT_EQ(test, 0u, nr_damon_tasks(c));
+}
+
+static void damon_test_set_pids(struct kunit *test)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   int pids[] = {1, 2, 3};
+   char buf[64];
+
+   damon_set_pids(ctx, pids, 3);
+   damon_sprint_pids(ctx, buf, 64);
+   KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2 3\n");
+

[PATCH v18 11/14] Documentation: Add documents for DAMON

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit adds documents for DAMON under
`Documentation/admin-guide/mm/damon/` and `Documentation/vm/damon/`.

Signed-off-by: SeongJae Park 
---
 Documentation/admin-guide/mm/damon/guide.rst | 157 ++
 Documentation/admin-guide/mm/damon/index.rst |  15 +
 Documentation/admin-guide/mm/damon/plans.rst |  29 ++
 Documentation/admin-guide/mm/damon/start.rst |  98 ++
 Documentation/admin-guide/mm/damon/usage.rst | 298 +++
 Documentation/admin-guide/mm/index.rst   |   1 +
 Documentation/vm/damon/api.rst   |  20 ++
 Documentation/vm/damon/eval.rst  | 222 ++
 Documentation/vm/damon/faq.rst   |  59 
 Documentation/vm/damon/index.rst |  32 ++
 Documentation/vm/damon/mechanisms.rst| 165 ++
 Documentation/vm/index.rst   |   1 +
 12 files changed, 1097 insertions(+)
 create mode 100644 Documentation/admin-guide/mm/damon/guide.rst
 create mode 100644 Documentation/admin-guide/mm/damon/index.rst
 create mode 100644 Documentation/admin-guide/mm/damon/plans.rst
 create mode 100644 Documentation/admin-guide/mm/damon/start.rst
 create mode 100644 Documentation/admin-guide/mm/damon/usage.rst
 create mode 100644 Documentation/vm/damon/api.rst
 create mode 100644 Documentation/vm/damon/eval.rst
 create mode 100644 Documentation/vm/damon/faq.rst
 create mode 100644 Documentation/vm/damon/index.rst
 create mode 100644 Documentation/vm/damon/mechanisms.rst

diff --git a/Documentation/admin-guide/mm/damon/guide.rst 
b/Documentation/admin-guide/mm/damon/guide.rst
new file mode 100644
index ..c51fb843efaa
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/guide.rst
@@ -0,0 +1,157 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+Optimization Guide
+==
+
+This document helps you estimating the amount of benefit that you could get
+from DAMON-based optimizations, and describes how you could achieve it.  You
+are assumed to already read :doc:`start`.
+
+
+Check The Signs
+===
+
+No optimization can provide same extent of benefit to every case.  Therefore
+you should first guess how much improvements you could get using DAMON.  If
+some of below conditions match your situation, you could consider using DAMON.
+
+- *Low IPC and High Cache Miss Ratios.*  Low IPC means most of the CPU time is
+  spent waiting for the completion of time-consuming operations such as memory
+  access, while high cache miss ratios mean the caches don't help it well.
+  DAMON is not for cache level optimization, but DRAM level.  However,
+  improving DRAM management will also help this case by reducing the memory
+  operation latency.
+- *Memory Over-commitment and Unknown Users.*  If you are doing memory
+  overcommitment and you cannot control every user of your system, a memory
+  bank run could happen at any time.  You can estimate when it will happen
+  based on DAMON's monitoring results and act earlier to avoid or deal better
+  with the crisis.
+- *Frequent Memory Pressure.*  Frequent memory pressure means your system has
+  wrong configurations or memory hogs.  DAMON will help you find the right
+  configuration and/or the criminals.
+- *Heterogeneous Memory System.*  If your system is utilizing memory devices
+  that placed between DRAM and traditional hard disks, such as non-volatile
+  memory or fast SSDs, DAMON could help you utilizing the devices more
+  efficiently.
+
+
+Profile
+===
+
+If you found some positive signals, you could start by profiling your workloads
+using DAMON.  Find major workloads on your systems and analyze their data
+access pattern to find something wrong or can be improved.  The DAMON user
+space tool (``damo``) will be useful for this.
+
+We recommend you to start from working set size distribution check using ``damo
+report wss``.  If the distribution is ununiform or quite different from what
+you estimated, you could consider `Memory Configuration`_ optimization.
+
+Then, review the overall access pattern in heatmap form using ``damo report
+heats``.  If it shows a simple pattern consists of a small number of memory
+regions having high contrast of access temperature, you could consider manual
+`Program Modification`_.
+
+If you still want to absorb more benefits, you should develop `Personalized
+DAMON Application`_ for your special case.
+
+You don't need to take only one approach among the above plans, but you could
+use multiple of the above approaches to maximize the benefit.
+
+
+Optimize
+
+
+If the profiling result also says it's worth trying some optimization, you
+could consider below approaches.  Note that some of the below approaches assume
+that your systems are configured with swap devices or other types of auxiliary
+memory so that you don't strictly required to accommodate the whole working set
+in the main memory.  Most of the detailed optimization should be made on your
+concrete

[PATCH v18 14/14] MAINTAINERS: Update for DAMON

2020-07-13 Thread SeongJae Park
From: SeongJae Park 

This commit updates MAINTAINERS file for DAMON related files.

Signed-off-by: SeongJae Park 
---
 MAINTAINERS | 13 +
 1 file changed, 13 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 50659d76976b..23348005f5bd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4686,6 +4686,19 @@ F:   net/ax25/ax25_out.c
 F: net/ax25/ax25_timer.c
 F: net/ax25/sysctl_net_ax25.c
 
+DATA ACCESS MONITOR
+M: SeongJae Park 
+L: linux...@kvack.org
+S: Maintained
+F: Documentation/admin-guide/mm/damon/*
+F: Documentation/vm/damon/*
+F: include/linux/damon.h
+F: include/trace/events/damon.h
+F: mm/damon-test.h
+F: mm/damon.c
+F: tools/damon/*
+F: tools/testing/selftests/damon/*
+
 DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER
 L: net...@vger.kernel.org
 S: Orphan
-- 
2.17.1



Re: Re: [PATCH v18 01/14] mm/page_ext: Export lookup_page_ext() to GPL modules

2020-07-13 Thread SeongJae Park
On Mon, 13 Jul 2020 15:08:42 +0300 Mike Rapoport  wrote:

> Hi,
> 
> On Mon, Jul 13, 2020 at 10:41:31AM +0200, SeongJae Park wrote:
> > From: SeongJae Park 
> > 
> > This commit exports 'lookup_page_ext()' to GPL modules.  It will be used
> > by DAMON in following commit for the implementation of the region based
> > sampling.
> 
> Maybe I'm missing something, but why is DAMON a module?

I made it loadable just for easier adoption from downstream kernels.  I could
drop the module build support if asked.


Thanks,
SeongJae Park

> 
> > Signed-off-by: SeongJae Park 
> > Reviewed-by: Leonard Foerster 
> > Reviewed-by: Varad Gautam 
> > ---
> >  mm/page_ext.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > index a3616f7a0e9e..9d802d01fcb5 100644
> > --- a/mm/page_ext.c
> > +++ b/mm/page_ext.c
> > @@ -131,6 +131,7 @@ struct page_ext *lookup_page_ext(const struct page 
> > *page)
> > MAX_ORDER_NR_PAGES);
> > return get_entry(base, index);
> >  }
> > +EXPORT_SYMBOL_GPL(lookup_page_ext);
> >  
> >  static int __init alloc_node_page_ext(int nid)
> >  {
> > -- 
> > 2.17.1
> > 
> 
> -- 
> Sincerely yours,
> Mike.


Re: Re: Re: [PATCH v18 01/14] mm/page_ext: Export lookup_page_ext() to GPL modules

2020-07-13 Thread SeongJae Park
On Mon, 13 Jul 2020 20:19:09 +0300 Mike Rapoport  wrote:

> On Mon, Jul 13, 2020 at 02:21:43PM +0200, SeongJae Park wrote:
> > On Mon, 13 Jul 2020 15:08:42 +0300 Mike Rapoport  wrote:
> > 
> > > Hi,
> > > 
> > > On Mon, Jul 13, 2020 at 10:41:31AM +0200, SeongJae Park wrote:
> > > > From: SeongJae Park 
> > > > 
> > > > This commit exports 'lookup_page_ext()' to GPL modules.  It will be used
> > > > by DAMON in following commit for the implementation of the region based
> > > > sampling.
> > > 
> > > Maybe I'm missing something, but why is DAMON a module?
> > 
> > I made it loadable just for easier adoption from downstream kernels.  I 
> > could
> > drop the module build support if asked.
>  
> Well, exporting core mm symbols to modules should be considred very
> carefully. 

Agreed.  I will drop the module support from the next spin.

> 
> Why lookup_page_ext() is required for DAMON? It is not used anywhere in
> this patchset.

It's indirectly used.  In the 6th patch, DAMON uses 'set_page_young()' to not
interfere with other PTE Accessed bit users.  And, 'set_page_young()' uses
'lookup_page_ext()' if !CONFIG_64BIT.  That's why I exported it.


Thanks,
SeongJae Park


Re: [PATCH v17 03/15] mm/damon: Implement region based sampling

2020-07-07 Thread SeongJae Park
On Mon, 6 Jul 2020 13:53:10 +0200 SeongJae Park  wrote:

> From: SeongJae Park 
> 
> This commit implements DAMON's target address space independent high
> level logics for basic access check and region based sampling.  This
> doesn't work alone, but needs the target address space specific low
> level pritimives implementation for the monitoring target address ranges
> construction and the access check, though.  A reference implementation
> of those will be provided by a later commit.  Nevertheless, users can
> implement and use their own versions for their specific use cases.
>
[...]
> +/**
> + * damon_start() - Starts monitoring with given context.
> + * @ctx: monitoring context
> + *
> + * Return: 0 on success, negative error code otherwise.
> + */
> +int damon_start(struct damon_ctx *ctx)
> +{
> + int err = -EBUSY;
> +
> + mutex_lock(>kdamond_lock);
> + if (!ctx->kdamond) {
> + err = 0;
> + ctx->kdamond_stop = false;
> + ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond");

Oops, this means 'kdamond_fn' could see the unset '->kdamond'.  I will use
'kthread_create()' and 'wake_up_process()' in the next spin.

> + if (IS_ERR(ctx->kdamond))
> + err = PTR_ERR(ctx->kdamond);
> + }
> + mutex_unlock(>kdamond_lock);
> +
> + return err;
> +}

So, the change would be something like below:

--- a/mm/damon.c
+++ b/mm/damon.c
@@ -1464,9 +1464,11 @@ int damon_start(struct damon_ctx *ctx)
if (!ctx->kdamond) {
err = 0;
ctx->kdamond_stop = false;
-   ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond");
+   ctx->kdamond = kthread_create(kdamond_fn, ctx, "kdamond");
if (IS_ERR(ctx->kdamond))
    err = PTR_ERR(ctx->kdamond);
+   else
+   wake_up_process(ctx->kdamond);
}
mutex_unlock(>kdamond_lock);


Thanks,
SeongJae Park


Re: Re: [PATCH v17 12/15] Documentation/admin-guide/mm: Add a document for DAMON

2020-07-07 Thread SeongJae Park
On Tue, 7 Jul 2020 10:49:06 +0300 Mike Rapoport  wrote:

> Hello SeongJae,
> 
> On Mon, Jul 06, 2020 at 01:53:19PM +0200, SeongJae Park wrote:
> > From: SeongJae Park 
> > 
> > This commit adds a document for DAMON under
> > `Documentation/admin-guide/mm/damon/`.
> > 
> > Signed-off-by: SeongJae Park 
> > ---
> >  Documentation/admin-guide/mm/damon/api.rst|  20 ++
> >  .../admin-guide/mm/damon/damon_heatmap.png| Bin 0 -> 8366 bytes
> >  .../admin-guide/mm/damon/damon_wss_change.png | Bin 0 -> 7211 bytes
> >  .../admin-guide/mm/damon/damon_wss_dist.png   | Bin 0 -> 6173 bytes
> >  Documentation/admin-guide/mm/damon/eval.rst   | 222 +
> >  Documentation/admin-guide/mm/damon/faq.rst|  59 
> >  .../admin-guide/mm/damon/freqmine_heatmap.png | Bin 0 ->  bytes
> >  .../admin-guide/mm/damon/freqmine_wss_sz.png  | Bin 0 -> 5589 bytes
> >  .../mm/damon/freqmine_wss_time.png| Bin 0 -> 6550 bytes
> >  Documentation/admin-guide/mm/damon/guide.rst  | 194 
> >  Documentation/admin-guide/mm/damon/index.rst  |  35 +++
> >  .../admin-guide/mm/damon/mechanisms.rst   | 159 ++
> >  Documentation/admin-guide/mm/damon/plans.rst  |  29 ++
> >  Documentation/admin-guide/mm/damon/start.rst  | 117 +++
> >  .../mm/damon/streamcluster_heatmap.png| Bin 0 -> 42210 bytes
> >  .../mm/damon/streamcluster_wss_sz.png | Bin 0 -> 6327 bytes
> >  .../mm/damon/streamcluster_wss_time.png   | Bin 0 -> 8830 bytes
> >  Documentation/admin-guide/mm/damon/usage.rst  | 296 ++
> >  Documentation/admin-guide/mm/index.rst|   1 +
> 
> It's really cool to see documentation along with the code!
> 
> I'd suggest to reorganize the DAMON docs to better match the current
> structure of Documentation/ directory.
> 
> The description of DAMON usage from the userspace and reference for the
> userland tools does belong to Documentation/admin-guide/mm. However, the
> kernel APIs are better to be placed in Documentation/core-api or even
> Documentation/dev-tools. As for the detailed description of the internal
> DAMON operation, this would naturally belong to Documentation/vm.
> 
> Another thing is that this patch is really hard to review because of the
> encoded png blobs. In general, we try to keep Documentation/ readable in
> plain text, but if you think that the images are essential and must be a
> part of DAMON in-tree docs rather than links to an external resource,
> please split them to a separate patch.

Totally agreed!  Thanks for the kind and wise comments.  I will try to remove
the blobs or at least separate those in the next spin.


Thanks,
SeongJae Park

> 
> -- 
> Sincerely yours,
> Mike.
> 


[RFC v13 1/8] mm/madvise: Export do_madvise() to external GPL modules

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit exports 'do_madvise()' to external GPL modules, so that
other modules including DAMON could use the function.

Signed-off-by: SeongJae Park 
---
 mm/madvise.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/madvise.c b/mm/madvise.c
index 1ad7522567d4..fcd951694ebc 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1171,6 +1171,7 @@ int do_madvise(struct task_struct *target_task, struct 
mm_struct *mm,
 
return error;
 }
+EXPORT_SYMBOL_GPL(do_madvise);
 
 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
-- 
2.17.1



[RFC v13 0/8] Implement Data Access Monitoring-based Memory Operation Schemes

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

Changes from Previous Version
=

- Wordsmith the document, comment, commit messages
- Support a scheme of max access count 0
- Use 'unsigned long' for (min|max)_sz_region

Introduction


DAMON[1] can be used as a primitive for data access awared memory management
optimizations.  For that, users who want such optimizations should run DAMON,
read the monitoring results, analyze it, plan a new memory management scheme,
and apply the new scheme by themselves.  Such efforts will be inevitable for
some complicated optimizations.

However, in many other cases, the users would simply want the system to apply a
memory management action to a memory region of a specific size having a
specific access frequency for a specific time.  For example, "page out a memory
region larger than 100 MiB keeping only rare accesses more than 2 minutes", or
"Do not use THP for a memory region larger than 2 MiB rarely accessed for more
than 1 seconds".

This RFC patchset makes DAMON to handle such data access monitoring-based
operation schemes.  With this change, users can do the data access aware
optimizations by simply specifying their schemes to DAMON.

[1] https://lore.kernel.org/linux-mm/20200706115322.29598-1-sjp...@amazon.com/

Evaluations
===

We evaluated DAMON's overhead, monitoring quality and usefulness using 25
realistic workloads on my QEMU/KVM based virtual machine running a kernel that
v12 of this patchset is applied.

An experimental DAMON-based operation scheme for THP, ‘ethp’, removes 31.29% of
THP memory overheads while preserving 60.64% of THP speedup. Another
experimental DAMON-based ‘proactive reclamation’ implementation, ‘prcl’,
reduces 87.95% of residential sets and 29.52% of system memory footprint while
incurring only 2.15% runtime overhead in the best case (parsec3/freqmine).

NOTE that the experimentail THP optimization and proactive reclamation are not
for production, just only for proof of concepts.

Please refer to the official document[1] or "Documentation/admin-guide/mm: Add
a document for DAMON" patch in the latest DAMON patchset for detailed
evaluation setup and results.

[1] https://damonitor.github.io/doc/html/latest-damos

More Information


We prepared a showcase web site[1] that you can get more information.  There
are

- the official documentations[2],
- the heatmap format dynamic access pattern of various realistic workloads for
  heap area[3], mmap()-ed area[4], and stack[5] area,
- the dynamic working set size distribution[6] and chronological working set
  size changes[7], and
- the latest performance test results[8].

[1] https://damonitor.github.io/_index
[2] https://damonitor.github.io/doc/html/latest-damos
[3] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.0.html
[4] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.1.html
[5] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.2.html
[6] https://damonitor.github.io/test/result/visual/latest/rec.wss_sz.html
[7] https://damonitor.github.io/test/result/visual/latest/rec.wss_time.html
[8] https://damonitor.github.io/test/result/perf/latest/html/index.html

Baseline and Complete Git Tree
==

The patches are based on the v5.7 plus v17 DAMON patchset[1] and Minchan's
``do_madvise()`` patch[2], which retrieved from the -next tree and slightly
modified for backporting on v5.7.  You can also clone the complete git tree:

$ git clone git://github.com/sjp38/linux -b damos/rfc/v13

The web is also available:
https://github.com/sjp38/linux/releases/tag/damos/rfc/v13

There are a couple of trees for entire DAMON patchset series that future
features are included.  The first one[3] contains the changes for latest
release, while the other one[4] contains the changes for next release.

[1] https://lore.kernel.org/linux-mm/20200706115322.29598-1-sjp...@amazon.com/
[2] https://lore.kernel.org/linux-mm/20200302193630.68771-2-minc...@kernel.org/
[3] https://github.com/sjp38/linux/tree/damon/master
[4] https://github.com/sjp38/linux/tree/damon/next

Sequence Of Patches
===

The 1st patch allows DAMON to reuse ``madvise()`` code for the actions.  The
2nd patch accounts age of each region.  The 3rd patch implements the handling
of the schemes in DAMON and exports a kernel space programming interface for
it.  The 4th patch implements a debugfs interface for the privileged people and
user programs.  The 5th patch implements schemes statistics feature for easier
tuning of the schemes and runtime access pattern analysis.  The 6th patche adds
selftests for these changes, and the 7th patch adds human friendly schemes
support to the user space tool for DAMON.  Finally, the 8th patch documents
this new feature in the document.

Patch History
=

Changes from RFC v12
(https://lore.kernel.org/linux-mm/20200616073828.16509-1-sjp...@amazon.com/)
 - W

[RFC v13 4/8] mm/damon/schemes: Implement a debugfs interface

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit implements a debugfs interface for the data access
monitoring oriented memory management schemes.  It is supposed to be
used by administrators and/or privileged user space programs.  Users can
read and update the rules using ``/damon/schemes`` file.  The
format is::

   

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 172 -
 1 file changed, 170 insertions(+), 2 deletions(-)

diff --git a/mm/damon.c b/mm/damon.c
index c08b8c80c517..28b1e119e521 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -194,6 +194,29 @@ static void damon_destroy_task(struct damon_task *t)
damon_free_task(t);
 }
 
+static struct damos *damon_new_scheme(
+   unsigned long min_sz_region, unsigned long max_sz_region,
+   unsigned int min_nr_accesses, unsigned int max_nr_accesses,
+   unsigned int min_age_region, unsigned int max_age_region,
+   enum damos_action action)
+{
+   struct damos *scheme;
+
+   scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
+   if (!scheme)
+   return NULL;
+   scheme->min_sz_region = min_sz_region;
+   scheme->max_sz_region = max_sz_region;
+   scheme->min_nr_accesses = min_nr_accesses;
+   scheme->max_nr_accesses = max_nr_accesses;
+   scheme->min_age_region = min_age_region;
+   scheme->max_age_region = max_age_region;
+   scheme->action = action;
+   INIT_LIST_HEAD(>list);
+
+   return scheme;
+}
+
 static void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
 {
list_add_tail(>list, >schemes_list);
@@ -1477,6 +1500,145 @@ static ssize_t debugfs_monitor_on_write(struct file 
*file,
return ret;
 }
 
+static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len)
+{
+   struct damos *s;
+   int written = 0;
+   int rc;
+
+   damon_for_each_scheme(s, c) {
+   rc = snprintf([written], len - written,
+   "%lu %lu %u %u %u %u %d\n",
+   s->min_sz_region, s->max_sz_region,
+   s->min_nr_accesses, s->max_nr_accesses,
+   s->min_age_region, s->max_age_region,
+   s->action);
+   if (!rc)
+   return -ENOMEM;
+
+   written += rc;
+   }
+   return written;
+}
+
+static ssize_t debugfs_schemes_read(struct file *file, char __user *buf,
+   size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char *kbuf;
+   ssize_t len;
+
+   kbuf = kmalloc(count, GFP_KERNEL);
+   if (!kbuf)
+   return -ENOMEM;
+
+   mutex_lock(>kdamond_lock);
+   len = sprint_schemes(ctx, kbuf, count);
+   mutex_unlock(>kdamond_lock);
+   if (len < 0)
+   goto out;
+   len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+   kfree(kbuf);
+   return len;
+}
+
+static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes)
+{
+   ssize_t i;
+
+   for (i = 0; i < nr_schemes; i++)
+   kfree(schemes[i]);
+   kfree(schemes);
+}
+
+/*
+ * Converts a string into an array of struct damos pointers
+ *
+ * Returns an array of struct damos pointers that converted if the conversion
+ * success, or NULL otherwise.
+ */
+static struct damos **str_to_schemes(const char *str, ssize_t len,
+   ssize_t *nr_schemes)
+{
+   struct damos *scheme, **schemes;
+   const int max_nr_schemes = 256;
+   int pos = 0, parsed, ret;
+   unsigned long min_sz, max_sz;
+   unsigned int min_nr_a, max_nr_a, min_age, max_age;
+   unsigned int action;
+
+   schemes = kmalloc_array(max_nr_schemes, sizeof(scheme),
+   GFP_KERNEL);
+   if (!schemes)
+   return NULL;
+
+   *nr_schemes = 0;
+   while (pos < len && *nr_schemes < max_nr_schemes) {
+   ret = sscanf([pos], "%lu %lu %u %u %u %u %u%n",
+   _sz, _sz, _nr_a, _nr_a,
+   _age, _age, , );
+   if (ret != 7)
+   break;
+   if (action >= DAMOS_ACTION_LEN) {
+   pr_err("wrong action %d\n", action);
+   goto fail;
+   }
+
+   pos += parsed;
+   scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
+   min_age, max_age, action);
+   if (!scheme)
+   goto fail;
+
+   schemes[*nr_schemes] = scheme;
+   *nr_schemes += 1;
+   }
+   return schemes;
+fail:
+   free_schemes_arr(schemes, *nr_schemes);
+   return NULL;
+}
+
+static ssize_t debugfs_schemes_

[RFC v13 3/8] mm/damon: Implement data access monitoring-based operation schemes

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

In many cases, users might use DAMON for simple data access aware
memory management optimizations such as applying an operation scheme to
a memory region of a specific size having a specific access frequency
for a specific time.  For example, "page out a memory region larger than
100 MiB but having a low access frequency more than 10 minutes", or "Use
THP for a memory region larger than 2 MiB having a high access frequency
for more than 2 seconds".

Most simple form of the solution would be doing offline data access
pattern profiling using DAMON and modifying the application source code
or system configuration based on the profiling results.  Or, developing
a daemon constructed with two modules (one for access monitoring and the
other for applying memory management actions via mlock(), madvise(),
sysctl, etc) is imaginable.

To minimize users from spending their time for implementation of such
simple data access monitoring-based operation schemes, this commit makes
DAMON to handle such schemes directly.  With this commit, users can
simply specify their desired schemes to DAMON.  Then, DAMON will
automatically apply the schemes to the the user-specified target
processes.

Each of the schemes is composed with conditions for filtering of the
target memory regions and desired memory management action for the
target.  Specifically, the format is::

   

The filtering conditions are size of memory region, number of accesses
to the region monitored by DAMON, and the age of the region.  The age of
region is incremented periodically but reset when its addresses or
access frequency has significantly changed or the action of a scheme was
applied.  For the action, current implementation supports only a few of
madvise() hints, ``MADV_WILLNEED``, ``MADV_COLD``, ``MADV_PAGEOUT``,
``MADV_HUGEPAGE``, and ``MADV_NOHUGEPAGE``.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |  50 +++
 mm/damon.c| 145 ++
 2 files changed, 195 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e94dab4edab6..76e6ea2f97a7 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -64,6 +64,52 @@ struct damon_task {
struct list_head list;
 };
 
+/**
+ * enum damos_action - Represents an action of a Data Access Monitoring-based
+ * Operation Scheme.
+ *
+ * @DAMOS_WILLNEED:Call ``madvise()`` for the region with MADV_WILLNEED.
+ * @DAMOS_COLD:Call ``madvise()`` for the region with 
MADV_COLD.
+ * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT.
+ * @DAMOS_HUGEPAGE:Call ``madvise()`` for the region with MADV_HUGEPAGE.
+ * @DAMOS_NOHUGEPAGE:  Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
+ * @DAMOS_ACTION_LEN:  Number of supported actions.
+ */
+enum damos_action {
+   DAMOS_WILLNEED,
+   DAMOS_COLD,
+   DAMOS_PAGEOUT,
+   DAMOS_HUGEPAGE,
+   DAMOS_NOHUGEPAGE,
+   DAMOS_ACTION_LEN,
+};
+
+/**
+ * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
+ * @min_sz_region: Minimum size of target regions.
+ * @max_sz_region: Maximum size of target regions.
+ * @min_nr_accesses:   Minimum ``->nr_accesses`` of target regions.
+ * @max_nr_accesses:   Maximum ``->nr_accesses`` of target regions.
+ * @min_age_region:Minimum age of target regions.
+ * @max_age_region:Maximum age of target regions.
+ * @action:_action to be applied to the target regions.
+ * @list:  List head for siblings.
+ *
+ * For each aggregation interval, DAMON applies @action to monitoring target
+ * regions fit in the condition and updates the statistics.  Note that both
+ * the minimums and the maximums are inclusive.
+ */
+struct damos {
+   unsigned long min_sz_region;
+   unsigned long max_sz_region;
+   unsigned int min_nr_accesses;
+   unsigned int max_nr_accesses;
+   unsigned int min_age_region;
+   unsigned int max_age_region;
+   enum damos_action action;
+   struct list_head list;
+};
+
 /**
  * struct damon_ctx - Represents a context for each monitoring.  This is the
  * main interface that allows users to set the attributes and get the results
@@ -107,6 +153,7 @@ struct damon_task {
  * @kdamond_lock.  Accesses to other fields must be protected by themselves.
  *
  * @tasks_list:Head of monitoring target tasks (_task) 
list.
+ * @schemes_list:  Head of schemes () list.
  *
  * @init_target_regions:   Constructs initial monitoring target regions.
  * @update_target_regions: Updates monitoring target regions.
@@ -157,6 +204,7 @@ struct damon_ctx {
struct mutex kdamond_lock;
 
struct list_head tasks_list;/* 'damon_task' objects */
+   struct list_head schemes_list;  /* 'damos' objects */
 
/* callbacks */
void (*init_target_regions)(struct damon_ctx *context

[RFC v13 2/8] mm/damon: Account age of target regions

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

DAMON can be used as a primitive for data access pattern aware memory
management optimizations.  However, users who want such optimizations
should run DAMON, read the monitoring results, analyze it, plan a new
memory management scheme, and apply the new scheme by themselves.  It
would not be too hard, but still require some level of effort.  For
complicated optimizations, this effort is inevitable.

That said, in many cases, users would simply want to apply an actions to
a memory region of a specific size having a specific access frequency
for a specific time.  For example, "page out a memory region larger than
100 MiB but having a low access frequency more than 10 minutes", or "Use
THP for a memory region larger than 2 MiB having a high access frequency
for more than 2 seconds".

For such optimizations, users will need to first account the age of each
region themselves.  To reduce such efforts, this commit implements a
simple age account of each region in DAMON.  For each aggregation step,
DAMON compares the access frequency with that from last aggregation and
reset the age of the region if the change is significant.  Else, the age
is incremented.  Also, in case of the merge of regions, the region
size-weighted average of the ages is set as the age of merged new
region.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h | 10 ++
 mm/damon.c| 20 +---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index b0e7e31a22b3..e94dab4edab6 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -31,12 +31,22 @@ struct damon_addr_range {
  * @sampling_addr: Address of the sample for the next access check.
  * @nr_accesses:   Access frequency of this region.
  * @list:  List head for siblings.
+ * @age:   Age of this region.
+ * @last_nr_accesses:  Internal value for age calculation.
+ *
+ * @age is initially zero, increased for each aggregation interval, and reset
+ * to zero again if the access frequency is significantly changed.  If two
+ * regions are merged into a new region, both @nr_accesses and @age of the new
+ * region are set as region size-weighted average of those of the two regions.
  */
 struct damon_region {
struct damon_addr_range ar;
unsigned long sampling_addr;
unsigned int nr_accesses;
struct list_head list;
+
+   unsigned int age;
+   unsigned int last_nr_accesses;
 };
 
 /**
diff --git a/mm/damon.c b/mm/damon.c
index 0f906126d21f..f46844540b37 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -107,6 +107,9 @@ static struct damon_region *damon_new_region(unsigned long 
start,
region->nr_accesses = 0;
INIT_LIST_HEAD(>list);
 
+   region->age = 0;
+   region->last_nr_accesses = 0;
+
return region;
 }
 
@@ -815,6 +818,7 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
damon_write_rbuf(c, >nr_accesses,
sizeof(r->nr_accesses));
trace_damon_aggregated(t, r, nr);
+   r->last_nr_accesses = r->nr_accesses;
r->nr_accesses = 0;
}
}
@@ -828,9 +832,11 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
 static void damon_merge_two_regions(struct damon_region *l,
struct damon_region *r)
 {
-   l->nr_accesses = (l->nr_accesses * sz_damon_region(l) +
-   r->nr_accesses * sz_damon_region(r)) /
-   (sz_damon_region(l) + sz_damon_region(r));
+   unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
+
+   l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
+   (sz_l + sz_r);
+   l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
l->ar.end = r->ar.end;
damon_destroy_region(r);
 }
@@ -850,6 +856,11 @@ static void damon_merge_regions_of(struct damon_task *t, 
unsigned int thres,
struct damon_region *r, *prev = NULL, *next;
 
damon_for_each_region_safe(r, next, t) {
+   if (diff_of(r->nr_accesses, r->last_nr_accesses) > thres)
+   r->age = 0;
+   else
+   r->age++;
+
if (prev && prev->ar.end == r->ar.start &&
diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
@@ -893,6 +904,9 @@ static void damon_split_region_at(struct damon_ctx *ctx,
new = damon_new_region(r->ar.start + sz_r, r->ar.end);
r->ar.end = new->ar.start;
 
+   new->age = r->age;
+   new->last_nr_accesses = r->last_nr_accesses;
+
damon_insert_region(new, r, damon_next_region(r));
 }
 
-- 
2.17.1



[RFC v13 6/8] mm/damon/selftests: Add 'schemes' debugfs tests

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit adds simple selftets for 'schemes' debugfs file of DAMON.

Signed-off-by: SeongJae Park 
---
 .../testing/selftests/damon/debugfs_attrs.sh  | 29 +++
 1 file changed, 29 insertions(+)

diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh 
b/tools/testing/selftests/damon/debugfs_attrs.sh
index d5188b0f71b1..4aeb2037a67e 100755
--- a/tools/testing/selftests/damon/debugfs_attrs.sh
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
@@ -97,6 +97,35 @@ fi
 
 echo $ORIG_CONTENT > $file
 
+# Test schemes file
+file="$DBGFS/schemes"
+
+ORIG_CONTENT=$(cat $file)
+echo "1 2 3 4 5 6 3" > $file
+if [ $? -ne 0 ]
+then
+   echo "$file write fail"
+   echo $ORIG_CONTENT > $file
+   exit 1
+fi
+
+echo "1 2
+3 4 5 6 3" > $file
+if [ $? -eq 0 ]
+then
+   echo "$file multi line write success (expected fail)"
+   echo $ORIG_CONTENT > $file
+   exit 1
+fi
+
+echo > $file
+if [ $? -ne 0 ]
+then
+   echo "$file empty string writing fail"
+   echo $ORIG_CONTENT > $file
+   exit 1
+fi
+
 # Test pids file
 file="$DBGFS/pids"
 
-- 
2.17.1



[RFC v13 5/8] mm/damon/schemes: Implement statistics feature

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

To tune the DAMON-based operation schemes, knowing how many and how
large regions are affected by each of the schemes will be helful.  Those
stats could be used for not only the tuning, but also monitoring of the
working set size and the number of regions, if the scheme does not
change the program behavior too much.

For the reason, this commit implements the statistics for the schemes.
The total number and size of the regions that each scheme is applied are
exported to users via '->stat_count' and '->stat_sz' of 'struct damos'.
Admins can also check the number by reading 'schemes' debugfs file.  The
last two integers now represents the stats.  To allow collecting the
stats without changing the program behavior, this commit also adds new
scheme action, 'DAMOS_STAT'.  Note that 'DAMOS_STAT' is not only making
no memory operation actions, but also does not reset the age of regions.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |  6 ++
 mm/damon.c| 13 ++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 76e6ea2f97a7..f176a2b6e67c 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -73,6 +73,7 @@ struct damon_task {
  * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT.
  * @DAMOS_HUGEPAGE:Call ``madvise()`` for the region with MADV_HUGEPAGE.
  * @DAMOS_NOHUGEPAGE:  Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
+ * @DAMOS_STAT:Do nothing but count the stat.
  * @DAMOS_ACTION_LEN:  Number of supported actions.
  */
 enum damos_action {
@@ -81,6 +82,7 @@ enum damos_action {
DAMOS_PAGEOUT,
DAMOS_HUGEPAGE,
DAMOS_NOHUGEPAGE,
+   DAMOS_STAT, /* Do nothing but only record the stat */
DAMOS_ACTION_LEN,
 };
 
@@ -93,6 +95,8 @@ enum damos_action {
  * @min_age_region:Minimum age of target regions.
  * @max_age_region:Maximum age of target regions.
  * @action:_action to be applied to the target regions.
+ * @stat_count:Total number of regions that this scheme is 
applied.
+ * @stat_sz:   Total size of regions that this scheme is applied.
  * @list:  List head for siblings.
  *
  * For each aggregation interval, DAMON applies @action to monitoring target
@@ -107,6 +111,8 @@ struct damos {
unsigned int min_age_region;
unsigned int max_age_region;
enum damos_action action;
+   unsigned long stat_count;
+   unsigned long stat_sz;
struct list_head list;
 };
 
diff --git a/mm/damon.c b/mm/damon.c
index 28b1e119e521..937b6bccb7b8 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -212,6 +212,8 @@ static struct damos *damon_new_scheme(
scheme->min_age_region = min_age_region;
scheme->max_age_region = max_age_region;
scheme->action = action;
+   scheme->stat_count = 0;
+   scheme->stat_sz = 0;
INIT_LIST_HEAD(>list);
 
return scheme;
@@ -927,6 +929,8 @@ static int damos_do_action(struct damon_task *task, struct 
damon_region *r,
case DAMOS_NOHUGEPAGE:
madv_action = MADV_NOHUGEPAGE;
break;
+   case DAMOS_STAT:
+   return 0;
default:
pr_warn("Wrong action %d\n", action);
return -EINVAL;
@@ -950,8 +954,11 @@ static void damon_do_apply_schemes(struct damon_ctx *c, 
struct damon_task *t,
continue;
if (r->age < s->min_age_region || s->max_age_region < r->age)
continue;
+   s->stat_count++;
+   s->stat_sz += sz;
damos_do_action(t, r, s->action);
-   r->age = 0;
+   if (s->action != DAMOS_STAT)
+   r->age = 0;
}
 }
 
@@ -1508,11 +1515,11 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char 
*buf, ssize_t len)
 
damon_for_each_scheme(s, c) {
rc = snprintf([written], len - written,
-   "%lu %lu %u %u %u %u %d\n",
+   "%lu %lu %u %u %u %u %d %lu %lu\n",
s->min_sz_region, s->max_sz_region,
s->min_nr_accesses, s->max_nr_accesses,
s->min_age_region, s->max_age_region,
-   s->action);
+   s->action, s->stat_count, s->stat_sz);
if (!rc)
return -ENOMEM;
 
-- 
2.17.1



[RFC v13 7/8] damon/tools: Support more human friendly 'schemes' control

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit implements 'schemes' subcommand of the damon userspace tool.
It can be used to describe and apply the data access monitoring-based
operation schemes in more human friendly fashion.

Signed-off-by: SeongJae Park 
---
 tools/damon/_convert_damos.py | 141 ++
 tools/damon/_damon.py |  27 +--
 tools/damon/damo  |   7 ++
 tools/damon/schemes.py| 110 ++
 4 files changed, 280 insertions(+), 5 deletions(-)
 create mode 100755 tools/damon/_convert_damos.py
 create mode 100644 tools/damon/schemes.py

diff --git a/tools/damon/_convert_damos.py b/tools/damon/_convert_damos.py
new file mode 100755
index ..0fd84b3701c9
--- /dev/null
+++ b/tools/damon/_convert_damos.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Change human readable data access monitoring-based operation schemes to the low
+level input for the '/damon/schemes' file.  Below is an example of the
+schemes written in the human readable format:
+
+# format is:
+#
+#
+# B/K/M/G/T for Bytes/KiB/MiB/GiB/TiB
+# us/ms/s/m/h/d for micro-seconds/milli-seconds/seconds/minutes/hours/days
+# 'min/max' for possible min/max value.
+
+# if a region keeps a high access frequency for >=100ms, put the region on
+# the head of the LRU list (call madvise() with MADV_WILLNEED).
+minmax  80  max 100ms   max willneed
+
+# if a region keeps a low access frequency for >=200ms and <=one hour, put
+# the region on the tail of the LRU list (call madvise() with MADV_COLD).
+min max 10  20  200ms   1h  cold
+
+# if a region keeps a very low access frequency for >=60 seconds, swap out
+# the region immediately (call madvise() with MADV_PAGEOUT).
+min max 0   10  60s max pageout
+
+# if a region of a size >=2MiB keeps a very high access frequency for
+# >=100ms, let the region to use huge pages (call madvise() with
+# MADV_HUGEPAGE).
+2M  max 90  100 100ms   max hugepage
+
+# If a regions of a size >=2MiB keeps small access frequency for >=100ms,
+# avoid the region using huge pages (call madvise() with MADV_NOHUGEPAGE).
+2M  max 0   25  100ms   max nohugepage
+"""
+
+import argparse
+import platform
+
+uint_max = 2**32 - 1
+ulong_max = 2**64 - 1
+if platform.architecture()[0] != '64bit':
+ulong_max = 2**32 - 1
+
+unit_to_bytes = {'B': 1, 'K': 1024, 'M': 1024 * 1024, 'G': 1024 * 1024 * 1024,
+'T': 1024 * 1024 * 1024 * 1024}
+
+def text_to_bytes(txt):
+if txt == 'min':
+return 0
+if txt == 'max':
+return ulong_max
+
+unit = txt[-1]
+number = float(txt[:-1])
+return int(number * unit_to_bytes[unit])
+
+unit_to_usecs = {'us': 1, 'ms': 1000, 's': 1000 * 1000, 'm': 60 * 1000 * 1000,
+'h': 60 * 60 * 1000 * 1000, 'd': 24 * 60 * 60 * 1000 * 1000}
+
+def text_to_aggr_intervals(txt, aggr_interval):
+if txt == 'min':
+return 0
+if txt == 'max':
+return uint_max
+
+unit = txt[-2:]
+if unit in ['us', 'ms']:
+number = float(txt[:-2])
+else:
+unit = txt[-1]
+number = float(txt[:-1])
+return int(number * unit_to_usecs[unit]) / aggr_interval
+
+damos_action_to_int = {'DAMOS_WILLNEED': 0, 'DAMOS_COLD': 1,
+'DAMOS_PAGEOUT': 2, 'DAMOS_HUGEPAGE': 3, 'DAMOS_NOHUGEPAGE': 4,
+'DAMOS_STAT': 5}
+
+def text_to_damos_action(txt):
+return damos_action_to_int['DAMOS_' + txt.upper()]
+
+def text_to_nr_accesses(txt, max_nr_accesses):
+if txt == 'min':
+return 0
+if txt == 'max':
+return max_nr_accesses
+
+return int(float(txt) * max_nr_accesses / 100)
+
+def debugfs_scheme(line, sample_interval, aggr_interval):
+fields = line.split()
+if len(fields) != 7:
+print('wrong input line: %s' % line)
+exit(1)
+
+limit_nr_accesses = aggr_interval / sample_interval
+try:
+min_sz = text_to_bytes(fields[0])
+max_sz = text_to_bytes(fields[1])
+min_nr_accesses = text_to_nr_accesses(fields[2], limit_nr_accesses)
+max_nr_accesses = text_to_nr_accesses(fields[3], limit_nr_accesses)
+min_age = text_to_aggr_intervals(fields[4], aggr_interval)
+max_age = text_to_aggr_intervals(fields[5], aggr_interval)
+action = text_to_damos_action(fields[6])
+except:
+print('wrong input field')
+raise
+return '%d\t%d\t%d\t%d\t%d\t%d\t%d' % (min_sz, max_sz, min_nr_accesses,
+max_nr_accesses, min_age, max_age, action)
+
+def convert(schemes_file, sample_interval, aggr_interval):
+lines = []
+with open(schemes_file, 'r') as f:
+for line in f:
+if line.startswith('#'):
+continue
+line = line.strip()

[RFC v13 8/8] Documentation/admin-guide/mm: Document DAMON-based operation schemes

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit documents DAMON-based operation schemes in the DAMON
document.

Signed-off-by: SeongJae Park 
---
 Documentation/admin-guide/mm/damon/guide.rst |  41 +-
 Documentation/admin-guide/mm/damon/plans.rst |  24 +---
 Documentation/admin-guide/mm/damon/start.rst |  11 ++
 Documentation/admin-guide/mm/damon/usage.rst | 124 +--
 4 files changed, 165 insertions(+), 35 deletions(-)

diff --git a/Documentation/admin-guide/mm/damon/guide.rst 
b/Documentation/admin-guide/mm/damon/guide.rst
index 5b73f015..783fef558f3b 100644
--- a/Documentation/admin-guide/mm/damon/guide.rst
+++ b/Documentation/admin-guide/mm/damon/guide.rst
@@ -53,6 +53,11 @@ heats``.  If it shows a simple pattern consists of a small 
number of memory
 regions having high contrast of access temperature, you could consider manual
 `Program Modification`_.
 
+If the access pattern is very frequently changing so that you cannot figure out
+what is the performance important region using your human eye, `Automated
+DAMON-based Memory Operations`_ might help the case owing to its machine-level
+microscope view.
+
 If you still want to absorb more benefits, you should develop `Personalized
 DAMON Application`_ for your special case.
 
@@ -158,6 +163,36 @@ hot object.
   The chronological changes of working set size.
 
 
+Automated DAMON-based Memory Operations
+---
+
+Though `Manual Program Optimization` works well in many cases and DAMON can
+help it, modifying the source code is not a good option in many cases.  First
+of all, the source code could be too old or unavailable.  And, many workloads
+will have complex data access patterns that even hard to distinguish hot memory
+objects and cold memory objects with the human eye.  Finding the mapping from
+the visualized access pattern to the source code and injecting the hinting
+system calls inside the code will also be quite challenging.
+
+By using DAMON-based operation schemes (DAMOS) via ``damo schemes``, you will
+be able to easily optimize your workload in such a case.  Our example schemes
+called 'efficient THP' and 'proactive reclamation' achieved significant speedup
+and memory space saves against 25 realistic workloads [2]_.
+
+That said, note that you need careful tune of the schemes (e.g., target region
+size and age) and monitoring attributes for the successful use of this
+approach.  Because the optimal values of the parameters will be dependent on
+each system and workload, misconfiguring the parameters could result in worse
+memory management.
+
+For the tuning, you could measure the performance metrics such as IPC, TLB
+misses, and swap in/out events and adjusts the parameters based on their
+changes.  The total number and the total size of the regions that each scheme
+is applied, which are provided via the debugfs interface and the programming
+interface can also be useful.  Writing a program automating this optimal
+parameter could be an option.
+
+
 Personalized DAMON Application
 --
 
@@ -183,9 +218,9 @@ Referencing previously done successful practices could help 
you getting the
 sense for this kind of optimizations.  There is an academic paper [1]_
 reporting the visualized access pattern and manual `Program
 Modification`_ results for a number of realistic workloads.  You can also get
-the visualized access patterns [3]_ [4]_ [5]_ and automated DAMON-based
-memory operations results for other realistic workloads that collected with
-latest version of DAMON [2]_.
+the visualized access patterns [3]_ [4]_ [5]_ and
+`Automated DAMON-based Memory Operations`_ results for other realistic
+workloads that collected with latest version of DAMON [2]_ .
 
 .. [1] https://dl.acm.org/doi/10.1145/3366626.3368125
 .. [2] https://damonitor.github.io/test/result/perf/latest/html/
diff --git a/Documentation/admin-guide/mm/damon/plans.rst 
b/Documentation/admin-guide/mm/damon/plans.rst
index e3aa5ab96c29..765344f02eb3 100644
--- a/Documentation/admin-guide/mm/damon/plans.rst
+++ b/Documentation/admin-guide/mm/damon/plans.rst
@@ -4,26 +4,4 @@
 Future Plans
 
 
-DAMON is still on its first stage.  Below plans are still under development.
-
-
-Automate Data Access Monitoring-based Memory Operation Schemes Execution
-
-
-The ultimate goal of DAMON is to be used as a building block for the data
-access pattern aware kernel memory management optimization.  It will make
-system just works efficiently.  However, some users having very special
-workloads will want to further do their own optimization.  DAMON will automate
-most of the tasks for such manual optimizations in near future.  Users will be
-required to only describe what kind of data access pattern-based operation
-schemes they want in a simple form.
-
-By applying a very simple scheme for THP promotion/demotion with a prototype

[RFC v5 00/11] DAMON: Support Physical Memory Address Space Monitoring

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

DAMON[1] programming interface users can extend DAMON for any address space by
configuring the address-space specific low level primitives with appropriate
ones including their own implementations.  However, because the implementation
for the virtual address space is only available now, the users should implement
their own for other address spaces.  Worse yet, the user space users who rely
on the debugfs interface and user space tool, cannot implement their own.

This patchset implements another reference implementation of the low level
primitives for the physical memory address space.  With this change, hence, the
kernel space users can monitor both the virtual and the physical address spaces
by simply changing the configuration in the runtime.  Further, this patchset
links the implementation to the debugfs interface and the user space tool for
the user space users.

Note that the implementation supports only the user memory, as same to the idle
page access tracking feature.

[1] https://lore.kernel.org/linux-mm/20200706115322.29598-1-sjp...@amazon.com/

Baseline and Complete Git Trees
===

The patches are based on the v5.7 plus DAMON v17 patchset[1] and DAMOS RFC v13
patchset[2].  You can also clone the complete git tree:

$ git clone git://github.com/sjp38/linux -b cdamon/rfc/v5

The web is also available:
https://github.com/sjp38/linux/releases/tag/cdamon/rfc/v5

[1] https://lore.kernel.org/linux-mm/20200706115322.29598-1-sjp...@amazon.com/
[2] https://lore.kernel.org/linux-mm/20200707093805.4775-1-sjp...@amazon.com/

Sequence of Patches
===

The sequence of patches is as follow.

The first 5 patches allow the user space users manually set the monitoring
regions.  The 1st and 2nd patches implements the features in the debugfs
interface and the user space tool .  Following two patches each implement
unittests (the 3rd patch) and selftests (the 4th patch) for the new feature.
Finally, the 5th patch documents this new feature.

Following 6 patches implement the physical memory monitoring.  The 6th patch
exports rmap essential functions to GPL modules as those will be used by the
DAMON's implementation of the low level primitives for the physical memory
address space.  The 7th patch implements the low level primitives.  The 8th and
the 9th patches links the feature to the debugfs and the user space tool,
respectively.  The 10th patch further implement a handy NUMA specific memory
monitoring feature on the user space tool.  Finally, the 11th patch documents
this new features.

Patch History
=

Changes from RFC v4
(https://lore.kernel.org/linux-mm/20200616140813.17863-1-sjp...@amazon.com/)
 - Support NUMA specific physical memory monitoring

Changes from RFC v3
(https://lore.kernel.org/linux-mm/20200609141941.19184-1-sjp...@amazon.com/)
 - Export rmap functions
 - Reorganize for physical memory monitoring support only
 - Clean up debugfs code

Changes from RFC v2
(https://lore.kernel.org/linux-mm/20200603141135.10575-1-sjp...@amazon.com/)
 - Support the physical memory monitoring with the user space tool
 - Use 'pfn_to_online_page()' (David Hildenbrand)
 - Document more detail on random 'pfn' and its safeness (David Hildenbrand)

Changes from RFC v1
(https://lore.kernel.org/linux-mm/20200409094232.29680-1-sjp...@amazon.com/)
 - Provide the reference primitive implementations for the physical memory
 - Connect the extensions with the debugfs interface

SeongJae Park (11):
  mm/damon/debugfs: Allow users to set initial monitoring target regions
  tools/damon: Support init target regions specification
  mm/damon-test: Add more unit tests for 'init_regions'
  selftests/damon/_chk_record: Do not check number of gaps
  Docs/damon: Document 'initial_regions' feature
  mm/rmap: Export essential functions for rmap_run
  mm/damon: Implement callbacks for physical memory monitoring
  mm/damon/debugfs: Support physical memory monitoring
  tools/damon/record: Support physical memory monitoring
  tools/damon/record: Support NUMA specific recording
  Docs/damon: Document physical memory monitoring support

 Documentation/admin-guide/mm/damon/faq.rst|   7 +-
 Documentation/admin-guide/mm/damon/index.rst  |   1 -
 .../admin-guide/mm/damon/mechanisms.rst   |  29 +-
 Documentation/admin-guide/mm/damon/plans.rst  |   7 -
 Documentation/admin-guide/mm/damon/usage.rst  |  80 +++-
 include/linux/damon.h |   5 +
 mm/damon-test.h   |  53 +++
 mm/damon.c| 374 +-
 mm/rmap.c |   2 +
 mm/util.c |   1 +
 tools/damon/_damon.py |  41 ++
 tools/damon/_paddr_layout.py  | 158 
 tools/damon/heats.py  |   2 +-
 tools/damon/record.py |  60 ++-
 tools/damon/schemes.py

[RFC v5 01/11] mm/damon/debugfs: Allow users to set initial monitoring target regions

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

Some users would want to monitor only a part of the entire virtual
memory address space.  The '->init_target_regions' callback is therefore
provided, but only programming interface can use it.

For the reason, this commit introduces a new debugfs file,
'init_region'.  Users can specify which initial monitoring target
address regions they want by writing special input to the file.  The
input should describe each region in each line in below form:

  

This commit also makes the default '->init_target_regions' callback,
'kdamon_init_vm_regions()' to do nothing if the user has set the initial
target regions already.

Note that the regions will be updated to cover entire memory mapped
regions after 'regions update interval'.  If you want the regions to not
be updated after the initial setting, you could set the interval as a
very long time, say, a few decades.

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 156 +++--
 1 file changed, 152 insertions(+), 4 deletions(-)

diff --git a/mm/damon.c b/mm/damon.c
index 937b6bccb7b8..3aecdef4c841 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -1800,6 +1800,147 @@ static ssize_t debugfs_record_write(struct file *file,
return ret;
 }
 
+static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len)
+{
+   struct damon_task *t;
+   struct damon_region *r;
+   int written = 0;
+   int rc;
+
+   damon_for_each_task(t, c) {
+   damon_for_each_region(r, t) {
+   rc = snprintf([written], len - written,
+   "%d %lu %lu\n",
+   t->pid, r->ar.start, r->ar.end);
+   if (!rc)
+   return -ENOMEM;
+   written += rc;
+   }
+   }
+   return written;
+}
+
+static ssize_t debugfs_init_regions_read(struct file *file, char __user *buf,
+   size_t count, loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char *kbuf;
+   ssize_t len;
+
+   kbuf = kmalloc(count, GFP_KERNEL);
+   if (!kbuf)
+   return -ENOMEM;
+
+   mutex_lock(>kdamond_lock);
+   if (ctx->kdamond) {
+   mutex_unlock(>kdamond_lock);
+   return -EBUSY;
+   }
+
+   len = sprint_init_regions(ctx, kbuf, count);
+   mutex_unlock(>kdamond_lock);
+   if (len < 0)
+   goto out;
+   len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+   kfree(kbuf);
+   return len;
+}
+
+static int add_init_region(struct damon_ctx *c,
+int pid, struct damon_addr_range *ar)
+{
+   struct damon_task *t;
+   struct damon_region *r, *prev;
+   int rc = -EINVAL;
+
+   if (ar->start >= ar->end)
+   return -EINVAL;
+
+   damon_for_each_task(t, c) {
+   if (t->pid == pid) {
+   r = damon_new_region(ar->start, ar->end);
+   if (!r)
+   return -ENOMEM;
+   damon_add_region(r, t);
+   if (nr_damon_regions(t) > 1) {
+   prev = damon_prev_region(r);
+   if (prev->ar.end > r->ar.start) {
+   damon_destroy_region(r);
+   return -EINVAL;
+   }
+   }
+   rc = 0;
+   }
+   }
+   return rc;
+}
+
+static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len)
+{
+   struct damon_task *t;
+   struct damon_region *r, *next;
+   int pos = 0, parsed, ret;
+   int pid;
+   struct damon_addr_range ar;
+   int err;
+
+   damon_for_each_task(t, c) {
+   damon_for_each_region_safe(r, next, t)
+   damon_destroy_region(r);
+   }
+
+   while (pos < len) {
+   ret = sscanf([pos], "%d %lu %lu%n",
+   , , , );
+   if (ret != 3)
+   break;
+   err = add_init_region(c, pid, );
+   if (err)
+   goto fail;
+   pos += parsed;
+   }
+
+   return 0;
+
+fail:
+   damon_for_each_task(t, c) {
+   damon_for_each_region_safe(r, next, t)
+   damon_destroy_region(r);
+   }
+   return err;
+}
+
+static ssize_t debugfs_init_regions_write(struct file *file,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   char *kbuf;
+   ssize_t ret = count;
+   int err;
+
+   kbuf = user_input_s

[RFC v5 02/11] tools/damon: Support init target regions specification

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit updates the damon user space tool to support the initial
monitoring target regions specification.

Signed-off-by: SeongJae Park 
---
 tools/damon/_damon.py  | 39 +++
 tools/damon/record.py  | 12 +++-
 tools/damon/schemes.py | 12 +++-
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/tools/damon/_damon.py b/tools/damon/_damon.py
index 3620ef12a5ea..ad476cc61421 100644
--- a/tools/damon/_damon.py
+++ b/tools/damon/_damon.py
@@ -12,12 +12,25 @@ debugfs_attrs = None
 debugfs_record = None
 debugfs_schemes = None
 debugfs_pids = None
+debugfs_init_regions = None
 debugfs_monitor_on = None
 
 def set_target_pid(pid):
 return subprocess.call('echo %s > %s' % (pid, debugfs_pids), shell=True,
 executable='/bin/bash')
 
+def set_target(pid, init_regions=[]):
+rc = set_target_pid(pid)
+if rc:
+return rc
+
+if not os.path.exists(debugfs_init_regions):
+return 0
+
+string = ' '.join(['%s %d %d' % (pid, r[0], r[1]) for r in init_regions])
+return subprocess.call('echo "%s" > %s' % (string, debugfs_init_regions),
+shell=True, executable='/bin/bash')
+
 def turn_damon(on_off):
 return subprocess.call("echo %s > %s" % (on_off, debugfs_monitor_on),
 shell=True, executable="/bin/bash")
@@ -97,6 +110,7 @@ def chk_update_debugfs(debugfs):
 global debugfs_record
 global debugfs_schemes
 global debugfs_pids
+global debugfs_init_regions
 global debugfs_monitor_on
 
 debugfs_damon = os.path.join(debugfs, 'damon')
@@ -104,6 +118,7 @@ def chk_update_debugfs(debugfs):
 debugfs_record = os.path.join(debugfs_damon, 'record')
 debugfs_schemes = os.path.join(debugfs_damon, 'schemes')
 debugfs_pids = os.path.join(debugfs_damon, 'pids')
+debugfs_init_regions = os.path.join(debugfs_damon, 'init_regions')
 debugfs_monitor_on = os.path.join(debugfs_damon, 'monitor_on')
 
 if not os.path.isdir(debugfs_damon):
@@ -131,6 +146,26 @@ def cmd_args_to_attrs(args):
 return Attrs(sample_interval, aggr_interval, regions_update_interval,
 min_nr_regions, max_nr_regions, rbuf_len, rfile_path, schemes)
 
+def cmd_args_to_init_regions(args):
+regions = []
+for arg in args.regions.split():
+addrs = arg.split('-')
+try:
+if len(addrs) != 2:
+raise Exception('two addresses not given')
+start = int(addrs[0])
+end = int(addrs[1])
+if start >= end:
+raise Exception('start >= end')
+if regions and regions[-1][1] > start:
+raise Exception('regions overlap')
+except Exception as e:
+print('Wrong \'--regions\' argument (%s)' % e)
+exit(1)
+
+regions.append([start, end])
+return regions
+
 def set_attrs_argparser(parser):
 parser.add_argument('-d', '--debugfs', metavar='', type=str,
 default='/sys/kernel/debug', help='debugfs mounted path')
@@ -144,3 +179,7 @@ def set_attrs_argparser(parser):
 default=10, help='minimal number of regions')
 parser.add_argument('-m', '--maxr', metavar='<# regions>', type=int,
 default=1000, help='maximum number of regions')
+
+def set_init_regions_argparser(parser):
+parser.add_argument('-r', '--regions', metavar='"- ..."',
+type=str, default='', help='monitoring target address regions')
diff --git a/tools/damon/record.py b/tools/damon/record.py
index 44fa3a12af35..6ce8721d782a 100644
--- a/tools/damon/record.py
+++ b/tools/damon/record.py
@@ -13,7 +13,7 @@ import time
 
 import _damon
 
-def do_record(target, is_target_cmd, attrs, old_attrs):
+def do_record(target, is_target_cmd, init_regions, attrs, old_attrs):
 if os.path.isfile(attrs.rfile_path):
 os.rename(attrs.rfile_path, attrs.rfile_path + '.old')
 
@@ -24,8 +24,8 @@ def do_record(target, is_target_cmd, attrs, old_attrs):
 if is_target_cmd:
 p = subprocess.Popen(target, shell=True, executable='/bin/bash')
 target = p.pid
-if _damon.set_target_pid(target):
-print('pid setting (%s) failed' % target)
+if _damon.set_target(target, init_regions):
+print('target setting (%s, %s) failed' % (target, init_regions))
 cleanup_exit(old_attrs, -2)
 if _damon.turn_damon('on'):
 print('could not turn on damon' % target)
@@ -65,6 +65,7 @@ def chk_permission():
 
 def set_argparser(parser):
 _damon.set_attrs_argparser(parser)
+_damon.set_init_regions_argparser(parser)
 parser.add_argument('target', type=str, metavar='',
 help='the target command or the pid to record')
 parser.add_argument('-l', '--rbuf', metavar='', type=int,
@@ -88,19 +89,20 @@ def main(args=None):
 
 args.schemes = ''
 new_attrs = _damon.cmd_args_to_attrs(args)
+

[RFC v5 03/11] mm/damon-test: Add more unit tests for 'init_regions'

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit adds more test cases for the new feature, 'init_regions'.

Signed-off-by: SeongJae Park 
---
 mm/damon-test.h | 53 +
 1 file changed, 53 insertions(+)

diff --git a/mm/damon-test.h b/mm/damon-test.h
index b31c7fe913ca..716594342ff1 100644
--- a/mm/damon-test.h
+++ b/mm/damon-test.h
@@ -152,6 +152,58 @@ static void damon_test_set_recording(struct kunit *test)
KUNIT_EXPECT_STREQ(test, ctx->rfile_path, "foo");
 }
 
+static void damon_test_set_init_regions(struct kunit *test)
+{
+   struct damon_ctx *ctx = _user_ctx;
+   int pids[] = {1, 2, 3};
+   /* Each line represents one region in ``  `` */
+   char * const valid_inputs[] = {"2 10 20\n 2   20 30\n2 35 45",
+   "2 10 20\n",
+   "2 10 20\n1 39 59\n1 70 134\n  2  20 25\n",
+   ""};
+   /* Reading the file again will show sorted, clean output */
+   char * const valid_expects[] = {"2 10 20\n2 20 30\n2 35 45\n",
+   "2 10 20\n",
+   "1 39 59\n1 70 134\n2 10 20\n2 20 25\n",
+   ""};
+   char * const invalid_inputs[] = {"4 10 20\n",   /* pid not exists */
+   "2 10 20\n 2 14 26\n",  /* regions overlap */
+   "1 10 20\n2 30 40\n 1 5 8"};/* not sorted by address */
+   char *input, *expect;
+   int i, rc;
+   char buf[256];
+
+   damon_set_pids(ctx, pids, 3);
+
+   /* Put valid inputs and check the results */
+   for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) {
+   input = valid_inputs[i];
+   expect = valid_expects[i];
+
+   rc = set_init_regions(ctx, input, strnlen(input, 256));
+   KUNIT_EXPECT_EQ(test, rc, 0);
+
+   memset(buf, 0, 256);
+   sprint_init_regions(ctx, buf, 256);
+
+   KUNIT_EXPECT_STREQ(test, (char *)buf, expect);
+   }
+   /* Put invlid inputs and check the return error code */
+   for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) {
+   input = invalid_inputs[i];
+   pr_info("input: %s\n", input);
+   rc = set_init_regions(ctx, input, strnlen(input, 256));
+   KUNIT_EXPECT_EQ(test, rc, -EINVAL);
+
+   memset(buf, 0, 256);
+   sprint_init_regions(ctx, buf, 256);
+
+   KUNIT_EXPECT_STREQ(test, (char *)buf, "");
+   }
+
+   damon_set_pids(ctx, NULL, 0);
+}
+
 static void __link_vmas(struct vm_area_struct *vmas, ssize_t nr_vmas)
 {
int i, j;
@@ -635,6 +687,7 @@ static struct kunit_case damon_test_cases[] = {
KUNIT_CASE(damon_test_regions),
KUNIT_CASE(damon_test_set_pids),
KUNIT_CASE(damon_test_set_recording),
+   KUNIT_CASE(damon_test_set_init_regions),
KUNIT_CASE(damon_test_three_regions_in_vmas),
KUNIT_CASE(damon_test_aggregate),
KUNIT_CASE(damon_test_write_rbuf),
-- 
2.17.1



[RFC v5 04/11] selftests/damon/_chk_record: Do not check number of gaps

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

Now the regions can be explicitly set as users want.  Therefore checking
the number of gaps doesn't make sense.  Remove the condition.

Signed-off-by: SeongJae Park 
---
 tools/testing/selftests/damon/_chk_record.py | 6 --
 1 file changed, 6 deletions(-)

diff --git a/tools/testing/selftests/damon/_chk_record.py 
b/tools/testing/selftests/damon/_chk_record.py
index 5cfcf4161404..ef28d4f7aca1 100644
--- a/tools/testing/selftests/damon/_chk_record.py
+++ b/tools/testing/selftests/damon/_chk_record.py
@@ -36,12 +36,9 @@ def chk_task_info(f):
 print('too many regions: %d > %d' % (nr_regions, max_nr_regions))
 exit(1)
 
-nr_gaps = 0
 eaddr = 0
 for r in range(nr_regions):
 saddr = struct.unpack('L', f.read(8))[0]
-if eaddr and saddr != eaddr:
-nr_gaps += 1
 eaddr = struct.unpack('L', f.read(8))[0]
 nr_accesses = struct.unpack('I', f.read(4))[0]
 
@@ -55,9 +52,6 @@ def chk_task_info(f):
 print('too high nr_access: expected %d but %d' %
 (max_nr_accesses, nr_accesses))
 exit(1)
-if nr_gaps != 2:
-print('number of gaps are not two but %d' % nr_gaps)
-exit(1)
 
 def parse_time_us(bindat):
 sec = struct.unpack('l', bindat[0:8])[0]
-- 
2.17.1



[RFC v5 06/11] mm/rmap: Export essential functions for rmap_run

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit exports the three essential functions for ramp walk,
'page_lock_anon_vma_read()', 'rmap_walk()', and 'page_rmapping()', to
GPL modules.  Those will be used by DAMON for the physical memory
address based access monitoring in the following commit.

Signed-off-by: SeongJae Park 
---
 mm/rmap.c | 2 ++
 mm/util.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/mm/rmap.c b/mm/rmap.c
index f79a206b271a..20ac37b27a7d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -579,6 +579,7 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page)
rcu_read_unlock();
return anon_vma;
 }
+EXPORT_SYMBOL_GPL(page_lock_anon_vma_read);
 
 void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 {
@@ -1934,6 +1935,7 @@ void rmap_walk(struct page *page, struct 
rmap_walk_control *rwc)
else
rmap_walk_file(page, rwc, false);
 }
+EXPORT_SYMBOL_GPL(rmap_walk);
 
 /* Like rmap_walk, but caller holds relevant rmap lock */
 void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
diff --git a/mm/util.c b/mm/util.c
index 988d11e6c17c..1df32546fe28 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -620,6 +620,7 @@ void *page_rmapping(struct page *page)
page = compound_head(page);
return __page_rmapping(page);
 }
+EXPORT_SYMBOL_GPL(page_rmapping);
 
 /*
  * Return true if this page is mapped into pagetables.
-- 
2.17.1



[RFC v5 07/11] mm/damon: Implement callbacks for physical memory monitoring

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit implements the four callbacks (->init_target_regions,
->update_target_regions, ->prepare_access_check, and ->check_accesses)
for the basic access monitoring of the physical memory address space.
By setting the callback pointers to point those, users can easily
monitor the accesses to the physical memory.

Internally, it uses the PTE Accessed bit, as similar to that of the
virtual memory support.  Also, it supports only user memory pages, as
idle page tracking also does, for the same reason.  If the monitoring
target physical memory address range contains non-user memory pages,
access check of the pages will do nothing but simply treat the pages as
not accessed.

Users who want to use other access check primitives and/or monitor the
non-user memory regions could implement and use their own callbacks.

Signed-off-by: SeongJae Park 
---
 include/linux/damon.h |   5 ++
 mm/damon.c| 201 ++
 2 files changed, 206 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index f176a2b6e67c..eb7a5595b616 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -227,6 +227,11 @@ void kdamond_update_vm_regions(struct damon_ctx *ctx);
 void kdamond_prepare_vm_access_checks(struct damon_ctx *ctx);
 unsigned int kdamond_check_vm_accesses(struct damon_ctx *ctx);
 
+void kdamond_init_phys_regions(struct damon_ctx *ctx);
+void kdamond_update_phys_regions(struct damon_ctx *ctx);
+void kdamond_prepare_phys_access_checks(struct damon_ctx *ctx);
+unsigned int kdamond_check_phys_accesses(struct damon_ctx *ctx);
+
 int damon_set_pids(struct damon_ctx *ctx, int *pids, ssize_t nr_pids);
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long regions_update_int,
diff --git a/mm/damon.c b/mm/damon.c
index 3aecdef4c841..fb533b2ee4bf 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -27,10 +27,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -535,6 +538,18 @@ void kdamond_init_vm_regions(struct damon_ctx *ctx)
}
 }
 
+/*
+ * The initial regions construction function for the physical address space.
+ *
+ * This default version does nothing in actual.  Users should set the initial
+ * regions by themselves before passing their damon_ctx to 'start_damon()', or
+ * implement their version of this and set '->init_target_regions' of their
+ * damon_ctx to point it.
+ */
+void kdamond_init_phys_regions(struct damon_ctx *ctx)
+{
+}
+
 /*
  * Functions for the dynamic monitoring target regions update
  */
@@ -618,6 +633,19 @@ void kdamond_update_vm_regions(struct damon_ctx *ctx)
}
 }
 
+/*
+ * The dynamic monitoring target regions update function for the physical
+ * address space.
+ *
+ * This default version does nothing in actual.  Users should update the
+ * regions in other callbacks such as '->aggregate_cb', or implement their
+ * version of this and set the '->init_target_regions' of their damon_ctx to
+ * point it.
+ */
+void kdamond_update_phys_regions(struct damon_ctx *ctx)
+{
+}
+
 /*
  * Functions for the access checking of the regions
  */
@@ -753,6 +781,179 @@ unsigned int kdamond_check_vm_accesses(struct damon_ctx 
*ctx)
return max_nr_accesses;
 }
 
+/* access check functions for physical address based regions */
+
+/*
+ * Get a page by pfn if it is in the LRU list.  Otherwise, returns NULL.
+ *
+ * The body of this function is stollen from the 'page_idle_get_page()'.  We
+ * steal rather than reuse it because the code is quite simple .
+ */
+static struct page *damon_phys_get_page(unsigned long pfn)
+{
+   struct page *page = pfn_to_online_page(pfn);
+   pg_data_t *pgdat;
+
+   if (!page || !PageLRU(page) ||
+   !get_page_unless_zero(page))
+   return NULL;
+
+   pgdat = page_pgdat(page);
+   spin_lock_irq(>lru_lock);
+   if (unlikely(!PageLRU(page))) {
+   put_page(page);
+   page = NULL;
+   }
+   spin_unlock_irq(>lru_lock);
+   return page;
+}
+
+static bool damon_page_mkold(struct page *page, struct vm_area_struct *vma,
+   unsigned long addr, void *arg)
+{
+   damon_mkold(vma->vm_mm, addr);
+   return true;
+}
+
+static void damon_phys_mkold(unsigned long paddr)
+{
+   struct page *page = damon_phys_get_page(PHYS_PFN(paddr));
+   struct rmap_walk_control rwc = {
+   .rmap_one = damon_page_mkold,
+   .anon_lock = page_lock_anon_vma_read,
+   };
+   bool need_lock;
+
+   if (!page)
+   return;
+
+   if (!page_mapped(page) || !page_rmapping(page))
+   return;
+
+   need_lock = !PageAnon(page) || PageKsm(page);
+   if (need_lock && !trylock_page(page))
+   return;
+
+   rmap

[RFC v5 05/11] Docs/damon: Document 'initial_regions' feature

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit documents the 'initial_regions' feature.

Signed-off-by: SeongJae Park 
---
 Documentation/admin-guide/mm/damon/usage.rst | 35 
 1 file changed, 35 insertions(+)

diff --git a/Documentation/admin-guide/mm/damon/usage.rst 
b/Documentation/admin-guide/mm/damon/usage.rst
index 153f07da9368..573fcb4c57a7 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -315,6 +315,41 @@ having pids 42 and 4242 as the processes to be monitored 
and check it again::
 Note that setting the pids doesn't start the monitoring.
 
 
+Initial Monitoring Target Regions
+-
+
+In case of the debugfs based monitoring, DAMON automatically sets and updates
+the monitoring target regions so that entire memory mappings of target
+processes can be covered. However, users might want to limit the monitoring
+region to specific address ranges, such as the heap, the stack, or specific
+file-mapped area. Or, some users might know the initial access pattern of their
+workloads and therefore want to set optimal initial regions for the 'adaptive
+regions adjustment'.
+
+In such cases, users can explicitly set the initial monitoring target regions
+as they want, by writing proper values to the ``init_regions`` file.  Each line
+of the input should represent one region in below form.::
+
+  
+
+The ``pid`` should already in ``pids`` file, and the regions should be
+passed in address order.  For example, below commands will set a couple of
+address ranges, ``1-100`` and ``100-200`` as the initial monitoring target
+region of process 42, and another couple of address ranges, ``20-40`` and
+``50-100`` as that of process 4242.::
+
+# cd /damon
+# echo "42   1   100
+42   100 200
+4242 20  40
+4242 50  100" > init_regions
+
+Note that this sets the initial monitoring target regions only.  DAMON will
+automatically updates the boundary of the regions after one ``regions update
+interval``.  Therefore, users should set the ``regions update interval`` large
+enough.
+
+
 Record
 --
 
-- 
2.17.1



[RFC v5 08/11] mm/damon/debugfs: Support physical memory monitoring

2020-07-07 Thread SeongJae Park
From: SeongJae Park 

This commit makes the debugfs interface to support the physical memory
monitoring, in addition to the virtual memory monitoring.

Users can do the physical memory monitoring by writing a special
keyword, 'paddr\n' to the 'pids' debugfs file.  Then, DAMON will check
the special keyword and configure the callbacks of the monitoring
context for the debugfs user for physical memory.  This will internally
add one fake monitoring target process, which has pid as -1.

Unlike the virtual memory monitoring, DAMON debugfs will not
automatically set the monitoring target region.  Therefore, users should
also set the monitoring target address region using the 'init_regions'
debugfs file.  While doing this, the 'pid' in the input should be '-1'.

Finally, the physical memory monitoring will not automatically
terminated because it has fake monitoring target process.  The user
should explicitly turn off the monitoring by writing 'off' to the
'monitor_on' debugfs file.

Signed-off-by: SeongJae Park 
---
 mm/damon.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/mm/damon.c b/mm/damon.c
index fb533b2ee4bf..34c418ef4e5f 100644
--- a/mm/damon.c
+++ b/mm/damon.c
@@ -1928,6 +1928,23 @@ static ssize_t debugfs_pids_write(struct file *file,
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
 
+   if (!strncmp(kbuf, "paddr\n", count)) {
+   /* Configure the context for physical memory monitoring */
+   ctx->init_target_regions = kdamond_init_phys_regions;
+   ctx->update_target_regions = kdamond_update_phys_regions;
+   ctx->prepare_access_checks = kdamond_prepare_phys_access_checks;
+   ctx->check_accesses = kdamond_check_phys_accesses;
+
+   /* Set the fake target task pid as -1 */
+   snprintf(kbuf, count, "-1");
+   } else {
+   /* Configure the context for virtual memory monitoring */
+   ctx->init_target_regions = kdamond_init_vm_regions;
+   ctx->update_target_regions = kdamond_update_vm_regions;
+   ctx->prepare_access_checks = kdamond_prepare_vm_access_checks;
+   ctx->check_accesses = kdamond_check_vm_accesses;
+   }
+
targets = str_to_pids(kbuf, ret, _targets);
if (!targets) {
ret = -ENOMEM;
-- 
2.17.1



<    4   5   6   7   8   9   10   11   12   13   >