[PATCH 1/3] tools lib traceevent: Add -O2 option to traceevent

2016-10-17 Thread Honggyu Kim
Since current traceevent somehow does not have an optimization flag,
this patch just adds -O2 to optimize its code.

Signed-off-by: Honggyu Kim <hong.gyu@lge.com>
---
 tools/lib/traceevent/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 7851df1..56d223b 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -124,7 +124,7 @@ else
 endif
 
 # Append required CFLAGS
-override CFLAGS += -fPIC
+override CFLAGS += -O2 -fPIC
 override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
 override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
 
-- 
2.10.0.rc2.dirty



[PATCH 2/3] tools lib traceevent: Check the return value of asprintf

2016-10-17 Thread Honggyu Kim
Since asprintf generates a compiler warning when its return value is not
not properly handled, this patch checks that asprintf call is successful
or not.

Signed-off-by: Honggyu Kim <hong.gyu@lge.com>
---
 tools/lib/traceevent/parse-filter.c | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/tools/lib/traceevent/parse-filter.c 
b/tools/lib/traceevent/parse-filter.c
index 7c214ce..f0fcdcd 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -2122,7 +2122,8 @@ static char *op_to_str(struct event_filter *filter, 
struct filter_arg *arg)
default:
break;
}
-   asprintf(, val ? "TRUE" : "FALSE");
+   if (asprintf(, val ? "TRUE" : "FALSE") < 0)
+   return NULL;
break;
}
}
@@ -2140,7 +2141,8 @@ static char *op_to_str(struct event_filter *filter, 
struct filter_arg *arg)
break;
}
 
-   asprintf(, "(%s) %s (%s)", left, op, right);
+   if (asprintf(, "(%s) %s (%s)", left, op, right) < 0)
+   return NULL;
break;
 
case FILTER_OP_NOT:
@@ -2156,10 +2158,12 @@ static char *op_to_str(struct event_filter *filter, 
struct filter_arg *arg)
right_val = 0;
if (right_val >= 0) {
/* just return the opposite */
-   asprintf(, right_val ? "FALSE" : "TRUE");
+   if (asprintf(, right_val ? "FALSE" : "TRUE") < 0)
+   return NULL;
break;
}
-   asprintf(, "%s(%s)", op, right);
+   if (asprintf(, "%s(%s)", op, right) < 0)
+   return NULL;
break;
 
default:
@@ -2175,7 +2179,8 @@ static char *val_to_str(struct event_filter *filter, 
struct filter_arg *arg)
 {
char *str = NULL;
 
-   asprintf(, "%lld", arg->value.val);
+   if (asprintf(, "%lld", arg->value.val) < 0)
+   return NULL;
 
return str;
 }
@@ -2233,7 +2238,8 @@ static char *exp_to_str(struct event_filter *filter, 
struct filter_arg *arg)
break;
}
 
-   asprintf(, "%s %s %s", lstr, op, rstr);
+   if (asprintf(, "%s %s %s", lstr, op, rstr) < 0)
+   return NULL;
 out:
free(lstr);
free(rstr);
@@ -2277,7 +2283,8 @@ static char *num_to_str(struct event_filter *filter, 
struct filter_arg *arg)
if (!op)
op = "<=";
 
-   asprintf(, "%s %s %s", lstr, op, rstr);
+   if (asprintf(, "%s %s %s", lstr, op, rstr) < 0)
+   return NULL;
break;
 
default:
@@ -2312,8 +2319,9 @@ static char *str_to_str(struct event_filter *filter, 
struct filter_arg *arg)
if (!op)
op = "!~";
 
-   asprintf(, "%s %s \"%s\"",
-arg->str.field->name, op, arg->str.val);
+   if (asprintf(, "%s %s \"%s\"",
+arg->str.field->name, op, arg->str.val) < 0)
+   return NULL;
break;
 
default:
@@ -2329,7 +2337,8 @@ static char *arg_to_str(struct event_filter *filter, 
struct filter_arg *arg)
 
switch (arg->type) {
case FILTER_ARG_BOOLEAN:
-   asprintf(, arg->boolean.value ? "TRUE" : "FALSE");
+   if (asprintf(, arg->boolean.value ? "TRUE" : "FALSE") < 0)
+   return NULL;
return str;
 
case FILTER_ARG_OP:
-- 
2.10.0.rc2.dirty



[PATCH 3/3] tools lib traceevent: Fix to set uninitialized variables

2016-10-17 Thread Honggyu Kim
This patch fixes uninitialized variables to remove remaining compiler
warnings as follows:

  event-parse.c: In function ‘pevent_find_event_by_name’:
  event-parse.c:3513:21: warning: ‘event’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
pevent->last_event = event;
   ^
  event-parse.c: In function ‘pevent_data_lat_fmt’:
  event-parse.c:5156:20: warning: ‘migrate_disable’ may be used uninitialized 
in this function [-Wmaybe-uninitialized]
  trace_seq_printf(s, "%d", migrate_disable);
  ^
  event-parse.c:5163:20: warning: ‘lock_depth’ may be used uninitialized in 
this function [-Wmaybe-uninitialized]
  trace_seq_printf(s, "%d", lock_depth);
  ^
  event-parse.c: In function ‘pevent_event_info’:
  event-parse.c:5060:18: warning: ‘len_arg’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
   print_str_arg(, data, size, event,
^
  event-parse.c:4846:6: note: ‘len_arg’ was declared here
int len_arg;
^
  ...
  kbuffer-parse.c: In function ‘__old_next_event’:
  kbuffer-parse.c:339:27: warning: ‘length’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
kbuf->next = kbuf->index + length;
 ^
  kbuffer-parse.c:297:15: note: ‘length’ was declared here
unsigned int length;
         ^

Signed-off-by: Honggyu Kim <hong.gyu@lge.com>
---
 tools/lib/traceevent/event-parse.c | 8 
 tools/lib/traceevent/kbuffer-parse.c   | 2 +-
 tools/lib/traceevent/plugin_function.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c 
b/tools/lib/traceevent/event-parse.c
index 664c90c..2fb9338 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3490,7 +3490,7 @@ struct event_format *
 pevent_find_event_by_name(struct pevent *pevent,
  const char *sys, const char *name)
 {
-   struct event_format *event;
+   struct event_format *event = NULL;
int i;
 
if (pevent->last_event &&
@@ -4843,7 +4843,7 @@ static void pretty_print(struct trace_seq *s, void *data, 
int size, struct event
char format[32];
int show_func;
int len_as_arg;
-   int len_arg;
+   int len_arg = 0;
int len;
int ls;
 
@@ -5102,8 +5102,8 @@ void pevent_data_lat_fmt(struct pevent *pevent,
static int migrate_disable_exists;
unsigned int lat_flags;
unsigned int pc;
-   int lock_depth;
-   int migrate_disable;
+   int lock_depth = -1;
+   int migrate_disable = 0;
int hardirq;
int softirq;
void *data = record->data;
diff --git a/tools/lib/traceevent/kbuffer-parse.c 
b/tools/lib/traceevent/kbuffer-parse.c
index 65984f1..fc8f20c 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -294,7 +294,7 @@ static unsigned int old_update_pointers(struct kbuffer 
*kbuf)
unsigned int type;
unsigned int len;
unsigned int delta;
-   unsigned int length;
+   unsigned int length = 0;
void *ptr = kbuf->data + kbuf->curr;
 
type_len_ts = read_4(kbuf, ptr);
diff --git a/tools/lib/traceevent/plugin_function.c 
b/tools/lib/traceevent/plugin_function.c
index a00ec19..42dbf73 100644
--- a/tools/lib/traceevent/plugin_function.c
+++ b/tools/lib/traceevent/plugin_function.c
@@ -130,7 +130,7 @@ static int function_handler(struct trace_seq *s, struct 
pevent_record *record,
unsigned long long pfunction;
const char *func;
const char *parent;
-   int index;
+   int index = 0;
 
if (pevent_get_field_val(s, event, "ip", record, , 1))
return trace_seq_putc(s, '!');
-- 
2.10.0.rc2.dirty



[PATCH 1/3] tools lib traceevent: Add -O2 option to traceevent

2016-10-17 Thread Honggyu Kim
Since current traceevent somehow does not have an optimization flag,
this patch just adds -O2 to optimize its code.

Signed-off-by: Honggyu Kim 
---
 tools/lib/traceevent/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 7851df1..56d223b 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -124,7 +124,7 @@ else
 endif
 
 # Append required CFLAGS
-override CFLAGS += -fPIC
+override CFLAGS += -O2 -fPIC
 override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
 override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
 
-- 
2.10.0.rc2.dirty



[PATCH 2/3] tools lib traceevent: Check the return value of asprintf

2016-10-17 Thread Honggyu Kim
Since asprintf generates a compiler warning when its return value is not
not properly handled, this patch checks that asprintf call is successful
or not.

Signed-off-by: Honggyu Kim 
---
 tools/lib/traceevent/parse-filter.c | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/tools/lib/traceevent/parse-filter.c 
b/tools/lib/traceevent/parse-filter.c
index 7c214ce..f0fcdcd 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -2122,7 +2122,8 @@ static char *op_to_str(struct event_filter *filter, 
struct filter_arg *arg)
default:
break;
}
-   asprintf(, val ? "TRUE" : "FALSE");
+   if (asprintf(, val ? "TRUE" : "FALSE") < 0)
+   return NULL;
break;
}
}
@@ -2140,7 +2141,8 @@ static char *op_to_str(struct event_filter *filter, 
struct filter_arg *arg)
break;
}
 
-   asprintf(, "(%s) %s (%s)", left, op, right);
+   if (asprintf(, "(%s) %s (%s)", left, op, right) < 0)
+   return NULL;
break;
 
case FILTER_OP_NOT:
@@ -2156,10 +2158,12 @@ static char *op_to_str(struct event_filter *filter, 
struct filter_arg *arg)
right_val = 0;
if (right_val >= 0) {
/* just return the opposite */
-   asprintf(, right_val ? "FALSE" : "TRUE");
+   if (asprintf(, right_val ? "FALSE" : "TRUE") < 0)
+   return NULL;
break;
}
-   asprintf(, "%s(%s)", op, right);
+   if (asprintf(, "%s(%s)", op, right) < 0)
+   return NULL;
break;
 
default:
@@ -2175,7 +2179,8 @@ static char *val_to_str(struct event_filter *filter, 
struct filter_arg *arg)
 {
char *str = NULL;
 
-   asprintf(, "%lld", arg->value.val);
+   if (asprintf(, "%lld", arg->value.val) < 0)
+   return NULL;
 
return str;
 }
@@ -2233,7 +2238,8 @@ static char *exp_to_str(struct event_filter *filter, 
struct filter_arg *arg)
break;
}
 
-   asprintf(, "%s %s %s", lstr, op, rstr);
+   if (asprintf(, "%s %s %s", lstr, op, rstr) < 0)
+   return NULL;
 out:
free(lstr);
free(rstr);
@@ -2277,7 +2283,8 @@ static char *num_to_str(struct event_filter *filter, 
struct filter_arg *arg)
if (!op)
op = "<=";
 
-   asprintf(, "%s %s %s", lstr, op, rstr);
+   if (asprintf(, "%s %s %s", lstr, op, rstr) < 0)
+   return NULL;
break;
 
default:
@@ -2312,8 +2319,9 @@ static char *str_to_str(struct event_filter *filter, 
struct filter_arg *arg)
if (!op)
op = "!~";
 
-   asprintf(, "%s %s \"%s\"",
-arg->str.field->name, op, arg->str.val);
+   if (asprintf(, "%s %s \"%s\"",
+arg->str.field->name, op, arg->str.val) < 0)
+   return NULL;
break;
 
default:
@@ -2329,7 +2337,8 @@ static char *arg_to_str(struct event_filter *filter, 
struct filter_arg *arg)
 
switch (arg->type) {
case FILTER_ARG_BOOLEAN:
-   asprintf(, arg->boolean.value ? "TRUE" : "FALSE");
+   if (asprintf(, arg->boolean.value ? "TRUE" : "FALSE") < 0)
+   return NULL;
return str;
 
case FILTER_ARG_OP:
-- 
2.10.0.rc2.dirty



[PATCH 3/3] tools lib traceevent: Fix to set uninitialized variables

2016-10-17 Thread Honggyu Kim
This patch fixes uninitialized variables to remove remaining compiler
warnings as follows:

  event-parse.c: In function ‘pevent_find_event_by_name’:
  event-parse.c:3513:21: warning: ‘event’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
pevent->last_event = event;
   ^
  event-parse.c: In function ‘pevent_data_lat_fmt’:
  event-parse.c:5156:20: warning: ‘migrate_disable’ may be used uninitialized 
in this function [-Wmaybe-uninitialized]
  trace_seq_printf(s, "%d", migrate_disable);
  ^
  event-parse.c:5163:20: warning: ‘lock_depth’ may be used uninitialized in 
this function [-Wmaybe-uninitialized]
  trace_seq_printf(s, "%d", lock_depth);
  ^
  event-parse.c: In function ‘pevent_event_info’:
  event-parse.c:5060:18: warning: ‘len_arg’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
   print_str_arg(, data, size, event,
^
  event-parse.c:4846:6: note: ‘len_arg’ was declared here
int len_arg;
^
  ...
  kbuffer-parse.c: In function ‘__old_next_event’:
  kbuffer-parse.c:339:27: warning: ‘length’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
kbuf->next = kbuf->index + length;
 ^
  kbuffer-parse.c:297:15: note: ‘length’ was declared here
unsigned int length;
         ^

Signed-off-by: Honggyu Kim 
---
 tools/lib/traceevent/event-parse.c | 8 
 tools/lib/traceevent/kbuffer-parse.c   | 2 +-
 tools/lib/traceevent/plugin_function.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c 
b/tools/lib/traceevent/event-parse.c
index 664c90c..2fb9338 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3490,7 +3490,7 @@ struct event_format *
 pevent_find_event_by_name(struct pevent *pevent,
  const char *sys, const char *name)
 {
-   struct event_format *event;
+   struct event_format *event = NULL;
int i;
 
if (pevent->last_event &&
@@ -4843,7 +4843,7 @@ static void pretty_print(struct trace_seq *s, void *data, 
int size, struct event
char format[32];
int show_func;
int len_as_arg;
-   int len_arg;
+   int len_arg = 0;
int len;
int ls;
 
@@ -5102,8 +5102,8 @@ void pevent_data_lat_fmt(struct pevent *pevent,
static int migrate_disable_exists;
unsigned int lat_flags;
unsigned int pc;
-   int lock_depth;
-   int migrate_disable;
+   int lock_depth = -1;
+   int migrate_disable = 0;
int hardirq;
int softirq;
void *data = record->data;
diff --git a/tools/lib/traceevent/kbuffer-parse.c 
b/tools/lib/traceevent/kbuffer-parse.c
index 65984f1..fc8f20c 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -294,7 +294,7 @@ static unsigned int old_update_pointers(struct kbuffer 
*kbuf)
unsigned int type;
unsigned int len;
unsigned int delta;
-   unsigned int length;
+   unsigned int length = 0;
void *ptr = kbuf->data + kbuf->curr;
 
type_len_ts = read_4(kbuf, ptr);
diff --git a/tools/lib/traceevent/plugin_function.c 
b/tools/lib/traceevent/plugin_function.c
index a00ec19..42dbf73 100644
--- a/tools/lib/traceevent/plugin_function.c
+++ b/tools/lib/traceevent/plugin_function.c
@@ -130,7 +130,7 @@ static int function_handler(struct trace_seq *s, struct 
pevent_record *record,
unsigned long long pfunction;
const char *func;
const char *parent;
-   int index;
+   int index = 0;
 
if (pevent_get_field_val(s, event, "ip", record, , 1))
return trace_seq_putc(s, '!');
-- 
2.10.0.rc2.dirty



[RFC PATCH 0/4] DAMON based 2-tier memory management for CXL memory

2024-01-14 Thread Honggyu Kim
 - - - -| 1.18
  default  | - 1.16  1.15  1.17  1.18  1.16  1.18  1.15 | 1.17
  DAMON 2-tier | - 1.04  1.04  1.05  1.05  1.06  1.05  1.06 | 1.05
  =++=
  CXL usage of redis-server in GB   | AVERAGE
  -++-
  DRAM-only|  0.0 - - - - - - - |  0.0
  CXL-only | 52.6 - - - - - - - | 52.6
  default  |-  19.3  26.1  32.2  38.5  44.6  50.5  50.6 | 37.4
  DAMON 2-tier |-   1.3   3.8   7.0   4.1   9.4  12.5  16.7 |  7.8
  =++=

In summary of both results, our evaluation shows that "DAMON 2-tier"
memory management reduces the performance slowdown compared to the
"default" memory policy from 15~17% to 4~5% when the system runs with
high memory pressure on its fast tier DRAM nodes.

The similar evaluation was done in another machine that has 256GB of
local DRAM and 96GB of CXL memory.  The performance slowdown is reduced
from 20~24% for "default" to 5~7% for "DAMON 2-tier".

Having these DAMOS_DEMOTE and DAMOS_PROMOTE actions can make 2-tier
memory systems run more efficiently under high memory pressures.

Signed-off-by: Honggyu Kim 
Signed-off-by: Hyeongtak Ji 
Signed-off-by: Rakie Kim 

[1] https://lore.kernel.org/damon/20231112195602.61525-1...@kernel.org
[2] https://github.com/skhynix/hmsdk
[3] https://github.com/redis/redis/tree/7.0.0
[4] https://github.com/brianfrankcooper/YCSB/tree/0.17.0
[5] https://dl.acm.org/doi/10.1145/3503222.3507731
[6] https://dl.acm.org/doi/10.1145/3582016.3582063

Honggyu Kim (2):
  mm/vmscan: refactor reclaim_pages with reclaim_or_migrate_folios
  mm/damon: introduce DAMOS_DEMOTE action for demotion

Hyeongtak Ji (2):
  mm/memory-tiers: add next_promotion_node to find promotion target
  mm/damon: introduce DAMOS_PROMOTE action for promotion

 include/linux/damon.h  |   4 +
 include/linux/memory-tiers.h   |  11 ++
 include/linux/migrate_mode.h   |   1 +
 include/linux/vm_event_item.h  |   1 +
 include/trace/events/migrate.h |   3 +-
 mm/damon/paddr.c   |  46 ++-
 mm/damon/sysfs-schemes.c   |   2 +
 mm/internal.h  |   2 +
 mm/memory-tiers.c  |  43 ++
 mm/vmscan.c| 231 +++--
 mm/vmstat.c|   1 +
 11 files changed, 330 insertions(+), 15 deletions(-)


base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
-- 
2.34.1




[RFC PATCH 1/4] mm/vmscan: refactor reclaim_pages with reclaim_or_migrate_folios

2024-01-14 Thread Honggyu Kim
Since we will introduce reclaim_pages like functions such as
demote_pages and promote_pages, the most of the code can be shared.

This is a preparation patch that introduces reclaim_or_migrate_folios()
to cover all the logics, but it provides a handler for the different
actions.

No functional changes applied.

Signed-off-by: Honggyu Kim 
---
 mm/vmscan.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index bba207f41b14..7ca2396ccc3b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2107,15 +2107,16 @@ static unsigned int reclaim_folio_list(struct list_head 
*folio_list,
return nr_reclaimed;
 }
 
-unsigned long reclaim_pages(struct list_head *folio_list)
+static unsigned long reclaim_or_migrate_folios(struct list_head *folio_list,
+   unsigned int (*handler)(struct list_head *, struct pglist_data 
*))
 {
int nid;
-   unsigned int nr_reclaimed = 0;
+   unsigned int nr_folios = 0;
LIST_HEAD(node_folio_list);
unsigned int noreclaim_flag;
 
if (list_empty(folio_list))
-   return nr_reclaimed;
+   return nr_folios;
 
noreclaim_flag = memalloc_noreclaim_save();
 
@@ -2129,15 +2130,20 @@ unsigned long reclaim_pages(struct list_head 
*folio_list)
continue;
}
 
-   nr_reclaimed += reclaim_folio_list(_folio_list, 
NODE_DATA(nid));
+   nr_folios += handler(_folio_list, NODE_DATA(nid));
nid = folio_nid(lru_to_folio(folio_list));
} while (!list_empty(folio_list));
 
-   nr_reclaimed += reclaim_folio_list(_folio_list, NODE_DATA(nid));
+   nr_folios += handler(_folio_list, NODE_DATA(nid));
 
memalloc_noreclaim_restore(noreclaim_flag);
 
-   return nr_reclaimed;
+   return nr_folios;
+}
+
+unsigned long reclaim_pages(struct list_head *folio_list)
+{
+   return reclaim_or_migrate_folios(folio_list, reclaim_folio_list);
 }
 
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-- 
2.34.1




[RFC PATCH 4/4] mm/damon: introduce DAMOS_PROMOTE action for promotion

2024-01-14 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch introduces DAMOS_PROMOTE action for paddr mode.

It includes renaming alloc_demote_folio to alloc_migrate_folio to use it
for promotion as well.

The execution sequence of DAMOS_DEMOTE and DAMOS_PROMOTE look as
follows for comparison.

  DAMOS_DEMOTE action
damo_pa_apply_scheme
-> damon_pa_reclaim
-> demote_pages
-> do_demote_folio_list
-> __demote_folio_list
-> demote_folio_list

  DAMOS_PROMOTE action
damo_pa_apply_scheme
-> damon_pa_promote
-> promote_pages
-> do_promote_folio_list
-> __promote_folio_list
-> promote_folio_list

Signed-off-by: Hyeongtak Ji 
Signed-off-by: Honggyu Kim 
---
 include/linux/damon.h  |   2 +
 include/linux/migrate_mode.h   |   1 +
 include/linux/vm_event_item.h  |   1 +
 include/trace/events/migrate.h |   3 +-
 mm/damon/paddr.c   |  29 
 mm/damon/sysfs-schemes.c   |   1 +
 mm/internal.h  |   1 +
 mm/vmscan.c| 129 -
 mm/vmstat.c|   1 +
 9 files changed, 165 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 4c0a0fef09c5..477060bb6718 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -107,6 +107,7 @@ struct damon_target {
  * @DAMOS_LRU_DEPRIO:  Deprioritize the region on its LRU lists.
  * @DAMOS_STAT:Do nothing but count the stat.
  * @DAMOS_DEMOTE:  Do demotion for the current region.
+ * @DAMOS_PROMOTE: Do promotion if possible, otherwise do nothing.
  * @NR_DAMOS_ACTIONS:  Total number of DAMOS actions
  *
  * The support of each action is up to running  damon_operations.
@@ -125,6 +126,7 @@ enum damos_action {
DAMOS_LRU_DEPRIO,
DAMOS_STAT, /* Do nothing but only record the stat */
DAMOS_DEMOTE,
+   DAMOS_PROMOTE,
NR_DAMOS_ACTIONS,
 };
 
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index f37cc03f9369..63f75eb9abf3 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -29,6 +29,7 @@ enum migrate_reason {
MR_CONTIG_RANGE,
MR_LONGTERM_PIN,
MR_DEMOTION,
+   MR_PROMOTION,
MR_TYPES
 };
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 8abfa1240040..63cf920afeaa 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -44,6 +44,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
+   PGPROMOTE,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 0190ef725b43..f0dd569c1e62 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -22,7 +22,8 @@
EM( MR_NUMA_MISPLACED,  "numa_misplaced")   \
EM( MR_CONTIG_RANGE,"contig_range") \
EM( MR_LONGTERM_PIN,"longterm_pin") \
-   EMe(MR_DEMOTION,"demotion")
+   EM( MR_DEMOTION,"demotion") \
+   EMe(MR_PROMOTION,   "promotion")
 
 /*
  * First define the enums in the above macros to be exported to userspace
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index d3e3f077cd00..360ce69d5898 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -257,6 +257,32 @@ static unsigned long damon_pa_reclaim(struct damon_region 
*r, struct damos *s, b
return applied * PAGE_SIZE;
 }
 
+static unsigned long damon_pa_promote(struct damon_region *r, struct damos *s)
+{
+   unsigned long addr, applied;
+   LIST_HEAD(folio_list);
+
+   for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
+   struct folio *folio = damon_get_folio(PHYS_PFN(addr));
+
+   if (!folio)
+   continue;
+
+   if (damos_pa_filter_out(s, folio))
+   goto put_folio;
+
+   if (!folio_isolate_lru(folio))
+   goto put_folio;
+
+   list_add(>lru, _list);
+put_folio:
+   folio_put(folio);
+   }
+   applied = promote_pages(_list);
+   cond_resched();
+   return applied * PAGE_SIZE;
+}
+
 static inline unsigned long damon_pa_mark_accessed_or_deactivate(
struct damon_region *r, struct damos *s, bool mark_accessed)
 {
@@ -309,6 +335,8 @@ static unsigned long damon_pa_apply_scheme(struct damon_ctx 
*ctx,
break;
case DAMOS_DEMOTE:
return damon_pa_reclaim(r, scheme, true);
+   case DAMOS_PROMOTE:
+   return damon_pa_promote(r, scheme);
default:
/* DAM

[RFC PATCH 3/4] mm/memory-tiers: add next_promotion_node to find promotion target

2024-01-14 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch adds next_promotion_node that can be used to identify the
appropriate promotion target based on memory tiers.  When multiple
promotion target nodes are available, the nearest node is selected based
on numa distance.

Signed-off-by: Hyeongtak Ji 
---
 include/linux/memory-tiers.h | 11 +
 mm/memory-tiers.c| 43 
 2 files changed, 54 insertions(+)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 1e39d27bee41..0788e435fc50 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -50,6 +50,7 @@ int mt_set_default_dram_perf(int nid, struct node_hmem_attrs 
*perf,
 int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist);
 #ifdef CONFIG_MIGRATION
 int next_demotion_node(int node);
+int next_promotion_node(int node);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
 bool node_is_toptier(int node);
 #else
@@ -58,6 +59,11 @@ static inline int next_demotion_node(int node)
return NUMA_NO_NODE;
 }
 
+static inline int next_promotion_node(int node)
+{
+   return NUMA_NO_NODE;
+}
+
 static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t 
*targets)
 {
*targets = NODE_MASK_NONE;
@@ -101,6 +107,11 @@ static inline int next_demotion_node(int node)
return NUMA_NO_NODE;
 }
 
+static inline int next_promotion_node(int node)
+{
+   return NUMA_NO_NODE;
+}
+
 static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t 
*targets)
 {
*targets = NODE_MASK_NONE;
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 8d5291add2bc..0060ee571cf4 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -335,6 +335,49 @@ int next_demotion_node(int node)
return target;
 }
 
+/*
+ * Select a promotion target that is close to the from node among the given
+ * two nodes.
+ *
+ * TODO: consider other decision policy as node_distance may not be precise.
+ */
+static int select_promotion_target(int a, int b, int from)
+{
+   if (node_distance(from, a) < node_distance(from, b))
+   return a;
+   else
+   return b;
+}
+
+/**
+ * next_promotion_node() - Get the next node in the promotion path
+ * @node: The starting node to lookup the next node
+ *
+ * Return: node id for next memory node in the promotion path hierarchy
+ * from @node; NUMA_NO_NODE if @node is the toptier.
+ */
+int next_promotion_node(int node)
+{
+   int target = NUMA_NO_NODE;
+   int nid;
+
+   if (node_is_toptier(node))
+   return NUMA_NO_NODE;
+
+   rcu_read_lock();
+   for_each_node_state(nid, N_MEMORY) {
+   if (node_isset(node, node_demotion[nid].preferred)) {
+   if (target == NUMA_NO_NODE)
+   target = nid;
+   else
+   target = select_promotion_target(nid, target, 
node);
+   }
+   }
+   rcu_read_unlock();
+
+   return target;
+}
+
 static void disable_all_demotion_targets(void)
 {
struct memory_tier *memtier;
-- 
2.34.1




[RFC PATCH 2/4] mm/damon: introduce DAMOS_DEMOTE action for demotion

2024-01-14 Thread Honggyu Kim
This patch introduces DAMOS_DEMOTE action, which is similar to
DAMOS_PAGEOUT, but demote folios instead of swapping them out.

Since there are some common routines with pageout, many functions have
similar logics between pageout and demote.

The execution sequence of DAMOS_PAGEOUT and DAMOS_DEMOTE look as follows.

  DAMOS_PAGEOUT action
damo_pa_apply_scheme
-> damon_pa_reclaim
-> reclaim_pages
-> reclaim_folio_list
-> shrink_folio_list

  DAMOS_DEMOTE action
damo_pa_apply_scheme
-> damon_pa_reclaim
-> demote_pages
-> do_demote_folio_list
-> __demote_folio_list
-> demote_folio_list

__demote_folio_list() is a minimized version of shrink_folio_list(), but
it's minified only for demotion.

Signed-off-by: Honggyu Kim 
---
 include/linux/damon.h|  2 +
 mm/damon/paddr.c | 17 +---
 mm/damon/sysfs-schemes.c |  1 +
 mm/internal.h|  1 +
 mm/vmscan.c  | 84 
 5 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e00ddf1ed39c..4c0a0fef09c5 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -106,6 +106,7 @@ struct damon_target {
  * @DAMOS_LRU_PRIO:Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:  Deprioritize the region on its LRU lists.
  * @DAMOS_STAT:Do nothing but count the stat.
+ * @DAMOS_DEMOTE:  Do demotion for the current region.
  * @NR_DAMOS_ACTIONS:  Total number of DAMOS actions
  *
  * The support of each action is up to running  damon_operations.
@@ -123,6 +124,7 @@ enum damos_action {
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
DAMOS_STAT, /* Do nothing but only record the stat */
+   DAMOS_DEMOTE,
NR_DAMOS_ACTIONS,
 };
 
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 081e2a325778..d3e3f077cd00 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -224,7 +224,7 @@ static bool damos_pa_filter_out(struct damos *scheme, 
struct folio *folio)
return false;
 }
 
-static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
+static unsigned long damon_pa_reclaim(struct damon_region *r, struct damos *s, 
bool is_demote)
 {
unsigned long addr, applied;
LIST_HEAD(folio_list);
@@ -242,14 +242,17 @@ static unsigned long damon_pa_pageout(struct damon_region 
*r, struct damos *s)
folio_test_clear_young(folio);
if (!folio_isolate_lru(folio))
goto put_folio;
-   if (folio_test_unevictable(folio))
+   if (folio_test_unevictable(folio) && !is_demote)
folio_putback_lru(folio);
else
list_add(>lru, _list);
 put_folio:
folio_put(folio);
}
-   applied = reclaim_pages(_list);
+   if (is_demote)
+   applied = demote_pages(_list);
+   else
+   applied = reclaim_pages(_list);
cond_resched();
return applied * PAGE_SIZE;
 }
@@ -297,13 +300,15 @@ static unsigned long damon_pa_apply_scheme(struct 
damon_ctx *ctx,
 {
switch (scheme->action) {
case DAMOS_PAGEOUT:
-   return damon_pa_pageout(r, scheme);
+   return damon_pa_reclaim(r, scheme, false);
case DAMOS_LRU_PRIO:
return damon_pa_mark_accessed(r, scheme);
case DAMOS_LRU_DEPRIO:
return damon_pa_deactivate_pages(r, scheme);
case DAMOS_STAT:
break;
+   case DAMOS_DEMOTE:
+   return damon_pa_reclaim(r, scheme, true);
default:
/* DAMOS actions that not yet supported by 'paddr'. */
break;
@@ -317,11 +322,11 @@ static int damon_pa_scheme_score(struct damon_ctx 
*context,
 {
switch (scheme->action) {
case DAMOS_PAGEOUT:
+   case DAMOS_LRU_DEPRIO:
+   case DAMOS_DEMOTE:
return damon_cold_score(context, r, scheme);
case DAMOS_LRU_PRIO:
return damon_hot_score(context, r, scheme);
-   case DAMOS_LRU_DEPRIO:
-   return damon_cold_score(context, r, scheme);
default:
break;
}
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index fe0fe2562000..ac7cd3f17b12 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1187,6 +1187,7 @@ static const char * const damon_sysfs_damos_action_strs[] 
= {
"lru_prio",
"lru_deprio",
"stat",
+   "demote",
 };
 
 static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc(
diff --git a/mm/internal.h b/mm/internal.h
index b61034bd50f5..2380397ec2f3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -869,6 +869,7 @@ extern void set_pageblock_order(void);
 unsigned long reclaim_pages(struct l

Re: [RFC PATCH 0/4] DAMON based 2-tier memory management for CXL memory

2024-01-17 Thread Honggyu Kim
Hi SeongJae,

Thanks very much for your comments in details.

On Tue, 16 Jan 2024 12:31:59 -0800 SeongJae Park  wrote:

> Thank you so much for this great patches and the above nice test results.  I
> believe the test setup and results make sense, and merging a revised version 
> of
> this patchset would provide real benefits to the users.

Glad to hear that!

> In a high level, I think it might better to separate DAMON internal changes
> from DAMON external changes.

I agree.  I can't guarantee but I can move all the external changes
inside mm/damon, but will try that as much as possible.

> For DAMON part changes, I have no big concern other than trivial coding style
> level comments.

Sure.  I will fix those.

> For DAMON-external changes that implementing demote_pages() and
> promote_pages(), I'm unsure if the implementation is reusing appropriate
> functions, and if those are placee in right source file.  Especially, I'm
> unsure if vmscan.c is the right place for promotion code.  Also I don't know 
> if
> there is a good agreement on the promotion/demotion target node decision.  
> That
> should be because I'm not that familiar with the areas and the files, but I
> feel this might because our discussions on the promotion and the demotion
> operations are having rooms for being more matured.  Because I'm not very
> faimiliar with the part, I'd like to hear others' comments, too.

I would also like to hear others' comments, but this might not be needed
if most of external code can be moved to mm/damon.

> To this end, I feel the problem might be able te be simpler, because this
> patchset is trying to provide two sophisticated operations, while I think a
> simpler approach might be possible.  My humble simpler idea is adding a DAMOS
> operation for moving pages to a given node (like sys_move_phy_pages RFC[1]),
> instead of the promote/demote.  Because the general pages migration can handle
> multiple cases including the promote/demote in my humble assumption.

My initial implementation was similar but I found that it's not accurate
enough due to the nature of inaccuracy of DAMON regions.  I saw that
many pages were demoted and promoted back and forth because migration
target regions include both hot and cold pages together.

So I have implemented the demotion and promotion logics based on the
shrink_folio_list, which contains many corner case handling logics for
reclaim.

Having the current demotion and promotion logics makes the hot/cold
migration pretty accurate as expected.  We made a simple program called
"hot_cold" and it receives 2 arguments for hot size and cold size in MB.
For example, "hot_cold 200 500" allocates 200MB of hot memory and 500MB
of cold memory.  It basically allocates 2 large blocks of memory with
mmap, then repeat memset for the initial 200MB to make it accessed in an
infinite loop.

Let's say there are 3 nodes in the system and the first node0 and node1
are the first tier, and node2 is the second tier.

  $ cat /sys/devices/virtual/memory_tiering/memory_tier4/nodelist
  0-1

  $ cat /sys/devices/virtual/memory_tiering/memory_tier22/nodelist
  2

Here is the result of partitioning hot/cold memory and I put execution
command at the right side of numastat result.  I initially ran each
hot_cold program with preferred setting so that they initially allocate
memory on one of node0 or node2, but they gradually migrated based on
their access frequencies.

  $ numastat -c -p hot_cold
  Per-node process memory usage (in MBs) 
  PID  Node 0 Node 1 Node 2 Total 
  ---  -- -- -- - 
  754 (hot_cold) 1800  0   2000  3800<- hot_cold 1800 2000 
  1184 (hot_cold) 300  0500   800<- hot_cold 300 500 
  1818 (hot_cold) 801  0   3199  4000<- hot_cold 800 3200 
  30289 (hot_cold)  4  0  510<- hot_cold 3 5 
  30325 (hot_cold) 31  0 5181<- hot_cold 30 50 
  ---  -- -- -- - 
  Total  2938  0   5756  8695

The final node placement result shows that DAMON accurately migrated
pages by their hotness for multiple processes.

> In more detail, users could decide which is the appropriate node for promotion
> or demotion and use the new DAMOS action to do promotion and demotion.  Users
> would requested to decide which node is the proper promotion/demotion target
> nodes, but that decision wouldn't be that hard in my opinion.
> 
> For this, 'struct damos' would need to be updated for such argument-dependent
> actions, like 'struct damos_filter' is haing a union.

That might be a better solution.  I will think about it.

> In future, we could extend the operation to the promotion and the demotion
> after the dicussion around the promotion and demotion is matured, if required.
> And assuming DAMON be extended for originating CPU-aware access monitoring, 
> the
> new DAMOS action would also cover more use cases such as general NUMA nodes
> balancing 

[PATCH v2 2/7] mm: make alloc_demote_folio externally invokable for migration

2024-02-26 Thread Honggyu Kim
The alloc_demote_folio can be used out of vmscan.c so it'd be better to
remove static keyword from it.

This function can also be used for both demotion and promotion so it'd
be better to rename it from alloc_demote_folio to alloc_migrate_folio.

Signed-off-by: Honggyu Kim 
---
 mm/internal.h |  1 +
 mm/vmscan.c   | 10 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index b61034bd50f5..61af6641235d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -866,6 +866,7 @@ extern unsigned long  __must_check vm_mmap_pgoff(struct 
file *, unsigned long,
 unsigned long, unsigned long);
 
 extern void set_pageblock_order(void);
+struct folio *alloc_migrate_folio(struct folio *src, unsigned long private);
 unsigned long reclaim_pages(struct list_head *folio_list);
 unsigned int reclaim_clean_pages_from_list(struct zone *zone,
struct list_head *folio_list);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bba207f41b14..b8a1a1599e3c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -910,8 +910,7 @@ static void folio_check_dirty_writeback(struct folio *folio,
mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
 }
 
-static struct folio *alloc_demote_folio(struct folio *src,
-   unsigned long private)
+struct folio *alloc_migrate_folio(struct folio *src, unsigned long private)
 {
struct folio *dst;
nodemask_t *allowed_mask;
@@ -935,6 +934,11 @@ static struct folio *alloc_demote_folio(struct folio *src,
if (dst)
return dst;
 
+   /*
+* Allocation failed from the target node so try to allocate from
+* fallback nodes based on allowed_mask.
+* See fallback_alloc() at mm/slab.c.
+*/
mtc->gfp_mask &= ~__GFP_THISNODE;
mtc->nmask = allowed_mask;
 
@@ -973,7 +977,7 @@ static unsigned int demote_folio_list(struct list_head 
*demote_folios,
node_get_allowed_targets(pgdat, _mask);
 
/* Demotion ignores all cpuset and mempolicy settings */
-   migrate_pages(demote_folios, alloc_demote_folio, NULL,
+   migrate_pages(demote_folios, alloc_migrate_folio, NULL,
  (unsigned long), MIGRATE_ASYNC, MR_DEMOTION,
  _succeeded);
 
-- 
2.34.1




[PATCH v2 3/7] mm/damon: introduce DAMOS_DEMOTE action for demotion

2024-02-26 Thread Honggyu Kim
This patch introduces DAMOS_DEMOTE action, which is similar to
DAMOS_PAGEOUT, but demote folios instead of swapping them out.

Since there are some common routines with pageout, many functions have
similar logics between pageout and demote.

damon_pa_migrate_folio_list() is a minimized version of
shrink_folio_list(), but it's minified only for demotion.

Signed-off-by: Honggyu Kim 
Signed-off-by: Hyeongtak Ji 
---
 include/linux/damon.h|   2 +
 mm/damon/paddr.c | 222 ++-
 mm/damon/sysfs-schemes.c |   1 +
 3 files changed, 224 insertions(+), 1 deletion(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e00ddf1ed39c..86e66772766b 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,7 @@ struct damon_target {
  * @DAMOS_NOHUGEPAGE:  Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:  Deprioritize the region on its LRU lists.
+ * @DAMOS_DEMOTE:   Do demotion for the given region.
  * @DAMOS_STAT:Do nothing but count the stat.
  * @NR_DAMOS_ACTIONS:  Total number of DAMOS actions
  *
@@ -122,6 +123,7 @@ enum damos_action {
DAMOS_NOHUGEPAGE,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
+   DAMOS_DEMOTE,
DAMOS_STAT, /* Do nothing but only record the stat */
NR_DAMOS_ACTIONS,
 };
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 277a1c4d833c..23e37ce57202 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -12,6 +12,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "../internal.h"
 #include "ops-common.h"
@@ -226,8 +229,214 @@ static bool damos_pa_filter_out(struct damos *scheme, 
struct folio *folio)
 
 enum migration_mode {
MIG_PAGEOUT,
+   MIG_DEMOTE,
 };
 
+/*
+ * XXX: This is copied from demote_folio_list as renamed as migrate_folio_list.
+ * Take folios on @migrate_folios and attempt to migrate them to another node.
+ * Folios which are not migrated are left on @migrate_folios.
+ */
+static unsigned int migrate_folio_list(struct list_head *migrate_folios,
+  struct pglist_data *pgdat,
+  enum migration_mode mm)
+{
+   int target_nid = next_demotion_node(pgdat->node_id);
+   unsigned int nr_succeeded;
+   nodemask_t allowed_mask;
+
+   struct migration_target_control mtc = {
+   /*
+* Allocate from 'node', or fail quickly and quietly.
+* When this happens, 'page' will likely just be discarded
+* instead of migrated.
+*/
+   .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 
__GFP_NOWARN |
+   __GFP_NOMEMALLOC | GFP_NOWAIT,
+   .nid = target_nid,
+   .nmask = _mask
+   };
+
+   if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
+   return 0;
+
+   if (list_empty(migrate_folios))
+   return 0;
+
+   node_get_allowed_targets(pgdat, _mask);
+
+   /* Migration ignores all cpuset and mempolicy settings */
+   migrate_pages(migrate_folios, alloc_migrate_folio, NULL,
+ (unsigned long), MIGRATE_ASYNC, MR_DEMOTION,
+ _succeeded);
+
+   __count_vm_events(PGDEMOTE_DIRECT, nr_succeeded);
+
+   return nr_succeeded;
+}
+
+enum folio_references {
+   FOLIOREF_RECLAIM,
+   FOLIOREF_KEEP,
+   FOLIOREF_ACTIVATE,
+};
+
+/*
+ * XXX: This is just copied and simplified from folio_check_references at
+ *  mm/vmscan.c but without having scan_control.
+ */
+static enum folio_references folio_check_references(struct folio *folio)
+{
+   int referenced_ptes, referenced_folio;
+   unsigned long vm_flags;
+
+   referenced_ptes = folio_referenced(folio, 1, NULL, _flags);
+   referenced_folio = folio_test_clear_referenced(folio);
+
+   /* rmap lock contention: rotate */
+   if (referenced_ptes == -1)
+   return FOLIOREF_KEEP;
+
+   if (referenced_ptes) {
+   /*
+* All mapped folios start out with page table
+* references from the instantiating fault, so we need
+* to look twice if a mapped file/anon folio is used more
+* than once.
+*
+* Mark it and spare it for another trip around the
+* inactive list.  Another page table reference will
+* lead to its activation.
+*
+* Note: the mark is set for activated folios as well
+* so that recently deactivated but used folios are
+* quickly recovered.
+*/
+   folio_set_referenced(folio);
+
+   if (referenced_folio || referenced_ptes > 

[RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-02-26 Thread Honggyu Kim
- - - - | 1.18
  default  |-  1.16  1.15  1.17  1.18  1.16  1.18  1.15 | 1.17
  DAMON 2-tier |-  1.04  1.04  1.05  1.05  1.06  1.05  1.06 | 1.05
  =++=
  CXL usage of redis-server in GB   | AVERAGE
  -++-
  DRAM-only|  0.0 - - - - - - - |  0.0
  CXL-only | 52.6 - - - - - - - | 52.6
  default  |-  19.3  26.1  32.2  38.5  44.6  50.5  50.6 | 37.4
  DAMON 2-tier |-   1.3   3.8   7.0   4.1   9.4  12.5  16.7 |  7.8
  =++=

In summary of both results, our evaluation shows that "DAMON 2-tier"
memory management reduces the performance slowdown compared to the
"default" memory policy from 15~17% to 4~5% when the system runs with
high memory pressure on its fast tier DRAM nodes.

The similar evaluation was done in another machine that has 256GB of
local DRAM and 96GB of CXL memory.  The performance slowdown is reduced
from 20~24% for "default" to 5~7% for "DAMON 2-tier".

Having these DAMOS_DEMOTE and DAMOS_PROMOTE actions can make 2-tier
memory systems run more efficiently under high memory pressures.

Signed-off-by: Honggyu Kim 
Signed-off-by: Hyeongtak Ji 
Signed-off-by: Rakie Kim 

[1] https://lore.kernel.org/damon/20231112195602.61525-1...@kernel.org
[2] https://github.com/skhynix/hmsdk
[3] https://github.com/redis/redis/tree/7.0.0
[4] https://github.com/brianfrankcooper/YCSB/tree/0.17.0
[5] https://dl.acm.org/doi/10.1145/3503222.3507731
[6] https://dl.acm.org/doi/10.1145/3582016.3582063

Changes from RFC:
  1. Move most of implementation from mm/vmscan.c to mm/damon/paddr.c.
  2. Simplify some functions of vmscan.c and used in paddr.c, but need
 to be reviewed more in depth.
  3. Refactor most functions for common usage for both promote and
 demote actions and introduce an enum migration_mode for its control.
  4. Add "target_nid" sysfs knob for migration destination node for both
 promote and demote actions.
  5. Move DAMOS_PROMOTE before DAMOS_DEMOTE and move then even above
 DAMOS_STAT.

Honggyu Kim (3):
  mm/damon: refactor DAMOS_PAGEOUT with migration_mode
  mm: make alloc_demote_folio externally invokable for migration
  mm/damon: introduce DAMOS_DEMOTE action for demotion

Hyeongtak Ji (4):
  mm/memory-tiers: add next_promotion_node to find promotion target
  mm/damon: introduce DAMOS_PROMOTE action for promotion
  mm/damon/sysfs-schemes: add target_nid on sysfs-schemes
  mm/damon/sysfs-schemes: apply target_nid for promote and demote
actions

 include/linux/damon.h  |  15 +-
 include/linux/memory-tiers.h   |  11 ++
 include/linux/migrate_mode.h   |   1 +
 include/linux/vm_event_item.h  |   1 +
 include/trace/events/migrate.h |   3 +-
 mm/damon/core.c|   5 +-
 mm/damon/dbgfs.c   |   2 +-
 mm/damon/lru_sort.c|   3 +-
 mm/damon/paddr.c   | 282 -
 mm/damon/reclaim.c |   3 +-
 mm/damon/sysfs-schemes.c   |  39 -
 mm/internal.h  |   1 +
 mm/memory-tiers.c  |  43 +
 mm/vmscan.c|  10 +-
 mm/vmstat.c|   1 +
 15 files changed, 404 insertions(+), 16 deletions(-)


base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
-- 
2.34.1




[PATCH v2 1/7] mm/damon: refactor DAMOS_PAGEOUT with migration_mode

2024-02-26 Thread Honggyu Kim
This is a preparation patch that introduces migration modes.

The damon_pa_pageout is renamed to damon_pa_migrate and it receives an
extra argument for migration_mode.

No functional changes applied.

Signed-off-by: Honggyu Kim 
---
 mm/damon/paddr.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 081e2a325778..277a1c4d833c 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -224,7 +224,12 @@ static bool damos_pa_filter_out(struct damos *scheme, 
struct folio *folio)
return false;
 }
 
-static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
+enum migration_mode {
+   MIG_PAGEOUT,
+};
+
+static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
+ enum migration_mode mm)
 {
unsigned long addr, applied;
LIST_HEAD(folio_list);
@@ -249,7 +254,14 @@ static unsigned long damon_pa_pageout(struct damon_region 
*r, struct damos *s)
 put_folio:
folio_put(folio);
}
-   applied = reclaim_pages(_list);
+   switch (mm) {
+   case MIG_PAGEOUT:
+   applied = reclaim_pages(_list);
+   break;
+   default:
+   /* Unexpected migration mode. */
+   return 0;
+   }
cond_resched();
return applied * PAGE_SIZE;
 }
@@ -297,7 +309,7 @@ static unsigned long damon_pa_apply_scheme(struct damon_ctx 
*ctx,
 {
switch (scheme->action) {
case DAMOS_PAGEOUT:
-   return damon_pa_pageout(r, scheme);
+   return damon_pa_migrate(r, scheme, MIG_PAGEOUT);
case DAMOS_LRU_PRIO:
return damon_pa_mark_accessed(r, scheme);
case DAMOS_LRU_DEPRIO:
-- 
2.34.1




[PATCH v2 6/7] mm/damon/sysfs-schemes: add target_nid on sysfs-schemes

2024-02-26 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch adds target_nid under
  /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes/

target_nid can be used as the destination node for DAMOS actions such as
DAMOS_DEMOTE or DAMOS_PROMOTE in the future.

Signed-off-by: Hyeongtak Ji 
Signed-off-by: Honggyu Kim 
---
 include/linux/damon.h| 11 ++-
 mm/damon/core.c  |  5 -
 mm/damon/dbgfs.c |  2 +-
 mm/damon/lru_sort.c  |  3 ++-
 mm/damon/reclaim.c   |  3 ++-
 mm/damon/sysfs-schemes.c | 37 -
 6 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index d7e52d0228b4..4d270956dbd0 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -321,6 +321,7 @@ struct damos_access_pattern {
  * @apply_interval_us: The time between applying the @action.
  * @quota: Control the aggressiveness of this scheme.
  * @wmarks:Watermarks for automated (in)activation of this scheme.
+ * @target_nid:Destination node if @action is "promote" or 
"demote".
  * @filters:   Additional set of  damos_filter for 
  * @stat:  Statistics of this scheme.
  * @list:  List head for siblings.
@@ -336,6 +337,10 @@ struct damos_access_pattern {
  * monitoring context are inactive, DAMON stops monitoring either, and just
  * repeatedly checks the watermarks.
  *
+ * @target_nid is used to set the destination node for promote or demote
+ * actions, which means it's only meaningful when @action is either "promote" 
or
+ * "demote".
+ *
  * Before applying the  to a memory region,  damon_operations
  * implementation could check pages of the region and skip  to respect
  * 
@@ -357,6 +362,9 @@ struct damos {
 /* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;
+   union {
+   int target_nid;
+   };
struct list_head filters;
struct damos_stat stat;
struct list_head list;
@@ -661,7 +669,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern 
*pattern,
enum damos_action action,
unsigned long apply_interval_us,
struct damos_quota *quota,
-   struct damos_watermarks *wmarks);
+   struct damos_watermarks *wmarks,
+   int target_nid);
 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
 void damon_destroy_scheme(struct damos *s);
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 3a05e71509b9..0c2472818fb9 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -316,7 +316,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern 
*pattern,
enum damos_action action,
unsigned long apply_interval_us,
struct damos_quota *quota,
-   struct damos_watermarks *wmarks)
+   struct damos_watermarks *wmarks,
+   int target_nid)
 {
struct damos *scheme;
 
@@ -341,6 +342,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern 
*pattern,
scheme->wmarks = *wmarks;
scheme->wmarks.activated = true;
 
+   scheme->target_nid = target_nid;
+
return scheme;
 }
 
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index dc0ea1fc30ca..29b427dd1186 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -279,7 +279,7 @@ static struct damos **str_to_schemes(const char *str, 
ssize_t len,
 
pos += parsed;
scheme = damon_new_scheme(, action, 0, ,
-   );
+   , NUMA_NO_NODE);
if (!scheme)
goto fail;
 
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index f2e5f9431892..fd0492637fce 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -163,7 +163,8 @@ static struct damos *damon_lru_sort_new_scheme(
/* under the quota. */
,
/* (De)activate this according to the watermarks. */
-   _lru_sort_wmarks);
+   _lru_sort_wmarks,
+   NUMA_NO_NODE);
 }
 
 /* Create a DAMON-based operation scheme for hot memory regions */
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index ab974e477d2f..973ac5df84eb 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -147,7 +147,8 @@ static struct damos *damon_reclaim_new_scheme(void)
/* under the quota. */
_reclaim_quota,
/* (De)activate this according to the watermarks. */
-   _reclaim_wmarks);
+   _reclaim_wmarks,
+   NUMA_NO_NODE);
 }
 
 static int damon_reclaim_apply_parameters(void)
diff --git a/mm/damon

[PATCH v2 5/7] mm/damon: introduce DAMOS_PROMOTE action for promotion

2024-02-26 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch introduces DAMOS_PROMOTE action for paddr mode.

It includes renaming alloc_demote_folio to alloc_migrate_folio to use it
for promotion as well.

Signed-off-by: Hyeongtak Ji 
Signed-off-by: Honggyu Kim 
---
 include/linux/damon.h  |  2 ++
 include/linux/migrate_mode.h   |  1 +
 include/linux/vm_event_item.h  |  1 +
 include/trace/events/migrate.h |  3 ++-
 mm/damon/paddr.c   | 45 --
 mm/damon/sysfs-schemes.c   |  1 +
 mm/vmstat.c|  1 +
 7 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 86e66772766b..d7e52d0228b4 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,7 @@ struct damon_target {
  * @DAMOS_NOHUGEPAGE:  Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:  Deprioritize the region on its LRU lists.
+ * @DAMOS_PROMOTE:  Do promotion for the given region.
  * @DAMOS_DEMOTE:   Do demotion for the given region.
  * @DAMOS_STAT:Do nothing but count the stat.
  * @NR_DAMOS_ACTIONS:  Total number of DAMOS actions
@@ -123,6 +124,7 @@ enum damos_action {
DAMOS_NOHUGEPAGE,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
+   DAMOS_PROMOTE,
DAMOS_DEMOTE,
DAMOS_STAT, /* Do nothing but only record the stat */
NR_DAMOS_ACTIONS,
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index f37cc03f9369..63f75eb9abf3 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -29,6 +29,7 @@ enum migrate_reason {
MR_CONTIG_RANGE,
MR_LONGTERM_PIN,
MR_DEMOTION,
+   MR_PROMOTION,
MR_TYPES
 };
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 8abfa1240040..63cf920afeaa 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -44,6 +44,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
+   PGPROMOTE,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 0190ef725b43..f0dd569c1e62 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -22,7 +22,8 @@
EM( MR_NUMA_MISPLACED,  "numa_misplaced")   \
EM( MR_CONTIG_RANGE,"contig_range") \
EM( MR_LONGTERM_PIN,"longterm_pin") \
-   EMe(MR_DEMOTION,"demotion")
+   EM( MR_DEMOTION,"demotion") \
+   EMe(MR_PROMOTION,   "promotion")
 
 /*
  * First define the enums in the above macros to be exported to userspace
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 23e37ce57202..37a7b34a36dd 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -229,6 +229,7 @@ static bool damos_pa_filter_out(struct damos *scheme, 
struct folio *folio)
 
 enum migration_mode {
MIG_PAGEOUT,
+   MIG_PROMOTE,
MIG_DEMOTE,
 };
 
@@ -241,9 +242,26 @@ static unsigned int migrate_folio_list(struct list_head 
*migrate_folios,
   struct pglist_data *pgdat,
   enum migration_mode mm)
 {
-   int target_nid = next_demotion_node(pgdat->node_id);
+   int target_nid;
unsigned int nr_succeeded;
nodemask_t allowed_mask;
+   int reason;
+   enum vm_event_item vm_event;
+
+   switch (mm) {
+   case MIG_PROMOTE:
+   target_nid = next_promotion_node(pgdat->node_id);
+   reason = MR_PROMOTION;
+   vm_event = PGPROMOTE;
+   break;
+   case MIG_DEMOTE:
+   target_nid = next_demotion_node(pgdat->node_id);
+   reason = MR_DEMOTION;
+   vm_event = PGDEMOTE_DIRECT;
+   break;
+   default:
+   return 0;
+   }
 
struct migration_target_control mtc = {
/*
@@ -263,14 +281,19 @@ static unsigned int migrate_folio_list(struct list_head 
*migrate_folios,
if (list_empty(migrate_folios))
return 0;
 
-   node_get_allowed_targets(pgdat, _mask);
+   if (mm == MIG_DEMOTE) {
+   node_get_allowed_targets(pgdat, _mask);
+   } else if (mm == MIG_PROMOTE) {
+   /* TODO: Need to add upper_tier_mask at struct memory_tier. */
+   allowed_mask = NODE_MASK_NONE;
+   }
 
/* Migration ignores all cpuset and mempolicy settings */
migrate_pages(migrate_folios, alloc_migrate_folio, NULL,
- (unsigned long), MIGRATE_ASY

[PATCH v2 7/7] mm/damon/sysfs-schemes: apply target_nid for promote and demote actions

2024-02-26 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch changes DAMOS_PROMOTE and DAMOS_DEMOTE to use target_nid of
sysfs as the destination NUMA node of migration.  This has been tested
on qemu as follows:

  $ cd /sys/kernel/mm/damon/admin/kdamonds/
  $ cat contexts//schemes//action
  promote
  $ echo 1 > contexts//schemes//target_nid
  $ echo commit > state
  $ numactl -p 2 ./hot_cold 500M 600M &
  $ numastat -c -p hot_cold

  Per-node process memory usage (in MBs)
  PID Node 0 Node 1 Node 2 Total
  --  -- -- -- -
  701 (hot_cold)   0501601  1101

Signed-off-by: Hyeongtak Ji 
Signed-off-by: Honggyu Kim 
---
 mm/damon/paddr.c | 27 +--
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 37a7b34a36dd..5e057a69464f 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -240,9 +240,9 @@ enum migration_mode {
  */
 static unsigned int migrate_folio_list(struct list_head *migrate_folios,
   struct pglist_data *pgdat,
-  enum migration_mode mm)
+  enum migration_mode mm,
+  int target_nid)
 {
-   int target_nid;
unsigned int nr_succeeded;
nodemask_t allowed_mask;
int reason;
@@ -250,12 +250,14 @@ static unsigned int migrate_folio_list(struct list_head 
*migrate_folios,
 
switch (mm) {
case MIG_PROMOTE:
-   target_nid = next_promotion_node(pgdat->node_id);
+   if (target_nid == NUMA_NO_NODE)
+   target_nid = next_promotion_node(pgdat->node_id);
reason = MR_PROMOTION;
vm_event = PGPROMOTE;
break;
case MIG_DEMOTE:
-   target_nid = next_demotion_node(pgdat->node_id);
+   if (target_nid == NUMA_NO_NODE)
+   target_nid = next_demotion_node(pgdat->node_id);
reason = MR_DEMOTION;
vm_event = PGDEMOTE_DIRECT;
break;
@@ -358,7 +360,8 @@ static enum folio_references folio_check_references(struct 
folio *folio)
  */
 static unsigned int damon_pa_migrate_folio_list(struct list_head *folio_list,
struct pglist_data *pgdat,
-   enum migration_mode mm)
+   enum migration_mode mm,
+   int target_nid)
 {
unsigned int nr_migrated = 0;
struct folio *folio;
@@ -399,7 +402,7 @@ static unsigned int damon_pa_migrate_folio_list(struct 
list_head *folio_list,
/* 'folio_list' is always empty here */
 
/* Migrate folios selected for migration */
-   nr_migrated += migrate_folio_list(_folios, pgdat, mm);
+   nr_migrated += migrate_folio_list(_folios, pgdat, mm, 
target_nid);
/* Folios that could not be migrated are still in @migrate_folios */
if (!list_empty(_folios)) {
/* Folios which weren't migrated go back on @folio_list */
@@ -426,7 +429,8 @@ static unsigned int damon_pa_migrate_folio_list(struct 
list_head *folio_list,
  *  common function for both cases.
  */
 static unsigned long damon_pa_migrate_pages(struct list_head *folio_list,
-   enum migration_mode mm)
+   enum migration_mode mm,
+   int target_nid)
 {
int nid;
unsigned int nr_migrated = 0;
@@ -449,12 +453,14 @@ static unsigned long damon_pa_migrate_pages(struct 
list_head *folio_list,
}
 
nr_migrated += damon_pa_migrate_folio_list(_folio_list,
-  NODE_DATA(nid), mm);
+  NODE_DATA(nid), mm,
+  target_nid);
nid = folio_nid(lru_to_folio(folio_list));
} while (!list_empty(folio_list));
 
nr_migrated += damon_pa_migrate_folio_list(_folio_list,
-  NODE_DATA(nid), mm);
+  NODE_DATA(nid), mm,
+  target_nid);
 
memalloc_noreclaim_restore(noreclaim_flag);
 
@@ -499,7 +505,8 @@ static unsigned long damon_pa_migrate(struct damon_region 
*r, struct damos *s,
break;
case MIG_PROMOTE:
case MIG_DEMOTE:
-   applied = damon_pa_migrate_pages(_list, mm);
+   applied = damon_pa_migrate_pages(_list, mm,
+s->target_nid);
break;
default:
/* Unexpected migration mode. */
-- 
2.34.1




[PATCH v2 4/7] mm/memory-tiers: add next_promotion_node to find promotion target

2024-02-26 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch adds next_promotion_node that can be used to identify the
appropriate promotion target based on memory tiers.  When multiple
promotion target nodes are available, the nearest node is selected based
on numa distance.

Signed-off-by: Hyeongtak Ji 
---
 include/linux/memory-tiers.h | 11 +
 mm/memory-tiers.c| 43 
 2 files changed, 54 insertions(+)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 1e39d27bee41..0788e435fc50 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -50,6 +50,7 @@ int mt_set_default_dram_perf(int nid, struct node_hmem_attrs 
*perf,
 int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist);
 #ifdef CONFIG_MIGRATION
 int next_demotion_node(int node);
+int next_promotion_node(int node);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
 bool node_is_toptier(int node);
 #else
@@ -58,6 +59,11 @@ static inline int next_demotion_node(int node)
return NUMA_NO_NODE;
 }
 
+static inline int next_promotion_node(int node)
+{
+   return NUMA_NO_NODE;
+}
+
 static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t 
*targets)
 {
*targets = NODE_MASK_NONE;
@@ -101,6 +107,11 @@ static inline int next_demotion_node(int node)
return NUMA_NO_NODE;
 }
 
+static inline int next_promotion_node(int node)
+{
+   return NUMA_NO_NODE;
+}
+
 static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t 
*targets)
 {
*targets = NODE_MASK_NONE;
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 8d5291add2bc..0060ee571cf4 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -335,6 +335,49 @@ int next_demotion_node(int node)
return target;
 }
 
+/*
+ * Select a promotion target that is close to the from node among the given
+ * two nodes.
+ *
+ * TODO: consider other decision policy as node_distance may not be precise.
+ */
+static int select_promotion_target(int a, int b, int from)
+{
+   if (node_distance(from, a) < node_distance(from, b))
+   return a;
+   else
+   return b;
+}
+
+/**
+ * next_promotion_node() - Get the next node in the promotion path
+ * @node: The starting node to lookup the next node
+ *
+ * Return: node id for next memory node in the promotion path hierarchy
+ * from @node; NUMA_NO_NODE if @node is the toptier.
+ */
+int next_promotion_node(int node)
+{
+   int target = NUMA_NO_NODE;
+   int nid;
+
+   if (node_is_toptier(node))
+   return NUMA_NO_NODE;
+
+   rcu_read_lock();
+   for_each_node_state(nid, N_MEMORY) {
+   if (node_isset(node, node_demotion[nid].preferred)) {
+   if (target == NUMA_NO_NODE)
+   target = nid;
+   else
+   target = select_promotion_target(nid, target, 
node);
+   }
+   }
+   rcu_read_unlock();
+
+   return target;
+}
+
 static void disable_all_demotion_targets(void)
 {
struct memory_tier *memtier;
-- 
2.34.1




Re: Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-08 Thread Honggyu Kim
Hi SeongJae,

I couldn't send email to LKML properly due to internal system issues,
but it's better now so I will be able to communicate better.

On Wed,  6 Mar 2024 19:05:50 -0800 SeongJae Park  wrote:
> 
> Hello,
> 
> On Tue, 27 Feb 2024 15:51:20 -0800 SeongJae Park  wrote:
> 
> > On Mon, 26 Feb 2024 23:05:46 +0900 Honggyu Kim  wrote:
> > 
> > > There was an RFC IDEA "DAMOS-based Tiered-Memory Management" previously
> > > posted at [1].
> > > 
> > > It says there is no implementation of the demote/promote DAMOS action
> > > are made.  This RFC is about its implementation for physical address
> > > space.
> [...]
> > Honggyu joined DAMON Beer/Coffee/Tea Chat[1] yesterday, and we discussed 
> > about
> > this patchset in high level.  Sharing the summary here for open discussion. 
> >  As
> > also discussed on the first version of this patchset[2], we want to make 
> > single
> > action for general page migration with minimum changes, but would like to 
> > keep
> > page level access re-check.  We also agreed the previously proposed DAMOS
> > filter-based approach could make sense for the purpose.
> > 
> > Because I was anyway planning making such DAMOS filter for not only
> > promotion/demotion but other types of DAMOS action, I will start developing 
> > the
> > page level access re-check results based DAMOS filter.  Once the 
> > implementation
> > of the prototype is done, I will share the early implementation.  Then, 
> > Honggyu
> > will adjust their implementation based on the filter, and run their tests 
> > again
> > and share the results.
> 
> I just posted an RFC patchset for the page level access re-check DAMOS filter:
> https://lore.kernel.org/r/20240307030013.47041-1...@kernel.org
> 
> I hope it to help you better understanding and testing the idea.

Thanks very much for your work! I will test it based on your changes.

Thanks,
Honggyu

> 
> > 
> > [1] https://lore.kernel.org/damon/20220810225102.124459-1...@kernel.org/
> > [2] https://lore.kernel.org/damon/20240118171756.80356-1...@kernel.org
> 
> 
> Thanks,
> SJ
> 
> [...]
> 
> 
> 



Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-18 Thread Honggyu Kim
Hi SeongJae,

On Sun, 17 Mar 2024 08:31:44 -0700 SeongJae Park  wrote:
> Hi Honggyu,
> 
> On Sun, 17 Mar 2024 17:36:29 +0900 Honggyu Kim  wrote:
> 
> > Hi SeongJae,
> > 
> > Thanks for the confirmation.  I have a few comments on young filter so
> > please read the inline comments again.
> > 
> > On Wed, 12 Mar 2024 08:53:00 -0800 SeongJae Park  wrote:
> > > Hi Honggyu,
> > > 
> > > > > -Original Message-
> > > > > From: SeongJae Park 
> > > > > Sent: Tuesday, March 12, 2024 3:33 AM
> > > > > To: Honggyu Kim 
> > > > > Cc: SeongJae Park ; kernel_team 
> > > > > 
> > > > > Subject: RE: Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory 
> > > > > management for CXL memory
> > > > >
> > > > > Hi Honggyu,
> > > > >
> > > > > On Mon, 11 Mar 2024 12:51:12 + "honggyu@sk.com" 
> > > > >  wrote:
> > > > >
> > > > > > Hi SeongJae,
> > > > > >
> > > > > > I've tested it again and found that "young" filter has to be set
> > > > > > differently as follows.
> > > > > > - demote action: set "young" filter with "matching" true
> > > > > > - promote action: set "young" filter with "matching" false

Thinking it again, I feel like "matching" true or false looks quite
vague to me as a general user.

Instead, I would like to have more meaningful names for "matching" as
follows.

- matching "true" can be either (filter) "out" or "skip".
- matching "false" can be either (filter) "in" or "apply".

Internally, the type of "matching" can be boolean, but it'd be better
for general users have another ways to set it such as "out"/"in" or
"skip"/"apply" via sysfs interface.  I prefer "skip" and "apply" looks
more intuitive, but I don't have strong objection on "out" and "in" as
well.

I also feel the filter name "young" is more for developers not for
general users.  I think this can be changed to "accessed" filter
instead.

The demote and promote filters can be set as follows using these.

- demote action: set "accessed" filter with "matching" to "skip"
- promote action: set "accessed" filter with "matching" to "apply"

I also think that you can feel this is more complicated so I would like
to hear how you think about this.

> > > > >
> > > > > DAMOS filter is basically for filtering "out" memory regions that 
> > > > > matches to
> > > > > the condition.

Right.  In other tools, I see filters are more used as filtering "in"
rather than filtering "out".  I feel this makes me more confused.

> > > > > Hence in your setup, young pages are not filtered out from
> > > > > demote action target.
> > > > 
> > > > I thought young filter true means "young pages ARE filtered out" for 
> > > > demotion.
> > > 
> > > You're correct.
> > 
> > Ack.
> > 
> > > > 
> > > > > That is, you're demoting pages that "not" young.
> > > > 
> > > > Your explanation here looks opposite to the previous statement.
> > > 
> > > Again, you're correct.  My intention was "non-young pages are not ..." but
> > > maybe I was out of my mind and mistakenly removed "non-" part.  Sorry for 
> > > the
> > > confusion.
> > 
> > No problem.  I also think it's quite confusing.
> > 
> > > > 
> > > > > And vice versa, so you're applying promote to non-non-young (young) 
> > > > > pages.
> > > > 
> > > > Yes, I understand "promote non-non-young pages" means "promote young 
> > > > pages".
> > > > This might be understood as "young pages are NOT filtered out" for 
> > > > promotion
> > > > but it doesn't mean that "old pages are filtered out" instead.
> > > > And we just rely hot detection only on DAMOS logics such as access 
> > > > frequency
> > > > and age. Am I correct?
> > > 
> > > You're correct.
> > 
> > Ack.  But if it doesn't mean that "old pages are filtered out" instead,
> 
> It does mean that.  Here, f

Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-18 Thread Honggyu Kim
Hi SeongJae,

On Sun, 17 Mar 2024 12:13:57 -0700 SeongJae Park  wrote:
> On Sun, 17 Mar 2024 08:31:44 -0700 SeongJae Park  wrote:
> 
> > Hi Honggyu,
> > 
> > On Sun, 17 Mar 2024 17:36:29 +0900 Honggyu Kim  wrote:
> > 
> > > Hi SeongJae,
> > > 
> > > Thanks for the confirmation.  I have a few comments on young filter so
> > > please read the inline comments again.
> > > 
> > > On Wed, 12 Mar 2024 08:53:00 -0800 SeongJae Park  wrote:
> > > > Hi Honggyu,
> [...]
> > > Thanks.  I see that it works fine, but I would like to have more
> > > discussion about "young" filter.  What I think about filter is that if I
> > > apply "young" filter "true" for demotion, then the action applies only
> > > for "young" pages, but the current implementation works opposite.
> > > 
> > > I understand the function name of internal implementation is
> > > "damos_pa_filter_out" so the basic action is filtering out, but the
> > > cgroup filter works in the opposite way for now.
> > 
> > Does memcg filter works in the opposite way?  I don't think so because
> > __damos_pa_filter_out() sets 'matches' as 'true' only if the the given 
> > folio is
> > contained in the given memcg.  'young' filter also simply sets 'matches' as
> > 'true' only if the given folio is young.
> > 
> > If it works in the opposite way, it's a bug that need to be fixed.  Please 
> > let
> > me know if I'm missing something.
> 
> I just read the DAMOS filters part of the documentation for DAMON sysfs
> interface again.  I believe it is explaining the meaning of 'matching' as I
> intended to, as below:
> 
> You can write ``Y`` or ``N`` to ``matching`` file to filter out pages 
> that does
> or does not match to the type, respectively.  Then, the scheme's action 
> will
> not be applied to the pages that specified to be filtered out.
> 
> But, I found the following example for memcg filter is wrong, as below:
> 
> For example, below restricts a DAMOS action to be applied to only 
> non-anonymous
> pages of all memory cgroups except ``/having_care_already``.::
> 
> # echo 2 > nr_filters
> # # filter out anonymous pages
> echo anon > 0/type
> echo Y > 0/matching
> # # further filter out all cgroups except one at 
> '/having_care_already'
> echo memcg > 1/type
> echo /having_care_already > 1/memcg_path
> echo N > 1/matching
> 
> Specifically, the last line of the commands should write 'Y' instead of 'N' to
> do what explained.  Without the fix, the action will be applied to only
> non-anonymous pages of 'having_care_already' memcg.  This is definitely wrong.
> I will fix this soon.  I'm unsure if this is what made you to believe memcg
> DAMOS filter is working in the opposite way, though.

I got confused not because of this.  I just think it again that this
user interface is better to be more intuitive as I mentioned in the
previous thread.

Thanks,
Honggyu

> 
> Thanks,
> SJ
> 
> [...]



Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-22 Thread Honggyu Kim
Hi SeongJae,

On Wed, 20 Mar 2024 09:56:19 -0700 SeongJae Park  wrote:
> Hi Honggyu,
> 
> On Wed, 20 Mar 2024 16:07:48 +0900 Honggyu Kim  wrote:
> 
> > Hi SeongJae,
> > 
> > On Mon, 18 Mar 2024 12:07:21 -0700 SeongJae Park  wrote:
> > > On Mon, 18 Mar 2024 22:27:45 +0900 Honggyu Kim  wrote:
> > > 
> > > > Hi SeongJae,
> > > > 
> > > > On Sun, 17 Mar 2024 08:31:44 -0700 SeongJae Park  
> > > > wrote:
> > > > > Hi Honggyu,
> > > > > 
> > > > > On Sun, 17 Mar 2024 17:36:29 +0900 Honggyu Kim  
> > > > > wrote:
> > > > > 
> > > > > > Hi SeongJae,
> > > > > > 
> > > > > > Thanks for the confirmation.  I have a few comments on young filter 
> > > > > > so
> > > > > > please read the inline comments again.
> > > > > > 
> > > > > > On Wed, 12 Mar 2024 08:53:00 -0800 SeongJae Park  
> > > > > > wrote:
> > > > > > > Hi Honggyu,
> > > > > > > 
> > > > > > > > > -Original Message-
> > > > > > > > > From: SeongJae Park 
> > > > > > > > > Sent: Tuesday, March 12, 2024 3:33 AM
> > > > > > > > > To: Honggyu Kim 
> > > > > > > > > Cc: SeongJae Park ; kernel_team 
> > > > > > > > > 
> > > > > > > > > Subject: RE: Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory 
> > > > > > > > > management for CXL memory
> > > > > > > > >
> > > > > > > > > Hi Honggyu,
> > > > > > > > >
> > > > > > > > > On Mon, 11 Mar 2024 12:51:12 + "honggyu@sk.com" 
> > > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > > Hi SeongJae,
> > > > > > > > > >
> > > > > > > > > > I've tested it again and found that "young" filter has to 
> > > > > > > > > > be set
> > > > > > > > > > differently as follows.
> > > > > > > > > > - demote action: set "young" filter with "matching" true
> > > > > > > > > > - promote action: set "young" filter with "matching" false
> > > > 
> > > > Thinking it again, I feel like "matching" true or false looks quite
> > > > vague to me as a general user.
> > > > 
> > > > Instead, I would like to have more meaningful names for "matching" as
> > > > follows.
> > > > 
> > > > - matching "true" can be either (filter) "out" or "skip".
> > > > - matching "false" can be either (filter) "in" or "apply".
> > > 
> > > I agree the naming could be done much better.  And thank you for the nice
> > > suggestions.  I have a few concerns, though.
> > 
> > I don't think my suggestion is best.  I just would like to have more
> > discussion about it.
> 
> I also understand my naming sense is far from good :)  I'm grateful to have
> this constructive discussion!

Yeah, naming is always difficult. Thanks anyway :)

> > 
> > > Firstly, increasing the number of behavioral concepts.  DAMOS filter 
> > > feature
> > > has only single behavior: excluding some types of memory from DAMOS action
> > > target.  The "matching" is to provide a flexible way for further 
> > > specifying the
> > > target to exclude in a bit detail.  Without it, we would need non-variant 
> > > for
> > > each filter type.  Comapred to the current terms, the new terms feel like
> > > implying there are two types of behaviors.  I think one behavior is 
> > > easier to
> > > understand than two behaviors, and better match what internal code is 
> > > doing.
> > > 
> > > Secondly, ambiguity in "in" and "apply".  To me, the terms sound like 
> > > _adding_
> > > something more than _excluding_.
> > 
> > I understood that young filter "matching" "false" means apply action
> > only to young pages.  Do I misunderstood something here?  If not,
> 
> Technically speaking, having a DAMOS filter with 'matching' parameter as
> 

Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-22 Thread Honggyu Kim
Hi SeongJae,

On Tue, 27 Feb 2024 15:51:20 -0800 SeongJae Park  wrote:
> On Mon, 26 Feb 2024 23:05:46 +0900 Honggyu Kim  wrote:
> 
> > There was an RFC IDEA "DAMOS-based Tiered-Memory Management" previously
> > posted at [1].
> > 
> > It says there is no implementation of the demote/promote DAMOS action
> > are made.  This RFC is about its implementation for physical address
> > space.
> > 
> > 
> > Introduction
> > 
> > 
> > With the advent of CXL/PCIe attached DRAM, which will be called simply
> > as CXL memory in this cover letter, some systems are becoming more
> > heterogeneous having memory systems with different latency and bandwidth
> > characteristics.  They are usually handled as different NUMA nodes in
> > separate memory tiers and CXL memory is used as slow tiers because of
> > its protocol overhead compared to local DRAM.
> > 
> > In this kind of systems, we need to be careful placing memory pages on
> > proper NUMA nodes based on the memory access frequency.  Otherwise, some
> > frequently accessed pages might reside on slow tiers and it makes
> > performance degradation unexpectedly.  Moreover, the memory access
> > patterns can be changed at runtime.
> > 
> > To handle this problem, we need a way to monitor the memory access
> > patterns and migrate pages based on their access temperature.  The
> > DAMON(Data Access MONitor) framework and its DAMOS(DAMON-based Operation
> > Schemes) can be useful features for monitoring and migrating pages.
> > DAMOS provides multiple actions based on DAMON monitoring results and it
> > can be used for proactive reclaim, which means swapping cold pages out
> > with DAMOS_PAGEOUT action, but it doesn't support migration actions such
> > as demotion and promotion between tiered memory nodes.
> > 
> > This series supports two new DAMOS actions; DAMOS_DEMOTE for demotion
> > from fast tiers and DAMOS_PROMOTE for promotion from slow tiers.  This
> > prevents hot pages from being stuck on slow tiers, which makes
> > performance degradation and cold pages can be proactively demoted to
> > slow tiers so that the system can increase the chance to allocate more
> > hot pages to fast tiers.
> > 
> > The DAMON provides various tuning knobs but we found that the proactive
> > demotion for cold pages is especially useful when the system is running
> > out of memory on its fast tier nodes.
> > 
> > Our evaluation result shows that it reduces the performance slowdown
> > compared to the default memory policy from 15~17% to 4~5% when the
> > system runs under high memory pressure on its fast tier DRAM nodes.
> > 
> > 
> > DAMON configuration
> > ===
> > 
> > The specific DAMON configuration doesn't have to be in the scope of this
> > patch series, but some rough idea is better to be shared to explain the
> > evaluation result.
> > 
> > The DAMON provides many knobs for fine tuning but its configuration file
> > is generated by HMSDK[2].  It includes gen_config.py script that
> > generates a json file with the full config of DAMON knobs and it creates
> > multiple kdamonds for each NUMA node when the DAMON is enabled so that
> > it can run hot/cold based migration for tiered memory.
> 
> I was feeling a bit confused from here since DAMON doesn't receive parameters
> via a file.  To my understanding, the 'configuration file' means the input 
> file
> for DAMON user-space tool, damo, not DAMON.  Just a trivial thing, but making
> it clear if possible could help readers in my opinion.
> 
> > 
> > 
> > Evaluation Workload
> > ===
> > 
> > The performance evaluation is done with redis[3], which is a widely used
> > in-memory database and the memory access patterns are generated via
> > YCSB[4].  We have measured two different workloads with zipfian and
> > latest distributions but their configs are slightly modified to make
> > memory usage higher and execution time longer for better evaluation.
> > 
> > The idea of evaluation using these demote and promote actions covers
> > system-wide memory management rather than partitioning hot/cold pages of
> > a single workload.  The default memory allocation policy creates pages
> > to the fast tier DRAM node first, then allocates newly created pages to
> > the slow tier CXL node when the DRAM node has insufficient free space.
> > Once the page allocation is done then those pages never move between
> > NUMA nodes.  It's not true when using numa balancing, but it is not the
> > scope 

Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-17 Thread Honggyu Kim
Hi SeongJae,

Thanks for the confirmation.  I have a few comments on young filter so
please read the inline comments again.

On Wed, 12 Mar 2024 08:53:00 -0800 SeongJae Park  wrote:
> Hi Honggyu,
> 
> > > -Original Message-
> > > From: SeongJae Park 
> > > Sent: Tuesday, March 12, 2024 3:33 AM
> > > To: Honggyu Kim 
> > > Cc: SeongJae Park ; kernel_team 
> > > Subject: RE: Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management 
> > > for CXL memory
> > >
> > > Hi Honggyu,
> > >
> > > On Mon, 11 Mar 2024 12:51:12 + "honggyu@sk.com" 
> > >  wrote:
> > >
> > > > Hi SeongJae,
> > > >
> > > > I've tested it again and found that "young" filter has to be set
> > > > differently as follows.
> > > > - demote action: set "young" filter with "matching" true
> > > > - promote action: set "young" filter with "matching" false
> > >
> > > DAMOS filter is basically for filtering "out" memory regions that matches 
> > > to
> > > the condition.  Hence in your setup, young pages are not filtered out from
> > > demote action target.
> > 
> > I thought young filter true means "young pages ARE filtered out" for 
> > demotion.
> 
> You're correct.

Ack.

> > 
> > > That is, you're demoting pages that "not" young.
> > 
> > Your explanation here looks opposite to the previous statement.
> 
> Again, you're correct.  My intention was "non-young pages are not ..." but
> maybe I was out of my mind and mistakenly removed "non-" part.  Sorry for the
> confusion.

No problem.  I also think it's quite confusing.

> > 
> > > And vice versa, so you're applying promote to non-non-young (young) pages.
> > 
> > Yes, I understand "promote non-non-young pages" means "promote young pages".
> > This might be understood as "young pages are NOT filtered out" for promotion
> > but it doesn't mean that "old pages are filtered out" instead.
> > And we just rely hot detection only on DAMOS logics such as access frequency
> > and age. Am I correct?
> 
> You're correct.

Ack.  But if it doesn't mean that "old pages are filtered out" instead,
then do we really need this filter for promotion?  If not, maybe should
we create another "old" filter for promotion?  As of now, the promotion
is mostly done inaccurately, but the accurate migration is done at
demotion level.  To avoid this issue, I feel we should promotion only
"young" pages after filtering "old" pages out.

> > 
> > > I understand this is somewhat complex, but what we have for now.
> > 
> > Thanks for the explanation. I guess you mean my filter setup is correct.
> > Is it correct?
> 
> Again, you're correct.  Your filter setup is what I expected to :)

Thanks.  I see that it works fine, but I would like to have more
discussion about "young" filter.  What I think about filter is that if I
apply "young" filter "true" for demotion, then the action applies only
for "young" pages, but the current implementation works opposite.

I understand the function name of internal implementation is
"damos_pa_filter_out" so the basic action is filtering out, but the
cgroup filter works in the opposite way for now.

I would like to hear how you think about this.

> > 
> > > > Then, I see that "hot_cold" migrates hot/cold memory correctly.
> > >
> > > Thank you so much for sharing this great news!  My tests also show no bad
> > > signal so far.
> > >
> > > >
> > > > Could you please upload the "damon_folio_mkold" patch to LKML?
> > > > Then I will rebase our changes based on it and run the redis test again.
> > >
> > > I will do that soon.
> > 
> > Thanks a lot for sharing the RFC v2 for DAMOS young filter.
> > https://lore.kernel.org/damon/20240311204545.47097-1...@kernel.org/
> > 
> > I will rebase our work based on it and share the result.
> 
> Cool, looking forward to it!  Hopefully we will make it be merged into the
> mainline by v6.10!

I hope so.  Thanks for your help!

Honggyu



Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-20 Thread Honggyu Kim
Hi SeongJae,

On Mon, 18 Mar 2024 12:07:21 -0700 SeongJae Park  wrote:
> On Mon, 18 Mar 2024 22:27:45 +0900 Honggyu Kim  wrote:
> 
> > Hi SeongJae,
> > 
> > On Sun, 17 Mar 2024 08:31:44 -0700 SeongJae Park  wrote:
> > > Hi Honggyu,
> > > 
> > > On Sun, 17 Mar 2024 17:36:29 +0900 Honggyu Kim  wrote:
> > > 
> > > > Hi SeongJae,
> > > > 
> > > > Thanks for the confirmation.  I have a few comments on young filter so
> > > > please read the inline comments again.
> > > > 
> > > > On Wed, 12 Mar 2024 08:53:00 -0800 SeongJae Park  
> > > > wrote:
> > > > > Hi Honggyu,
> > > > > 
> > > > > > > -Original Message-
> > > > > > > From: SeongJae Park 
> > > > > > > Sent: Tuesday, March 12, 2024 3:33 AM
> > > > > > > To: Honggyu Kim 
> > > > > > > Cc: SeongJae Park ; kernel_team 
> > > > > > > 
> > > > > > > Subject: RE: Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory 
> > > > > > > management for CXL memory
> > > > > > >
> > > > > > > Hi Honggyu,
> > > > > > >
> > > > > > > On Mon, 11 Mar 2024 12:51:12 + "honggyu@sk.com" 
> > > > > > >  wrote:
> > > > > > >
> > > > > > > > Hi SeongJae,
> > > > > > > >
> > > > > > > > I've tested it again and found that "young" filter has to be set
> > > > > > > > differently as follows.
> > > > > > > > - demote action: set "young" filter with "matching" true
> > > > > > > > - promote action: set "young" filter with "matching" false
> > 
> > Thinking it again, I feel like "matching" true or false looks quite
> > vague to me as a general user.
> > 
> > Instead, I would like to have more meaningful names for "matching" as
> > follows.
> > 
> > - matching "true" can be either (filter) "out" or "skip".
> > - matching "false" can be either (filter) "in" or "apply".
> 
> I agree the naming could be done much better.  And thank you for the nice
> suggestions.  I have a few concerns, though.

I don't think my suggestion is best.  I just would like to have more
discussion about it.

> Firstly, increasing the number of behavioral concepts.  DAMOS filter feature
> has only single behavior: excluding some types of memory from DAMOS action
> target.  The "matching" is to provide a flexible way for further specifying 
> the
> target to exclude in a bit detail.  Without it, we would need non-variant for
> each filter type.  Comapred to the current terms, the new terms feel like
> implying there are two types of behaviors.  I think one behavior is easier to
> understand than two behaviors, and better match what internal code is doing.
> 
> Secondly, ambiguity in "in" and "apply".  To me, the terms sound like _adding_
> something more than _excluding_.

I understood that young filter "matching" "false" means apply action
only to young pages.  Do I misunderstood something here?  If not,
"apply" means _adding_ or _including_ only the matched pages for action.
It looks like you thought about _excluding_ non matched pages here.

> I think that might confuse people in some
> cases.  Actually, I have used the term "filter-out" and "filter-in" on
> this  and several threads.  When saying about "filter-in" scenario, I had to
> emphasize the fact that it is not adding something but excluding others.

Excluding others also means including matched pages.  I think we better
focus on what to do only for the matched pages.

> I now think that was not a good approach.
> 
> Finally, "apply" sounds a bit deterministic.  I think it could be a bit
> confusing in some cases such as when using multiple filters in a combined way.
> For example, if we have two filters for 1) "apply" a memcg and 2) skip anon
> pages, the given DAMOS action will not be applied to anon pages of the memcg.
> I think this might be a bit confusing.

No objection on this.  If so, I think "in" sounds better than "apply".

> > 
> > Internally, the type of "matching" can be boolean, but it'd be better
> > for general users have another ways to set it such as "out"/"in" or
> > "s

Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-03-25 Thread Honggyu Kim
Hi SeongJae,

On Fri, 22 Mar 2024 09:32:23 -0700 SeongJae Park  wrote:
> On Fri, 22 Mar 2024 18:02:23 +0900 Honggyu Kim  wrote:
> 
> > Hi SeongJae,
> > 
> > On Tue, 27 Feb 2024 15:51:20 -0800 SeongJae Park  wrote:
> > > On Mon, 26 Feb 2024 23:05:46 +0900 Honggyu Kim  wrote:
> > > 
> > > > There was an RFC IDEA "DAMOS-based Tiered-Memory Management" previously
> > > > posted at [1].
> > > > 
> > > > It says there is no implementation of the demote/promote DAMOS action
> > > > are made.  This RFC is about its implementation for physical address
> > > > space.
> > > > 
> > > > 
> [...]
> > > Thank you for running the tests again with the new version of the patches 
> > > and
> > > sharing the results!
> > 
> > It's a bit late answer, but the result was from the previous evaluation.
> > I ran it again with RFC v2, but didn't see much difference so just
> > pasted the same result here.
> 
> No problem, thank you for clarifying :)
> 
> [...]
> > > > Honggyu Kim (3):
> > > >   mm/damon: refactor DAMOS_PAGEOUT with migration_mode
> > > >   mm: make alloc_demote_folio externally invokable for migration
> > > >   mm/damon: introduce DAMOS_DEMOTE action for demotion
> > > > 
> > > > Hyeongtak Ji (4):
> > > >   mm/memory-tiers: add next_promotion_node to find promotion target
> > > >   mm/damon: introduce DAMOS_PROMOTE action for promotion
> > > >   mm/damon/sysfs-schemes: add target_nid on sysfs-schemes
> > > >   mm/damon/sysfs-schemes: apply target_nid for promote and demote
> > > > actions
> > > 
> > > Honggyu joined DAMON Beer/Coffee/Tea Chat[1] yesterday, and we discussed 
> > > about
> > > this patchset in high level.  Sharing the summary here for open 
> > > discussion.  As
> > > also discussed on the first version of this patchset[2], we want to make 
> > > single
> > > action for general page migration with minimum changes, but would like to 
> > > keep
> > > page level access re-check.  We also agreed the previously proposed DAMOS
> > > filter-based approach could make sense for the purpose.
> > 
> > Thanks very much for the summary.  I have been trying to merge promote
> > and demote actions into a single migrate action, but I found an issue
> > regarding damon_pa_scheme_score.  It currently calls damon_cold_score()
> > for demote action and damon_hot_score() for promote action, but what
> > should we call when we use a single migrate action?
> 
> Good point!  This is what I didn't think about when suggesting that.  Thank 
> you
> for letting me know this gap!  I think there could be two approach, off the 
> top
> of my head.
> 
> The first one would be extending the interface so that the user can select the
> score function.  This would let flexible usage, but I'm bit concerned if this
> could make things unnecessarily complex, and would really useful in many
> general use case.

I also think this looks complicated and may not be useful for general
users.

> The second approach would be letting DAMON infer the intention.  In this case,
> I think we could know the intention is the demotion if the scheme has a youg
> pages exclusion filter.  Then, we could use the cold_score().  And vice versa.
> To cover a case that there is no filter at all, I think we could have one
> assumption.  My humble intuition says the new action (migrate) may be used 
> more
> for promotion use case.  So, in damon_pa_scheme_score(), if the action of the
> given scheme is the new one (say, MIGRATE), the function will further check if
> the scheme has a filter for excluding young pages.  If so, the function will
> use cold_score().  Otherwise, the function will use hot_score().

Thanks for suggesting many ideas but I'm afraid that I feel this doesn't
look good.  Thinking it again, I think we can think about keep using
DAMOS_PROMOTE and DAMOS_DEMOTE, but I can make them directly call
damon_folio_young() for access check instead of using young filter.

And we can internally handle the complicated combination such as demote
action sets "young" filter with "matching" true and promote action sets
"young" filter with "matching" false.  IMHO, this will make the usage
simpler.

I would like to hear how you think about this.

> So I'd more prefer the second approach.  I think it would be not too late to
> consider the first approach after waiting for it turns out more actions have
> such ambiguity and need more general interface for explicitly set the score
> function.

I will join the DAMON Beer/Coffee/Tea Chat tomorrow as scheduled so I
can talk more about this issue.

Thanks,
Honggyu

> 
> Thanks,
> SJ
> 
> [...]



Re: [RFC PATCH v3 5/7] mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion

2024-04-09 Thread Honggyu Kim
Hi SeongJae,

On Mon,  8 Apr 2024 10:52:28 -0700 SeongJae Park  wrote:
> On Mon,  8 Apr 2024 21:06:44 +0900 Honggyu Kim  wrote:
> 
> > On Fri,  5 Apr 2024 12:24:30 -0700 SeongJae Park  wrote:
> > > On Fri,  5 Apr 2024 15:08:54 +0900 Honggyu Kim  wrote:
> [...]
> > > > Here is one of the example usage of this 'migrate_cold' action.
> > > > 
> > > >   $ cd /sys/kernel/mm/damon/admin/kdamonds/
> > > >   $ cat contexts//schemes//action
> > > >   migrate_cold
> > > >   $ echo 2 > contexts//schemes//target_nid
> > > >   $ echo commit > state
> > > >   $ numactl -p 0 ./hot_cold 500M 600M &
> > > >   $ numastat -c -p hot_cold
> > > > 
> > > >   Per-node process memory usage (in MBs)
> > > >   PID Node 0 Node 1 Node 2 Total
> > > >   --  -- -- -- -
> > > >   701 (hot_cold) 501  0601  1101
> > > > 
> > > > Since there are some common routines with pageout, many functions have
> > > > similar logics between pageout and migrate cold.
> > > > 
> > > > damon_pa_migrate_folio_list() is a minimized version of
> > > > shrink_folio_list(), but it's minified only for demotion.
> > > 
> > > MIGRATE_COLD is not only for demotion, right?  I think the last two words 
> > > are
> > > better to be removed for reducing unnecessary confuses.
> > 
> > You mean the last two sentences?  I will remove them if you feel it's
> > confusing.
> 
> Yes.  My real intended suggestion was 's/only for demotion/only for
> migration/', but entirely removing the sentences is also ok for me.

Ack.

> > 
> > > > 
> > > > Signed-off-by: Honggyu Kim 
> > > > Signed-off-by: Hyeongtak Ji 
> > > > ---
> > > >  include/linux/damon.h|   2 +
> > > >  mm/damon/paddr.c | 146 ++-
> > > >  mm/damon/sysfs-schemes.c |   4 ++
> > > >  3 files changed, 151 insertions(+), 1 deletion(-)
> [...]
> > > > --- a/mm/damon/paddr.c
> > > > +++ b/mm/damon/paddr.c
> [...]
> > > > +{
> > > > +   unsigned int nr_succeeded;
> > > > +   nodemask_t allowed_mask = NODE_MASK_NONE;
> > > > +
> > > 
> > > I personally prefer not having empty lines in the middle of variable
> > > declarations/definitions.  Could we remove this empty line?
> > 
> > I can remove it, but I would like to have more discussion about this
> > issue.  The current implementation allows only a single migration
> > target with "target_nid", but users might want to provide fall back
> > migration target nids.
> > 
> > For example, if more than two CXL nodes exist in the system, users might
> > want to migrate cold pages to any CXL nodes.  In such cases, we might
> > have to make "target_nid" accept comma separated node IDs.  nodemask can
> > be better but we should provide a way to change the scanning order.
> > 
> > I would like to hear how you think about this.
> 
> Good point.  I think we could later extend the sysfs file to receive the
> comma-separated numbers, or even mask.  For simplicity, adding sysfs files
> dedicated for the different format of inputs could also be an option (e.g.,
> target_nids_list, target_nids_mask).  But starting from this single node as is
> now looks ok to me.

If you think we can start from a single node, then I will keep it as is.
But are you okay if I change the same 'target_nid' to accept
comma-separated numbers later?  Or do you want to introduce another knob
such as 'target_nids_list'?  What about rename 'target_nid' to
'target_nids' at the first place?

> [...]
> > > > +   /* 'folio_list' is always empty here */
> > > > +
> > > > +   /* Migrate folios selected for migration */
> > > > +   nr_migrated += migrate_folio_list(_folios, pgdat, 
> > > > target_nid);
> > > > +   /* Folios that could not be migrated are still in 
> > > > @migrate_folios */
> > > > +   if (!list_empty(_folios)) {
> > > > +   /* Folios which weren't migrated go back on @folio_list 
> > > > */
> > > > +   list_splice_init(_folios, folio_list);
> > > > +   }
> > > 
> > > Let's not use braces for single statement
> > > (https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces).
> > 
> > Hmm.. I know the conv

Re: [RFC PATCH v3 0/7] DAMON based tiered memory management for CXL

2024-04-09 Thread Honggyu Kim
On Mon,  8 Apr 2024 22:41:04 +0900 Honggyu Kim  wrote:
[...]
> To explain this, I better share more test results.  In the section of
> "Evaluation Workload", the test sequence can be summarized as follows.
> 
>   *. "Turn on DAMON."
>   1. Allocate cold memory(mmap+memset) at DRAM node, then make the
>  process sleep.
>   2. Launch redis-server and load prebaked snapshot image, dump.rdb.
>  (85GB consumed: 52GB for anon and 33GB for file cache)
>   3. Run YCSB to make zipfian distribution of memory accesses to
>  redis-server, then measure execution time.
>   4. Repeat 4 over 50 times to measure the average execution time for
>  each run.

Sorry, "Repeat 4 over 50 times" is incorrect.  This should be "Repeat 3
over 50 times".

>   5. Increase the cold memory size then repeat goes to 2.
> 
> I didn't want to make the evaluation too long in the cover letter, but
> I have also evaluated another senario, which lazyly enabled DAMON just
> before YCSB run at step 4.  I will call this test as "DAMON lazy".  This
> is missing part from the cover letter.
> 
>   1. Allocate cold memory(mmap+memset) at DRAM node, then make the
>  process sleep.
>   2. Launch redis-server and load prebaked snapshot image, dump.rdb.
>  (85GB consumed: 52GB for anon and 33GB for file cache)
>   *. "Turn on DAMON."
>   4. Run YCSB to make zipfian distribution of memory accesses to
>  redis-server, then measure execution time.
>   5. Repeat 4 over 50 times to measure the average execution time for
>  each run.
>   6. Increase the cold memory size then repeat goes to 2.
> 
> In the "DAMON lazy" senario, DAMON started monitoring late so the
> initial redis-server placement is same as "default", but started to
> demote cold data and promote redis data just before YCSB run.
[...]

Thanks,
Honggyu



[RFC PATCH v3 3/7] mm/damon/sysfs-schemes: add target_nid on sysfs-schemes

2024-04-05 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch adds target_nid under
  /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//

The 'target_nid' can be used as the destination node for DAMOS actions
such as DAMOS_MIGRATE_{HOT,COLD} in the follow up patches.

Signed-off-by: Hyeongtak Ji 
Signed-off-by: Honggyu Kim 
---
 include/linux/damon.h| 11 ++-
 mm/damon/core.c  |  5 -
 mm/damon/dbgfs.c |  2 +-
 mm/damon/lru_sort.c  |  3 ++-
 mm/damon/reclaim.c   |  3 ++-
 mm/damon/sysfs-schemes.c | 33 -
 6 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 5881e4ac30be..24ea33a03d5d 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -337,6 +337,7 @@ struct damos_access_pattern {
  * @apply_interval_us: The time between applying the @action.
  * @quota: Control the aggressiveness of this scheme.
  * @wmarks:Watermarks for automated (in)activation of this scheme.
+ * @target_nid:Destination node if @action is 
"migrate_{hot,cold}".
  * @filters:   Additional set of  damos_filter for 
  * @stat:  Statistics of this scheme.
  * @list:  List head for siblings.
@@ -352,6 +353,10 @@ struct damos_access_pattern {
  * monitoring context are inactive, DAMON stops monitoring either, and just
  * repeatedly checks the watermarks.
  *
+ * @target_nid is used to set the migration target node for migrate_hot or
+ * migrate_cold actions, which means it's only meaningful when @action is 
either
+ * "migrate_hot" or "migrate_cold".
+ *
  * Before applying the  to a memory region,  damon_operations
  * implementation could check pages of the region and skip  to respect
  * 
@@ -373,6 +378,9 @@ struct damos {
 /* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;
+   union {
+   int target_nid;
+   };
struct list_head filters;
struct damos_stat stat;
struct list_head list;
@@ -677,7 +685,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern 
*pattern,
enum damos_action action,
unsigned long apply_interval_us,
struct damos_quota *quota,
-   struct damos_watermarks *wmarks);
+   struct damos_watermarks *wmarks,
+   int target_nid);
 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
 void damon_destroy_scheme(struct damos *s);
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 5b325749fc12..7ff0259d9fa6 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -316,7 +316,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern 
*pattern,
enum damos_action action,
unsigned long apply_interval_us,
struct damos_quota *quota,
-   struct damos_watermarks *wmarks)
+   struct damos_watermarks *wmarks,
+   int target_nid)
 {
struct damos *scheme;
 
@@ -341,6 +342,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern 
*pattern,
scheme->wmarks = *wmarks;
scheme->wmarks.activated = true;
 
+   scheme->target_nid = target_nid;
+
return scheme;
 }
 
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index 7dac24e69e3b..d04fdccfa65b 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -279,7 +279,7 @@ static struct damos **str_to_schemes(const char *str, 
ssize_t len,
 
pos += parsed;
scheme = damon_new_scheme(, action, 0, ,
-   );
+   , NUMA_NO_NODE);
if (!scheme)
goto fail;
 
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 3de2916a65c3..3775f0f2743d 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -163,7 +163,8 @@ static struct damos *damon_lru_sort_new_scheme(
/* under the quota. */
,
/* (De)activate this according to the watermarks. */
-   _lru_sort_wmarks);
+   _lru_sort_wmarks,
+   NUMA_NO_NODE);
 }
 
 /* Create a DAMON-based operation scheme for hot memory regions */
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 66e190f0374a..84e6e96b5dcc 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -147,7 +147,8 @@ static struct damos *damon_reclaim_new_scheme(void)
/* under the quota. */
_reclaim_quota,
/* (De)activate this according to the watermarks. */
-   _reclaim_wmarks);
+   _reclaim_wmarks,
+   NUMA_NO_NODE);
 }
 
 static void damon_reclaim_copy_quota_status(s

[RFC PATCH v3 4/7] mm/migrate: add MR_DAMON to migrate_reason

2024-04-05 Thread Honggyu Kim
The current patch series introduces DAMON based migration across NUMA
nodes so it'd be better to have a new migrate_reason in trace events.

Signed-off-by: Honggyu Kim 
---
 include/linux/migrate_mode.h   | 1 +
 include/trace/events/migrate.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index f37cc03f9369..cec36b7e7ced 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -29,6 +29,7 @@ enum migrate_reason {
MR_CONTIG_RANGE,
MR_LONGTERM_PIN,
MR_DEMOTION,
+   MR_DAMON,
MR_TYPES
 };
 
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 0190ef725b43..cd01dd7b3640 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -22,7 +22,8 @@
EM( MR_NUMA_MISPLACED,  "numa_misplaced")   \
EM( MR_CONTIG_RANGE,"contig_range") \
EM( MR_LONGTERM_PIN,"longterm_pin") \
-   EMe(MR_DEMOTION,"demotion")
+   EM( MR_DEMOTION,"demotion") \
+   EMe(MR_DAMON,   "damon")
 
 /*
  * First define the enums in the above macros to be exported to userspace
-- 
2.34.1




[RFC PATCH v3 5/7] mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion

2024-04-05 Thread Honggyu Kim
This patch introduces DAMOS_MIGRATE_COLD action, which is similar to
DAMOS_PAGEOUT, but migrate folios to the given 'target_nid' in the sysfs
instead of swapping them out.

The 'target_nid' sysfs knob is created by this patch to inform the
migration target node ID.

Here is one of the example usage of this 'migrate_cold' action.

  $ cd /sys/kernel/mm/damon/admin/kdamonds/
  $ cat contexts//schemes//action
  migrate_cold
  $ echo 2 > contexts//schemes//target_nid
  $ echo commit > state
  $ numactl -p 0 ./hot_cold 500M 600M &
  $ numastat -c -p hot_cold

  Per-node process memory usage (in MBs)
  PID Node 0 Node 1 Node 2 Total
  --  -- -- -- -
  701 (hot_cold) 501  0601  1101

Since there are some common routines with pageout, many functions have
similar logics between pageout and migrate cold.

damon_pa_migrate_folio_list() is a minimized version of
shrink_folio_list(), but it's minified only for demotion.

Signed-off-by: Honggyu Kim 
Signed-off-by: Hyeongtak Ji 
---
 include/linux/damon.h|   2 +
 mm/damon/paddr.c | 146 ++-
 mm/damon/sysfs-schemes.c |   4 ++
 3 files changed, 151 insertions(+), 1 deletion(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 24ea33a03d5d..df8671e69a70 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,7 @@ struct damon_target {
  * @DAMOS_NOHUGEPAGE:  Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:  Deprioritize the region on its LRU lists.
+ * @DAMOS_MIGRATE_COLD: Migrate for the given cold region.
  * @DAMOS_STAT:Do nothing but count the stat.
  * @NR_DAMOS_ACTIONS:  Total number of DAMOS actions
  *
@@ -122,6 +123,7 @@ enum damos_action {
DAMOS_NOHUGEPAGE,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
+   DAMOS_MIGRATE_COLD,
DAMOS_STAT, /* Do nothing but only record the stat */
NR_DAMOS_ACTIONS,
 };
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 277a1c4d833c..fe217a26f788 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -12,6 +12,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "../internal.h"
 #include "ops-common.h"
@@ -226,8 +229,137 @@ static bool damos_pa_filter_out(struct damos *scheme, 
struct folio *folio)
 
 enum migration_mode {
MIG_PAGEOUT,
+   MIG_MIGRATE_COLD,
 };
 
+static unsigned int migrate_folio_list(struct list_head *migrate_folios,
+  struct pglist_data *pgdat,
+  int target_nid)
+{
+   unsigned int nr_succeeded;
+   nodemask_t allowed_mask = NODE_MASK_NONE;
+
+   struct migration_target_control mtc = {
+   /*
+* Allocate from 'node', or fail quickly and quietly.
+* When this happens, 'page' will likely just be discarded
+* instead of migrated.
+*/
+   .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 
__GFP_NOWARN |
+   __GFP_NOMEMALLOC | GFP_NOWAIT,
+   .nid = target_nid,
+   .nmask = _mask
+   };
+
+   if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
+   return 0;
+
+   if (list_empty(migrate_folios))
+   return 0;
+
+   /* Migration ignores all cpuset and mempolicy settings */
+   migrate_pages(migrate_folios, alloc_migrate_folio, NULL,
+ (unsigned long), MIGRATE_ASYNC, MR_DAMON,
+ _succeeded);
+
+   return nr_succeeded;
+}
+
+static unsigned int damon_pa_migrate_folio_list(struct list_head *folio_list,
+   struct pglist_data *pgdat,
+   enum migration_mode mm,
+   int target_nid)
+{
+   unsigned int nr_migrated = 0;
+   struct folio *folio;
+   LIST_HEAD(ret_folios);
+   LIST_HEAD(migrate_folios);
+
+   cond_resched();
+
+   while (!list_empty(folio_list)) {
+   struct folio *folio;
+
+   cond_resched();
+
+   folio = lru_to_folio(folio_list);
+   list_del(>lru);
+
+   if (!folio_trylock(folio))
+   goto keep;
+
+   VM_BUG_ON_FOLIO(folio_test_active(folio), folio);
+
+   /* Relocate its contents to another node. */
+   list_add(>lru, _folios);
+   folio_unlock(folio);
+   continue;
+keep:
+   list_add(>lru, _folios);
+   VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+   }
+   /* 'folio_list' is always empty here */
+
+   /* Migrate folios selected for migration */
+   nr_migrated +=

[RFC PATCH v3 7/7] mm/damon: Add "damon_migrate_{hot,cold}" vmstat

2024-04-05 Thread Honggyu Kim
This patch adds "damon_migrate_{hot,cold}" under node specific vmstat
counters at the following location.

  /sys/devices/system/node/node*/vmstat

The counted values are accumulcated to the global vmstat so it also
introduces the same counter at /proc/vmstat as well.

Signed-off-by: Honggyu Kim 
---
 include/linux/mmzone.h |  4 
 mm/damon/paddr.c   | 17 -
 mm/vmstat.c|  4 
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a497f189d988..0005372c5503 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -214,6 +214,10 @@ enum node_stat_item {
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
+#ifdef CONFIG_DAMON_PADDR
+   DAMON_MIGRATE_HOT,
+   DAMON_MIGRATE_COLD,
+#endif
NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index fd9d35b5cc83..d559c242d151 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -235,10 +235,23 @@ enum migration_mode {
 
 static unsigned int migrate_folio_list(struct list_head *migrate_folios,
   struct pglist_data *pgdat,
+  enum migration_mode mm,
   int target_nid)
 {
unsigned int nr_succeeded;
nodemask_t allowed_mask = NODE_MASK_NONE;
+   enum node_stat_item node_stat;
+
+   switch (mm) {
+   case MIG_MIGRATE_HOT:
+   node_stat = DAMON_MIGRATE_HOT;
+   break;
+   case MIG_MIGRATE_COLD:
+   node_stat = DAMON_MIGRATE_COLD;
+   break;
+   default:
+   return 0;
+   }
 
struct migration_target_control mtc = {
/*
@@ -263,6 +276,8 @@ static unsigned int migrate_folio_list(struct list_head 
*migrate_folios,
  (unsigned long), MIGRATE_ASYNC, MR_DAMON,
  _succeeded);
 
+   mod_node_page_state(pgdat, node_stat, nr_succeeded);
+
return nr_succeeded;
 }
 
@@ -302,7 +317,7 @@ static unsigned int damon_pa_migrate_folio_list(struct 
list_head *folio_list,
/* 'folio_list' is always empty here */
 
/* Migrate folios selected for migration */
-   nr_migrated += migrate_folio_list(_folios, pgdat, target_nid);
+   nr_migrated += migrate_folio_list(_folios, pgdat, mm, 
target_nid);
/* Folios that could not be migrated are still in @migrate_folios */
if (!list_empty(_folios)) {
/* Folios which weren't migrated go back on @folio_list */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..be9ba89fede1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1252,6 +1252,10 @@ const char * const vmstat_text[] = {
"pgdemote_kswapd",
"pgdemote_direct",
"pgdemote_khugepaged",
+#ifdef CONFIG_DAMON_PADDR
+   "damon_migrate_hot",
+   "damon_migrate_cold",
+#endif
 
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
-- 
2.34.1




[RFC PATCH v3 6/7] mm/damon/paddr: introduce DAMOS_MIGRATE_HOT action for promotion

2024-04-05 Thread Honggyu Kim
From: Hyeongtak Ji 

This patch introduces DAMOS_MIGRATE_HOT action, which is similar to
DAMOS_MIGRATE_COLD, but it is targeted to migrate hot pages.

It migrates pages inside the given region to the 'target_nid' NUMA node
in the sysfs.

Here is one of the example usage of this 'migrate_hot' action.

  $ cd /sys/kernel/mm/damon/admin/kdamonds/
  $ cat contexts//schemes//action
  migrate_hot
  $ echo 0 > contexts//schemes//target_nid
  $ echo commit > state
  $ numactl -p 2 ./hot_cold 500M 600M &
  $ numastat -c -p hot_cold

  Per-node process memory usage (in MBs)
  PID Node 0 Node 1 Node 2 Total
  --  -- -- -- -
  701 (hot_cold) 501  0601  1101

Signed-off-by: Hyeongtak Ji 
Signed-off-by: Honggyu Kim 
---
 include/linux/damon.h|  2 ++
 mm/damon/paddr.c | 12 ++--
 mm/damon/sysfs-schemes.c |  4 +++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index df8671e69a70..934c95a7c042 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,7 @@ struct damon_target {
  * @DAMOS_NOHUGEPAGE:  Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:  Deprioritize the region on its LRU lists.
+ * @DAMOS_MIGRATE_HOT:  Migrate for the given hot region.
  * @DAMOS_MIGRATE_COLD: Migrate for the given cold region.
  * @DAMOS_STAT:Do nothing but count the stat.
  * @NR_DAMOS_ACTIONS:  Total number of DAMOS actions
@@ -123,6 +124,7 @@ enum damos_action {
DAMOS_NOHUGEPAGE,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
+   DAMOS_MIGRATE_HOT,
DAMOS_MIGRATE_COLD,
DAMOS_STAT, /* Do nothing but only record the stat */
NR_DAMOS_ACTIONS,
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index fe217a26f788..fd9d35b5cc83 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -229,6 +229,7 @@ static bool damos_pa_filter_out(struct damos *scheme, 
struct folio *folio)
 
 enum migration_mode {
MIG_PAGEOUT,
+   MIG_MIGRATE_HOT,
MIG_MIGRATE_COLD,
 };
 
@@ -375,8 +376,10 @@ static unsigned long damon_pa_migrate(struct damon_region 
*r, struct damos *s,
if (damos_pa_filter_out(s, folio))
goto put_folio;
 
-   folio_clear_referenced(folio);
-   folio_test_clear_young(folio);
+   if (mm != MIG_MIGRATE_HOT) {
+   folio_clear_referenced(folio);
+   folio_test_clear_young(folio);
+   }
if (!folio_isolate_lru(folio))
goto put_folio;
/*
@@ -394,6 +397,7 @@ static unsigned long damon_pa_migrate(struct damon_region 
*r, struct damos *s,
case MIG_PAGEOUT:
applied = reclaim_pages(_list);
break;
+   case MIG_MIGRATE_HOT:
case MIG_MIGRATE_COLD:
applied = damon_pa_migrate_pages(_list, mm,
 s->target_nid);
@@ -454,6 +458,8 @@ static unsigned long damon_pa_apply_scheme(struct damon_ctx 
*ctx,
return damon_pa_mark_accessed(r, scheme);
case DAMOS_LRU_DEPRIO:
return damon_pa_deactivate_pages(r, scheme);
+   case DAMOS_MIGRATE_HOT:
+   return damon_pa_migrate(r, scheme, MIG_MIGRATE_HOT);
case DAMOS_MIGRATE_COLD:
return damon_pa_migrate(r, scheme, MIG_MIGRATE_COLD);
case DAMOS_STAT:
@@ -476,6 +482,8 @@ static int damon_pa_scheme_score(struct damon_ctx *context,
return damon_hot_score(context, r, scheme);
case DAMOS_LRU_DEPRIO:
return damon_cold_score(context, r, scheme);
+   case DAMOS_MIGRATE_HOT:
+   return damon_hot_score(context, r, scheme);
case DAMOS_MIGRATE_COLD:
return damon_cold_score(context, r, scheme);
default:
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 18b7d054c748..1d2f62aa79ca 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1406,6 +1406,7 @@ static const char * const damon_sysfs_damos_action_strs[] 
= {
"nohugepage",
"lru_prio",
"lru_deprio",
+   "migrate_hot",
"migrate_cold",
"stat",
 };
@@ -1660,7 +1661,8 @@ static ssize_t target_nid_store(struct kobject *kobj,
struct damon_sysfs_scheme, kobj);
int err = 0;
 
-if (scheme->action != DAMOS_MIGRATE_COLD)
+if (scheme->action != DAMOS_MIGRATE_HOT &&
+scheme->action != DAMOS_MIGRATE_COLD)
 return -EINVAL;
 
/* TODO: error handling for target_nid range. */
-- 
2.34.1




[RFC PATCH v3 0/7] DAMON based tiered memory management for CXL memory

2024-04-05 Thread Honggyu Kim
ver to fast DRAM node.
Moreover, DAMOS_MIGRATE_HOT action at CXL node also promotes hot pages
of redis-server to DRAM node actively.

As a result, it makes more memory of redis-server stay in DRAM node
compared to "default" memory policy and this makes the performance
improvement.

The following result of latest distribution workload shows similar data.

  2. YCSB latest distribution read only workload
  memory pressure with cold memory on node0 with 512GB of local DRAM.
  =++=
   |   cold memory occupied by mmap and memset  |
   |   0G  440G  450G  460G  470G  480G  490G  500G |
  =++=
  Execution time normalized to DRAM-only values | GEOMEAN
  -++-
  DRAM-only| 1.00 - - - - - - - | 1.00
  CXL-only | 1.18 - - - - - - - | 1.18
  default  |-  1.18  1.19  1.18  1.18  1.17  1.19  1.18 | 1.18 
  DAMON tiered |-  1.04  1.04  1.04  1.05  1.04  1.05  1.05 | 1.04 
  =++=
  CXL usage of redis-server in GB   | AVERAGE
  -++-
  DRAM-only|  0.0 - - - - - - - |  0.0
  CXL-only | 52.6 - - - - - - - | 52.6
  default  |-  20.5  27.1  33.2  39.5  45.5  50.4  50.5 | 38.1
  DAMON tiered |-   0.2   0.4   0.7   1.6   1.2   1.1   3.4 |  1.2
  =++=

In summary of both results, our evaluation shows that "DAMON tiered"
memory management reduces the performance slowdown compared to the
"default" memory policy from 17~18% to 4~5% when the system runs with
high memory pressure on its fast tier DRAM nodes.

Having these DAMOS_MIGRATE_HOT and DAMOS_MIGRATE_COLD actions can make
tiered memory systems run more efficiently under high memory pressures.

Signed-off-by: Honggyu Kim 
Signed-off-by: Hyeongtak Ji 
Signed-off-by: Rakie Kim 

[1] https://lore.kernel.org/damon/20231112195602.61525-1...@kernel.org
[2] https://lore.kernel.org/damon/20240311204545.47097-1...@kernel.org
[3] https://github.com/skhynix/hmsdk
[4] https://github.com/redis/redis/tree/7.0.0
[5] https://github.com/brianfrankcooper/YCSB/tree/0.17.0
[6] https://dl.acm.org/doi/10.1145/3503222.3507731
[7] https://dl.acm.org/doi/10.1145/3582016.3582063


Honggyu Kim (5):
  mm/damon/paddr: refactor DAMOS_PAGEOUT with migration_mode
  mm: make alloc_demote_folio externally invokable for migration
  mm/migrate: add MR_DAMON to migrate_reason
  mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion
  mm/damon: Add "damon_migrate_{hot,cold}" vmstat

Hyeongtak Ji (2):
  mm/damon/sysfs-schemes: add target_nid on sysfs-schemes
  mm/damon/paddr: introduce DAMOS_MIGRATE_HOT action for promotion

 include/linux/damon.h  |  15 ++-
 include/linux/migrate_mode.h   |   1 +
 include/linux/mmzone.h |   4 +
 include/trace/events/migrate.h |   3 +-
 mm/damon/core.c|   5 +-
 mm/damon/dbgfs.c   |   2 +-
 mm/damon/lru_sort.c|   3 +-
 mm/damon/paddr.c   | 191 +++--
 mm/damon/reclaim.c |   3 +-
 mm/damon/sysfs-schemes.c   |  39 ++-
 mm/internal.h  |   1 +
 mm/vmscan.c|  10 +-
 mm/vmstat.c|   4 +
 13 files changed, 265 insertions(+), 16 deletions(-)


base-commit: e8f897f4afef0031fe618a8e94127a0934896aba
-- 
2.34.1




[RFC PATCH v3 2/7] mm: make alloc_demote_folio externally invokable for migration

2024-04-05 Thread Honggyu Kim
The alloc_demote_folio can be used out of vmscan.c so it'd be better to
remove static keyword from it.

This function can also be used for both demotion and promotion so it'd
be better to rename it from alloc_demote_folio to alloc_migrate_folio.

Signed-off-by: Honggyu Kim 
---
 mm/internal.h |  1 +
 mm/vmscan.c   | 10 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index f309a010d50f..c96ff9bc82d0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -866,6 +866,7 @@ extern unsigned long  __must_check vm_mmap_pgoff(struct 
file *, unsigned long,
 unsigned long, unsigned long);
 
 extern void set_pageblock_order(void);
+struct folio *alloc_migrate_folio(struct folio *src, unsigned long private);
 unsigned long reclaim_pages(struct list_head *folio_list);
 unsigned int reclaim_clean_pages_from_list(struct zone *zone,
struct list_head *folio_list);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4255619a1a31..9e456cac03b4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -910,8 +910,7 @@ static void folio_check_dirty_writeback(struct folio *folio,
mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
 }
 
-static struct folio *alloc_demote_folio(struct folio *src,
-   unsigned long private)
+struct folio *alloc_migrate_folio(struct folio *src, unsigned long private)
 {
struct folio *dst;
nodemask_t *allowed_mask;
@@ -935,6 +934,11 @@ static struct folio *alloc_demote_folio(struct folio *src,
if (dst)
return dst;
 
+   /*
+* Allocation failed from the target node so try to allocate from
+* fallback nodes based on allowed_mask.
+* See fallback_alloc() at mm/slab.c.
+*/
mtc->gfp_mask &= ~__GFP_THISNODE;
mtc->nmask = allowed_mask;
 
@@ -973,7 +977,7 @@ static unsigned int demote_folio_list(struct list_head 
*demote_folios,
node_get_allowed_targets(pgdat, _mask);
 
/* Demotion ignores all cpuset and mempolicy settings */
-   migrate_pages(demote_folios, alloc_demote_folio, NULL,
+   migrate_pages(demote_folios, alloc_migrate_folio, NULL,
  (unsigned long), MIGRATE_ASYNC, MR_DEMOTION,
  _succeeded);
 
-- 
2.34.1




[RFC PATCH v3 1/7] mm/damon/paddr: refactor DAMOS_PAGEOUT with migration_mode

2024-04-05 Thread Honggyu Kim
This is a preparation patch that introduces migration modes.

The damon_pa_pageout is renamed to damon_pa_migrate and it receives an
extra argument for migration_mode.

No functional changes applied.

Signed-off-by: Honggyu Kim 
---
 mm/damon/paddr.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 081e2a325778..277a1c4d833c 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -224,7 +224,12 @@ static bool damos_pa_filter_out(struct damos *scheme, 
struct folio *folio)
return false;
 }
 
-static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
+enum migration_mode {
+   MIG_PAGEOUT,
+};
+
+static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
+ enum migration_mode mm)
 {
unsigned long addr, applied;
LIST_HEAD(folio_list);
@@ -249,7 +254,14 @@ static unsigned long damon_pa_pageout(struct damon_region 
*r, struct damos *s)
 put_folio:
folio_put(folio);
}
-   applied = reclaim_pages(_list);
+   switch (mm) {
+   case MIG_PAGEOUT:
+   applied = reclaim_pages(_list);
+   break;
+   default:
+   /* Unexpected migration mode. */
+   return 0;
+   }
cond_resched();
return applied * PAGE_SIZE;
 }
@@ -297,7 +309,7 @@ static unsigned long damon_pa_apply_scheme(struct damon_ctx 
*ctx,
 {
switch (scheme->action) {
case DAMOS_PAGEOUT:
-   return damon_pa_pageout(r, scheme);
+   return damon_pa_migrate(r, scheme, MIG_PAGEOUT);
case DAMOS_LRU_PRIO:
return damon_pa_mark_accessed(r, scheme);
case DAMOS_LRU_DEPRIO:
-- 
2.34.1




Re: [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory

2024-04-05 Thread Honggyu Kim
Hi SeongJae,

On Tue, 26 Mar 2024 16:03:09 -0700 SeongJae Park  wrote:
> On Mon, 25 Mar 2024 15:53:03 -0700 SeongJae Park  wrote:
> 
> > On Mon, 25 Mar 2024 21:01:04 +0900 Honggyu Kim  wrote:
> [...]
> > > On Fri, 22 Mar 2024 09:32:23 -0700 SeongJae Park  wrote:
> > > > On Fri, 22 Mar 2024 18:02:23 +0900 Honggyu Kim  
> > > > wrote:
> [...]
> > >
> > > I would like to hear how you think about this.
> > 
> > So, to summarize my humble opinion,
> > 
> > 1. I like the idea of having two actions.  But I'd like to use names other 
> > than
> >'promote' and 'demote'.
> > 2. I still prefer having a filter for the page granularity access re-check.
> > 
> [...]
> > > I will join the DAMON Beer/Coffee/Tea Chat tomorrow as scheduled so I
> > > can talk more about this issue.
> > 
> > Looking forward to chatting with you :)
> 
> We met and discussed about this topic in the chat series yesterday.  Sharing
> the summary for keeping the open discussion.
> 
> Honggyu thankfully accepted my humble suggestions on the last reply.  Honggyu
> will post the third version of this patchset soon.  The patchset will 
> implement
> two new DAMOS actions, namely MIGRATE_HOT and MIGRATE_COLD.  Those will 
> migrate
> the DAMOS target regions to a user-specified NUMA node, but will have 
> different
> prioritization score function.  As name implies, they will prioritize more hot
> regions and cold regions, respectively.

It's a late answer but thanks very much for the summary.  It was really
helpful discussion in the chat series.

I'm leaving a message so that anyone can follow for the update.  The v3
is posted at 
https://lore.kernel.org/damon/20240405060858.2818-1-honggyu@sk.com.

> Honggyu, please feel free to fix if there is anything wrong or missed.

I don't have anything to fix from your summary.

> And thanks to Honggyu again for patiently keeping this productive discussion
> and their awesome work.

I appreciate your persistent support and kind reviews. 

Thanks,
Honggyu

> 
> Thanks,
> SJ
> 
> [...]



Re: [RFC PATCH v3 0/7] DAMON based tiered memory management for CXL

2024-04-08 Thread Honggyu Kim
Hi Gregory,

On Fri, 5 Apr 2024 12:56:14 -0400 Gregory Price  
wrote:
> On Fri, Apr 05, 2024 at 03:08:49PM +0900, Honggyu Kim wrote:
> > There was an RFC IDEA "DAMOS-based Tiered-Memory Management" previously
> > posted at [1].
> > 
> >   1. YCSB zipfian distribution read only workload
> >   memory pressure with cold memory on node0 with 512GB of local DRAM.
> >   =++=
> >|   cold memory occupied by mmap and memset  |
> >|   0G  440G  450G  460G  470G  480G  490G  500G |
> >   =++=
> >   Execution time normalized to DRAM-only values | GEOMEAN
> >   -++-
> >   DRAM-only| 1.00 - - - - - - - | 1.00
> >   CXL-only | 1.22 - - - - - - - | 1.22
> >   default  |-  1.12  1.13  1.14  1.16  1.19  1.21  1.21 | 1.17 
> >   DAMON tiered |-  1.04  1.03  1.04  1.06  1.05  1.05  1.05 | 1.05 
> >   =++=
> >   CXL usage of redis-server in GB   | AVERAGE
> >   -++-
> >   DRAM-only|  0.0 - - - - - - - |  0.0
> >   CXL-only | 52.6 - - - - - - - | 52.6
> >   default  |-  20.4  27.0  33.1  39.5  45.6  50.5  50.3 | 38.1
> >   DAMON tiered |-   0.1   0.3   0.8   0.6   0.7   1.3   0.9 |  0.7
> >   =++=
> > 
> > Each test result is based on the exeuction environment as follows.
> > 
> >   DRAM-only   : redis-server uses only local DRAM memory.
> >   CXL-only: redis-server uses only CXL memory.
> >   default : default memory policy(MPOL_DEFAULT).
> > numa balancing disabled.
> >   DAMON tiered: DAMON enabled with DAMOS_MIGRATE_COLD for DRAM nodes and
> > DAMOS_MIGRATE_HOT for CXL nodes.
> > 
> > The above result shows the "default" execution time goes up as the size
> > of cold memory is increased from 440G to 500G because the more cold
> > memory used, the more CXL memory is used for the target redis workload
> > and this makes the execution time increase.
> > 
> > However, "DAMON tiered" result shows less slowdown because the
> > DAMOS_MIGRATE_COLD action at DRAM node proactively demotes pre-allocated
> > cold memory to CXL node and this free space at DRAM increases more
> > chance to allocate hot or warm pages of redis-server to fast DRAM node.
> > Moreover, DAMOS_MIGRATE_HOT action at CXL node also promotes hot pages
> > of redis-server to DRAM node actively.
> > 
> > As a result, it makes more memory of redis-server stay in DRAM node
> > compared to "default" memory policy and this makes the performance
> > improvement.
> > 
> > The following result of latest distribution workload shows similar data.
> > 
> >   2. YCSB latest distribution read only workload
> >   memory pressure with cold memory on node0 with 512GB of local DRAM.
> >   =++=
> >|   cold memory occupied by mmap and memset  |
> >|   0G  440G  450G  460G  470G  480G  490G  500G |
> >   =++=
> >   Execution time normalized to DRAM-only values | GEOMEAN
> >   -++-
> >   DRAM-only| 1.00 - - - - - - - | 1.00
> >   CXL-only | 1.18 - - - - - - - | 1.18
> >   default  |-  1.18  1.19  1.18  1.18  1.17  1.19  1.18 | 1.18 
> >   DAMON tiered |-  1.04  1.04  1.04  1.05  1.04  1.05  1.05 | 1.04 
> >   =++=
> >   CXL usage of redis-server in GB   | AVERAGE
> >   -++-
> >   DRAM-only|  0.0 - - - - - - - |  0.0
> >   CXL-only | 52.6 - - - - - - - | 52.6
> >   default  |-  20.5  27.1  33.2  39.5  45.5  50.4  50.5 | 38.1
> >   DAMON tiered |-   0.2   0.4   0.7   1.6   1.2   1.1   3.4 |  1.2
> &g

Re: [RFC PATCH v3 5/7] mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion

2024-04-08 Thread Honggyu Kim
On Fri,  5 Apr 2024 12:24:30 -0700 SeongJae Park  wrote:
> On Fri,  5 Apr 2024 15:08:54 +0900 Honggyu Kim  wrote:
> 
> > This patch introduces DAMOS_MIGRATE_COLD action, which is similar to
> > DAMOS_PAGEOUT, but migrate folios to the given 'target_nid' in the sysfs
> > instead of swapping them out.
> > 
> > The 'target_nid' sysfs knob is created by this patch to inform the
> > migration target node ID.
> 
> Isn't it created by the previous patch?

Right.  I didn't fix the commit message after split this patch.  I will
fix it.

> > 
> > Here is one of the example usage of this 'migrate_cold' action.
> > 
> >   $ cd /sys/kernel/mm/damon/admin/kdamonds/
> >   $ cat contexts//schemes//action
> >   migrate_cold
> >   $ echo 2 > contexts//schemes//target_nid
> >   $ echo commit > state
> >   $ numactl -p 0 ./hot_cold 500M 600M &
> >   $ numastat -c -p hot_cold
> > 
> >   Per-node process memory usage (in MBs)
> >   PID Node 0 Node 1 Node 2 Total
> >   --  -- -- -- -
> >   701 (hot_cold) 501  0601  1101
> > 
> > Since there are some common routines with pageout, many functions have
> > similar logics between pageout and migrate cold.
> > 
> > damon_pa_migrate_folio_list() is a minimized version of
> > shrink_folio_list(), but it's minified only for demotion.
> 
> MIGRATE_COLD is not only for demotion, right?  I think the last two words are
> better to be removed for reducing unnecessary confuses.

You mean the last two sentences?  I will remove them if you feel it's
confusing.

> > 
> > Signed-off-by: Honggyu Kim 
> > Signed-off-by: Hyeongtak Ji 
> > ---
> >  include/linux/damon.h|   2 +
> >  mm/damon/paddr.c | 146 ++-
> >  mm/damon/sysfs-schemes.c |   4 ++
> >  3 files changed, 151 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/damon.h b/include/linux/damon.h
> > index 24ea33a03d5d..df8671e69a70 100644
> > --- a/include/linux/damon.h
> > +++ b/include/linux/damon.h
> > @@ -105,6 +105,7 @@ struct damon_target {
> >   * @DAMOS_NOHUGEPAGE:  Call ``madvise()`` for the region with 
> > MADV_NOHUGEPAGE.
> >   * @DAMOS_LRU_PRIO:Prioritize the region on its LRU lists.
> >   * @DAMOS_LRU_DEPRIO:  Deprioritize the region on its LRU lists.
> > + * @DAMOS_MIGRATE_COLD: Migrate for the given cold region.
> 
> Whether it will be for cold region or not is depending on the target access
> pattern.  What about 'Migrate the regions in coldest regions first manner.'?
> Or, simply 'Migrate the regions (prioritize cold)' here, and explain about the
> prioritization under quota on the detailed comments part?

"Migrate the regions in coldest regions first manner under quota" sounds
better.  I will change it.

> Also, let's use tab consistently.

Yeah, it's a mistake.  will fix it.

> >   * @DAMOS_STAT:Do nothing but count the stat.
> >   * @NR_DAMOS_ACTIONS:  Total number of DAMOS actions
> >   *
> > @@ -122,6 +123,7 @@ enum damos_action {
> > DAMOS_NOHUGEPAGE,
> > DAMOS_LRU_PRIO,
> > DAMOS_LRU_DEPRIO,
> > +   DAMOS_MIGRATE_COLD,
> > DAMOS_STAT, /* Do nothing but only record the stat */
> > NR_DAMOS_ACTIONS,
> >  };
> > diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
> > index 277a1c4d833c..fe217a26f788 100644
> > --- a/mm/damon/paddr.c
> > +++ b/mm/damon/paddr.c
> > @@ -12,6 +12,9 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> > +#include 
> >  
> >  #include "../internal.h"
> >  #include "ops-common.h"
> > @@ -226,8 +229,137 @@ static bool damos_pa_filter_out(struct damos *scheme, 
> > struct folio *folio)
> >  
> >  enum migration_mode {
> > MIG_PAGEOUT,
> > +   MIG_MIGRATE_COLD,
> >  };
> >  
> > +static unsigned int migrate_folio_list(struct list_head *migrate_folios,
> > +  struct pglist_data *pgdat,
> > +  int target_nid)
> 
> To avoid name collisions, I'd prefer having damon_pa_prefix.  I show this 
> patch
> is defining damon_pa_migrate_folio_list() below, though.  What about
> __damon_pa_migrate_folio_list()?

Ack.  I will change it to __damon_pa_migrate_folio_list().

> > +{
> > +   unsigned int nr_succeeded;
> > +   nodemask_t allowed_mask = NODE_MASK_NONE;
> > +
> 
> I personally prefer not having empty lines in the middle of variable

Re: [RFC PATCH v3 0/7] DAMON based tiered memory management for CXL memory

2024-04-08 Thread Honggyu Kim
Hi SeongJae,

On Fri,  5 Apr 2024 12:28:00 -0700 SeongJae Park  wrote:
> Hello Honggyu,
> 
> On Fri,  5 Apr 2024 15:08:49 +0900 Honggyu Kim  wrote:
> 
> > There was an RFC IDEA "DAMOS-based Tiered-Memory Management" previously
> > posted at [1].
> > 
> > It says there is no implementation of the demote/promote DAMOS action
> > are made.  This RFC is about its implementation for physical address
> > space.
> > 
> > 
> > Changes from RFC v2:
> >   1. Rename DAMOS_{PROMOTE,DEMOTE} actions to DAMOS_MIGRATE_{HOT,COLD}.
> >   2. Create 'target_nid' to set the migration target node instead of
> >  depending on node distance based information.
> >   3. Instead of having page level access check in this patch series,
> >  delegate the job to a new DAMOS filter type YOUNG[2].
> >   4. Introduce vmstat counters "damon_migrate_{hot,cold}".
> >   5. Rebase from v6.7 to v6.8.
> 
> Thank you for patiently keeping discussion and making this great version!  I
> left comments on each patch, but found no special concerns.  Per-page access
> recheck for MIGRATE_HOT and vmstat change are taking my eyes, though.  I doubt
> if those really needed.  It would be nice if you could answer to the comments.

I will answer them where you made the comments.

> Once my comments on this version are addressed, I would have no reason to
> object at dropping the RFC tag from this patchset.

Thanks.  I will drop the RFC after addressing your comments.

> Nonetheless, I show some warnings and errors from checkpatch.pl.  I don't
> really care about those for RFC patches, so no problem at all.  But if you
> agree to my opinion about RFC tag dropping, and therefore if you will send 
> next
> version without RFC tag, please make sure you also run checkpatch.pl before
> posting.

Sure.  I will run checkpatch.pl from the next revision.

Thanks,
Honggyu

> 
> Thanks,
> SJ
> 
> [...]