[PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-14 Thread peter.enderborg
From: Peter Enderborg 

Fundamental changes:
1 Does NOT take any RCU lock in shrinker functions.
2 It returns same result for scan and counts, so  we dont need to do
  shinker will know when it is pointless to call scan.
3 It does not lock any other process than the one that is
  going to be killed.

Background.
The low memory killer scans for process that can be killed to free
memory. This can be cpu consuming when there is a high demand for
memory. This can be seen by analysing the kswapd0 task work.
The stats function added in earler patch adds a counter for waste work.

How it works.
This patch create a structure within the lowmemory killer that caches
the user spaces processes that it might kill. It is done with a
sorted rbtree so we can very easy find the candidate to be killed,
and knows its properies as memory usage and sorted by oom_score_adj
to look up the task with highest oom_score_adj. To be able to achive
this it uses oom_score_notify events.

This patch also as a other effect, we are now free to do other
lowmemorykiller configurations.  Without the patch there is a need
for a tradeoff between freed memory and task and rcu locks. This
is no longer a concern for tuning lmk. This patch is not intended
to do any calculation changes other than we do use the cache for
calculate the count values and that makes kswapd0 to shrink other
areas.

Signed-off-by: Peter Enderborg 
---
 drivers/staging/android/Kconfig |   1 +
 drivers/staging/android/Makefile|   1 +
 drivers/staging/android/lowmemorykiller.c   | 294 +++-
 drivers/staging/android/lowmemorykiller.h   |  15 ++
 drivers/staging/android/lowmemorykiller_stats.c |  24 ++
 drivers/staging/android/lowmemorykiller_stats.h |  14 +-
 drivers/staging/android/lowmemorykiller_tasks.c | 220 ++
 drivers/staging/android/lowmemorykiller_tasks.h |  35 +++
 8 files changed, 498 insertions(+), 106 deletions(-)
 create mode 100644 drivers/staging/android/lowmemorykiller.h
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.c
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.h

diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index 96e86c7..899186c 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -16,6 +16,7 @@ config ASHMEM
 
 config ANDROID_LOW_MEMORY_KILLER
bool "Android Low Memory Killer"
+   select OOM_SCORE_NOTIFIER
---help---
  Registers processes to be killed when low memory conditions, this is 
useful
  as there is no particular swap space on android.
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index d710eb2..b7a8036 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -4,4 +4,5 @@ obj-y   += ion/
 
 obj-$(CONFIG_ASHMEM)   += ashmem.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller.o
+obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller_tasks.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER_STATS)  += lowmemorykiller_stats.o
diff --git a/drivers/staging/android/lowmemorykiller.c 
b/drivers/staging/android/lowmemorykiller.c
index 15c1b38..1e275b1 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -41,10 +41,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include "lowmemorykiller.h"
 #include "lowmemorykiller_stats.h"
+#include "lowmemorykiller_tasks.h"
 
-static u32 lowmem_debug_level = 1;
+u32 lowmem_debug_level = 1;
 static short lowmem_adj[6] = {
0,
1,
@@ -62,135 +66,212 @@ static int lowmem_minfree[6] = {
 
 static int lowmem_minfree_size = 4;
 
-static unsigned long lowmem_deathpending_timeout;
-
-#define lowmem_print(level, x...)  \
-   do {\
-   if (lowmem_debug_level >= (level))  \
-   pr_info(x); \
-   } while (0)
-
-static unsigned long lowmem_count(struct shrinker *s,
- struct shrink_control *sc)
-{
-   lmk_inc_stats(LMK_COUNT);
-   return global_node_page_state(NR_ACTIVE_ANON) +
-   global_node_page_state(NR_ACTIVE_FILE) +
-   global_node_page_state(NR_INACTIVE_ANON) +
-   global_node_page_state(NR_INACTIVE_FILE);
-}
+struct calculated_params {
+   long selected_tasksize;
+   long minfree;
+   int other_file;
+   int other_free;
+   int dynamic_max_queue_len;
+   short selected_oom_score_adj;
+   short min_score_adj;
+};
 
-static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
+static int kill_needed(int level, struct shrink_control *sc,
+  struct calculated_params 

[PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-14 Thread peter.enderborg
From: Peter Enderborg 

Fundamental changes:
1 Does NOT take any RCU lock in shrinker functions.
2 It returns same result for scan and counts, so  we dont need to do
  shinker will know when it is pointless to call scan.
3 It does not lock any other process than the one that is
  going to be killed.

Background.
The low memory killer scans for process that can be killed to free
memory. This can be cpu consuming when there is a high demand for
memory. This can be seen by analysing the kswapd0 task work.
The stats function added in earler patch adds a counter for waste work.

How it works.
This patch create a structure within the lowmemory killer that caches
the user spaces processes that it might kill. It is done with a
sorted rbtree so we can very easy find the candidate to be killed,
and knows its properies as memory usage and sorted by oom_score_adj
to look up the task with highest oom_score_adj. To be able to achive
this it uses oom_score_notify events.

This patch also as a other effect, we are now free to do other
lowmemorykiller configurations.  Without the patch there is a need
for a tradeoff between freed memory and task and rcu locks. This
is no longer a concern for tuning lmk. This patch is not intended
to do any calculation changes other than we do use the cache for
calculate the count values and that makes kswapd0 to shrink other
areas.

Signed-off-by: Peter Enderborg 
---
 drivers/staging/android/Kconfig |   1 +
 drivers/staging/android/Makefile|   1 +
 drivers/staging/android/lowmemorykiller.c   | 294 +++-
 drivers/staging/android/lowmemorykiller.h   |  15 ++
 drivers/staging/android/lowmemorykiller_stats.c |  24 ++
 drivers/staging/android/lowmemorykiller_stats.h |  14 +-
 drivers/staging/android/lowmemorykiller_tasks.c | 220 ++
 drivers/staging/android/lowmemorykiller_tasks.h |  35 +++
 8 files changed, 498 insertions(+), 106 deletions(-)
 create mode 100644 drivers/staging/android/lowmemorykiller.h
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.c
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.h

diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index 96e86c7..899186c 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -16,6 +16,7 @@ config ASHMEM
 
 config ANDROID_LOW_MEMORY_KILLER
bool "Android Low Memory Killer"
+   select OOM_SCORE_NOTIFIER
---help---
  Registers processes to be killed when low memory conditions, this is 
useful
  as there is no particular swap space on android.
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index d710eb2..b7a8036 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -4,4 +4,5 @@ obj-y   += ion/
 
 obj-$(CONFIG_ASHMEM)   += ashmem.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller.o
+obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller_tasks.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER_STATS)  += lowmemorykiller_stats.o
diff --git a/drivers/staging/android/lowmemorykiller.c 
b/drivers/staging/android/lowmemorykiller.c
index 15c1b38..1e275b1 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -41,10 +41,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include "lowmemorykiller.h"
 #include "lowmemorykiller_stats.h"
+#include "lowmemorykiller_tasks.h"
 
-static u32 lowmem_debug_level = 1;
+u32 lowmem_debug_level = 1;
 static short lowmem_adj[6] = {
0,
1,
@@ -62,135 +66,212 @@ static int lowmem_minfree[6] = {
 
 static int lowmem_minfree_size = 4;
 
-static unsigned long lowmem_deathpending_timeout;
-
-#define lowmem_print(level, x...)  \
-   do {\
-   if (lowmem_debug_level >= (level))  \
-   pr_info(x); \
-   } while (0)
-
-static unsigned long lowmem_count(struct shrinker *s,
- struct shrink_control *sc)
-{
-   lmk_inc_stats(LMK_COUNT);
-   return global_node_page_state(NR_ACTIVE_ANON) +
-   global_node_page_state(NR_ACTIVE_FILE) +
-   global_node_page_state(NR_INACTIVE_ANON) +
-   global_node_page_state(NR_INACTIVE_FILE);
-}
+struct calculated_params {
+   long selected_tasksize;
+   long minfree;
+   int other_file;
+   int other_free;
+   int dynamic_max_queue_len;
+   short selected_oom_score_adj;
+   short min_score_adj;
+};
 
-static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
+static int kill_needed(int level, struct shrink_control *sc,
+  struct calculated_params *cp)
 {
-   struct task_struct *tsk;
-   struct 

Re: [PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread Michal Hocko
On Fri 10-02-17 08:39:11, peter enderborg wrote:
> On 02/09/2017 09:05 PM, Michal Hocko wrote:
> > On Thu 09-02-17 14:21:52, peter enderborg wrote:
> >> Fundamental changes:
> >> 1 Does NOT take any RCU lock in shrinker functions.
> >> 2 It returns same result for scan and counts, so  we dont need to do
> >>   shinker will know when it is pointless to call scan.
> >> 3 It does not lock any other process than the one that is
> >>   going to be killed.
> >>
> >> Background.
> >> The low memory killer scans for process that can be killed to free
> >> memory. This can be cpu consuming when there is a high demand for
> >> memory. This can be seen by analysing the kswapd0 task work.
> >> The stats function added in earler patch adds a counter for waste work.
> >>
> >> How it works.
> >> This patch create a structure within the lowmemory killer that caches
> >> the user spaces processes that it might kill. It is done with a
> >> sorted rbtree so we can very easy find the candidate to be killed,
> >> and knows its properies as memory usage and sorted by oom_score_adj
> >> to look up the task with highest oom_score_adj. To be able to achive
> >> this it uses oom_score_notify events.
> >>
> >> This patch also as a other effect, we are now free to do other
> >> lowmemorykiller configurations.  Without the patch there is a need
> >> for a tradeoff between freed memory and task and rcu locks. This
> >> is no longer a concern for tuning lmk. This patch is not intended
> >> to do any calculation changes other than we do use the cache for
> >> calculate the count values and that makes kswapd0 to shrink other
> >> areas.
> > I have to admit I really do not understand big part of the above
> > paragraph as well as how this all is supposed to work. A quick glance
> > over the implementation. __lmk_task_insert seems to be only called from
> > the oom_score notifier context. If nobody updates the value then no task
> > will get into the tree. Or am I missing something really obvious here?
> > Moreover oom scores tend to be mostly same for tasks. That means that
> > your sorted tree will become sorted by pids in most cases. I do not see
> > any sorting based on the rss nor any updates that would reflect updates
> > of rss. How can this possibly work?
> 
> The task tree nodes are created,updated or removed from the notifier when
> there is a relevant oom_score_adj change. If no one create a task that
> is in the range for the lowmemorykiller the tree will be empty. This is
> an android feature so the score will be updated very often. It is
> part of activity manager to prioritise tasks.  Why should we do sort of
> rss?
 
Because the current lmk selects the tasks based on rss. And the patch
doesn't explain why this is no longer suitable and a different metric
shoult be used. If you also consider that the scale of oom_score_adj is
quite small, conllisions when you simply sort based on pids which is
more than questionable. I really fail to see how this can work
reasonably and why the change of the lmk semantic is even acceptable.

-- 
Michal Hocko
SUSE Labs


Re: [PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread Michal Hocko
On Fri 10-02-17 08:39:11, peter enderborg wrote:
> On 02/09/2017 09:05 PM, Michal Hocko wrote:
> > On Thu 09-02-17 14:21:52, peter enderborg wrote:
> >> Fundamental changes:
> >> 1 Does NOT take any RCU lock in shrinker functions.
> >> 2 It returns same result for scan and counts, so  we dont need to do
> >>   shinker will know when it is pointless to call scan.
> >> 3 It does not lock any other process than the one that is
> >>   going to be killed.
> >>
> >> Background.
> >> The low memory killer scans for process that can be killed to free
> >> memory. This can be cpu consuming when there is a high demand for
> >> memory. This can be seen by analysing the kswapd0 task work.
> >> The stats function added in earler patch adds a counter for waste work.
> >>
> >> How it works.
> >> This patch create a structure within the lowmemory killer that caches
> >> the user spaces processes that it might kill. It is done with a
> >> sorted rbtree so we can very easy find the candidate to be killed,
> >> and knows its properies as memory usage and sorted by oom_score_adj
> >> to look up the task with highest oom_score_adj. To be able to achive
> >> this it uses oom_score_notify events.
> >>
> >> This patch also as a other effect, we are now free to do other
> >> lowmemorykiller configurations.  Without the patch there is a need
> >> for a tradeoff between freed memory and task and rcu locks. This
> >> is no longer a concern for tuning lmk. This patch is not intended
> >> to do any calculation changes other than we do use the cache for
> >> calculate the count values and that makes kswapd0 to shrink other
> >> areas.
> > I have to admit I really do not understand big part of the above
> > paragraph as well as how this all is supposed to work. A quick glance
> > over the implementation. __lmk_task_insert seems to be only called from
> > the oom_score notifier context. If nobody updates the value then no task
> > will get into the tree. Or am I missing something really obvious here?
> > Moreover oom scores tend to be mostly same for tasks. That means that
> > your sorted tree will become sorted by pids in most cases. I do not see
> > any sorting based on the rss nor any updates that would reflect updates
> > of rss. How can this possibly work?
> 
> The task tree nodes are created,updated or removed from the notifier when
> there is a relevant oom_score_adj change. If no one create a task that
> is in the range for the lowmemorykiller the tree will be empty. This is
> an android feature so the score will be updated very often. It is
> part of activity manager to prioritise tasks.  Why should we do sort of
> rss?
 
Because the current lmk selects the tasks based on rss. And the patch
doesn't explain why this is no longer suitable and a different metric
shoult be used. If you also consider that the scale of oom_score_adj is
quite small, conllisions when you simply sort based on pids which is
more than questionable. I really fail to see how this can work
reasonably and why the change of the lmk semantic is even acceptable.

-- 
Michal Hocko
SUSE Labs


Re: [PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread peter enderborg
On 02/09/2017 09:05 PM, Michal Hocko wrote:
> On Thu 09-02-17 14:21:52, peter enderborg wrote:
>> Fundamental changes:
>> 1 Does NOT take any RCU lock in shrinker functions.
>> 2 It returns same result for scan and counts, so  we dont need to do
>>   shinker will know when it is pointless to call scan.
>> 3 It does not lock any other process than the one that is
>>   going to be killed.
>>
>> Background.
>> The low memory killer scans for process that can be killed to free
>> memory. This can be cpu consuming when there is a high demand for
>> memory. This can be seen by analysing the kswapd0 task work.
>> The stats function added in earler patch adds a counter for waste work.
>>
>> How it works.
>> This patch create a structure within the lowmemory killer that caches
>> the user spaces processes that it might kill. It is done with a
>> sorted rbtree so we can very easy find the candidate to be killed,
>> and knows its properies as memory usage and sorted by oom_score_adj
>> to look up the task with highest oom_score_adj. To be able to achive
>> this it uses oom_score_notify events.
>>
>> This patch also as a other effect, we are now free to do other
>> lowmemorykiller configurations.  Without the patch there is a need
>> for a tradeoff between freed memory and task and rcu locks. This
>> is no longer a concern for tuning lmk. This patch is not intended
>> to do any calculation changes other than we do use the cache for
>> calculate the count values and that makes kswapd0 to shrink other
>> areas.
> I have to admit I really do not understand big part of the above
> paragraph as well as how this all is supposed to work. A quick glance
> over the implementation. __lmk_task_insert seems to be only called from
> the oom_score notifier context. If nobody updates the value then no task
> will get into the tree. Or am I missing something really obvious here?
> Moreover oom scores tend to be mostly same for tasks. That means that
> your sorted tree will become sorted by pids in most cases. I do not see
> any sorting based on the rss nor any updates that would reflect updates
> of rss. How can this possibly work?

The task tree nodes are created,updated or removed from the notifier when
there is a relevant oom_score_adj change. If no one create a task that
is in the range for the lowmemorykiller the tree will be empty. This is
an android feature so the score will be updated very often. It is
part of activity manager to prioritise tasks.  Why should we do sort of
rss?




Re: [PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread peter enderborg
On 02/09/2017 09:05 PM, Michal Hocko wrote:
> On Thu 09-02-17 14:21:52, peter enderborg wrote:
>> Fundamental changes:
>> 1 Does NOT take any RCU lock in shrinker functions.
>> 2 It returns same result for scan and counts, so  we dont need to do
>>   shinker will know when it is pointless to call scan.
>> 3 It does not lock any other process than the one that is
>>   going to be killed.
>>
>> Background.
>> The low memory killer scans for process that can be killed to free
>> memory. This can be cpu consuming when there is a high demand for
>> memory. This can be seen by analysing the kswapd0 task work.
>> The stats function added in earler patch adds a counter for waste work.
>>
>> How it works.
>> This patch create a structure within the lowmemory killer that caches
>> the user spaces processes that it might kill. It is done with a
>> sorted rbtree so we can very easy find the candidate to be killed,
>> and knows its properies as memory usage and sorted by oom_score_adj
>> to look up the task with highest oom_score_adj. To be able to achive
>> this it uses oom_score_notify events.
>>
>> This patch also as a other effect, we are now free to do other
>> lowmemorykiller configurations.  Without the patch there is a need
>> for a tradeoff between freed memory and task and rcu locks. This
>> is no longer a concern for tuning lmk. This patch is not intended
>> to do any calculation changes other than we do use the cache for
>> calculate the count values and that makes kswapd0 to shrink other
>> areas.
> I have to admit I really do not understand big part of the above
> paragraph as well as how this all is supposed to work. A quick glance
> over the implementation. __lmk_task_insert seems to be only called from
> the oom_score notifier context. If nobody updates the value then no task
> will get into the tree. Or am I missing something really obvious here?
> Moreover oom scores tend to be mostly same for tasks. That means that
> your sorted tree will become sorted by pids in most cases. I do not see
> any sorting based on the rss nor any updates that would reflect updates
> of rss. How can this possibly work?

The task tree nodes are created,updated or removed from the notifier when
there is a relevant oom_score_adj change. If no one create a task that
is in the range for the lowmemorykiller the tree will be empty. This is
an android feature so the score will be updated very often. It is
part of activity manager to prioritise tasks.  Why should we do sort of
rss?




Re: [PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread Michal Hocko
On Thu 09-02-17 14:21:52, peter enderborg wrote:
> Fundamental changes:
> 1 Does NOT take any RCU lock in shrinker functions.
> 2 It returns same result for scan and counts, so  we dont need to do
>   shinker will know when it is pointless to call scan.
> 3 It does not lock any other process than the one that is
>   going to be killed.
> 
> Background.
> The low memory killer scans for process that can be killed to free
> memory. This can be cpu consuming when there is a high demand for
> memory. This can be seen by analysing the kswapd0 task work.
> The stats function added in earler patch adds a counter for waste work.
> 
> How it works.
> This patch create a structure within the lowmemory killer that caches
> the user spaces processes that it might kill. It is done with a
> sorted rbtree so we can very easy find the candidate to be killed,
> and knows its properies as memory usage and sorted by oom_score_adj
> to look up the task with highest oom_score_adj. To be able to achive
> this it uses oom_score_notify events.
> 
> This patch also as a other effect, we are now free to do other
> lowmemorykiller configurations.  Without the patch there is a need
> for a tradeoff between freed memory and task and rcu locks. This
> is no longer a concern for tuning lmk. This patch is not intended
> to do any calculation changes other than we do use the cache for
> calculate the count values and that makes kswapd0 to shrink other
> areas.

I have to admit I really do not understand big part of the above
paragraph as well as how this all is supposed to work. A quick glance
over the implementation. __lmk_task_insert seems to be only called from
the oom_score notifier context. If nobody updates the value then no task
will get into the tree. Or am I missing something really obvious here?
Moreover oom scores tend to be mostly same for tasks. That means that
your sorted tree will become sorted by pids in most cases. I do not see
any sorting based on the rss nor any updates that would reflect updates
of rss. How can this possibly work?
-- 
Michal Hocko
SUSE Labs


Re: [PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread Michal Hocko
On Thu 09-02-17 14:21:52, peter enderborg wrote:
> Fundamental changes:
> 1 Does NOT take any RCU lock in shrinker functions.
> 2 It returns same result for scan and counts, so  we dont need to do
>   shinker will know when it is pointless to call scan.
> 3 It does not lock any other process than the one that is
>   going to be killed.
> 
> Background.
> The low memory killer scans for process that can be killed to free
> memory. This can be cpu consuming when there is a high demand for
> memory. This can be seen by analysing the kswapd0 task work.
> The stats function added in earler patch adds a counter for waste work.
> 
> How it works.
> This patch create a structure within the lowmemory killer that caches
> the user spaces processes that it might kill. It is done with a
> sorted rbtree so we can very easy find the candidate to be killed,
> and knows its properies as memory usage and sorted by oom_score_adj
> to look up the task with highest oom_score_adj. To be able to achive
> this it uses oom_score_notify events.
> 
> This patch also as a other effect, we are now free to do other
> lowmemorykiller configurations.  Without the patch there is a need
> for a tradeoff between freed memory and task and rcu locks. This
> is no longer a concern for tuning lmk. This patch is not intended
> to do any calculation changes other than we do use the cache for
> calculate the count values and that makes kswapd0 to shrink other
> areas.

I have to admit I really do not understand big part of the above
paragraph as well as how this all is supposed to work. A quick glance
over the implementation. __lmk_task_insert seems to be only called from
the oom_score notifier context. If nobody updates the value then no task
will get into the tree. Or am I missing something really obvious here?
Moreover oom scores tend to be mostly same for tasks. That means that
your sorted tree will become sorted by pids in most cases. I do not see
any sorting based on the rss nor any updates that would reflect updates
of rss. How can this possibly work?
-- 
Michal Hocko
SUSE Labs


[PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread peter enderborg

Fundamental changes:
1 Does NOT take any RCU lock in shrinker functions.
2 It returns same result for scan and counts, so  we dont need to do
  shinker will know when it is pointless to call scan.
3 It does not lock any other process than the one that is
  going to be killed.

Background.
The low memory killer scans for process that can be killed to free
memory. This can be cpu consuming when there is a high demand for
memory. This can be seen by analysing the kswapd0 task work.
The stats function added in earler patch adds a counter for waste work.

How it works.
This patch create a structure within the lowmemory killer that caches
the user spaces processes that it might kill. It is done with a
sorted rbtree so we can very easy find the candidate to be killed,
and knows its properies as memory usage and sorted by oom_score_adj
to look up the task with highest oom_score_adj. To be able to achive
this it uses oom_score_notify events.

This patch also as a other effect, we are now free to do other
lowmemorykiller configurations.  Without the patch there is a need
for a tradeoff between freed memory and task and rcu locks. This
is no longer a concern for tuning lmk. This patch is not intended
to do any calculation changes other than we do use the cache for
calculate the count values and that makes kswapd0 to shrink other
areas.

Signed-off-by: Peter Enderborg 
---
 drivers/staging/android/Kconfig |   1 +
 drivers/staging/android/Makefile|   1 +
 drivers/staging/android/lowmemorykiller.c   | 294 +++-
 drivers/staging/android/lowmemorykiller.h   |  15 ++
 drivers/staging/android/lowmemorykiller_stats.c |  24 ++
 drivers/staging/android/lowmemorykiller_stats.h |  14 +-
 drivers/staging/android/lowmemorykiller_tasks.c | 220 ++
 drivers/staging/android/lowmemorykiller_tasks.h |  35 +++
 8 files changed, 498 insertions(+), 106 deletions(-)
 create mode 100644 drivers/staging/android/lowmemorykiller.h
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.c
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.h

diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index 96e86c7..899186c 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -16,6 +16,7 @@ config ASHMEM

 config ANDROID_LOW_MEMORY_KILLER
 bool "Android Low Memory Killer"
+select OOM_SCORE_NOTIFIER
 ---help---
   Registers processes to be killed when low memory conditions, this is 
useful
   as there is no particular swap space on android.
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index d710eb2..b7a8036 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -4,4 +4,5 @@ obj-y+= ion/

 obj-$(CONFIG_ASHMEM)+= ashmem.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller.o
+obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller_tasks.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER_STATS)+= lowmemorykiller_stats.o
diff --git a/drivers/staging/android/lowmemorykiller.c 
b/drivers/staging/android/lowmemorykiller.c
index 15c1b38..1e275b1 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -41,10 +41,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include "lowmemorykiller.h"
 #include "lowmemorykiller_stats.h"
+#include "lowmemorykiller_tasks.h"

-static u32 lowmem_debug_level = 1;
+u32 lowmem_debug_level = 1;
 static short lowmem_adj[6] = {
 0,
 1,
@@ -62,135 +66,212 @@ static int lowmem_minfree[6] = {

 static int lowmem_minfree_size = 4;

-static unsigned long lowmem_deathpending_timeout;
-
-#define lowmem_print(level, x...)\
-do {\
-if (lowmem_debug_level >= (level))\
-pr_info(x);\
-} while (0)
-
-static unsigned long lowmem_count(struct shrinker *s,
-  struct shrink_control *sc)
-{
-lmk_inc_stats(LMK_COUNT);
-return global_node_page_state(NR_ACTIVE_ANON) +
-global_node_page_state(NR_ACTIVE_FILE) +
-global_node_page_state(NR_INACTIVE_ANON) +
-global_node_page_state(NR_INACTIVE_FILE);
-}
+struct calculated_params {
+long selected_tasksize;
+long minfree;
+int other_file;
+int other_free;
+int dynamic_max_queue_len;
+short selected_oom_score_adj;
+short min_score_adj;
+};

-static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
+static int kill_needed(int level, struct shrink_control *sc,
+   struct calculated_params *cp)
 {
-struct task_struct *tsk;
-struct task_struct *selected = NULL;
-unsigned long rem = 0;
-int tasksize;
 int i;
-short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
-int minfree = 0;
-int selected_tasksize = 0;
-  

[PATCH 3/3 staging-next] mm: Remove RCU and tasklocks from lmk

2017-02-09 Thread peter enderborg

Fundamental changes:
1 Does NOT take any RCU lock in shrinker functions.
2 It returns same result for scan and counts, so  we dont need to do
  shinker will know when it is pointless to call scan.
3 It does not lock any other process than the one that is
  going to be killed.

Background.
The low memory killer scans for process that can be killed to free
memory. This can be cpu consuming when there is a high demand for
memory. This can be seen by analysing the kswapd0 task work.
The stats function added in earler patch adds a counter for waste work.

How it works.
This patch create a structure within the lowmemory killer that caches
the user spaces processes that it might kill. It is done with a
sorted rbtree so we can very easy find the candidate to be killed,
and knows its properies as memory usage and sorted by oom_score_adj
to look up the task with highest oom_score_adj. To be able to achive
this it uses oom_score_notify events.

This patch also as a other effect, we are now free to do other
lowmemorykiller configurations.  Without the patch there is a need
for a tradeoff between freed memory and task and rcu locks. This
is no longer a concern for tuning lmk. This patch is not intended
to do any calculation changes other than we do use the cache for
calculate the count values and that makes kswapd0 to shrink other
areas.

Signed-off-by: Peter Enderborg 
---
 drivers/staging/android/Kconfig |   1 +
 drivers/staging/android/Makefile|   1 +
 drivers/staging/android/lowmemorykiller.c   | 294 +++-
 drivers/staging/android/lowmemorykiller.h   |  15 ++
 drivers/staging/android/lowmemorykiller_stats.c |  24 ++
 drivers/staging/android/lowmemorykiller_stats.h |  14 +-
 drivers/staging/android/lowmemorykiller_tasks.c | 220 ++
 drivers/staging/android/lowmemorykiller_tasks.h |  35 +++
 8 files changed, 498 insertions(+), 106 deletions(-)
 create mode 100644 drivers/staging/android/lowmemorykiller.h
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.c
 create mode 100644 drivers/staging/android/lowmemorykiller_tasks.h

diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index 96e86c7..899186c 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -16,6 +16,7 @@ config ASHMEM

 config ANDROID_LOW_MEMORY_KILLER
 bool "Android Low Memory Killer"
+select OOM_SCORE_NOTIFIER
 ---help---
   Registers processes to be killed when low memory conditions, this is 
useful
   as there is no particular swap space on android.
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index d710eb2..b7a8036 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -4,4 +4,5 @@ obj-y+= ion/

 obj-$(CONFIG_ASHMEM)+= ashmem.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller.o
+obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)+= lowmemorykiller_tasks.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER_STATS)+= lowmemorykiller_stats.o
diff --git a/drivers/staging/android/lowmemorykiller.c 
b/drivers/staging/android/lowmemorykiller.c
index 15c1b38..1e275b1 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -41,10 +41,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include "lowmemorykiller.h"
 #include "lowmemorykiller_stats.h"
+#include "lowmemorykiller_tasks.h"

-static u32 lowmem_debug_level = 1;
+u32 lowmem_debug_level = 1;
 static short lowmem_adj[6] = {
 0,
 1,
@@ -62,135 +66,212 @@ static int lowmem_minfree[6] = {

 static int lowmem_minfree_size = 4;

-static unsigned long lowmem_deathpending_timeout;
-
-#define lowmem_print(level, x...)\
-do {\
-if (lowmem_debug_level >= (level))\
-pr_info(x);\
-} while (0)
-
-static unsigned long lowmem_count(struct shrinker *s,
-  struct shrink_control *sc)
-{
-lmk_inc_stats(LMK_COUNT);
-return global_node_page_state(NR_ACTIVE_ANON) +
-global_node_page_state(NR_ACTIVE_FILE) +
-global_node_page_state(NR_INACTIVE_ANON) +
-global_node_page_state(NR_INACTIVE_FILE);
-}
+struct calculated_params {
+long selected_tasksize;
+long minfree;
+int other_file;
+int other_free;
+int dynamic_max_queue_len;
+short selected_oom_score_adj;
+short min_score_adj;
+};

-static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
+static int kill_needed(int level, struct shrink_control *sc,
+   struct calculated_params *cp)
 {
-struct task_struct *tsk;
-struct task_struct *selected = NULL;
-unsigned long rem = 0;
-int tasksize;
 int i;
-short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
-int minfree = 0;
-int selected_tasksize = 0;
-short selected_oom_score_adj;