[PATCH] mm: vmalloc: Prevent use after free in _vm_unmap_aliases

2021-03-18 Thread vjitta
From: Vijayanand Jitta 

A potential use after free can occur in _vm_unmap_aliases
where an already freed vmap_area could be accessed, Consider
the following scenario:

Process 1   Process 2

__vm_unmap_aliases  __vm_unmap_aliases
purge_fragmented_blocks_allcpus rcu_read_lock()
rcu_read_lock()
list_del_rcu(>free_list)

list_for_each_entry_rcu(vb .. )
__purge_vmap_area_lazy
kmem_cache_free(va)

va_start = vb->va->va_start

Here Process 1 is in purge path and it does list_del_rcu on vmap_block
and later frees the vmap_area, since Process 2 was holding the rcu lock
at this time vmap_block will still be present in and Process 2 accesse
it and thereby it tries to access vmap_area of that vmap_block which was
already freed by Process 1 and this results in use after free.

Fix this by adding a check for vb->dirty before accessing vmap_area
structure since vb->dirty will be set to VMAP_BBMAP_BITS in purge path
checking for this will prevent the use after free.

Signed-off-by: Vijayanand Jitta 
---
 mm/vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d5f2a84..ebb6f57 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1762,7 +1762,7 @@ static void _vm_unmap_aliases(unsigned long start, 
unsigned long end, int flush)
rcu_read_lock();
list_for_each_entry_rcu(vb, >free, free_list) {
spin_lock(>lock);
-   if (vb->dirty) {
+   if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) {
unsigned long va_start = vb->va->va_start;
unsigned long s, e;
 
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH] lib: stackdepot: fix ignoring return value warning

2021-01-31 Thread vjitta
From: Vijayanand Jitta 

fix the below ignoring return value warning for kstrtobool
in is_stack_depot_disabled function.

lib/stackdepot.c: In function 'is_stack_depot_disabled':
lib/stackdepot.c:154:2: warning: ignoring return value of 'kstrtobool'
declared with attribute 'warn_unused_result' [-Wunused-result]

Fixes: b9779abb09a8 ("lib: stackdepot: add support to disable stack depot")
Signed-off-by: Vijayanand Jitta 
---
 lib/stackdepot.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index cc21116..49f67a0 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -151,8 +151,10 @@ static struct stack_record **stack_table;
 
 static int __init is_stack_depot_disabled(char *str)
 {
-   kstrtobool(str, _depot_disable);
-   if (stack_depot_disable) {
+   int ret;
+
+   ret = kstrtobool(str, _depot_disable);
+   if (!ret && stack_depot_disable) {
pr_info("Stack Depot is disabled\n");
stack_table = NULL;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4


[PATCH v6 1/2] lib: stackdepot: Add support to configure STACK_HASH_SIZE

2021-01-27 Thread vjitta
From: Yogesh Lal 

Use CONFIG_STACK_HASH_ORDER to configure STACK_HASH_SIZE.

Aim is to have configurable value for  STACK_HASH_SIZE,
so depend on use case one can configure it.

One example is of Page Owner, CONFIG_PAGE_OWNER works only
if page_owner=on via kernel parameter on CONFIG_PAGE_OWNER
configured system. Thus, unless admin enable it via command
line option, the stackdepot will just waste 8M memory without
any customer.

Making it configurable and use lower value helps to enable
features like CONFIG_PAGE_OWNER without any significant overhead.

Signed-off-by: Yogesh Lal 
Signed-off-by: Vinayak Menon 
Signed-off-by: Vijayanand Jitta 
Reviewed-by: Minchan Kim 
Reviewed-by: Alexander Potapenko 
---
 lib/Kconfig  | 9 +
 lib/stackdepot.c | 3 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig b/lib/Kconfig
index b46a9fd..96ee125 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -651,6 +651,15 @@ config STACKDEPOT
bool
select STACKTRACE
 
+config STACK_HASH_ORDER
+   int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
+   range 12 20
+   default 20
+   depends on STACKDEPOT
+   help
+Select the hash size as a power of 2 for the stackdepot hash table.
+Choose a lower value to reduce the memory impact.
+
 config SBITMAP
bool
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 2caffc6..dff8521 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -142,8 +142,7 @@ static struct stack_record *depot_alloc_stack(unsigned long 
*entries, int size,
return stack;
 }
 
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v6 2/2] lib: stackdepot: Add support to disable stack depot

2021-01-27 Thread vjitta
From: Vijayanand Jitta 

Add a kernel parameter stack_depot_disable to disable
stack depot. So that stack hash table doesn't consume
any memory when stack depot is disabled.

The usecase is CONFIG_PAGE_OWNER without page_owner=on.
Without this patch, stackdepot will consume the memory
for the hashtable. By default, it's 8M which is never trivial.

With this option, in CONFIG_PAGE_OWNER configured system,
page_owner=off, stack_depot_disable in kernel command line,
we could save the wasted memory for the hashtable.

Signed-off-by: Vinayak Menon 
Signed-off-by: Vijayanand Jitta 
---
 Documentation/admin-guide/kernel-parameters.txt |  6 +
 include/linux/stackdepot.h  |  1 +
 init/main.c |  2 ++
 lib/stackdepot.c| 32 +
 4 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 44fde25..381fad9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5133,6 +5133,12 @@
growing up) the main stack are reserved for no other
mapping. Default value is 256 pages.
 
+   stack_depot_disable= [KNL]
+   Setting this to true through kernel command line will
+   disable the stack depot thereby saving the static memory
+   consumed by the stack hash table. By default this is set
+   to false.
+
stacktrace  [FTRACE]
Enabled the stack tracer on boot up.
 
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 24d49c7..eafd9aa 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -21,4 +21,5 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 
 unsigned int filter_irq_stacks(unsigned long *entries, unsigned int 
nr_entries);
 
+int stack_depot_init(void);
 #endif
diff --git a/init/main.c b/init/main.c
index 32b2a8a..8fcf9bb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -98,6 +98,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -827,6 +828,7 @@ static void __init mm_init(void)
page_ext_init_flatmem();
init_debug_pagealloc();
report_meminit();
+   stack_depot_init();
mem_init();
kmem_cache_init();
kmemleak_init();
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index dff8521..0398658 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
 
@@ -146,9 +147,32 @@ static struct stack_record *depot_alloc_stack(unsigned 
long *entries, int size,
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-static struct stack_record *stack_table[STACK_HASH_SIZE] = {
-   [0 ...  STACK_HASH_SIZE - 1] = NULL
-};
+static bool stack_depot_disable;
+static struct stack_record **stack_table;
+
+static int __init is_stack_depot_disabled(char *str)
+{
+   kstrtobool(str, _depot_disable);
+   if (stack_depot_disable) {
+   pr_info("Stack Depot is disabled\n");
+   stack_table = NULL;
+   }
+   return 0;
+}
+early_param("stack_depot_disable", is_stack_depot_disabled);
+
+int __init stack_depot_init(void)
+{
+   if (!stack_depot_disable) {
+   size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+   int i;
+
+   stack_table = memblock_alloc(size, size);
+   for (i = 0; i < STACK_HASH_SIZE;  i++)
+   stack_table[i] = NULL;
+   }
+   return 0;
+}
 
 /* Calculate hash for a stack */
 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
@@ -242,7 +266,7 @@ depot_stack_handle_t stack_depot_save(unsigned long 
*entries,
unsigned long flags;
u32 hash;
 
-   if (unlikely(nr_entries == 0))
+   if (unlikely(nr_entries == 0) || stack_depot_disable)
goto fast_exit;
 
hash = hash_stack(entries, nr_entries);
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v5 2/2] lib: stackdepot: Add support to disable stack depot

2021-01-18 Thread vjitta
From: Vijayanand Jitta 

Add a kernel parameter stack_depot_disable to disable
stack depot. So that stack hash table doesn't consume
any memory when stack depot is disabled.

Signed-off-by: Vinayak Menon 
Signed-off-by: Vijayanand Jitta 
---
 include/linux/stackdepot.h |  1 +
 init/main.c|  2 ++
 lib/stackdepot.c   | 33 +
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 24d49c7..eafd9aa 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -21,4 +21,5 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 
 unsigned int filter_irq_stacks(unsigned long *entries, unsigned int 
nr_entries);
 
+int stack_depot_init(void);
 #endif
diff --git a/init/main.c b/init/main.c
index 32b2a8a..8fcf9bb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -98,6 +98,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -827,6 +828,7 @@ static void __init mm_init(void)
page_ext_init_flatmem();
init_debug_pagealloc();
report_meminit();
+   stack_depot_init();
mem_init();
kmem_cache_init();
kmemleak_init();
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index dff8521..d20e6fd 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -31,6 +31,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
 
@@ -146,9 +148,32 @@ static struct stack_record *depot_alloc_stack(unsigned 
long *entries, int size,
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-static struct stack_record *stack_table[STACK_HASH_SIZE] = {
-   [0 ...  STACK_HASH_SIZE - 1] = NULL
-};
+static bool stack_depot_disable;
+static struct stack_record **stack_table;
+
+static int __init is_stack_depot_disabled(char *str)
+{
+   kstrtobool(str, _depot_disable);
+   if (stack_depot_disable) {
+   pr_info("Stack Depot is disabled\n");
+   stack_table = NULL;
+   }
+   return 0;
+}
+early_param("stack_depot_disable", is_stack_depot_disabled);
+
+int __init stack_depot_init(void)
+{
+   if (!stack_depot_disable) {
+   size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+   int i;
+
+   stack_table = memblock_alloc(size, size);
+   for (i = 0; i < STACK_HASH_SIZE;  i++)
+   stack_table[i] = NULL;
+   }
+   return 0;
+}
 
 /* Calculate hash for a stack */
 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
@@ -242,7 +267,7 @@ depot_stack_handle_t stack_depot_save(unsigned long 
*entries,
unsigned long flags;
u32 hash;
 
-   if (unlikely(nr_entries == 0))
+   if (unlikely(nr_entries == 0) || stack_depot_disable)
goto fast_exit;
 
hash = hash_stack(entries, nr_entries);
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v5 1/2] lib: stackdepot: Add support to configure STACK_HASH_SIZE

2021-01-18 Thread vjitta
From: Yogesh Lal 

Use CONFIG_STACK_HASH_ORDER to configure STACK_HASH_SIZE.

Aim is to have configurable value for  STACK_HASH_SIZE,
so depend on use case one can configure it.

One example is of Page Owner, default value of
STACK_HASH_SIZE lead stack depot to consume 8MB of static memory.
Making it configurable and use lower value helps to enable features like
CONFIG_PAGE_OWNER without any significant overhead.

Signed-off-by: Yogesh Lal 
Signed-off-by: Vinayak Menon 
Signed-off-by: Vijayanand Jitta 
---
 lib/Kconfig  | 9 +
 lib/stackdepot.c | 3 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig b/lib/Kconfig
index b46a9fd..96ee125 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -651,6 +651,15 @@ config STACKDEPOT
bool
select STACKTRACE
 
+config STACK_HASH_ORDER
+   int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
+   range 12 20
+   default 20
+   depends on STACKDEPOT
+   help
+Select the hash size as a power of 2 for the stackdepot hash table.
+Choose a lower value to reduce the memory impact.
+
 config SBITMAP
bool
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 2caffc6..dff8521 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -142,8 +142,7 @@ static struct stack_record *depot_alloc_stack(unsigned long 
*entries, int size,
return stack;
 }
 
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v4 2/2] lib: stackdepot: Add support to disable stack depot

2020-12-30 Thread vjitta
From: Vijayanand Jitta 

Add a kernel parameter stack_depot_disable to disable
stack depot. So that stack hash table doesn't consume
any memory when stack depot is disabled.

Signed-off-by: Vinayak Menon 
Signed-off-by: Vijayanand Jitta 
---
 lib/stackdepot.c | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 614ac28..72b9050 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
 
@@ -145,10 +146,36 @@ static struct stack_record *depot_alloc_stack(unsigned 
long *entries, int size,
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-static struct stack_record *stack_table[STACK_HASH_SIZE] = {
+static struct stack_record *stack_table_tmp[STACK_HASH_SIZE] __initdata = {
[0 ...  STACK_HASH_SIZE - 1] = NULL
 };
 
+static bool stack_depot_disable;
+static struct stack_record **stack_table __refdata = stack_table_tmp;
+
+static int __init is_stack_depot_disabled(char *str)
+{
+   kstrtobool(str, _depot_disable);
+   if (stack_depot_disable) {
+   pr_info("Stack Depot is disabled\n");
+   stack_table = NULL;
+   }
+   return 0;
+}
+early_param("stack_depot_disable", is_stack_depot_disabled);
+
+static int __init init_stackdepot(void)
+{
+   if (!stack_depot_disable) {
+   size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+
+   stack_table = vmalloc(size);
+   memcpy(stack_table, stack_table_tmp, size);
+   }
+   return 0;
+}
+early_initcall(init_stackdepot);
+
 /* Calculate hash for a stack */
 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
 {
@@ -230,7 +257,7 @@ depot_stack_handle_t stack_depot_save(unsigned long 
*entries,
unsigned long flags;
u32 hash;
 
-   if (unlikely(nr_entries == 0))
+   if (unlikely(nr_entries == 0) || !stack_table)
goto fast_exit;
 
hash = hash_stack(entries, nr_entries);
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v4 1/2] lib: stackdepot: Add support to configure STACK_HASH_SIZE

2020-12-30 Thread vjitta
From: Yogesh Lal 

Use STACK_HASH_ORDER_SHIFT to configure STACK_HASH_SIZE.

Aim is to have configurable value for  STACK_HASH_SIZE,
so depend on use case one can configure it.

One example is of Page Owner, default value of
STACK_HASH_SIZE lead stack depot to consume 8MB of static memory.
Making it configurable and use lower value helps to enable features like
CONFIG_PAGE_OWNER without any significant overhead.

Signed-off-by: Yogesh Lal 
Signed-off-by: Vinayak Menon 
Signed-off-by: Vijayanand Jitta 
---
 lib/Kconfig  | 9 +
 lib/stackdepot.c | 3 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig b/lib/Kconfig
index 3321d04..fd967fb 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -623,6 +623,15 @@ config STACKDEPOT
bool
select STACKTRACE
 
+config STACK_HASH_ORDER_SHIFT
+   int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
+   range 12 20
+   default 20
+   depends on STACKDEPOT
+   help
+Select the hash size as a power of 2 for the stackdepot hash table.
+Choose a lower value to reduce the memory impact.
+
 config SBITMAP
bool
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 81c69c0..614ac28 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -141,8 +141,7 @@ static struct stack_record *depot_alloc_stack(unsigned long 
*entries, int size,
return stack;
 }
 
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER_SHIFT)
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v3] lib: stackdepot: Add support to configure STACK_HASH_SIZE

2020-12-09 Thread vjitta
From: Yogesh Lal 

Add a kernel parameter stack_hash_order to configure STACK_HASH_SIZE.

Aim is to have configurable value for STACK_HASH_SIZE, so that one
can configure it depending on usecase there by reducing the static
memory overhead.

One example is of Page Owner, default value of STACK_HASH_SIZE lead
stack depot to consume 8MB of static memory. Making it configurable
and use lower value helps to enable features like CONFIG_PAGE_OWNER
without any significant overhead.

Suggested-by: Minchan Kim 
Signed-off-by: Yogesh Lal 
Signed-off-by: Vijayanand Jitta 
---
 lib/stackdepot.c | 31 +++
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 81c69c0..e0eebfd 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
 
@@ -141,14 +142,36 @@ static struct stack_record *depot_alloc_stack(unsigned 
long *entries, int size,
return stack;
 }
 
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define MAX_STACK_HASH_ORDER 20
+#define MAX_STACK_HASH_SIZE (1L << MAX_STACK_HASH_ORDER)
+#define STACK_HASH_SIZE (1L << stack_hash_order)
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-static struct stack_record *stack_table[STACK_HASH_SIZE] = {
-   [0 ...  STACK_HASH_SIZE - 1] = NULL
+static unsigned int stack_hash_order = 20;
+static struct stack_record *stack_table_def[MAX_STACK_HASH_SIZE] __initdata = {
+   [0 ...  MAX_STACK_HASH_SIZE - 1] = NULL
 };
+static struct stack_record **stack_table __refdata = stack_table_def;
+
+static int __init setup_stack_hash_order(char *str)
+{
+   kstrtouint(str, 0, _hash_order);
+   if (stack_hash_order > MAX_STACK_HASH_ORDER)
+   stack_hash_order = MAX_STACK_HASH_ORDER;
+   return 0;
+}
+early_param("stack_hash_order", setup_stack_hash_order);
+
+static int __init init_stackdepot(void)
+{
+   size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+
+   stack_table = vmalloc(size);
+   memcpy(stack_table, stack_table_def, size);
+   return 0;
+}
+early_initcall(init_stackdepot);
 
 /* Calculate hash for a stack */
 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
-- 
2.7.4
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation


[PATCH v2] lib: stackdepot: Add support to configure STACK_HASH_SIZE

2020-11-25 Thread vjitta
From: Yogesh Lal 

Add a kernel parameter stack_hash_order to configure STACK_HASH_SIZE.

Aim is to have configurable value for STACK_HASH_SIZE, so that one
can configure it depending on usecase there by reducing the static
memory overhead.

One example is of Page Owner, default value of STACK_HASH_SIZE lead
stack depot to consume 8MB of static memory. Making it configurable
and use lower value helps to enable features like CONFIG_PAGE_OWNER
without any significant overhead.

Suggested-by: Minchan Kim 
Signed-off-by: Yogesh Lal 
Signed-off-by: Vijayanand Jitta 
---
 lib/stackdepot.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 81c69c0..ce53598 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -141,14 +141,31 @@ static struct stack_record *depot_alloc_stack(unsigned 
long *entries, int size,
return stack;
 }
 
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+static unsigned int stack_hash_order = 20;
+#define STACK_HASH_SIZE (1L << stack_hash_order)
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-static struct stack_record *stack_table[STACK_HASH_SIZE] = {
-   [0 ...  STACK_HASH_SIZE - 1] = NULL
-};
+static struct stack_record **stack_table;
+
+static int __init setup_stack_hash_order(char *str)
+{
+   kstrtouint(str, 0, _hash_order);
+   return 0;
+}
+early_param("stack_hash_order", setup_stack_hash_order);
+
+static int __init init_stackdepot(void)
+{
+   int i;
+
+   stack_table = kvmalloc(sizeof(struct stack_record *) * STACK_HASH_SIZE, 
GFP_KERNEL);
+   for (i = 0; i < STACK_HASH_SIZE; i++)
+   stack_table[i] = NULL;
+   return 0;
+}
+
+early_initcall(init_stackdepot);
 
 /* Calculate hash for a stack */
 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH] lib: stackdepot: Add support to configure STACK_HASH_SIZE

2020-10-22 Thread vjitta
From: Yogesh Lal 

Use STACK_HASH_ORDER_SHIFT to configure STACK_HASH_SIZE.

Aim is to have configurable value for  STACK_HASH_SIZE,
so depend on use case one can configure it.

One example is of Page Owner, default value of
STACK_HASH_SIZE lead stack depot to consume 8MB of static memory.
Making it configurable and use lower value helps to enable features like
CONFIG_PAGE_OWNER without any significant overhead.

Signed-off-by: Yogesh Lal 
Signed-off-by: Vinayak Menon 
Signed-off-by: Vijayanand Jitta 
---
 lib/Kconfig  | 9 +
 lib/stackdepot.c | 3 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig b/lib/Kconfig
index 18d76b6..b3f8259 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -651,6 +651,15 @@ config STACKDEPOT
bool
select STACKTRACE
 
+config STACK_HASH_ORDER_SHIFT
+   int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
+   range 12 20
+   default 20
+   depends on STACKDEPOT
+   help
+Select the hash size as a power of 2 for the stackdepot hash table.
+Choose a lower value to reduce the memory impact.
+
 config SBITMAP
bool
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 2caffc6..413c20b 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -142,8 +142,7 @@ static struct stack_record *depot_alloc_stack(unsigned long 
*entries, int size,
return stack;
 }
 
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER_SHIFT)
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v5 2/2] iommu/iova: Free global iova rcache on iova alloc failure

2020-09-30 Thread vjitta
From: Vijayanand Jitta 

When ever an iova alloc request fails we free the iova
ranges present in the percpu iova rcaches and then retry
but the global iova rcache is not freed as a result we could
still see iova alloc failure even after retry as global
rcache is holding the iova's which can cause fragmentation.
So, free the global iova rcache as well and then go for the
retry.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index c3a1a8e..faf9b13 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -25,6 +25,7 @@ static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
 static void fq_destroy_all_entries(struct iova_domain *iovad);
 static void fq_flush_timeout(struct timer_list *t);
+static void free_global_cached_iovas(struct iova_domain *iovad);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -442,6 +443,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long 
size,
flush_rcache = false;
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
+   free_global_cached_iovas(iovad);
goto retry;
}
 
@@ -1057,5 +1059,26 @@ void free_cpu_cached_iovas(unsigned int cpu, struct 
iova_domain *iovad)
}
 }
 
+/*
+ * free all the IOVA ranges of global cache
+ */
+static void free_global_cached_iovas(struct iova_domain *iovad)
+{
+   struct iova_rcache *rcache;
+   unsigned long flags;
+   int i, j;
+
+   for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+   rcache = >rcaches[i];
+   spin_lock_irqsave(>lock, flags);
+   for (j = 0; j < rcache->depot_size; ++j) {
+   iova_magazine_free_pfns(rcache->depot[j], iovad);
+   iova_magazine_free(rcache->depot[j]);
+   rcache->depot[j] = NULL;
+   }
+   rcache->depot_size = 0;
+   spin_unlock_irqrestore(>lock, flags);
+   }
+}
 MODULE_AUTHOR("Anil S Keshavamurthy ");
 MODULE_LICENSE("GPL");
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v5 1/2] iommu/iova: Retry from last rb tree node if iova search fails

2020-09-30 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available.

Signed-off-by: Vijayanand Jitta 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/iova.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 30d969a..c3a1a8e 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long new_pfn;
+   unsigned long new_pfn, retry_pfn;
unsigned long align_mask = ~0UL;
+   unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+   retry_pfn = curr_iova->pfn_hi + 1;
+
+retry:
do {
-   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
-   new_pfn = (limit_pfn - size) & align_mask;
+   high_pfn = min(high_pfn, curr_iova->pfn_lo);
+   new_pfn = (high_pfn - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
curr_iova = rb_entry(curr, struct iova, node);
-   } while (curr && new_pfn <= curr_iova->pfn_hi);
-
-   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
+
+   if (high_pfn < size || new_pfn < low_pfn) {
+   if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
+   high_pfn = limit_pfn;
+   low_pfn = retry_pfn;
+   curr = >anchor.node;
+   curr_iova = rb_entry(curr, struct iova, node);
+   goto retry;
+   }
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v4 2/2] iommu/iova: Free global iova rcache on iova alloc failure

2020-09-29 Thread vjitta
From: Vijayanand Jitta 

When ever an iova alloc request fails we free the iova
ranges present in the percpu iova rcaches and then retry
but the global iova rcache is not freed as a result we could
still see iova alloc failure even after retry as global
rcache is holding the iova's which can cause fragmentation.
So, free the global iova rcache as well and then go for the
retry.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index c3a1a8e..64ce082 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -442,6 +442,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long 
size,
flush_rcache = false;
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
+   free_global_cached_iovas(iovad);
goto retry;
}
 
@@ -1057,5 +1058,26 @@ void free_cpu_cached_iovas(unsigned int cpu, struct 
iova_domain *iovad)
}
 }
 
+/*
+ * free all the IOVA ranges of global cache
+ */
+static void free_global_cached_iovas(struct iova_domain *iovad)
+{
+   struct iova_rcache *rcache;
+   unsigned long flags;
+   int i, j;
+
+   for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+   rcache = >rcaches[i];
+   spin_lock_irqsave(>lock, flags);
+   for (j = 0; j < rcache->depot_size; ++j) {
+   iova_magazine_free_pfns(rcache->depot[j], iovad);
+   iova_magazine_free(rcache->depot[j]);
+   rcache->depot[j] = NULL;
+   }
+   rcache->depot_size = 0;
+   spin_unlock_irqrestore(>lock, flags);
+   }
+}
 MODULE_AUTHOR("Anil S Keshavamurthy ");
 MODULE_LICENSE("GPL");
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v4 1/2] iommu/iova: Retry from last rb tree node if iova search fails

2020-09-29 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available.

Signed-off-by: Vijayanand Jitta 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/iova.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 30d969a..c3a1a8e 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long new_pfn;
+   unsigned long new_pfn, retry_pfn;
unsigned long align_mask = ~0UL;
+   unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+   retry_pfn = curr_iova->pfn_hi + 1;
+
+retry:
do {
-   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
-   new_pfn = (limit_pfn - size) & align_mask;
+   high_pfn = min(high_pfn, curr_iova->pfn_lo);
+   new_pfn = (high_pfn - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
curr_iova = rb_entry(curr, struct iova, node);
-   } while (curr && new_pfn <= curr_iova->pfn_hi);
-
-   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
+
+   if (high_pfn < size || new_pfn < low_pfn) {
+   if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
+   high_pfn = limit_pfn;
+   low_pfn = retry_pfn;
+   curr = >anchor.node;
+   curr_iova = rb_entry(curr, struct iova, node);
+   goto retry;
+   }
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v3 2/2] iommu/iova: Free global iova rcache on iova alloc failure

2020-09-28 Thread vjitta
From: Vijayanand Jitta 

When ever an iova alloc request fails we free the iova
ranges present in the percpu iova rcaches and then retry
but the global iova rcache is not freed as a result we could
still see iova alloc failure even after retry as global
rcache is holding the iova's which can cause fragmentation.
So, free the global iova rcache as well and then go for the
retry.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index acc8bee..b4f04fd 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -442,6 +442,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long 
size,
flush_rcache = false;
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
+   free_global_cached_iovas(iovad);
goto retry;
}
 
@@ -1057,5 +1058,26 @@ void free_cpu_cached_iovas(unsigned int cpu, struct 
iova_domain *iovad)
}
 }
 
+/*
+ * free all the IOVA ranges of global cache
+ */
+static void free_global_cached_iovas(struct iova_domain *iovad)
+{
+   struct iova_rcache *rcache;
+   unsigned long flags;
+   int i, j;
+
+   for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+   rcache = >rcaches[i];
+   spin_lock_irqsave(>lock, flags);
+   for (j = 0; j < rcache->depot_size; ++j) {
+   iova_magazine_free_pfns(rcache->depot[j], iovad);
+   iova_magazine_free(rcache->depot[j]);
+   rcache->depot[j] = NULL;
+   }
+   rcache->depot_size = 0;
+   spin_unlock_irqrestore(>lock, flags);
+   }
+}
 MODULE_AUTHOR("Anil S Keshavamurthy ");
 MODULE_LICENSE("GPL");
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v3 1/2] iommu/iova: Retry from last rb tree node if iova search fails

2020-09-28 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 30d969a..acc8bee 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long new_pfn;
+   unsigned long new_pfn, low_pfn_new;
unsigned long align_mask = ~0UL;
+   unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+   low_pfn_new = curr_iova->pfn_hi + 1;
+
+retry:
do {
-   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
-   new_pfn = (limit_pfn - size) & align_mask;
+   high_pfn = min(high_pfn, curr_iova->pfn_lo);
+   new_pfn = (high_pfn - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
curr_iova = rb_entry(curr, struct iova, node);
-   } while (curr && new_pfn <= curr_iova->pfn_hi);
-
-   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
+
+   if (high_pfn < size || new_pfn < low_pfn) {
+   if (low_pfn == iovad->start_pfn && low_pfn_new < limit_pfn) {
+   high_pfn = limit_pfn;
+   low_pfn = low_pfn_new;
+   curr = >anchor.node;
+   curr_iova = rb_entry(curr, struct iova, node);
+   goto retry;
+   }
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
2.7.4



[PATCH v2 1/2] iommu/iova: Retry from last rb tree node if iova search fails

2020-08-20 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 49fc01f..4e77116 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long new_pfn;
+   unsigned long new_pfn, low_pfn_new;
unsigned long align_mask = ~0UL;
+   unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+   low_pfn_new = curr_iova->pfn_hi + 1;
+
+retry:
do {
-   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
-   new_pfn = (limit_pfn - size) & align_mask;
+   high_pfn = min(high_pfn, curr_iova->pfn_lo);
+   new_pfn = (high_pfn - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
curr_iova = rb_entry(curr, struct iova, node);
-   } while (curr && new_pfn <= curr_iova->pfn_hi);
-
-   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
+
+   if (high_pfn < size || new_pfn < low_pfn) {
+   if (low_pfn == iovad->start_pfn && low_pfn_new < limit_pfn) {
+   high_pfn = limit_pfn;
+   low_pfn = low_pfn_new;
+   curr = >anchor.node;
+   curr_iova = rb_entry(curr, struct iova, node);
+   goto retry;
+   }
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1



[PATCH v2 2/2] iommu/iova: Free global iova rcache on iova alloc failure

2020-08-20 Thread vjitta
From: Vijayanand Jitta 

When ever an iova alloc request fails we free the iova
ranges present in the percpu iova rcaches and then retry
but the global iova rcache is not freed as a result we could
still see iova alloc failure even after retry as global
rcache is holding the iova's which can cause fragmentation.
So, free the global iova rcache as well and then go for the
retry.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +++
 include/linux/iova.h |  6 ++
 2 files changed, 29 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 4e77116..5836c87 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -442,6 +442,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned 
long pfn)
flush_rcache = false;
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
+   free_global_cached_iovas(iovad);
goto retry;
}
 
@@ -1055,5 +1056,27 @@ void free_cpu_cached_iovas(unsigned int cpu, struct 
iova_domain *iovad)
}
 }
 
+/*
+ * free all the IOVA ranges of global cache
+ */
+void free_global_cached_iovas(struct iova_domain *iovad)
+{
+   struct iova_rcache *rcache;
+   unsigned long flags;
+   int i, j;
+
+   for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+   rcache = >rcaches[i];
+   spin_lock_irqsave(>lock, flags);
+   for (j = 0; j < rcache->depot_size; ++j) {
+   iova_magazine_free_pfns(rcache->depot[j], iovad);
+   iova_magazine_free(rcache->depot[j]);
+   rcache->depot[j] = NULL;
+   }
+   rcache->depot_size = 0;
+   spin_unlock_irqrestore(>lock, flags);
+   }
+}
+
 MODULE_AUTHOR("Anil S Keshavamurthy ");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/iova.h b/include/linux/iova.h
index a0637ab..a905726 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -163,6 +163,7 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 struct iova *split_and_remove_iova(struct iova_domain *iovad,
struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi);
 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
+void free_global_cached_iovas(struct iova_domain *iovad);
 #else
 static inline int iova_cache_get(void)
 {
@@ -270,6 +271,11 @@ static inline void free_cpu_cached_iovas(unsigned int cpu,
 struct iova_domain *iovad)
 {
 }
+
+static inline void free_global_cached_iovas(struct iova_domain *iovad)
+{
+}
+
 #endif
 
 #endif
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1



[PATCH 2/2] iommu/iova: Free global iova rcache on iova alloc failure

2020-07-03 Thread vjitta
From: Vijayanand Jitta 

When ever an iova alloc request fails we free the iova
ranges present in the percpu iova rcaches and then retry
but the global iova rcache is not freed as a result we could
still see iova alloc failure even after retry as global
rcache is holding the iova's which can cause fragmentation.
So, free the global iova rcache as well and then go for the
retry.

Change-Id: Ib8236dc88ba5516b73d4fbf6bf8e68bbf09bbad2
Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +++
 include/linux/iova.h |  6 ++
 2 files changed, 29 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 4e77116..5836c87 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -442,6 +442,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned 
long pfn)
flush_rcache = false;
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
+   free_global_cached_iovas(iovad);
goto retry;
}
 
@@ -1055,5 +1056,27 @@ void free_cpu_cached_iovas(unsigned int cpu, struct 
iova_domain *iovad)
}
 }
 
+/*
+ * free all the IOVA ranges of global cache
+ */
+void free_global_cached_iovas(struct iova_domain *iovad)
+{
+   struct iova_rcache *rcache;
+   unsigned long flags;
+   int i, j;
+
+   for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+   rcache = >rcaches[i];
+   spin_lock_irqsave(>lock, flags);
+   for (j = 0; j < rcache->depot_size; ++j) {
+   iova_magazine_free_pfns(rcache->depot[j], iovad);
+   iova_magazine_free(rcache->depot[j]);
+   rcache->depot[j] = NULL;
+   }
+   rcache->depot_size = 0;
+   spin_unlock_irqrestore(>lock, flags);
+   }
+}
+
 MODULE_AUTHOR("Anil S Keshavamurthy ");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/iova.h b/include/linux/iova.h
index a0637ab..a905726 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -163,6 +163,7 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 struct iova *split_and_remove_iova(struct iova_domain *iovad,
struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi);
 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
+void free_global_cached_iovas(struct iova_domain *iovad);
 #else
 static inline int iova_cache_get(void)
 {
@@ -270,6 +271,11 @@ static inline void free_cpu_cached_iovas(unsigned int cpu,
 struct iova_domain *iovad)
 {
 }
+
+static inline void free_global_cached_iovas(struct iova_domain *iovad)
+{
+}
+
 #endif
 
 #endif
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1



[PATCH 1/2] iommu/iova: Retry from last rb tree node if iova search fails

2020-07-03 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 49fc01f..4e77116 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long new_pfn;
+   unsigned long new_pfn, low_pfn_new;
unsigned long align_mask = ~0UL;
+   unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+   low_pfn_new = curr_iova->pfn_hi + 1;
+
+retry:
do {
-   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
-   new_pfn = (limit_pfn - size) & align_mask;
+   high_pfn = min(high_pfn, curr_iova->pfn_lo);
+   new_pfn = (high_pfn - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
curr_iova = rb_entry(curr, struct iova, node);
-   } while (curr && new_pfn <= curr_iova->pfn_hi);
-
-   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
+
+   if (high_pfn < size || new_pfn < low_pfn) {
+   if (low_pfn == iovad->start_pfn && low_pfn_new < limit_pfn) {
+   high_pfn = limit_pfn;
+   low_pfn = low_pfn_new;
+   curr = >anchor.node;
+   curr_iova = rb_entry(curr, struct iova, node);
+   goto retry;
+   }
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1



[PATCH] iommu/iova: Free global iova rcache on iova alloc failure

2020-05-27 Thread vjitta
From: Vijayanand Jitta 

When ever an iova alloc request fails we free the iova
ranges present in the percpu iova rcaches and then retry
but the global iova rcache is not freed as a result we
could still see iova alloc failure even after retry as
global rcache is still holding the iova's which can cause
fragmentation. So, free the global iova rcache as well
and then go for the retry.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 22 ++
 include/linux/iova.h |  6 ++
 2 files changed, 28 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 0e6a953..5ae0328 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -431,6 +431,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned 
long pfn)
flush_rcache = false;
for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad);
+   free_global_cached_iovas(iovad);
goto retry;
}
 
@@ -1044,5 +1045,26 @@ void free_cpu_cached_iovas(unsigned int cpu, struct 
iova_domain *iovad)
}
 }
 
+/*
+ * free all the IOVA ranges of global cache
+ */
+void free_global_cached_iovas(struct iova_domain *iovad)
+{
+   struct iova_rcache *rcache;
+   int i, j;
+
+   for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+   rcache = >rcaches[i];
+   spin_lock(>lock);
+   for (j = 0; j < rcache->depot_size; ++j) {
+   iova_magazine_free_pfns(rcache->depot[j], iovad);
+   iova_magazine_free(rcache->depot[j]);
+   rcache->depot[j] = NULL;
+   }
+   rcache->depot_size = 0;
+   spin_unlock(>lock);
+   }
+}
+
 MODULE_AUTHOR("Anil S Keshavamurthy ");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/iova.h b/include/linux/iova.h
index a0637ab..a905726 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -163,6 +163,7 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 struct iova *split_and_remove_iova(struct iova_domain *iovad,
struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi);
 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
+void free_global_cached_iovas(struct iova_domain *iovad);
 #else
 static inline int iova_cache_get(void)
 {
@@ -270,6 +271,11 @@ static inline void free_cpu_cached_iovas(unsigned int cpu,
 struct iova_domain *iovad)
 {
 }
+
+static inline void free_global_cached_iovas(struct iova_domain *iovad)
+{
+}
+
 #endif
 
 #endif
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation


[PATCH v2] iommu/iova: Retry from last rb tree node if iova search fails

2020-05-11 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 0e6a953..7d82afc 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -184,8 +184,9 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long new_pfn;
+   unsigned long new_pfn, alloc_lo_new;
unsigned long align_mask = ~0UL;
+   unsigned long alloc_hi = limit_pfn, alloc_lo = iovad->start_pfn;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,15 +199,25 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+   alloc_lo_new = curr_iova->pfn_hi;
+
+retry:
do {
-   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
-   new_pfn = (limit_pfn - size) & align_mask;
+   alloc_hi = min(alloc_hi, curr_iova->pfn_lo);
+   new_pfn = (alloc_hi - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
curr_iova = rb_entry(curr, struct iova, node);
} while (curr && new_pfn <= curr_iova->pfn_hi);
 
-   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   if (alloc_hi < size || new_pfn < alloc_lo) {
+   if (alloc_lo == iovad->start_pfn && alloc_lo_new < limit_pfn) {
+   alloc_hi = limit_pfn;
+   alloc_lo = alloc_lo_new;
+   curr = >anchor.node;
+   curr_iova = rb_entry(curr, struct iova, node);
+   goto retry;
+   }
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1


[PATCH] iommu/iova: Retry from last rb tree node if iova search fails

2020-05-06 Thread vjitta
From: Vijayanand Jitta 

When ever a new iova alloc request comes iova is always searched
from the cached node and the nodes which are previous to cached
node. So, even if there is free iova space available in the nodes
which are next to the cached node iova allocation can still fail
because of this approach.

Consider the following sequence of iova alloc and frees on
1GB of iova space

1) alloc - 500MB
2) alloc - 12MB
3) alloc - 499MB
4) free -  12MB which was allocated in step 2
5) alloc - 13MB

After the above sequence we will have 12MB of free iova space and
cached node will be pointing to the iova pfn of last alloc of 13MB
which will be the lowest iova pfn of that iova space. Now if we get an
alloc request of 2MB we just search from cached node and then look
for lower iova pfn's for free iova and as they aren't any, iova alloc
fails though there is 12MB of free iova space.

To avoid such iova search failures do a retry from the last rb tree node
when iova search fails, this will search the entire tree and get an iova
if its available

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/iova.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 0e6a953..2985222 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -186,6 +186,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
unsigned long flags;
unsigned long new_pfn;
unsigned long align_mask = ~0UL;
+   bool retry = false;
 
if (size_aligned)
align_mask <<= fls_long(size - 1);
@@ -198,6 +199,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
 
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
+
+retry_search:
do {
limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
new_pfn = (limit_pfn - size) & align_mask;
@@ -207,6 +210,14 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
} while (curr && new_pfn <= curr_iova->pfn_hi);
 
if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   if (!retry) {
+   curr = rb_last(>rbroot);
+   curr_iova = rb_entry(curr, struct iova, node);
+   limit_pfn = curr_iova->pfn_lo;
+   retry = true;
+   goto retry_search;
+   }
+
iovad->max32_alloc_size = size;
goto iova32_full;
}
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
1.9.1


Re: [PATCH] ion: Consider ion pool pages as indirectly reclaimable

2018-04-27 Thread vjitta

On 2018-04-27 10:40, vji...@codeaurora.org wrote:

On 2018-04-25 21:17, Laura Abbott wrote:

On 04/24/2018 08:43 PM, vji...@codeaurora.org wrote:

From: Vijayanand Jitta 

An issue is observed where mallocs are failing due to overcommit 
failure.

The failure happens when there is high ION page pool since ION page
pool is not considered reclaimable by the overcommit calculation 
code.
This change considers ion pool pages as indirectly reclaimable and 
thus

accounted as available memory in the overcommit calculation.

Signed-off-by: Vijayanand Jitta 
---
  drivers/staging/android/ion/ion_page_pool.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/staging/android/ion/ion_page_pool.c 
b/drivers/staging/android/ion/ion_page_pool.c

index db8f614..9bc56eb 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -32,6 +32,9 @@ static void ion_page_pool_add(struct ion_page_pool 
*pool, struct page *page)

list_add_tail(>lru, >low_items);
pool->low_count++;
}
+
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   (1 << (PAGE_SHIFT + pool->order)));
mutex_unlock(>mutex);
  }
  @@ -50,6 +53,8 @@ static struct page *ion_page_pool_remove(struct 
ion_page_pool *pool, bool high)

}
list_del(>lru);
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   -(1 << (PAGE_SHIFT + pool->order)));
return page;
  }



I'm sure this fixes the problem but I don't think we want to
start throwing page adjustments into Ion. Why isn't this
memory already considered reclaimable by existing calculations?

Thanks,
Laura


You can refer to discussion here https://lkml.org/lkml/2018/3/5/361 
introducing
NR_INDIRECTLY_RECLAIMABLE_BYTES for the memory which is not currently 
considered

as reclaimable

Thanks,
Vijay


There was also discussion specific to ion in that thread you can find it 
here

https://lkml.org/lkml/2018/4/25/642

Thanks,
Vijay


Re: [PATCH] ion: Consider ion pool pages as indirectly reclaimable

2018-04-27 Thread vjitta

On 2018-04-27 10:40, vji...@codeaurora.org wrote:

On 2018-04-25 21:17, Laura Abbott wrote:

On 04/24/2018 08:43 PM, vji...@codeaurora.org wrote:

From: Vijayanand Jitta 

An issue is observed where mallocs are failing due to overcommit 
failure.

The failure happens when there is high ION page pool since ION page
pool is not considered reclaimable by the overcommit calculation 
code.
This change considers ion pool pages as indirectly reclaimable and 
thus

accounted as available memory in the overcommit calculation.

Signed-off-by: Vijayanand Jitta 
---
  drivers/staging/android/ion/ion_page_pool.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/staging/android/ion/ion_page_pool.c 
b/drivers/staging/android/ion/ion_page_pool.c

index db8f614..9bc56eb 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -32,6 +32,9 @@ static void ion_page_pool_add(struct ion_page_pool 
*pool, struct page *page)

list_add_tail(>lru, >low_items);
pool->low_count++;
}
+
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   (1 << (PAGE_SHIFT + pool->order)));
mutex_unlock(>mutex);
  }
  @@ -50,6 +53,8 @@ static struct page *ion_page_pool_remove(struct 
ion_page_pool *pool, bool high)

}
list_del(>lru);
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   -(1 << (PAGE_SHIFT + pool->order)));
return page;
  }



I'm sure this fixes the problem but I don't think we want to
start throwing page adjustments into Ion. Why isn't this
memory already considered reclaimable by existing calculations?

Thanks,
Laura


You can refer to discussion here https://lkml.org/lkml/2018/3/5/361 
introducing
NR_INDIRECTLY_RECLAIMABLE_BYTES for the memory which is not currently 
considered

as reclaimable

Thanks,
Vijay


There was also discussion specific to ion in that thread you can find it 
here

https://lkml.org/lkml/2018/4/25/642

Thanks,
Vijay


Re: [PATCH] ion: Consider ion pool pages as indirectly reclaimable

2018-04-26 Thread vjitta

On 2018-04-25 21:17, Laura Abbott wrote:

On 04/24/2018 08:43 PM, vji...@codeaurora.org wrote:

From: Vijayanand Jitta 

An issue is observed where mallocs are failing due to overcommit 
failure.

The failure happens when there is high ION page pool since ION page
pool is not considered reclaimable by the overcommit calculation code.
This change considers ion pool pages as indirectly reclaimable and 
thus

accounted as available memory in the overcommit calculation.

Signed-off-by: Vijayanand Jitta 
---
  drivers/staging/android/ion/ion_page_pool.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/staging/android/ion/ion_page_pool.c 
b/drivers/staging/android/ion/ion_page_pool.c

index db8f614..9bc56eb 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -32,6 +32,9 @@ static void ion_page_pool_add(struct ion_page_pool 
*pool, struct page *page)

list_add_tail(>lru, >low_items);
pool->low_count++;
}
+
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   (1 << (PAGE_SHIFT + pool->order)));
mutex_unlock(>mutex);
  }
  @@ -50,6 +53,8 @@ static struct page *ion_page_pool_remove(struct 
ion_page_pool *pool, bool high)

}
list_del(>lru);
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   -(1 << (PAGE_SHIFT + pool->order)));
return page;
  }



I'm sure this fixes the problem but I don't think we want to
start throwing page adjustments into Ion. Why isn't this
memory already considered reclaimable by existing calculations?

Thanks,
Laura


You can refer to discussion here https://lkml.org/lkml/2018/3/5/361 
introducing
NR_INDIRECTLY_RECLAIMABLE_BYTES for the memory which is not currently 
considered

as reclaimable

Thanks,
Vijay


Re: [PATCH] ion: Consider ion pool pages as indirectly reclaimable

2018-04-26 Thread vjitta

On 2018-04-25 21:17, Laura Abbott wrote:

On 04/24/2018 08:43 PM, vji...@codeaurora.org wrote:

From: Vijayanand Jitta 

An issue is observed where mallocs are failing due to overcommit 
failure.

The failure happens when there is high ION page pool since ION page
pool is not considered reclaimable by the overcommit calculation code.
This change considers ion pool pages as indirectly reclaimable and 
thus

accounted as available memory in the overcommit calculation.

Signed-off-by: Vijayanand Jitta 
---
  drivers/staging/android/ion/ion_page_pool.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/staging/android/ion/ion_page_pool.c 
b/drivers/staging/android/ion/ion_page_pool.c

index db8f614..9bc56eb 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -32,6 +32,9 @@ static void ion_page_pool_add(struct ion_page_pool 
*pool, struct page *page)

list_add_tail(>lru, >low_items);
pool->low_count++;
}
+
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   (1 << (PAGE_SHIFT + pool->order)));
mutex_unlock(>mutex);
  }
  @@ -50,6 +53,8 @@ static struct page *ion_page_pool_remove(struct 
ion_page_pool *pool, bool high)

}
list_del(>lru);
+	mod_node_page_state(page_pgdat(page), 
NR_INDIRECTLY_RECLAIMABLE_BYTES,

+   -(1 << (PAGE_SHIFT + pool->order)));
return page;
  }



I'm sure this fixes the problem but I don't think we want to
start throwing page adjustments into Ion. Why isn't this
memory already considered reclaimable by existing calculations?

Thanks,
Laura


You can refer to discussion here https://lkml.org/lkml/2018/3/5/361 
introducing
NR_INDIRECTLY_RECLAIMABLE_BYTES for the memory which is not currently 
considered

as reclaimable

Thanks,
Vijay


[PATCH] ion: Consider ion pool pages as indirectly reclaimable

2018-04-24 Thread vjitta
From: Vijayanand Jitta 

An issue is observed where mallocs are failing due to overcommit failure.
The failure happens when there is high ION page pool since ION page
pool is not considered reclaimable by the overcommit calculation code.
This change considers ion pool pages as indirectly reclaimable and thus
accounted as available memory in the overcommit calculation.

Signed-off-by: Vijayanand Jitta 
---
 drivers/staging/android/ion/ion_page_pool.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/staging/android/ion/ion_page_pool.c 
b/drivers/staging/android/ion/ion_page_pool.c
index db8f614..9bc56eb 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -32,6 +32,9 @@ static void ion_page_pool_add(struct ion_page_pool *pool, 
struct page *page)
list_add_tail(>lru, >low_items);
pool->low_count++;
}
+
+   mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
+   (1 << (PAGE_SHIFT + pool->order)));
mutex_unlock(>mutex);
 }
 
@@ -50,6 +53,8 @@ static struct page *ion_page_pool_remove(struct ion_page_pool 
*pool, bool high)
}
 
list_del(>lru);
+   mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
+   -(1 << (PAGE_SHIFT + pool->order)));
return page;
 }
 
-- 
1.9.1



[PATCH] ion: Consider ion pool pages as indirectly reclaimable

2018-04-24 Thread vjitta
From: Vijayanand Jitta 

An issue is observed where mallocs are failing due to overcommit failure.
The failure happens when there is high ION page pool since ION page
pool is not considered reclaimable by the overcommit calculation code.
This change considers ion pool pages as indirectly reclaimable and thus
accounted as available memory in the overcommit calculation.

Signed-off-by: Vijayanand Jitta 
---
 drivers/staging/android/ion/ion_page_pool.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/staging/android/ion/ion_page_pool.c 
b/drivers/staging/android/ion/ion_page_pool.c
index db8f614..9bc56eb 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -32,6 +32,9 @@ static void ion_page_pool_add(struct ion_page_pool *pool, 
struct page *page)
list_add_tail(>lru, >low_items);
pool->low_count++;
}
+
+   mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
+   (1 << (PAGE_SHIFT + pool->order)));
mutex_unlock(>mutex);
 }
 
@@ -50,6 +53,8 @@ static struct page *ion_page_pool_remove(struct ion_page_pool 
*pool, bool high)
}
 
list_del(>lru);
+   mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
+   -(1 << (PAGE_SHIFT + pool->order)));
return page;
 }
 
-- 
1.9.1



[PATCH] iommu/arm-smmu: Use context bank TLBSTATUS registers

2017-05-22 Thread vjitta
From: Vijayanand Jitta 

There are TLBSTATUS registers in SMMU global register space as well as
context bank register space.  Currently we're polling the global
TLBSTATUS registers after TLB invalidation, even when using the TLB
invalidation registers from context bank address space.  This violates
the usage model described in the ARM SMMU spec.  Fix this by polling
context bank TLBSTATUS registers for context bank TLB operations, and
global TLBSTATUS registers for global TLB operations.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/arm-smmu.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7cecc37..8dc6da9 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -233,6 +233,9 @@ enum arm_smmu_s2cr_privcfg {
 #define ARM_SMMU_CB_S1_TLBIVAL 0x620
 #define ARM_SMMU_CB_S2_TLBIIPAS2   0x630
 #define ARM_SMMU_CB_S2_TLBIIPAS2L  0x638
+#define ARM_SMMU_CB_TLBSYNC0x7f0
+#define ARM_SMMU_CB_TLBSTATUS  0x7f4
+#define TLBSTATUS_SACTIVE  (1 << 0)
 #define ARM_SMMU_CB_ATS1PR 0x800
 #define ARM_SMMU_CB_ATSR   0x8f0
 
@@ -580,6 +583,19 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int 
idx)
 }
 
 /* Wait for any pending TLB invalidations to complete */
+static void arm_smmu_tlb_sync_cb(struct arm_smmu_device *smmu,
+   int cbndx)
+{
+   void __iomem *base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cbndx);
+   u32 val;
+
+   writel_relaxed(0, base + ARM_SMMU_CB_TLBSYNC);
+   if (readl_poll_timeout_atomic(base + ARM_SMMU_CB_TLBSTATUS, val,
+ !(val & TLBSTATUS_SACTIVE),
+ 0, TLB_LOOP_TIMEOUT))
+   dev_err(smmu->dev, "TLBSYNC timeout!\n");
+}
+
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
int count = 0;
@@ -601,7 +617,7 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device 
*smmu)
 static void arm_smmu_tlb_sync(void *cookie)
 {
struct arm_smmu_domain *smmu_domain = cookie;
-   __arm_smmu_tlb_sync(smmu_domain->smmu);
+   arm_smmu_tlb_sync_cb(smmu_domain->smmu, smmu_domain->cfg.cbndx);
 }
 
 static void arm_smmu_tlb_inv_context(void *cookie)
@@ -616,13 +632,13 @@ static void arm_smmu_tlb_inv_context(void *cookie)
base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
   base + ARM_SMMU_CB_S1_TLBIASID);
+   arm_smmu_tlb_sync_cb(smmu, cfg->cbndx);
} else {
base = ARM_SMMU_GR0(smmu);
writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
   base + ARM_SMMU_GR0_TLBIVMID);
+   __arm_smmu_tlb_sync(smmu);
}
-
-   __arm_smmu_tlb_sync(smmu);
 }
 
 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-- 
1.9.1



[PATCH] iommu/arm-smmu: Use context bank TLBSTATUS registers

2017-05-22 Thread vjitta
From: Vijayanand Jitta 

There are TLBSTATUS registers in SMMU global register space as well as
context bank register space.  Currently we're polling the global
TLBSTATUS registers after TLB invalidation, even when using the TLB
invalidation registers from context bank address space.  This violates
the usage model described in the ARM SMMU spec.  Fix this by polling
context bank TLBSTATUS registers for context bank TLB operations, and
global TLBSTATUS registers for global TLB operations.

Signed-off-by: Vijayanand Jitta 
---
 drivers/iommu/arm-smmu.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7cecc37..8dc6da9 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -233,6 +233,9 @@ enum arm_smmu_s2cr_privcfg {
 #define ARM_SMMU_CB_S1_TLBIVAL 0x620
 #define ARM_SMMU_CB_S2_TLBIIPAS2   0x630
 #define ARM_SMMU_CB_S2_TLBIIPAS2L  0x638
+#define ARM_SMMU_CB_TLBSYNC0x7f0
+#define ARM_SMMU_CB_TLBSTATUS  0x7f4
+#define TLBSTATUS_SACTIVE  (1 << 0)
 #define ARM_SMMU_CB_ATS1PR 0x800
 #define ARM_SMMU_CB_ATSR   0x8f0
 
@@ -580,6 +583,19 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int 
idx)
 }
 
 /* Wait for any pending TLB invalidations to complete */
+static void arm_smmu_tlb_sync_cb(struct arm_smmu_device *smmu,
+   int cbndx)
+{
+   void __iomem *base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cbndx);
+   u32 val;
+
+   writel_relaxed(0, base + ARM_SMMU_CB_TLBSYNC);
+   if (readl_poll_timeout_atomic(base + ARM_SMMU_CB_TLBSTATUS, val,
+ !(val & TLBSTATUS_SACTIVE),
+ 0, TLB_LOOP_TIMEOUT))
+   dev_err(smmu->dev, "TLBSYNC timeout!\n");
+}
+
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
int count = 0;
@@ -601,7 +617,7 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device 
*smmu)
 static void arm_smmu_tlb_sync(void *cookie)
 {
struct arm_smmu_domain *smmu_domain = cookie;
-   __arm_smmu_tlb_sync(smmu_domain->smmu);
+   arm_smmu_tlb_sync_cb(smmu_domain->smmu, smmu_domain->cfg.cbndx);
 }
 
 static void arm_smmu_tlb_inv_context(void *cookie)
@@ -616,13 +632,13 @@ static void arm_smmu_tlb_inv_context(void *cookie)
base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
   base + ARM_SMMU_CB_S1_TLBIASID);
+   arm_smmu_tlb_sync_cb(smmu, cfg->cbndx);
} else {
base = ARM_SMMU_GR0(smmu);
writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
   base + ARM_SMMU_GR0_TLBIVMID);
+   __arm_smmu_tlb_sync(smmu);
}
-
-   __arm_smmu_tlb_sync(smmu);
 }
 
 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-- 
1.9.1