Signed-off-by: Thomas Schoebel-Theuer <[email protected]>
---
 drivers/staging/mars/brick_mem.c | 1081 ++++++++++++++++++++++++++++++++++++++
 include/linux/brick/brick_mem.h  |  218 ++++++++
 2 files changed, 1299 insertions(+)
 create mode 100644 drivers/staging/mars/brick_mem.c
 create mode 100644 include/linux/brick/brick_mem.h

diff --git a/drivers/staging/mars/brick_mem.c b/drivers/staging/mars/brick_mem.c
new file mode 100644
index 0000000..03a3d28
--- /dev/null
+++ b/drivers/staging/mars/brick_mem.c
@@ -0,0 +1,1081 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+
+#include <linux/atomic.h>
+
+#include <linux/brick/brick_mem.h>
+#include <linux/brick/brick_say.h>
+#include <linux/brick/lamport.h>
+
+#define USE_KERNEL_PAGES               /*  currently mandatory (vmalloc does 
not work) */
+
+#define MAGIC_BLOCK                    ((int)0x8B395D7B)
+#define MAGIC_BEND                     ((int)0x8B395D7C)
+#define MAGIC_MEM1                     ((int)0x8B395D7D)
+#define MAGIC_MEM2                     ((int)0x9B395D8D)
+#define MAGIC_MEND1                    ((int)0x8B395D7E)
+#define MAGIC_MEND2                    ((int)0x9B395D8E)
+#define MAGIC_STR                      ((int)0x8B395D7F)
+#define MAGIC_SEND                     ((int)0x9B395D8F)
+
+#define INT_ACCESS(ptr, offset) (*(int *)(((char *)(ptr)) + (offset)))
+
+#define _BRICK_FMT(_fmt, _class)                                       \
+       "%ld.%09ld %ld.%09ld MEM_%-5s %s[%d] %s:%d %s(): "              \
+               _fmt,                                                   \
+               _s_now.tv_sec, _s_now.tv_nsec,                          \
+               _l_now.tv_sec, _l_now.tv_nsec,                          \
+               say_class[_class],                                      \
+               current->comm, (int)smp_processor_id(),                 \
+               __BASE_FILE__,                                          \
+               __LINE__,                                               \
+               __func__
+
+#define _BRICK_MSG(_class, _dump, _fmt, _args...)                      \
+       do {                                                            \
+               struct timespec _s_now = CURRENT_TIME;                  \
+               struct timespec _l_now;                                 \
+               get_lamport(&_l_now);                                   \
+               say(_class, _BRICK_FMT(_fmt, _class), ##_args);         \
+               if (_dump)                                              \
+                       dump_stack();                                   \
+       } while (0)
+
+#define BRICK_ERR(_fmt, _args...) _BRICK_MSG(SAY_ERROR, true,  _fmt, ##_args)
+#define BRICK_WRN(_fmt, _args...) _BRICK_MSG(SAY_WARN, false, _fmt, ##_args)
+#define BRICK_INF(_fmt, _args...) _BRICK_MSG(SAY_INFO, false, _fmt, ##_args)
+
+/***********************************************************************/
+
+/*  limit handling */
+
+#include <linux/swap.h>
+
+long long brick_global_memavail;
+long long brick_global_memlimit;
+
+atomic64_t brick_global_block_used = ATOMIC64_INIT(0);
+
+void get_total_ram(void)
+{
+       struct sysinfo i = {};
+
+       si_meminfo(&i);
+       /* si_swapinfo(&i); */
+       brick_global_memavail = (long long)i.totalram * (PAGE_SIZE / 1024);
+       BRICK_INF("total RAM = %lld [KiB]\n", brick_global_memavail);
+}
+
+/***********************************************************************/
+
+/*  small memory allocation (use this only for len < PAGE_SIZE) */
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_mem_alloc = ATOMIC_INIT(0);
+static atomic_t mem_redirect_alloc = ATOMIC_INIT(0);
+static atomic_t mem_count[BRICK_DEBUG_MEM];
+static atomic_t mem_free[BRICK_DEBUG_MEM];
+static int  mem_len[BRICK_DEBUG_MEM];
+
+#define PLUS_SIZE                      (6 * sizeof(int))
+#else
+#define PLUS_SIZE                      (2 * sizeof(int))
+#endif
+
+static inline
+void *__brick_mem_alloc(int len)
+{
+       void *res;
+
+       if (len >= PAGE_SIZE) {
+#ifdef BRICK_DEBUG_MEM
+               atomic_inc(&mem_redirect_alloc);
+#endif
+               res = _brick_block_alloc(0, len, 0);
+       } else {
+               for (;;) {
+                       res = kmalloc(len, GFP_BRICK);
+                       if (likely(res))
+                               break;
+                       msleep(1000);
+               }
+#ifdef BRICK_DEBUG_MEM
+               atomic_inc(&phys_mem_alloc);
+#endif
+       }
+       return res;
+}
+
+static inline
+void __brick_mem_free(void *data, int len)
+{
+       if (len >= PAGE_SIZE) {
+               _brick_block_free(data, len, 0);
+#ifdef BRICK_DEBUG_MEM
+               atomic_dec(&mem_redirect_alloc);
+#endif
+       } else {
+               kfree(data);
+#ifdef BRICK_DEBUG_MEM
+               atomic_dec(&phys_mem_alloc);
+#endif
+       }
+}
+
+void *_brick_mem_alloc(int len, int line)
+{
+       void *res;
+
+#ifdef CONFIG_MARS_DEBUG
+       might_sleep();
+#endif
+
+       res = __brick_mem_alloc(len + PLUS_SIZE);
+
+#ifdef BRICK_DEBUG_MEM
+       if (unlikely(line < 0))
+               line = 0;
+       else if (unlikely(line >= BRICK_DEBUG_MEM))
+               line = BRICK_DEBUG_MEM - 1;
+       INT_ACCESS(res, 0 * sizeof(int)) = MAGIC_MEM1;
+       INT_ACCESS(res, 1 * sizeof(int)) = len;
+       INT_ACCESS(res, 2 * sizeof(int)) = line;
+       INT_ACCESS(res, 3 * sizeof(int)) = MAGIC_MEM2;
+       res += 4 * sizeof(int);
+       INT_ACCESS(res, len + 0 * sizeof(int)) = MAGIC_MEND1;
+       INT_ACCESS(res, len + 1 * sizeof(int)) = MAGIC_MEND2;
+       atomic_inc(&mem_count[line]);
+       mem_len[line] = len;
+#else
+       INT_ACCESS(res, 0 * sizeof(int)) = len;
+       res += PLUS_SIZE;
+#endif
+       return res;
+}
+
+void _brick_mem_free(void *data, int cline)
+{
+#ifdef BRICK_DEBUG_MEM
+       void *test = data - 4 * sizeof(int);
+       int magic1 = INT_ACCESS(test, 0 * sizeof(int));
+       int len = INT_ACCESS(test, 1 * sizeof(int));
+       int line = INT_ACCESS(test, 2 * sizeof(int));
+       int magic2 = INT_ACCESS(test, 3 * sizeof(int));
+
+       if (unlikely(magic1 != MAGIC_MEM1)) {
+               BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len 
= %d\n",
+                       cline,
+                       magic1,
+                       MAGIC_MEM1,
+                       len);
+               goto _out_return;
+       }
+       if (unlikely(magic2 != MAGIC_MEM2)) {
+               BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len 
= %d\n",
+                       cline,
+                       magic2,
+                       MAGIC_MEM2,
+                       len);
+               goto _out_return;
+       }
+       if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+               BRICK_ERR("line %d memory corruption: alloc line = %d, len = 
%d\n", cline, line, len);
+               goto _out_return;
+       }
+       INT_ACCESS(test, 0) = 0xffffffff;
+       magic1 = INT_ACCESS(data, len + 0 * sizeof(int));
+       if (unlikely(magic1 != MAGIC_MEND1)) {
+               BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len 
= %d\n",
+                       cline,
+                       magic1,
+                       MAGIC_MEND1,
+                       len);
+               goto _out_return;
+       }
+       magic2 = INT_ACCESS(data, len + 1 * sizeof(int));
+       if (unlikely(magic2 != MAGIC_MEND2)) {
+               BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len 
= %d\n",
+                       cline,
+                       magic2,
+                       MAGIC_MEND2,
+                       len);
+               goto _out_return;
+       }
+       INT_ACCESS(data, len) = 0xffffffff;
+       atomic_dec(&mem_count[line]);
+       atomic_inc(&mem_free[line]);
+#else
+       void *test = data - PLUS_SIZE;
+       int len = INT_ACCESS(test, 0 * sizeof(int));
+
+#endif
+       data = test;
+       __brick_mem_free(data, len + PLUS_SIZE);
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+/***********************************************************************/
+
+/*  string memory allocation */
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+# define STRING_CANARY                                                 \
+       "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+       "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+       "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+       "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+       "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+       "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+       " FILE = "      __FILE__                                        \
+       " VERSION = "   __VERSION__                                     \
+       " xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx STRING_error xxx\n"
+# define STRING_PLUS (sizeof(int) * 3 + sizeof(STRING_CANARY))
+#elif defined(BRICK_DEBUG_MEM)
+# define STRING_PLUS (sizeof(int) * 4)
+#else
+# define STRING_PLUS 0
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_string_alloc = ATOMIC_INIT(0);
+static atomic_t string_count[BRICK_DEBUG_MEM];
+static atomic_t string_free[BRICK_DEBUG_MEM];
+
+#endif
+
+char *_brick_string_alloc(int len, int line)
+{
+       char *res;
+
+#ifdef CONFIG_MARS_DEBUG
+       might_sleep();
+       if (unlikely(len > PAGE_SIZE))
+               BRICK_WRN("line = %d string too long: len = %d\n", line, len);
+#endif
+       if (len <= 0)
+               len = BRICK_STRING_LEN;
+
+       for (;;) {
+               res = kzalloc(len + STRING_PLUS, GFP_BRICK);
+               if (likely(res))
+                       break;
+               msleep(1000);
+       }
+
+#ifdef BRICK_DEBUG_MEM
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       memset(res + 1, '?', len - 1);
+#endif
+       atomic_inc(&phys_string_alloc);
+       if (unlikely(line < 0))
+               line = 0;
+       else if (unlikely(line >= BRICK_DEBUG_MEM))
+               line = BRICK_DEBUG_MEM - 1;
+       INT_ACCESS(res, 0) = MAGIC_STR;
+       INT_ACCESS(res, sizeof(int)) = len;
+       INT_ACCESS(res, sizeof(int) * 2) = line;
+       res += sizeof(int) * 3;
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       strcpy(res + len, STRING_CANARY);
+#else
+       INT_ACCESS(res, len) = MAGIC_SEND;
+#endif
+       atomic_inc(&string_count[line]);
+#endif
+       return res;
+}
+
+void _brick_string_free(const char *data, int cline)
+{
+#ifdef BRICK_DEBUG_MEM
+       int magic;
+       int len;
+       int line;
+       char *orig = (void *)data;
+
+       data -= sizeof(int) * 3;
+       magic = INT_ACCESS(data, 0);
+       if (unlikely(magic != MAGIC_STR)) {
+               BRICK_ERR("cline %d stringmem corruption: magix %08x != 
%08x\n", cline, magic, MAGIC_STR);
+               goto _out_return;
+       }
+       len = INT_ACCESS(data, sizeof(int));
+       line = INT_ACCESS(data, sizeof(int) * 2);
+       if (unlikely(len <= 0)) {
+               BRICK_ERR("cline %d stringmem corruption: line = %d len = 
%d\n", cline, line, len);
+               goto _out_return;
+       }
+       if (unlikely(len > PAGE_SIZE))
+               BRICK_ERR("cline %d string too long: line = %d len = %d 
string='%s'\n", cline, line, len, orig);
+       if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+               BRICK_ERR("cline %d stringmem corruption: line = %d (len = 
%d)\n", cline, line, len);
+               goto _out_return;
+       }
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       if (unlikely(strcmp(orig + len, STRING_CANARY))) {
+               BRICK_ERR("cline %d stringmem corruption: bad canary '%s', line 
= %d len = %d\n",
+                         cline, STRING_CANARY, line, len);
+               goto _out_return;
+       }
+       orig[len]--;
+       memset(orig, '!', len);
+#else
+       magic = INT_ACCESS(orig, len);
+       if (unlikely(magic != MAGIC_SEND)) {
+               BRICK_ERR("cline %d stringmem corruption: end_magix %08x != 
%08x, line = %d len = %d\n",
+                         cline, magic, MAGIC_SEND, line, len);
+               goto _out_return;
+       }
+       INT_ACCESS(orig, len) = 0xffffffff;
+#endif
+       atomic_dec(&string_count[line]);
+       atomic_inc(&string_free[line]);
+       atomic_dec(&phys_string_alloc);
+#endif
+       kfree(data);
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+/***********************************************************************/
+
+/*  block memory allocation */
+
+static
+int len2order(int len)
+{
+       int order = 0;
+
+       if (unlikely(len <= 0)) {
+               BRICK_ERR("trying to use %d bytes\n", len);
+               return 0;
+       }
+
+       while ((PAGE_SIZE << order) < len)
+               order++;
+
+       if (unlikely(order > BRICK_MAX_ORDER)) {
+               BRICK_ERR("trying to use %d bytes (oder = %d, max = %d)\n", 
len, order, BRICK_MAX_ORDER);
+               return BRICK_MAX_ORDER;
+       }
+       return order;
+}
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+static atomic_t _alloc_count[BRICK_MAX_ORDER+1];
+int brick_mem_alloc_count[BRICK_MAX_ORDER+1] = {};
+int brick_mem_alloc_max[BRICK_MAX_ORDER+1] = {};
+int brick_mem_freelist_max[BRICK_MAX_ORDER+1] = {};
+
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_block_alloc = ATOMIC_INIT(0);
+
+/*  indexed by line */
+static atomic_t block_count[BRICK_DEBUG_MEM];
+static atomic_t block_free[BRICK_DEBUG_MEM];
+static int  block_len[BRICK_DEBUG_MEM];
+
+/*  indexed by order */
+static atomic_t op_count[BRICK_MAX_ORDER+1];
+static atomic_t raw_count[BRICK_MAX_ORDER+1];
+static int alloc_line[BRICK_MAX_ORDER+1];
+static int alloc_len[BRICK_MAX_ORDER+1];
+
+#endif
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+
+#define MAX_INFO_LISTS                 1024
+
+#define INFO_LIST_HASH(addr) ((unsigned long)(addr) / (PAGE_SIZE * 2) % 
MAX_INFO_LISTS)
+
+struct mem_block_info {
+       struct list_head inf_head;
+       void *inf_data;
+       int inf_len;
+       int inf_line;
+       bool inf_used;
+};
+
+static struct list_head inf_anchor[MAX_INFO_LISTS];
+static rwlock_t inf_lock[MAX_INFO_LISTS];
+
+static
+void _new_block_info(void *data, int len, int cline)
+{
+       struct mem_block_info *inf;
+       int hash;
+
+       for (;;) {
+               inf = kmalloc(sizeof(struct mem_block_info), GFP_BRICK);
+               if (likely(inf))
+                       break;
+               msleep(1000);
+       }
+       inf->inf_data = data;
+       inf->inf_len = len;
+       inf->inf_line = cline;
+       inf->inf_used = true;
+
+       hash = INFO_LIST_HASH(data);
+
+       write_lock(&inf_lock[hash]);
+       list_add(&inf->inf_head, &inf_anchor[hash]);
+       write_unlock(&inf_lock[hash]);
+}
+
+static
+struct mem_block_info *_find_block_info(void *data, bool remove)
+{
+       struct mem_block_info *res = NULL;
+       struct list_head *tmp;
+       int hash = INFO_LIST_HASH(data);
+
+       if (remove)
+               write_lock(&inf_lock[hash]);
+       else
+               read_lock(&inf_lock[hash]);
+       for (tmp = inf_anchor[hash].next; tmp != &inf_anchor[hash]; tmp = 
tmp->next) {
+               struct mem_block_info *inf = container_of(tmp, struct 
mem_block_info, inf_head);
+
+               if (inf->inf_data != data)
+                       continue;
+               if (remove)
+                       list_del_init(tmp);
+               res = inf;
+               break;
+       }
+       if (remove)
+               write_unlock(&inf_lock[hash]);
+       else
+               read_unlock(&inf_lock[hash]);
+       return res;
+}
+
+#endif /*  CONFIG_MARS_DEBUG_MEM_STRONG */
+
+static inline
+void *__brick_block_alloc(gfp_t gfp, int order, int cline)
+{
+       void *res;
+
+       for (;;) {
+#ifdef USE_KERNEL_PAGES
+               res = (void *)__get_free_pages(gfp, order);
+#else
+               res = __vmalloc(PAGE_SIZE << order, gfp, PAGE_KERNEL_IO);
+#endif
+               if (likely(res))
+                       break;
+               msleep(1000);
+       }
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       _new_block_info(res, PAGE_SIZE << order, cline);
+#endif
+#ifdef BRICK_DEBUG_MEM
+       atomic_inc(&phys_block_alloc);
+       atomic_inc(&raw_count[order]);
+#endif
+       atomic64_add((PAGE_SIZE/1024) << order, &brick_global_block_used);
+
+       return res;
+}
+
+static inline
+void __brick_block_free(void *data, int order, int cline)
+{
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       struct mem_block_info *inf = _find_block_info(data, true);
+
+       if (likely(inf)) {
+               int inf_len = inf->inf_len;
+               int inf_line = inf->inf_line;
+
+               kfree(inf);
+               if (unlikely(inf_len != (PAGE_SIZE << order))) {
+                       BRICK_ERR("line %d: address %p: bad freeing size %d 
(correct should be %d, previous line = %d)\n",
+                               cline,
+                               data,
+                               (int)(PAGE_SIZE << order),
+                               inf_len,
+                               inf_line);
+                       goto err;
+               }
+       } else {
+               BRICK_ERR("line %d: trying to free non-existent address %p 
(order = %d)\n", cline, data, order);
+               goto err;
+       }
+#endif
+#ifdef USE_KERNEL_PAGES
+       __free_pages(virt_to_page((unsigned long)data), order);
+#else
+       vfree(data);
+#endif
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+err:
+#endif
+#ifdef BRICK_DEBUG_MEM
+       atomic_dec(&phys_block_alloc);
+       atomic_dec(&raw_count[order]);
+#endif
+       atomic64_sub((PAGE_SIZE/1024) << order, &brick_global_block_used);
+}
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+int brick_allow_freelist = 1;
+
+int brick_pre_reserve[BRICK_MAX_ORDER+1] = {};
+
+/* Note: we have no separate lists per CPU.
+ * This should not hurt because the freelists are only used
+ * for higher-order pages which should be rather low-frequency.
+ */
+static spinlock_t freelist_lock[BRICK_MAX_ORDER+1];
+static void *brick_freelist[BRICK_MAX_ORDER+1];
+static atomic_t freelist_count[BRICK_MAX_ORDER+1];
+
+static
+void *_get_free(int order, int cline)
+{
+       void *data;
+       unsigned long flags;
+
+       spin_lock_irqsave(&freelist_lock[order], flags);
+       data = brick_freelist[order];
+       if (likely(data)) {
+               void *next = *(void **)data;
+
+#ifdef BRICK_DEBUG_MEM /*  check for corruptions */
+               long pattern = *(((long *)data)+1);
+               void *copy = *(((void **)data)+2);
+
+               if (unlikely(pattern != 0xf0f0f0f0f0f0f0f0 || next != copy)) { 
/*  found a corruption */
+                       /*  prevent further trouble by leaving a memleak */
+                       brick_freelist[order] = NULL;
+                       spin_unlock_irqrestore(&freelist_lock[order], flags);
+                       BRICK_ERR("line %d:freelist corruption at %p (pattern = 
%lx next %p != %p, murdered = %d), order = %d\n",
+                                 cline, data, pattern, next, copy, 
atomic_read(&freelist_count[order]), order);
+                       return NULL;
+               }
+#endif
+               brick_freelist[order] = next;
+               atomic_dec(&freelist_count[order]);
+       }
+       spin_unlock_irqrestore(&freelist_lock[order], flags);
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       if (data) {
+               struct mem_block_info *inf = _find_block_info(data, false);
+
+               if (likely(inf)) {
+                       if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
+                               BRICK_ERR("line %d: address %p: bad freelist 
size %d (correct should be %d, previous line = %d)\n",
+                                         cline, data, (int)(PAGE_SIZE << 
order), inf->inf_len, inf->inf_line);
+                       }
+                       inf->inf_line = cline;
+                       inf->inf_used = true;
+               } else {
+                       BRICK_ERR("line %d: freelist address %p is invalid 
(order = %d)\n", cline, data, order);
+               }
+       }
+#endif
+       return data;
+}
+
+static
+void _put_free(void *data, int order)
+{
+       void *next;
+       unsigned long flags;
+
+#ifdef BRICK_DEBUG_MEM /*  fill with pattern */
+       memset(data, 0xf0, PAGE_SIZE << order);
+#endif
+
+       spin_lock_irqsave(&freelist_lock[order], flags);
+       next = brick_freelist[order];
+       *(void **)data = next;
+#ifdef BRICK_DEBUG_MEM /*  insert redundant copy for checking */
+       *(((void **)data)+2) = next;
+#endif
+       brick_freelist[order] = data;
+       spin_unlock_irqrestore(&freelist_lock[order], flags);
+       atomic_inc(&freelist_count[order]);
+}
+
+static
+void _free_all(void)
+{
+       int order;
+
+       for (order = BRICK_MAX_ORDER; order >= 0; order--) {
+               for (;;) {
+                       void *data = _get_free(order, __LINE__);
+
+                       if (!data)
+                               break;
+                       __brick_block_free(data, order, __LINE__);
+               }
+       }
+}
+
+int brick_mem_reserve(void)
+{
+       int order;
+       int status = 0;
+
+       for (order = BRICK_MAX_ORDER; order >= 0; order--) {
+               int max = brick_pre_reserve[order];
+               int i;
+
+               brick_mem_freelist_max[order] += max;
+               BRICK_INF("preallocating %d at order %d (new maxlevel = %d)\n",
+                       max,
+                       order,
+                       brick_mem_freelist_max[order]);
+
+               max = brick_mem_freelist_max[order] - 
atomic_read(&freelist_count[order]);
+               if (max >= 0) {
+                       for (i = 0; i < max; i++) {
+                               void *data = __brick_block_alloc(GFP_KERNEL, 
order, __LINE__);
+
+                               if (likely(data))
+                                       _put_free(data, order);
+                               else
+                                       status = -ENOMEM;
+                       }
+               } else {
+                       for (i = 0; i < -max; i++) {
+                               void *data = _get_free(order, __LINE__);
+
+                               if (likely(data))
+                                       __brick_block_free(data, order, 
__LINE__);
+                       }
+               }
+       }
+       return status;
+}
+#else
+int brick_mem_reserve(struct mem_reservation *r)
+{
+       BRICK_INF("preallocation is not compiled in\n");
+       return 0;
+}
+#endif
+
+void *_brick_block_alloc(loff_t pos, int len, int line)
+{
+       void *data;
+       int count;
+
+#ifdef BRICK_DEBUG_MEM
+#ifdef BRICK_DEBUG_ORDER0
+       const int plus0 = PAGE_SIZE;
+
+#else
+       const int plus0 = 0;
+
+#endif
+       const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
+
+#else
+       const int plus = 0;
+
+#endif
+       int order = len2order(len + plus);
+
+       if (unlikely(order < 0)) {
+               BRICK_ERR("trying to allocate %d bytes (max = %d)\n", len, 
(int)(PAGE_SIZE << order));
+               return NULL;
+       }
+
+#ifdef CONFIG_MARS_DEBUG
+       might_sleep();
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       count = atomic_add_return(1, &_alloc_count[order]);
+       brick_mem_alloc_count[order] = count;
+       if (count > brick_mem_alloc_max[order])
+               brick_mem_alloc_max[order] = count;
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+       atomic_inc(&op_count[order]);
+       /*  statistics */
+       alloc_line[order] = line;
+       alloc_len[order] = len;
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       /* Dynamic increase of limits, in order to reduce
+        * fragmentation on higher-order pages.
+        * This comes on cost of higher memory usage.
+        */
+       if (order > 0 && count > brick_mem_freelist_max[order])
+               brick_mem_freelist_max[order] = count;
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       data = _get_free(order, line);
+       if (!data)
+#endif
+               data = __brick_block_alloc(GFP_BRICK, order, line);
+
+#ifdef BRICK_DEBUG_MEM
+       if (order > 0) {
+               if (unlikely(line < 0))
+                       line = 0;
+               else if (unlikely(line >= BRICK_DEBUG_MEM))
+                       line = BRICK_DEBUG_MEM - 1;
+               atomic_inc(&block_count[line]);
+               block_len[line] = len;
+               if (order > 1) {
+                       INT_ACCESS(data, 0 * sizeof(int)) = MAGIC_BLOCK;
+                       INT_ACCESS(data, 1 * sizeof(int)) = line;
+                       INT_ACCESS(data, 2 * sizeof(int)) = len;
+                       data += PAGE_SIZE;
+                       INT_ACCESS(data, -1 * sizeof(int)) = MAGIC_BLOCK;
+                       INT_ACCESS(data, len) = MAGIC_BEND;
+               } else if (order == 1) {
+                       INT_ACCESS(data, PAGE_SIZE + 0 * sizeof(int)) = 
MAGIC_BLOCK;
+                       INT_ACCESS(data, PAGE_SIZE + 1 * sizeof(int)) = line;
+                       INT_ACCESS(data, PAGE_SIZE + 2 * sizeof(int)) = len;
+               }
+       }
+#endif
+       return data;
+}
+
+void _brick_block_free(void *data, int len, int cline)
+{
+       int order;
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       struct mem_block_info *inf;
+       char *real_data;
+
+#endif
+#ifdef BRICK_DEBUG_MEM
+       int prev_line = 0;
+
+#ifdef BRICK_DEBUG_ORDER0
+       const int plus0 = PAGE_SIZE;
+
+#else
+       const int plus0 = 0;
+
+#endif
+       const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
+
+#else
+       const int plus = 0;
+
+#endif
+
+       order = len2order(len + plus);
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       real_data = data;
+       if (order > 1)
+               real_data -= PAGE_SIZE;
+       inf = _find_block_info(real_data, false);
+       if (likely(inf)) {
+               prev_line = inf->inf_line;
+               if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
+                       BRICK_ERR("line %d: address %p: bad freeing size %d 
(correct should be %d, previous line = %d)\n",
+                                 cline, data, (int)(PAGE_SIZE << order), 
inf->inf_len, prev_line);
+                       goto _out_return;
+               }
+               if (unlikely(!inf->inf_used)) {
+                       BRICK_ERR("line %d: address %p: double freeing 
(previous line = %d)\n",
+                               cline,
+                               data,
+                               prev_line);
+                       goto _out_return;
+               }
+               inf->inf_line = cline;
+               inf->inf_used = false;
+       } else {
+               BRICK_ERR("line %d: trying to free non-existent address %p 
(order = %d)\n", cline, data, order);
+               goto _out_return;
+       }
+#endif
+#ifdef BRICK_DEBUG_MEM
+       if (order > 1) {
+               void *test = data - PAGE_SIZE;
+               int magic = INT_ACCESS(test, 0);
+               int line = INT_ACCESS(test, sizeof(int));
+               int oldlen = INT_ACCESS(test, sizeof(int)*2);
+               int magic1 = INT_ACCESS(data, -1 * sizeof(int));
+               int magic2;
+
+               if (unlikely(magic1 != MAGIC_BLOCK)) {
+                       BRICK_ERR("line %d memory corruption: %p magix1 %08x != 
%08x (previous line = %d)\n",
+                               cline,
+                               data,
+                               magic1,
+                               MAGIC_BLOCK,
+                               prev_line);
+                       goto _out_return;
+               }
+               if (unlikely(magic != MAGIC_BLOCK)) {
+                       BRICK_ERR("line %d memory corruption: %p magix %08x != 
%08x (previous line = %d)\n",
+                               cline,
+                               data,
+                               magic,
+                               MAGIC_BLOCK,
+                               prev_line);
+                       goto _out_return;
+               }
+               if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+                       BRICK_ERR("line %d memory corruption %p: alloc line = 
%d (previous line = %d)\n",
+                               cline,
+                               data,
+                               line,
+                               prev_line);
+                       goto _out_return;
+               }
+               if (unlikely(oldlen != len)) {
+                       BRICK_ERR("line %d memory corruption %p: len != oldlen 
(%d != %d, previous line = %d))\n",
+                               cline,
+                               data,
+                               len,
+                               oldlen,
+                               prev_line);
+                       goto _out_return;
+               }
+               magic2 = INT_ACCESS(data, len);
+               if (unlikely(magic2 != MAGIC_BEND)) {
+                       BRICK_ERR("line %d memory corruption %p: magix %08x != 
%08x (previous line = %d)\n",
+                               cline,
+                               data,
+                               magic,
+                               MAGIC_BEND,
+                               prev_line);
+                       goto _out_return;
+               }
+               INT_ACCESS(test, 0) = 0xffffffff;
+               INT_ACCESS(data, len) = 0xffffffff;
+               data = test;
+               atomic_dec(&block_count[line]);
+               atomic_inc(&block_free[line]);
+       } else if (order == 1) {
+               void *test = data + PAGE_SIZE;
+               int magic = INT_ACCESS(test, 0 * sizeof(int));
+               int line = INT_ACCESS(test, 1 * sizeof(int));
+               int oldlen = INT_ACCESS(test, 2 * sizeof(int));
+
+               if (unlikely(magic != MAGIC_BLOCK)) {
+                       BRICK_ERR("line %d memory corruption %p: magix %08x != 
%08x (previous line = %d)\n",
+                               cline,
+                               data,
+                               magic,
+                               MAGIC_BLOCK,
+                               prev_line);
+                       goto _out_return;
+               }
+               if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+                       BRICK_ERR("line %d memory corruption %p: alloc line = 
%d (previous line = %d)\n",
+                               cline,
+                               data,
+                               line,
+                               prev_line);
+                       goto _out_return;
+               }
+               if (unlikely(oldlen != len)) {
+                       BRICK_ERR("line %d memory corruption %p: len != oldlen 
(%d != %d, previous line = %d))\n",
+                               cline,
+                               data,
+                               len,
+                               oldlen,
+                               prev_line);
+                       goto _out_return;
+               }
+               atomic_dec(&block_count[line]);
+               atomic_inc(&block_free[line]);
+       }
+#endif
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       if (order > 0 && brick_allow_freelist && 
atomic_read(&freelist_count[order]) <= brick_mem_freelist_max[order]) {
+               _put_free(data, order);
+       } else
+#endif
+               __brick_block_free(data, order, cline);
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       brick_mem_alloc_count[order] = atomic_dec_return(&_alloc_count[order]);
+#endif
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+struct page *brick_iomap(void *data, int *offset, int *len)
+{
+       int _offset = ((unsigned long)data) & (PAGE_SIZE-1);
+       struct page *page;
+
+       *offset = _offset;
+       if (*len > PAGE_SIZE - _offset)
+               *len = PAGE_SIZE - _offset;
+       if (is_vmalloc_addr(data))
+               page = vmalloc_to_page(data);
+       else
+               page = virt_to_page(data);
+       return page;
+}
+
+/***********************************************************************/
+
+/*  module */
+
+void brick_mem_statistics(bool final)
+{
+#ifdef BRICK_DEBUG_MEM
+       int i;
+       int count = 0;
+       int places = 0;
+
+       BRICK_INF("======== page allocation:\n");
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       for (i = 0; i <= BRICK_MAX_ORDER; i++) {
+               BRICK_INF("pages order = %2d operations = %9d freelist_count = 
%4d / %3d raw_count = %5d alloc_count = %5d alloc_len = %5d line = %5d 
max_count = %5d\n",
+                         i,
+                         atomic_read(&op_count[i]),
+                         atomic_read(&freelist_count[i]),
+                         brick_mem_freelist_max[i],
+                         atomic_read(&raw_count[i]),
+                         brick_mem_alloc_count[i],
+                         alloc_len[i],
+                         alloc_line[i],
+                         brick_mem_alloc_max[i]);
+       }
+#endif
+       for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+               int val = atomic_read(&block_count[i]);
+
+               if (val) {
+                       count += val;
+                       places++;
+                       BRICK_INF("line %4d: %6d allocated (last size = %4d, 
freed = %6d)\n",
+                                 i,
+                                 val,
+                                 block_len[i],
+                                 atomic_read(&block_free[i]));
+               }
+       }
+       if (!final || !count) {
+               BRICK_INF("======== %d block allocations in %d places 
(phys=%d)\n",
+                         count, places, atomic_read(&phys_block_alloc));
+       } else {
+               BRICK_ERR("======== %d block allocations in %d places 
(phys=%d)\n",
+                         count, places, atomic_read(&phys_block_alloc));
+       }
+       count = places = 0;
+       for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+               int val = atomic_read(&mem_count[i]);
+
+               if (val) {
+                       count += val;
+                       places++;
+                       BRICK_INF("line %4d: %6d allocated (last size = %4d, 
freed = %6d)\n",
+                                 i,
+                                 val,
+                                 mem_len[i],
+                                 atomic_read(&mem_free[i]));
+               }
+       }
+       if (!final || !count) {
+               BRICK_INF("======== %d memory allocations in %d places 
(phys=%d,redirect=%d)\n",
+                         count, places,
+                         atomic_read(&phys_mem_alloc), 
atomic_read(&mem_redirect_alloc));
+       } else {
+               BRICK_ERR("======== %d memory allocations in %d places 
(phys=%d,redirect=%d)\n",
+                         count, places,
+                         atomic_read(&phys_mem_alloc), 
atomic_read(&mem_redirect_alloc));
+       }
+       count = places = 0;
+       for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+               int val = atomic_read(&string_count[i]);
+
+               if (val) {
+                       count += val;
+                       places++;
+                       BRICK_INF("line %4d: %6d allocated (freed = %6d)\n",
+                                 i,
+                                 val,
+                                 atomic_read(&string_free[i]));
+               }
+       }
+       if (!final || !count) {
+               BRICK_INF("======== %d string allocations in %d places 
(phys=%d)\n",
+                         count, places, atomic_read(&phys_string_alloc));
+       } else {
+               BRICK_ERR("======== %d string allocations in %d places 
(phys=%d)\n",
+                         count, places, atomic_read(&phys_string_alloc));
+       }
+#endif
+}
+
+/*  module init stuff */
+
+int __init init_brick_mem(void)
+{
+       int i;
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       for (i = BRICK_MAX_ORDER; i >= 0; i--)
+               spin_lock_init(&freelist_lock[i]);
+#endif
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+       for (i = 0; i < MAX_INFO_LISTS; i++) {
+               INIT_LIST_HEAD(&inf_anchor[i]);
+               rwlock_init(&inf_lock[i]);
+       }
+#else
+       (void)i;
+#endif
+
+       get_total_ram();
+
+       return 0;
+}
+
+void exit_brick_mem(void)
+{
+       BRICK_INF("deallocating memory...\n");
+#ifdef CONFIG_MARS_MEM_PREALLOC
+       _free_all();
+#endif
+
+       brick_mem_statistics(true);
+}
diff --git a/include/linux/brick/brick_mem.h b/include/linux/brick/brick_mem.h
new file mode 100644
index 0000000..1a2f236
--- /dev/null
+++ b/include/linux/brick/brick_mem.h
@@ -0,0 +1,218 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef BRICK_MEM_H
+#define BRICK_MEM_H
+
+#include <linux/mm_types.h>
+
+#define BRICK_DEBUG_MEM                        4096
+
+#ifndef CONFIG_MARS_DEBUG_MEM
+#undef BRICK_DEBUG_MEM
+#endif
+#ifdef CONFIG_MARS_DEBUG_ORDER0
+#define BRICK_DEBUG_ORDER0
+#endif
+
+#define CONFIG_MARS_MEM_PREALLOC       /* this is VITAL - disable only for 
experiments! */
+
+#define GFP_BRICK                      GFP_NOIO
+
+extern long long brick_global_memavail;
+extern long long brick_global_memlimit;
+extern atomic64_t brick_global_block_used;
+
+/* All brick memory allocations are guaranteed to succeed.
+ * In case of low memory, they will just retry (forever).
+ *
+ * We always prefer threads for concurrency.
+ * Therefore, in_interrupt() code does not occur, and we can
+ * always sleep in case of memory pressure.
+ *
+ * Resource deadlocks are avoided by the above memory limits.
+ * When exceeded, new memory is simply not allocated any more
+ * (except for vital memory, such as IO memory for which a
+ * low_mem_reserve must always exist, anyway).
+ */
+
+/***********************************************************************/
+
+/*  compiler tweaking */
+
+/* Some functions are known to return non-null pointer values,
+ * at least under some Kconfig conditions.
+ *
+ * In code like...
+ *
+ * void *ptr = myfunction();
+ * if (unlikely(!ptr)) {
+ *        printk("ERROR: this should not happen\n");
+ *        goto fail;
+ * }
+ *
+ * ... the dead code elimination of gcc will not remove the if clause
+ * because the function might return a NULL value, even if a human
+ * would know that myfunction() does not return a NULL value.
+ *
+ * Unfortunately, the __attribute__((nonnull)) can only be applied
+ * to input parameters, but not to the return value.
+ *
+ * More unfortunately, a small inline wrapper does not help,
+ * because it seems that together with the elimination of the wrapper,
+ * its nonnull attribute seems to be eliminated alltogether.
+ * I don't know whether this is a bug or a feature (or just a weakness).
+ *
+ * Following is a small hack which solves the problem at least for gcc 4.7.
+ *
+ * In order to be useful, the -fdelete-null-pointer-checks must be set.
+ * Since BRICK is superuser-only anyway, enabling this for MARS should not
+ * be a security risk
+ * (c.f. upstream kernel commit a3ca86aea507904148870946d599e07a340b39bf)
+ */
+extern inline
+void *brick_mark_nonnull(void *_ptr)
+{
+       char *ptr = _ptr;
+
+       /*  fool gcc to believe that the pointer were dereferenced... */
+       asm("" : : "X" (*ptr));
+       return ptr;
+}
+
+/***********************************************************************/
+
+/*  small memory allocation (use this only for len < PAGE_SIZE) */
+
+#define brick_mem_alloc(_len_)                                         \
+       ({                                                              \
+               void *_res_ = _brick_mem_alloc(_len_, __LINE__);        \
+               brick_mark_nonnull(_res_);                              \
+       })
+
+#define brick_zmem_alloc(_len_)                                                
\
+       ({                                                              \
+               void *_res_ = _brick_mem_alloc(_len_, __LINE__);        \
+               _res_ = brick_mark_nonnull(_res_);                      \
+               memset(_res_, 0, _len_);                                \
+               _res_;                                                  \
+       })
+
+#define brick_mem_free(_data_)                                         \
+       do {                                                            \
+               if (_data_) {                                           \
+                       _brick_mem_free(_data_, __LINE__);              \
+               }                                                       \
+       } while (0)
+
+/*  don't use the following directly */
+extern void *_brick_mem_alloc(int len, int line) __attribute__((malloc)) 
__attribute__((alloc_size(1)));
+extern void _brick_mem_free(void *data, int line);
+
+/***********************************************************************/
+
+/*  string memory allocation */
+
+#define BRICK_STRING_LEN               1024 /* default value when len == 0 */
+
+#define brick_string_alloc(_len_)                                      \
+       ({                                                              \
+               char *_res_ = _brick_string_alloc((_len_), __LINE__);   \
+               (char *)brick_mark_nonnull(_res_);                      \
+       })
+
+#define brick_strndup(_orig_, _len_)                                   \
+       ({                                                              \
+               char *_res_ = _brick_string_alloc((_len_) + 1, __LINE__);\
+               _res_ = brick_mark_nonnull(_res_);                      \
+               strncpy(_res_, (_orig_), (_len_) + 1);                  \
+               /* always null-terminate for safety */                  \
+               _res_[_len_] = '\0';                                    \
+               (char *)brick_mark_nonnull(_res_);                      \
+       })
+
+#define brick_strdup(_orig_)                                           \
+       ({                                                              \
+               int _len_ = strlen(_orig_);                             \
+               char *_res_ = _brick_string_alloc((_len_) + 1, __LINE__);\
+               _res_ = brick_mark_nonnull(_res_);                      \
+               strncpy(_res_, (_orig_), (_len_) + 1);                  \
+               (char *)brick_mark_nonnull(_res_);                      \
+       })
+
+#define brick_string_free(_data_)                                      \
+       do {                                                            \
+               if (_data_) {                                           \
+                       _brick_string_free(_data_, __LINE__);           \
+               }                                                       \
+       } while (0)
+
+/*  don't use the following directly */
+extern char *_brick_string_alloc(int len, int line) __attribute__((malloc));
+extern void _brick_string_free(const char *data, int line);
+
+/***********************************************************************/
+
+/*  block memory allocation (for aligned multiples of 512 resp PAGE_SIZE) */
+
+#define brick_block_alloc(_pos_, _len_)                                        
\
+       ({                                                              \
+               void *_res_ = _brick_block_alloc((_pos_), (_len_), __LINE__);\
+               brick_mark_nonnull(_res_);                              \
+       })
+
+#define brick_block_free(_data_, _len_)                                        
\
+       do {                                                            \
+               if (_data_) {                                           \
+                       _brick_block_free((_data_), (_len_), __LINE__); \
+               }                                                       \
+       } while (0)
+
+extern struct page *brick_iomap(void *data, int *offset, int *len);
+
+/*  don't use the following directly */
+extern void *_brick_block_alloc(loff_t pos, int len, int line) 
__attribute__((malloc)) __attribute__((alloc_size(2)));
+extern void _brick_block_free(void *data, int len, int cline);
+
+/***********************************************************************/
+
+/*  reservations / preallocation */
+
+#define BRICK_MAX_ORDER                        11
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+extern int brick_allow_freelist;
+
+extern int brick_pre_reserve[BRICK_MAX_ORDER+1];
+extern int brick_mem_freelist_max[BRICK_MAX_ORDER+1];
+extern int brick_mem_alloc_count[BRICK_MAX_ORDER+1];
+extern int brick_mem_alloc_max[BRICK_MAX_ORDER+1];
+
+extern int brick_mem_reserve(void);
+
+#endif
+
+extern void brick_mem_statistics(bool final);
+
+/***********************************************************************/
+
+/*  init */
+
+extern int init_brick_mem(void);
+extern void exit_brick_mem(void);
+
+#endif
-- 
2.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to