Note that the whole cache can go away - the memory corruption when probing pages for write access has been fixed, and you can just call MmProbeAndLockPages.
-Fab From: [email protected] [mailto:[email protected]] On Behalf Of Leonid Keller Sent: Sunday, December 12, 2010 6:02 AM To: '[email protected]' Subject: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB The bus driver memory registration mechanism is using internal cash that sets some limitation on the system memory size. Till now it was 256 GB, but a customer of us has machines with up to 1 TB system memory... Index: hw/mlx4/kernel/bus/core/pa_cash.c =================================================================== --- hw/mlx4/kernel/bus/core/pa_cash.c (revision 3023) +++ hw/mlx4/kernel/bus/core/pa_cash.c (working copy) @@ -50,9 +50,10 @@ /////////////////////////////////////////////////////////////////////////// #ifdef _WIN64 -#define MAX_PAGES_SUPPORTED (64 * 1024 * 1024) // 256 GB +// be careful with setting it >= 4G. Compiler puts it into an integer, so 4*1024*1024*1024 = 0 !!! +#define MAX_PAGES_SUPPORTED ((u32)2 * 1024 * 1024 * 1024) // 8 TB #else -#define MAX_PAGES_SUPPORTED (16 * 1024 * 1024) // 64 GB +#define MAX_PAGES_SUPPORTED ((u32)16 * 1024 * 1024) // 64 GB #endif #define FREE_LIST_TRESHOLD 256 // max number of pages in free list @@ -63,13 +64,9 @@ // /////////////////////////////////////////////////////////////////////////// -#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) -#define PA_TABLE_ENTRY_NUM (PAGE_SIZE / PA_TABLE_ENTRY_SIZE) -#define PA_TABLE_SIZE (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM) +#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) // 4 -#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) -#define PA_DIR_ENTRY_NUM (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM) -#define PA_DIR_SIZE (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM) +#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) // 16 for x64 /////////////////////////////////////////////////////////////////////////// @@ -107,6 +104,11 @@ DEFINE_MUTEX(g_pa_mutex); u64 g_pa[1024]; pa_cash_t g_cash; +u32 g_max_pages_supported = 0; +u32 g_pa_table_entry_num = 0; +u32 g_pa_table_size = 0; +u32 g_pa_dir_entry_num = 0; +u32 g_pa_dir_size = 0; /////////////////////////////////////////////////////////////////////////// @@ -133,7 +135,7 @@ g_cash.free_nr_pages--; } else /* allocate new page */ - pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL ); + pa_te = (pa_table_entry_t *)kzalloc( g_pa_table_size, GFP_KERNEL ); return pa_te; } @@ -150,15 +152,15 @@ static pa_table_entry_t * pa_get_page(uint32_t ix) { - pa_table_entry_t *pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + pa_table_entry_t *pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te; /* no this page_table - add a new one */ if (!pa_te) { pa_te = pa_alloc_page(); if (!pa_te) return NULL; - g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te; - g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0; + g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = pa_te; + g_cash.pa_dir[ix / g_pa_table_entry_num].used = 0; g_cash.cur_nr_pages++; } @@ -167,8 +169,8 @@ static void pa_put_page(uint32_t ix) { - pa_free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te); - g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL; + pa_free_page(g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te); + g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = NULL; g_cash.cur_nr_pages--; } @@ -189,9 +191,9 @@ return -ENOMEM; /* register page address */ - if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) - ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; - ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + if (!pa_te[ix % g_pa_table_entry_num].ref_cnt) + ++g_cash.pa_dir[ix / g_pa_table_entry_num].used; + ++pa_te[ix % g_pa_table_entry_num].ref_cnt; return 0; } @@ -208,7 +210,7 @@ return -EFAULT; } - pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te; /* no this page_table - error*/ if (!pa_te) { @@ -217,13 +219,13 @@ } /* deregister page address */ - --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; - ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0); + --pa_te[ix % g_pa_table_entry_num].ref_cnt; + ASSERT(pa_te[ix % g_pa_table_entry_num].ref_cnt >= 0); /* release the page on need */ - if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) - --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; - if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used) + if (!pa_te[ix % g_pa_table_entry_num].ref_cnt) + --g_cash.pa_dir[ix / g_pa_table_entry_num].used; + if (!g_cash.pa_dir[ix / g_pa_table_entry_num].used) pa_put_page(ix); return 0; @@ -301,7 +303,7 @@ void pa_cash_release() { - int i; + u32 i; pa_cash_print(); @@ -309,7 +311,7 @@ return; /* free cash tables */ - for (i=0; i<PA_DIR_ENTRY_NUM; ++i) + for (i=0; i<g_pa_dir_entry_num; ++i) if (g_cash.pa_dir[i].pa_te) { kfree(g_cash.pa_dir[i].pa_te); g_cash.cur_nr_pages--; @@ -338,24 +340,31 @@ return -EFAULT; } - pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te; /* no this page_table */ if (!pa_te) return 0; - return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + return pa_te[ix % g_pa_table_entry_num].ref_cnt; } int pa_cash_init() { void *pa_dir; - pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL); + g_max_pages_supported = MAX_PAGES_SUPPORTED; + g_pa_table_entry_num = PAGE_SIZE / PA_TABLE_ENTRY_SIZE; + g_pa_table_size = PA_TABLE_ENTRY_SIZE * g_pa_table_entry_num; + g_pa_dir_entry_num = g_max_pages_supported /g_pa_table_entry_num; + g_pa_dir_size = PA_DIR_ENTRY_SIZE * g_pa_dir_entry_num; + + pa_dir = kzalloc(g_pa_dir_size, GFP_KERNEL); + if (!pa_dir) return -ENOMEM; g_cash.pa_dir = pa_dir; - g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM; + g_cash.max_nr_pages = g_pa_table_entry_num * g_pa_dir_entry_num; g_cash.free_list_hdr.Next = NULL; g_cash.cur_nr_pages = 0; g_cash.free_nr_pages = 0; @@ -363,4 +372,4 @@ mutex_init(&g_pa_mutex); return 0; } - +
_______________________________________________ ofw mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
