It was fixed starting Windows 7/Server 2008 R2. It may make sense to trap the target OS version in the build and conditionally build the cache? I would think the simpler we can make the code, the better. Then we can mark the older OS code with descriptive checks to allow deleting it as we drop support for older operating systems.
-Fab From: Leonid Keller [mailto:[email protected]] Sent: Monday, December 13, 2010 2:12 AM To: Fab Tillier; '[email protected]' Subject: RE: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB Good news, I didn't know. Do you know in what version was it fixed ? We still support win2k3 and winxp ... From: Fab Tillier [mailto:[email protected]] Sent: Monday, December 13, 2010 7:14 AM To: Leonid Keller; '[email protected]' Subject: RE: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB Note that the whole cache can go away - the memory corruption when probing pages for write access has been fixed, and you can just call MmProbeAndLockPages. -Fab From: [email protected] [mailto:[email protected]] On Behalf Of Leonid Keller Sent: Sunday, December 12, 2010 6:02 AM To: '[email protected]' Subject: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB The bus driver memory registration mechanism is using internal cash that sets some limitation on the system memory size. Till now it was 256 GB, but a customer of us has machines with up to 1 TB system memory... Index: hw/mlx4/kernel/bus/core/pa_cash.c =================================================================== --- hw/mlx4/kernel/bus/core/pa_cash.c (revision 3023) +++ hw/mlx4/kernel/bus/core/pa_cash.c (working copy) @@ -50,9 +50,10 @@ /////////////////////////////////////////////////////////////////////////// #ifdef _WIN64 -#define MAX_PAGES_SUPPORTED (64 * 1024 * 1024) // 256 GB +// be careful with setting it >= 4G. Compiler puts it into an integer, so 4*1024*1024*1024 = 0 !!! +#define MAX_PAGES_SUPPORTED ((u32)2 * 1024 * 1024 * 1024) // 8 TB #else -#define MAX_PAGES_SUPPORTED (16 * 1024 * 1024) // 64 GB +#define MAX_PAGES_SUPPORTED ((u32)16 * 1024 * 1024) // 64 GB #endif #define FREE_LIST_TRESHOLD 256 // max number of pages in free list @@ -63,13 +64,9 @@ // /////////////////////////////////////////////////////////////////////////// -#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) -#define PA_TABLE_ENTRY_NUM (PAGE_SIZE / PA_TABLE_ENTRY_SIZE) -#define PA_TABLE_SIZE (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM) +#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) // 4 -#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) -#define PA_DIR_ENTRY_NUM (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM) -#define PA_DIR_SIZE (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM) +#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) // 16 for x64 /////////////////////////////////////////////////////////////////////////// @@ -107,6 +104,11 @@ DEFINE_MUTEX(g_pa_mutex); u64 g_pa[1024]; pa_cash_t g_cash; +u32 g_max_pages_supported = 0; +u32 g_pa_table_entry_num = 0; +u32 g_pa_table_size = 0; +u32 g_pa_dir_entry_num = 0; +u32 g_pa_dir_size = 0; /////////////////////////////////////////////////////////////////////////// @@ -133,7 +135,7 @@ g_cash.free_nr_pages--; } else /* allocate new page */ - pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL ); + pa_te = (pa_table_entry_t *)kzalloc( g_pa_table_size, GFP_KERNEL ); return pa_te; } @@ -150,15 +152,15 @@ static pa_table_entry_t * pa_get_page(uint32_t ix) { - pa_table_entry_t *pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + pa_table_entry_t *pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te; /* no this page_table - add a new one */ if (!pa_te) { pa_te = pa_alloc_page(); if (!pa_te) return NULL; - g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te; - g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0; + g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = pa_te; + g_cash.pa_dir[ix / g_pa_table_entry_num].used = 0; g_cash.cur_nr_pages++; } @@ -167,8 +169,8 @@ static void pa_put_page(uint32_t ix) { - pa_free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te); - g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL; + pa_free_page(g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te); + g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = NULL; g_cash.cur_nr_pages--; } @@ -189,9 +191,9 @@ return -ENOMEM; /* register page address */ - if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) - ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; - ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + if (!pa_te[ix % g_pa_table_entry_num].ref_cnt) + ++g_cash.pa_dir[ix / g_pa_table_entry_num].used; + ++pa_te[ix % g_pa_table_entry_num].ref_cnt; return 0; } @@ -208,7 +210,7 @@ return -EFAULT; } - pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te; /* no this page_table - error*/ if (!pa_te) { @@ -217,13 +219,13 @@ } /* deregister page address */ - --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; - ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0); + --pa_te[ix % g_pa_table_entry_num].ref_cnt; + ASSERT(pa_te[ix % g_pa_table_entry_num].ref_cnt >= 0); /* release the page on need */ - if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) - --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; - if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used) + if (!pa_te[ix % g_pa_table_entry_num].ref_cnt) + --g_cash.pa_dir[ix / g_pa_table_entry_num].used; + if (!g_cash.pa_dir[ix / g_pa_table_entry_num].used) pa_put_page(ix); return 0; @@ -301,7 +303,7 @@ void pa_cash_release() { - int i; + u32 i; pa_cash_print(); @@ -309,7 +311,7 @@ return; /* free cash tables */ - for (i=0; i<PA_DIR_ENTRY_NUM; ++i) + for (i=0; i<g_pa_dir_entry_num; ++i) if (g_cash.pa_dir[i].pa_te) { kfree(g_cash.pa_dir[i].pa_te); g_cash.cur_nr_pages--; @@ -338,24 +340,31 @@ return -EFAULT; } - pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te; /* no this page_table */ if (!pa_te) return 0; - return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + return pa_te[ix % g_pa_table_entry_num].ref_cnt; } int pa_cash_init() { void *pa_dir; - pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL); + g_max_pages_supported = MAX_PAGES_SUPPORTED; + g_pa_table_entry_num = PAGE_SIZE / PA_TABLE_ENTRY_SIZE; + g_pa_table_size = PA_TABLE_ENTRY_SIZE * g_pa_table_entry_num; + g_pa_dir_entry_num = g_max_pages_supported /g_pa_table_entry_num; + g_pa_dir_size = PA_DIR_ENTRY_SIZE * g_pa_dir_entry_num; + + pa_dir = kzalloc(g_pa_dir_size, GFP_KERNEL); + if (!pa_dir) return -ENOMEM; g_cash.pa_dir = pa_dir; - g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM; + g_cash.max_nr_pages = g_pa_table_entry_num * g_pa_dir_entry_num; g_cash.free_list_hdr.Next = NULL; g_cash.cur_nr_pages = 0; g_cash.free_nr_pages = 0; @@ -363,4 +372,4 @@ mutex_init(&g_pa_mutex); return 0; } - +
_______________________________________________ ofw mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
