It was fixed starting Windows 7/Server 2008 R2.

It may make sense to trap the target OS version in the build and conditionally 
build the cache?  I would think the simpler we can make the code, the better.  
Then we can mark the older OS code with descriptive checks to allow deleting it 
as we drop support for older operating systems.

-Fab

From: Leonid Keller [mailto:[email protected]]
Sent: Monday, December 13, 2010 2:12 AM
To: Fab Tillier; '[email protected]'
Subject: RE: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on 
the system memory size from 256 GB to 8 TB

Good news, I didn't know.
Do you know in what version was it fixed ?
We still support win2k3 and winxp ...

From: Fab Tillier [mailto:[email protected]]
Sent: Monday, December 13, 2010 7:14 AM
To: Leonid Keller; '[email protected]'
Subject: RE: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on 
the system memory size from 256 GB to 8 TB

Note that the whole cache can go away - the memory corruption when probing 
pages for write access has been fixed, and you can just call 
MmProbeAndLockPages.

-Fab

From: [email protected] 
[mailto:[email protected]] On Behalf Of Leonid Keller
Sent: Sunday, December 12, 2010 6:02 AM
To: '[email protected]'
Subject: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on the 
system memory size from 256 GB to 8 TB

The bus driver memory registration mechanism is using internal cash that sets 
some limitation on the system memory size.
Till now it was 256 GB, but a customer of us has machines with up to 1 TB 
system memory...

Index: hw/mlx4/kernel/bus/core/pa_cash.c
===================================================================
--- hw/mlx4/kernel/bus/core/pa_cash.c               (revision 3023)
+++ hw/mlx4/kernel/bus/core/pa_cash.c            (working copy)
@@ -50,9 +50,10 @@
 ///////////////////////////////////////////////////////////////////////////

 #ifdef _WIN64
-#define MAX_PAGES_SUPPORTED        (64 * 1024 * 1024)                          
  // 256 GB
+// be careful with setting it >= 4G. Compiler puts it into an integer, so 
4*1024*1024*1024 = 0 !!!
+#define MAX_PAGES_SUPPORTED       ((u32)2 * 1024 * 1024 * 1024)                
                                                      // 8 TB
 #else
-#define MAX_PAGES_SUPPORTED        (16 * 1024 * 1024)                          
  // 64 GB
+#define MAX_PAGES_SUPPORTED       ((u32)16 * 1024 * 1024)                      
                                                                                
           // 64 GB
 #endif

 #define FREE_LIST_TRESHOLD                  256                         // max 
number of pages in free list
@@ -63,13 +64,9 @@
 //
 ///////////////////////////////////////////////////////////////////////////

-#define PA_TABLE_ENTRY_SIZE              sizeof(pa_table_entry_t)
-#define PA_TABLE_ENTRY_NUM           (PAGE_SIZE / PA_TABLE_ENTRY_SIZE)
-#define PA_TABLE_SIZE                                              
(PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM)
+#define PA_TABLE_ENTRY_SIZE                             
sizeof(pa_table_entry_t)                                                        
                     // 4

-#define PA_DIR_ENTRY_SIZE                   sizeof(pa_dir_entry_t)
-#define PA_DIR_ENTRY_NUM                 (MAX_PAGES_SUPPORTED 
/PA_TABLE_ENTRY_NUM)
-#define PA_DIR_SIZE                                                   
(PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM)
+#define PA_DIR_ENTRY_SIZE                  sizeof(pa_dir_entry_t)              
                                                                    // 16 for 
x64


 ///////////////////////////////////////////////////////////////////////////
@@ -107,6 +104,11 @@
 DEFINE_MUTEX(g_pa_mutex);
 u64 g_pa[1024];
 pa_cash_t g_cash;
+u32 g_max_pages_supported = 0;
+u32 g_pa_table_entry_num = 0;
+u32 g_pa_table_size = 0;
+u32 g_pa_dir_entry_num = 0;
+u32 g_pa_dir_size = 0;


 ///////////////////////////////////////////////////////////////////////////
@@ -133,7 +135,7 @@
                                g_cash.free_nr_pages--;
                }
                else  /* allocate new page */
-                              pa_te = (pa_table_entry_t *)kzalloc( 
PA_TABLE_SIZE, GFP_KERNEL );
+                             pa_te = (pa_table_entry_t *)kzalloc( 
g_pa_table_size, GFP_KERNEL );

                return pa_te;
 }
@@ -150,15 +152,15 @@

 static pa_table_entry_t * pa_get_page(uint32_t ix)
 {
-              pa_table_entry_t *pa_te =  g_cash.pa_dir[ix / 
PA_TABLE_ENTRY_NUM].pa_te;
+             pa_table_entry_t *pa_te =  g_cash.pa_dir[ix / 
g_pa_table_entry_num].pa_te;

                /* no this page_table - add a new one */
                if (!pa_te) {
                                pa_te = pa_alloc_page();
                                if (!pa_te)
                                                return NULL;
-                              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = 
pa_te;
-                              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0;
+                             g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = 
pa_te;
+                             g_cash.pa_dir[ix / g_pa_table_entry_num].used = 0;
                                g_cash.cur_nr_pages++;
                }

@@ -167,8 +169,8 @@

 static void pa_put_page(uint32_t ix)
 {
-              pa_free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te);
-              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL;
+             pa_free_page(g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te);
+             g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = NULL;
                g_cash.cur_nr_pages--;
 }

@@ -189,9 +191,9 @@
                                return -ENOMEM;

                /* register page address */
-              if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
-                              ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
-              ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+             if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+                             ++g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+             ++pa_te[ix % g_pa_table_entry_num].ref_cnt;

                return 0;
 }
@@ -208,7 +210,7 @@
                                return -EFAULT;
                }

-              pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;

                /* no this page_table - error*/
                if (!pa_te)  {
@@ -217,13 +219,13 @@
                }

                /* deregister page address */
-              --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
-              ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0);
+             --pa_te[ix % g_pa_table_entry_num].ref_cnt;
+             ASSERT(pa_te[ix % g_pa_table_entry_num].ref_cnt >= 0);

                /* release the page on need */
-              if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
-                              --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
-              if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used)
+             if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+                             --g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+             if (!g_cash.pa_dir[ix / g_pa_table_entry_num].used)
                                pa_put_page(ix);

                return 0;
@@ -301,7 +303,7 @@

 void pa_cash_release()
 {
-              int i;
+             u32 i;

                pa_cash_print();

@@ -309,7 +311,7 @@
                                return;

                /* free cash tables */
-              for (i=0; i<PA_DIR_ENTRY_NUM; ++i)
+             for (i=0; i<g_pa_dir_entry_num; ++i)
                                if (g_cash.pa_dir[i].pa_te) {
                                                kfree(g_cash.pa_dir[i].pa_te);
                                                g_cash.cur_nr_pages--;
@@ -338,24 +340,31 @@
                                return -EFAULT;
                }

-              pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;

                /* no this page_table */
                if (!pa_te)
                                return 0;

-              return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+             return pa_te[ix % g_pa_table_entry_num].ref_cnt;
 }

 int pa_cash_init()
 {
                void *pa_dir;
-              pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL);

+             g_max_pages_supported = MAX_PAGES_SUPPORTED;
+             g_pa_table_entry_num = PAGE_SIZE / PA_TABLE_ENTRY_SIZE;
+             g_pa_table_size = PA_TABLE_ENTRY_SIZE * g_pa_table_entry_num;
+             g_pa_dir_entry_num = g_max_pages_supported /g_pa_table_entry_num;
+             g_pa_dir_size = PA_DIR_ENTRY_SIZE * g_pa_dir_entry_num;
+
+             pa_dir = kzalloc(g_pa_dir_size, GFP_KERNEL);
+
                if (!pa_dir)
                                return -ENOMEM;
                g_cash.pa_dir = pa_dir;
-              g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM;
+             g_cash.max_nr_pages = g_pa_table_entry_num * g_pa_dir_entry_num;
                g_cash.free_list_hdr.Next = NULL;
                g_cash.cur_nr_pages = 0;
                g_cash.free_nr_pages = 0;
@@ -363,4 +372,4 @@
                mutex_init(&g_pa_mutex);
                return 0;
 }
-
+
_______________________________________________
ofw mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw

Reply via email to