Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-07 Thread Dhaval Giani
On Mon, Jan 07, 2008 at 12:04:06PM -0800, Christoph Lameter wrote:
> Here is the cleaned version of the patch. Dhaval is testing it.
> 
> 
> quicklists: Only consider memory that can be used with GFP_KERNEL
> 
> Quicklists calculates the size of the quicklists based on the number
> of free pages. This must be the number of free pages that can be
> allocated with GFP_KERNEL. node_page_state() includes the pages in
> ZONE_HIGHMEM and ZONE_MOVABLE which may lead the quicklists to
> become too large causing OOM.
> 
> Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>

Does the job here for me.

Tested-by: Dhaval Giani <[EMAIL PROTECTED]>

> 
> Index: linux-2.6/mm/quicklist.c
> ===
> --- linux-2.6.orig/mm/quicklist.c 2008-01-07 10:38:13.0 -0800
> +++ linux-2.6/mm/quicklist.c  2008-01-07 10:38:44.0 -0800
> @@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quickli
>  static unsigned long max_pages(unsigned long min_pages)
>  {
>   unsigned long node_free_pages, max;
> + struct zone *zones = NODE_DATA(numa_node_id())->node_zones;
> +
> + node_free_pages =
> +#ifdef CONFIG_ZONE_DMA
> + zone_page_state([ZONE_DMA], NR_FREE_PAGES) +
> +#endif
> +#ifdef CONFIG_ZONE_DMA32
> + zone_page_state([ZONE_DMA32], NR_FREE_PAGES) +
> +#endif
> + zone_page_state([ZONE_NORMAL], NR_FREE_PAGES);
> 
> - node_free_pages = node_page_state(numa_node_id(),
> - NR_FREE_PAGES);
>   max = node_free_pages / FRACTION_OF_NODE_MEM;
>   return max(max, min_pages);
>  }

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-07 Thread Christoph Lameter
Here is the cleaned version of the patch. Dhaval is testing it.


quicklists: Only consider memory that can be used with GFP_KERNEL

Quicklists calculates the size of the quicklists based on the number
of free pages. This must be the number of free pages that can be
allocated with GFP_KERNEL. node_page_state() includes the pages in
ZONE_HIGHMEM and ZONE_MOVABLE which may lead the quicklists to
become too large causing OOM.

Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>

Index: linux-2.6/mm/quicklist.c
===
--- linux-2.6.orig/mm/quicklist.c   2008-01-07 10:38:13.0 -0800
+++ linux-2.6/mm/quicklist.c2008-01-07 10:38:44.0 -0800
@@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quickli
 static unsigned long max_pages(unsigned long min_pages)
 {
unsigned long node_free_pages, max;
+   struct zone *zones = NODE_DATA(numa_node_id())->node_zones;
+
+   node_free_pages =
+#ifdef CONFIG_ZONE_DMA
+   zone_page_state([ZONE_DMA], NR_FREE_PAGES) +
+#endif
+#ifdef CONFIG_ZONE_DMA32
+   zone_page_state([ZONE_DMA32], NR_FREE_PAGES) +
+#endif
+   zone_page_state([ZONE_NORMAL], NR_FREE_PAGES);
 
-   node_free_pages = node_page_state(numa_node_id(),
-   NR_FREE_PAGES);
max = node_free_pages / FRACTION_OF_NODE_MEM;
return max(max, min_pages);
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-07 Thread Christoph Lameter
Here is the cleaned version of the patch. Dhaval is testing it.


quicklists: Only consider memory that can be used with GFP_KERNEL

Quicklists calculates the size of the quicklists based on the number
of free pages. This must be the number of free pages that can be
allocated with GFP_KERNEL. node_page_state() includes the pages in
ZONE_HIGHMEM and ZONE_MOVABLE which may lead the quicklists to
become too large causing OOM.

Signed-off-by: Christoph Lameter [EMAIL PROTECTED]

Index: linux-2.6/mm/quicklist.c
===
--- linux-2.6.orig/mm/quicklist.c   2008-01-07 10:38:13.0 -0800
+++ linux-2.6/mm/quicklist.c2008-01-07 10:38:44.0 -0800
@@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quickli
 static unsigned long max_pages(unsigned long min_pages)
 {
unsigned long node_free_pages, max;
+   struct zone *zones = NODE_DATA(numa_node_id())-node_zones;
+
+   node_free_pages =
+#ifdef CONFIG_ZONE_DMA
+   zone_page_state(zones[ZONE_DMA], NR_FREE_PAGES) +
+#endif
+#ifdef CONFIG_ZONE_DMA32
+   zone_page_state(zones[ZONE_DMA32], NR_FREE_PAGES) +
+#endif
+   zone_page_state(zones[ZONE_NORMAL], NR_FREE_PAGES);
 
-   node_free_pages = node_page_state(numa_node_id(),
-   NR_FREE_PAGES);
max = node_free_pages / FRACTION_OF_NODE_MEM;
return max(max, min_pages);
 }
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-07 Thread Dhaval Giani
On Mon, Jan 07, 2008 at 12:04:06PM -0800, Christoph Lameter wrote:
 Here is the cleaned version of the patch. Dhaval is testing it.
 
 
 quicklists: Only consider memory that can be used with GFP_KERNEL
 
 Quicklists calculates the size of the quicklists based on the number
 of free pages. This must be the number of free pages that can be
 allocated with GFP_KERNEL. node_page_state() includes the pages in
 ZONE_HIGHMEM and ZONE_MOVABLE which may lead the quicklists to
 become too large causing OOM.
 
 Signed-off-by: Christoph Lameter [EMAIL PROTECTED]

Does the job here for me.

Tested-by: Dhaval Giani [EMAIL PROTECTED]

 
 Index: linux-2.6/mm/quicklist.c
 ===
 --- linux-2.6.orig/mm/quicklist.c 2008-01-07 10:38:13.0 -0800
 +++ linux-2.6/mm/quicklist.c  2008-01-07 10:38:44.0 -0800
 @@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quickli
  static unsigned long max_pages(unsigned long min_pages)
  {
   unsigned long node_free_pages, max;
 + struct zone *zones = NODE_DATA(numa_node_id())-node_zones;
 +
 + node_free_pages =
 +#ifdef CONFIG_ZONE_DMA
 + zone_page_state(zones[ZONE_DMA], NR_FREE_PAGES) +
 +#endif
 +#ifdef CONFIG_ZONE_DMA32
 + zone_page_state(zones[ZONE_DMA32], NR_FREE_PAGES) +
 +#endif
 + zone_page_state(zones[ZONE_NORMAL], NR_FREE_PAGES);
 
 - node_free_pages = node_page_state(numa_node_id(),
 - NR_FREE_PAGES);
   max = node_free_pages / FRACTION_OF_NODE_MEM;
   return max(max, min_pages);
  }

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-03 Thread Christoph Lameter
On Thu, 3 Jan 2008, Dhaval Giani wrote:

> Yes, no oom even after 20 mins of running (which is double the normal
> time for the oom to occur), also no changes in free lowmem.

Ahhh.. Good then lets redo the patchset the right way (the patch so far 
does not address the ZONE_MOVABLE issues) . Does this patch 
also do the trick?



Quicklists: Only consider memory that can be allocated via GFP_KERNEL

Quicklists calculates the size of the quicklists based on the number
of free pages. This must be the number of free pages that can be
allocated with GFP_KERNEL. node_page_state() includes the pages in
ZONE_HIGHMEM and ZONE_MOVABLE. These should not be considered for the 
size calculation.

Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>

Index: linux-2.6/mm/quicklist.c
===
--- linux-2.6.orig/mm/quicklist.c   2008-01-03 12:22:55.0 -0800
+++ linux-2.6/mm/quicklist.c2008-01-03 13:00:30.0 -0800
@@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quickli
 static unsigned long max_pages(unsigned long min_pages)
 {
unsigned long node_free_pages, max;
+   struct zone *zones = NODE_DATA(node)->node_zones;
+
+   node_free_pages =
+#ifdef CONFIG_ZONE_DMA
+   zone_page_state([ZONE_DMA], NR_FREE_PAGES) +
+#endif
+#ifdef CONFIG_ZONE_DMA32
+   zone_page_state([ZONE_DMA32], NR_FREE_PAGES) +
+#endif
+   zone_page_state([ZONE_NORMAL], NR_FREE_PAGES);
 
-   node_free_pages = node_page_state(numa_node_id(),
-   NR_FREE_PAGES);
max = node_free_pages / FRACTION_OF_NODE_MEM;
return max(max, min_pages);
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-03 Thread Christoph Lameter
On Thu, 3 Jan 2008, Dhaval Giani wrote:

 Yes, no oom even after 20 mins of running (which is double the normal
 time for the oom to occur), also no changes in free lowmem.

Ahhh.. Good then lets redo the patchset the right way (the patch so far 
does not address the ZONE_MOVABLE issues) . Does this patch 
also do the trick?



Quicklists: Only consider memory that can be allocated via GFP_KERNEL

Quicklists calculates the size of the quicklists based on the number
of free pages. This must be the number of free pages that can be
allocated with GFP_KERNEL. node_page_state() includes the pages in
ZONE_HIGHMEM and ZONE_MOVABLE. These should not be considered for the 
size calculation.

Signed-off-by: Christoph Lameter [EMAIL PROTECTED]

Index: linux-2.6/mm/quicklist.c
===
--- linux-2.6.orig/mm/quicklist.c   2008-01-03 12:22:55.0 -0800
+++ linux-2.6/mm/quicklist.c2008-01-03 13:00:30.0 -0800
@@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quickli
 static unsigned long max_pages(unsigned long min_pages)
 {
unsigned long node_free_pages, max;
+   struct zone *zones = NODE_DATA(node)-node_zones;
+
+   node_free_pages =
+#ifdef CONFIG_ZONE_DMA
+   zone_page_state(zones[ZONE_DMA], NR_FREE_PAGES) +
+#endif
+#ifdef CONFIG_ZONE_DMA32
+   zone_page_state(zones[ZONE_DMA32], NR_FREE_PAGES) +
+#endif
+   zone_page_state(zones[ZONE_NORMAL], NR_FREE_PAGES);
 
-   node_free_pages = node_page_state(numa_node_id(),
-   NR_FREE_PAGES);
max = node_free_pages / FRACTION_OF_NODE_MEM;
return max(max, min_pages);
 }
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Dhaval Giani
On Wed, Jan 02, 2008 at 01:54:12PM -0800, Christoph Lameter wrote:
> Just traced it again on my system: It is okay for the number of pages on 
> the quicklist to reach the high count that we see (although the 16 bit 
> limits are weird. You have around 4GB of memory in the system?). Up to 
> 1/16th of free memory of a node can be allocated for quicklists (this 
> allows the effective shutting down and restarting of large amounts of 
> processes)
> 
> The problem may be that this is run on a HIGHMEM system and the 
> calculation of allowable pages on the quicklists does not take into 
> account that highmem pages are not usable for quicklists (not sure about 
> ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)
> 
> Here is a patch that removes the HIGHMEM portion from the calculation. 
> Does this change anything:
> 

Yep. This one hits it. I don't see the obvious signs of the oom
happening in the 5 mins I have run the script. I will let it run for
some more time.

Thanks!
-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Dhaval Giani
On Thu, Jan 03, 2008 at 09:29:42AM +0530, Dhaval Giani wrote:
> On Wed, Jan 02, 2008 at 01:54:12PM -0800, Christoph Lameter wrote:
> > Just traced it again on my system: It is okay for the number of pages on 
> > the quicklist to reach the high count that we see (although the 16 bit 
> > limits are weird. You have around 4GB of memory in the system?). Up to 
> > 1/16th of free memory of a node can be allocated for quicklists (this 
> > allows the effective shutting down and restarting of large amounts of 
> > processes)
> > 
> > The problem may be that this is run on a HIGHMEM system and the 
> > calculation of allowable pages on the quicklists does not take into 
> > account that highmem pages are not usable for quicklists (not sure about 
> > ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)
> > 
> > Here is a patch that removes the HIGHMEM portion from the calculation. 
> > Does this change anything:
> > 
> 
> Yep. This one hits it. I don't see the obvious signs of the oom
> happening in the 5 mins I have run the script. I will let it run for
> some more time.
> 

Yes, no oom even after 20 mins of running (which is double the normal
time for the oom to occur), also no changes in free lowmem.

Thanks for the fix. Feel free to add a 

Tested-by: Dhaval Giani <[EMAIL PROTECTED]>

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Christoph Lameter
Just traced it again on my system: It is okay for the number of pages on 
the quicklist to reach the high count that we see (although the 16 bit 
limits are weird. You have around 4GB of memory in the system?). Up to 
1/16th of free memory of a node can be allocated for quicklists (this 
allows the effective shutting down and restarting of large amounts of 
processes)

The problem may be that this is run on a HIGHMEM system and the 
calculation of allowable pages on the quicklists does not take into 
account that highmem pages are not usable for quicklists (not sure about 
ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)

Here is a patch that removes the HIGHMEM portion from the calculation. 
Does this change anything:

Index: linux-2.6/mm/quicklist.c
===
--- linux-2.6.orig/mm/quicklist.c   2008-01-02 13:41:10.0 -0800
+++ linux-2.6/mm/quicklist.c2008-01-02 13:44:15.0 -0800
@@ -29,6 +29,12 @@ static unsigned long max_pages(unsigned 
 
node_free_pages = node_page_state(numa_node_id(),
NR_FREE_PAGES);
+#ifdef CONFIG_HIGHMEM
+   /* Take HIGHMEM pages out of consideration */
+   node_free_pages -= 
zone_page_state(_DATA(numa_node_id())->node_zones[ZONE_HIGHMEM],
+   NR_FREE_PAGES);
+#endif
+
max = node_free_pages / FRACTION_OF_NODE_MEM;
return max(max, min_pages);
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Christoph Lameter
On Sun, 30 Dec 2007, Ingo Molnar wrote:

> so we still dont seem to understand the failure mode well enough. This 
> also looks like a quite dangerous change so late in the v2.6.24 cycle. 
> Does it really fix the OOM? If yes, why exactly?

Not exactly sure. I suspect that there is some memory corruption. See my 
earlier post from today. I do not see this issue on my system. So it must 
be particular to a certain config.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Christoph Lameter
On Fri, 28 Dec 2007, Dhaval Giani wrote:

> we managed to get your required information. Last 10,000 lines are
> attached (The uncompressed file comes to 500 kb).
> 
> Hope it helps.

Somehow the nr_pages field is truncated to 16 bit and it 
seems that there are sign issues there? We are wrapping around

 q->nr_pages is 36877, min_pages is 25 > swapper
 q->nr_pages is 46266, min_pages is 25 > bash
 q->nr_pages is 36877, min_pages is 25 > swapper
 q->nr_pages is 36877, min_pages is 25 > swapper
 q->nr_pages is 46265, min_pages is 25 > bash
 q->nr_pages is 46265, min_pages is 25 > cat
 q->nr_pages is 36877, min_pages is 25 > swapper
 q->nr_pages is 46265, min_pages is 25 > cat
 q->nr_pages is 36877, min_pages is 25 > swapper
 q->nr_pages is 0, min_pages is 25 > swapper
 q->nr_pages is 36877, min_pages is 25 > swapper
 q->nr_pages is 36877, min_pages is 25 > swapper
 q->nr_pages is 46265, min_pages is 25 > cat


An int is just a 16 bit field on i386? I thought it was 32 bits? Or is 
the result due to the way that systemtap works?

Could you post the neighboring per cpu variables to quicklist (look at the 
System.map). Maybe somehow we corrupt the nr_pages and page contents.

Also could you do another systemtap and also print out the current 
processor? Maybe nr_pages gets only corrupted on a specific processor. I 
see a zero there and sometimes other sane values.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Christoph Lameter
On Fri, 28 Dec 2007, Dhaval Giani wrote:

 we managed to get your required information. Last 10,000 lines are
 attached (The uncompressed file comes to 500 kb).
 
 Hope it helps.

Somehow the nr_pages field is truncated to 16 bit and it 
seems that there are sign issues there? We are wrapping around

 q-nr_pages is 36877, min_pages is 25  swapper
 q-nr_pages is 46266, min_pages is 25  bash
 q-nr_pages is 36877, min_pages is 25  swapper
 q-nr_pages is 36877, min_pages is 25  swapper
 q-nr_pages is 46265, min_pages is 25  bash
 q-nr_pages is 46265, min_pages is 25  cat
 q-nr_pages is 36877, min_pages is 25  swapper
 q-nr_pages is 46265, min_pages is 25  cat
 q-nr_pages is 36877, min_pages is 25  swapper
 q-nr_pages is 0, min_pages is 25  swapper
 q-nr_pages is 36877, min_pages is 25  swapper
 q-nr_pages is 36877, min_pages is 25  swapper
 q-nr_pages is 46265, min_pages is 25  cat


An int is just a 16 bit field on i386? I thought it was 32 bits? Or is 
the result due to the way that systemtap works?

Could you post the neighboring per cpu variables to quicklist (look at the 
System.map). Maybe somehow we corrupt the nr_pages and page contents.

Also could you do another systemtap and also print out the current 
processor? Maybe nr_pages gets only corrupted on a specific processor. I 
see a zero there and sometimes other sane values.



--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Christoph Lameter
On Sun, 30 Dec 2007, Ingo Molnar wrote:

 so we still dont seem to understand the failure mode well enough. This 
 also looks like a quite dangerous change so late in the v2.6.24 cycle. 
 Does it really fix the OOM? If yes, why exactly?

Not exactly sure. I suspect that there is some memory corruption. See my 
earlier post from today. I do not see this issue on my system. So it must 
be particular to a certain config.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Christoph Lameter
Just traced it again on my system: It is okay for the number of pages on 
the quicklist to reach the high count that we see (although the 16 bit 
limits are weird. You have around 4GB of memory in the system?). Up to 
1/16th of free memory of a node can be allocated for quicklists (this 
allows the effective shutting down and restarting of large amounts of 
processes)

The problem may be that this is run on a HIGHMEM system and the 
calculation of allowable pages on the quicklists does not take into 
account that highmem pages are not usable for quicklists (not sure about 
ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)

Here is a patch that removes the HIGHMEM portion from the calculation. 
Does this change anything:

Index: linux-2.6/mm/quicklist.c
===
--- linux-2.6.orig/mm/quicklist.c   2008-01-02 13:41:10.0 -0800
+++ linux-2.6/mm/quicklist.c2008-01-02 13:44:15.0 -0800
@@ -29,6 +29,12 @@ static unsigned long max_pages(unsigned 
 
node_free_pages = node_page_state(numa_node_id(),
NR_FREE_PAGES);
+#ifdef CONFIG_HIGHMEM
+   /* Take HIGHMEM pages out of consideration */
+   node_free_pages -= 
zone_page_state(NODE_DATA(numa_node_id())-node_zones[ZONE_HIGHMEM],
+   NR_FREE_PAGES);
+#endif
+
max = node_free_pages / FRACTION_OF_NODE_MEM;
return max(max, min_pages);
 }
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Dhaval Giani
On Thu, Jan 03, 2008 at 09:29:42AM +0530, Dhaval Giani wrote:
 On Wed, Jan 02, 2008 at 01:54:12PM -0800, Christoph Lameter wrote:
  Just traced it again on my system: It is okay for the number of pages on 
  the quicklist to reach the high count that we see (although the 16 bit 
  limits are weird. You have around 4GB of memory in the system?). Up to 
  1/16th of free memory of a node can be allocated for quicklists (this 
  allows the effective shutting down and restarting of large amounts of 
  processes)
  
  The problem may be that this is run on a HIGHMEM system and the 
  calculation of allowable pages on the quicklists does not take into 
  account that highmem pages are not usable for quicklists (not sure about 
  ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)
  
  Here is a patch that removes the HIGHMEM portion from the calculation. 
  Does this change anything:
  
 
 Yep. This one hits it. I don't see the obvious signs of the oom
 happening in the 5 mins I have run the script. I will let it run for
 some more time.
 

Yes, no oom even after 20 mins of running (which is double the normal
time for the oom to occur), also no changes in free lowmem.

Thanks for the fix. Feel free to add a 

Tested-by: Dhaval Giani [EMAIL PROTECTED]

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Dhaval Giani
On Wed, Jan 02, 2008 at 01:54:12PM -0800, Christoph Lameter wrote:
 Just traced it again on my system: It is okay for the number of pages on 
 the quicklist to reach the high count that we see (although the 16 bit 
 limits are weird. You have around 4GB of memory in the system?). Up to 
 1/16th of free memory of a node can be allocated for quicklists (this 
 allows the effective shutting down and restarting of large amounts of 
 processes)
 
 The problem may be that this is run on a HIGHMEM system and the 
 calculation of allowable pages on the quicklists does not take into 
 account that highmem pages are not usable for quicklists (not sure about 
 ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)
 
 Here is a patch that removes the HIGHMEM portion from the calculation. 
 Does this change anything:
 

Yep. This one hits it. I don't see the obvious signs of the oom
happening in the 5 mins I have run the script. I will let it run for
some more time.

Thanks!
-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-30 Thread Dhaval Giani
On Sun, Dec 30, 2007 at 03:01:16PM +0100, Ingo Molnar wrote:
> 
> * Christoph Lameter <[EMAIL PROTECTED]> wrote:
> 
> > Index: linux-2.6/arch/x86/mm/pgtable_32.c
> > ===
> > --- linux-2.6.orig/arch/x86/mm/pgtable_32.c 2007-12-26 12:55:10.0 
> > -0800
> > +++ linux-2.6/arch/x86/mm/pgtable_32.c  2007-12-26 12:55:54.0 
> > -0800
> > @@ -366,6 +366,15 @@ void pgd_free(pgd_t *pgd)
> > }
> > /* in the non-PAE case, free_pgtables() clears user pgd entries */
> > quicklist_free(0, pgd_dtor, pgd);
> > +
> > +   /*
> > +* We must call check_pgd_cache() here because the pgd is freed after
> > +* tlb flushing and the call to check_pgd_cache. In some cases the VM
> > +* may not call tlb_flush_mmu during process termination (??).
> 
> that's incorrect i think: during process termination exit_mmap() calls 
> tlb_finish_mmu() unconditionally which calls tlb_flush_mmu().
> 
> > +* If this is repeated then we may never call check_pgd_cache.
> > +* The quicklist will grow and grow. So call check_pgd_cache here.
> > +*/
> > +   check_pgt_cache();
> >  }
> 
> so we still dont seem to understand the failure mode well enough. This 
> also looks like a quite dangerous change so late in the v2.6.24 cycle. 
> Does it really fix the OOM? If yes, why exactly?
> 

No it does not. I've sent out some more information if it helps, will
send to you separately.

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-30 Thread Ingo Molnar

* Christoph Lameter <[EMAIL PROTECTED]> wrote:

> Index: linux-2.6/arch/x86/mm/pgtable_32.c
> ===
> --- linux-2.6.orig/arch/x86/mm/pgtable_32.c   2007-12-26 12:55:10.0 
> -0800
> +++ linux-2.6/arch/x86/mm/pgtable_32.c2007-12-26 12:55:54.0 
> -0800
> @@ -366,6 +366,15 @@ void pgd_free(pgd_t *pgd)
>   }
>   /* in the non-PAE case, free_pgtables() clears user pgd entries */
>   quicklist_free(0, pgd_dtor, pgd);
> +
> + /*
> +  * We must call check_pgd_cache() here because the pgd is freed after
> +  * tlb flushing and the call to check_pgd_cache. In some cases the VM
> +  * may not call tlb_flush_mmu during process termination (??).

that's incorrect i think: during process termination exit_mmap() calls 
tlb_finish_mmu() unconditionally which calls tlb_flush_mmu().

> +  * If this is repeated then we may never call check_pgd_cache.
> +  * The quicklist will grow and grow. So call check_pgd_cache here.
> +  */
> + check_pgt_cache();
>  }

so we still dont seem to understand the failure mode well enough. This 
also looks like a quite dangerous change so late in the v2.6.24 cycle. 
Does it really fix the OOM? If yes, why exactly?

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-30 Thread Ingo Molnar

* Christoph Lameter [EMAIL PROTECTED] wrote:

 Index: linux-2.6/arch/x86/mm/pgtable_32.c
 ===
 --- linux-2.6.orig/arch/x86/mm/pgtable_32.c   2007-12-26 12:55:10.0 
 -0800
 +++ linux-2.6/arch/x86/mm/pgtable_32.c2007-12-26 12:55:54.0 
 -0800
 @@ -366,6 +366,15 @@ void pgd_free(pgd_t *pgd)
   }
   /* in the non-PAE case, free_pgtables() clears user pgd entries */
   quicklist_free(0, pgd_dtor, pgd);
 +
 + /*
 +  * We must call check_pgd_cache() here because the pgd is freed after
 +  * tlb flushing and the call to check_pgd_cache. In some cases the VM
 +  * may not call tlb_flush_mmu during process termination (??).

that's incorrect i think: during process termination exit_mmap() calls 
tlb_finish_mmu() unconditionally which calls tlb_flush_mmu().

 +  * If this is repeated then we may never call check_pgd_cache.
 +  * The quicklist will grow and grow. So call check_pgd_cache here.
 +  */
 + check_pgt_cache();
  }

so we still dont seem to understand the failure mode well enough. This 
also looks like a quite dangerous change so late in the v2.6.24 cycle. 
Does it really fix the OOM? If yes, why exactly?

Ingo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-30 Thread Dhaval Giani
On Sun, Dec 30, 2007 at 03:01:16PM +0100, Ingo Molnar wrote:
 
 * Christoph Lameter [EMAIL PROTECTED] wrote:
 
  Index: linux-2.6/arch/x86/mm/pgtable_32.c
  ===
  --- linux-2.6.orig/arch/x86/mm/pgtable_32.c 2007-12-26 12:55:10.0 
  -0800
  +++ linux-2.6/arch/x86/mm/pgtable_32.c  2007-12-26 12:55:54.0 
  -0800
  @@ -366,6 +366,15 @@ void pgd_free(pgd_t *pgd)
  }
  /* in the non-PAE case, free_pgtables() clears user pgd entries */
  quicklist_free(0, pgd_dtor, pgd);
  +
  +   /*
  +* We must call check_pgd_cache() here because the pgd is freed after
  +* tlb flushing and the call to check_pgd_cache. In some cases the VM
  +* may not call tlb_flush_mmu during process termination (??).
 
 that's incorrect i think: during process termination exit_mmap() calls 
 tlb_finish_mmu() unconditionally which calls tlb_flush_mmu().
 
  +* If this is repeated then we may never call check_pgd_cache.
  +* The quicklist will grow and grow. So call check_pgd_cache here.
  +*/
  +   check_pgt_cache();
   }
 
 so we still dont seem to understand the failure mode well enough. This 
 also looks like a quite dangerous change so late in the v2.6.24 cycle. 
 Does it really fix the OOM? If yes, why exactly?
 

No it does not. I've sent out some more information if it helps, will
send to you separately.

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-28 Thread Dhaval Giani
On Thu, Dec 27, 2007 at 11:22:34AM -0800, Christoph Lameter wrote:
> On Thu, 27 Dec 2007, Dhaval Giani wrote:
> 
> > anything specific you are looking for? I still hit the oom.
> 
> Weird WTH is this? You run an unmodified upstream tree? Can you add a 
> printk in quicklist_trim that shows
> 

Hi,

I am running 2.6.24-rc5-mm1 here.

> A) that it is called
> 
> B) what the control values q->nr_pages and min_pages are?
> 

Trying to print these using printks renders the system unbootable. With
help from RAS folks around me, managed to get a systemtap script, 

probe kernel.statement("[EMAIL PROTECTED]/quicklist.c:56")
{
printf(" q->nr_pages is %d, min_pages is %d > %s\n",
$q->nr_pages, $$
min_pages, execname());
}

we managed to get your required information. Last 10,000 lines are
attached (The uncompressed file comes to 500 kb).

Hope it helps.

Thanks,
-- 
regards,
Dhaval


systp.out.1.bz2
Description: BZip2 compressed data


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-28 Thread Dhaval Giani
On Thu, Dec 27, 2007 at 11:22:34AM -0800, Christoph Lameter wrote:
 On Thu, 27 Dec 2007, Dhaval Giani wrote:
 
  anything specific you are looking for? I still hit the oom.
 
 Weird WTH is this? You run an unmodified upstream tree? Can you add a 
 printk in quicklist_trim that shows
 

Hi,

I am running 2.6.24-rc5-mm1 here.

 A) that it is called
 
 B) what the control values q-nr_pages and min_pages are?
 

Trying to print these using printks renders the system unbootable. With
help from RAS folks around me, managed to get a systemtap script, 

probe kernel.statement([EMAIL PROTECTED]/quicklist.c:56)
{
printf( q-nr_pages is %d, min_pages is %d  %s\n,
$q-nr_pages, $$
min_pages, execname());
}

we managed to get your required information. Last 10,000 lines are
attached (The uncompressed file comes to 500 kb).

Hope it helps.

Thanks,
-- 
regards,
Dhaval


systp.out.1.bz2
Description: BZip2 compressed data


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-26 Thread Christoph Lameter
On Fri, 21 Dec 2007, Dhaval Giani wrote:

> No, it does not stop the oom I am seeing here.

Duh. Disregard that patch. It looks like check_pgt_cache() is not called. 
This could happen if tlb_flush_mmu is never called during the 
fork/terminate sequences in your script. pgd_free is called *after* a 
possible tlb flush so the pgd page is on the quicklist (which is good for 
the next process which needs a pgd). The tlb_flush_mmu's during pte 
eviction should trim the quicklist. For some reason this is not happening 
on your box (it works here).

Could you try this script that insures that check_pgt_cache is called 
after every pgd_free?

Index: linux-2.6/arch/x86/mm/pgtable_32.c
===
--- linux-2.6.orig/arch/x86/mm/pgtable_32.c 2007-12-26 12:55:10.0 
-0800
+++ linux-2.6/arch/x86/mm/pgtable_32.c  2007-12-26 12:55:54.0 -0800
@@ -366,6 +366,15 @@ void pgd_free(pgd_t *pgd)
}
/* in the non-PAE case, free_pgtables() clears user pgd entries */
quicklist_free(0, pgd_dtor, pgd);
+
+   /*
+* We must call check_pgd_cache() here because the pgd is freed after
+* tlb flushing and the call to check_pgd_cache. In some cases the VM
+* may not call tlb_flush_mmu during process termination (??).
+* If this is repeated then we may never call check_pgd_cache.
+* The quicklist will grow and grow. So call check_pgd_cache here.
+*/
+   check_pgt_cache();
 }
 
 void check_pgt_cache(void)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-26 Thread Christoph Lameter
On Fri, 21 Dec 2007, Dhaval Giani wrote:

 No, it does not stop the oom I am seeing here.

Duh. Disregard that patch. It looks like check_pgt_cache() is not called. 
This could happen if tlb_flush_mmu is never called during the 
fork/terminate sequences in your script. pgd_free is called *after* a 
possible tlb flush so the pgd page is on the quicklist (which is good for 
the next process which needs a pgd). The tlb_flush_mmu's during pte 
eviction should trim the quicklist. For some reason this is not happening 
on your box (it works here).

Could you try this script that insures that check_pgt_cache is called 
after every pgd_free?

Index: linux-2.6/arch/x86/mm/pgtable_32.c
===
--- linux-2.6.orig/arch/x86/mm/pgtable_32.c 2007-12-26 12:55:10.0 
-0800
+++ linux-2.6/arch/x86/mm/pgtable_32.c  2007-12-26 12:55:54.0 -0800
@@ -366,6 +366,15 @@ void pgd_free(pgd_t *pgd)
}
/* in the non-PAE case, free_pgtables() clears user pgd entries */
quicklist_free(0, pgd_dtor, pgd);
+
+   /*
+* We must call check_pgd_cache() here because the pgd is freed after
+* tlb flushing and the call to check_pgd_cache. In some cases the VM
+* may not call tlb_flush_mmu during process termination (??).
+* If this is repeated then we may never call check_pgd_cache.
+* The quicklist will grow and grow. So call check_pgd_cache here.
+*/
+   check_pgt_cache();
 }
 
 void check_pgt_cache(void)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-20 Thread Dhaval Giani
> > It was just
> > 
> > while echo ; do cat /sys/kernel/ ; done
> > 
> > it's all in the email threads somewhere..
> 
> The patch that was posted in the thread that I mentioned earlier is here. 
> I ran the test for 15 minutes and things are still fine.
> 
> 
> 
> quicklist: Set tlb->need_flush if pages are remaining in quicklist 0
> 
> This ensures that the quicklists are drained. Otherwise draining may only 
> occur when the processor reaches an idle state.
> 

Hi Christoph,

No, it does not stop the oom I am seeing here.

Thanks,

> Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>
> 
> Index: linux-2.6/include/asm-generic/tlb.h
> ===
> --- linux-2.6.orig/include/asm-generic/tlb.h  2007-12-13 14:45:38.0 
> -0800
> +++ linux-2.6/include/asm-generic/tlb.h   2007-12-13 14:51:07.0 
> -0800
> @@ -14,6 +14,7 @@
>  #define _ASM_GENERIC__TLB_H
> 
>  #include 
> +#include 
>  #include 
>  #include 
> 
> @@ -85,6 +86,9 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
>  static inline void
>  tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long 
> end)
>  {
> +#ifdef CONFIG_QUICKLIST
> + tlb->need_flush += &__get_cpu_var(quicklist)[0].nr_pages != 0;
> +#endif
>   tlb_flush_mmu(tlb, start, end);
> 
>   /* keep the page table cache within bounds */

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-20 Thread Dhaval Giani
  It was just
  
  while echo ; do cat /sys/kernel/some file ; done
  
  it's all in the email threads somewhere..
 
 The patch that was posted in the thread that I mentioned earlier is here. 
 I ran the test for 15 minutes and things are still fine.
 
 
 
 quicklist: Set tlb-need_flush if pages are remaining in quicklist 0
 
 This ensures that the quicklists are drained. Otherwise draining may only 
 occur when the processor reaches an idle state.
 

Hi Christoph,

No, it does not stop the oom I am seeing here.

Thanks,

 Signed-off-by: Christoph Lameter [EMAIL PROTECTED]
 
 Index: linux-2.6/include/asm-generic/tlb.h
 ===
 --- linux-2.6.orig/include/asm-generic/tlb.h  2007-12-13 14:45:38.0 
 -0800
 +++ linux-2.6/include/asm-generic/tlb.h   2007-12-13 14:51:07.0 
 -0800
 @@ -14,6 +14,7 @@
  #define _ASM_GENERIC__TLB_H
 
  #include linux/swap.h
 +#include linux/quicklist.h
  #include asm/pgalloc.h
  #include asm/tlbflush.h
 
 @@ -85,6 +86,9 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
  static inline void
  tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long 
 end)
  {
 +#ifdef CONFIG_QUICKLIST
 + tlb-need_flush += __get_cpu_var(quicklist)[0].nr_pages != 0;
 +#endif
   tlb_flush_mmu(tlb, start, end);
 
   /* keep the page table cache within bounds */

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-15 Thread Dhaval Giani
On Fri, Dec 14, 2007 at 10:00:30PM -0800, Andrew Morton wrote:
> On Sat, 15 Dec 2007 09:22:00 +0530 Dhaval Giani <[EMAIL PROTECTED]> wrote:
> 
> > > Is it really the case that the bug only turns up when you run tests like
> > > 
> > >   while echo; do cat /sys/kernel/kexec_crash_loaded; done
> > > and
> > >   while echo; do cat /sys/kernel/uevent_seqnum ; done;
> > > 
> > > or will any fork-intensive workload also do it?  Say,
> > > 
> > >   while echo ; do true ; done
> > > 
> > 
> > This does not leak, but having a simple text file and reading it in a
> > loop causes it.
> 
> hm.
> 
> > > ?
> > > 
> > > Another interesting factoid here is that after the oomkilling you 
> > > slabinfo has
> > > 
> > > mm_struct 38 9858471 : tunables   32   16
> > > 8 : slabdata 14 14  0 : globalstat278119649   31  
> > >   01000 : cpustat 368800  
> > > 11864 368920  11721
> > > 
> > > so we aren't leaking mm_structs.  In fact we aren't leaking anything from
> > > slab.   But we are leaking pgds.
> > > 
> > > iirc the most recent change we've made in the pgd_t area is the quicklist
> > > management which went into 2.6.22-rc1.  You say the bug was present in
> > > 2.6.22.  Can you test 2.6.21?  
> > 
> > Nope, leak is not present in 2.6.21.7
> 
> Could you try this debug patch please?
> 

Here is the dmesg with that patch,

use, ignoring.
PCI: Unable to reserve mem region #2:[EMAIL PROTECTED] for device :08:0a.1
aic7xxx:  at PCI 8/10/1
aic7xxx: I/O ports already in use, ignoring.
megaraid cmm: 2.20.2.7 (Release Date: Sun Jul 16 00:01:03 EST 2006)
megaraid: 2.20.5.1 (Release Date: Thu Nov 16 15:32:35 EST 2006)
megasas: 00.00.03.16-rc1 Thu. Nov. 07 10:09:32 PDT 2007
st: Version 20070203, fixed bufsize 32768, s/g segs 256
osst :I: Tape driver with OnStream support version 0.99.4
osst :I: $Id: osst.c,v 1.73 2005/01/01 21:13:34 wriede Exp $
sd 1:0:0:0: [sda] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:0:0: [sda] Write Protect is off
sd 1:0:0:0: [sda] Mode Sense: cb 00 00 08
sd 1:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:0:0: [sda] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:0:0: [sda] Write Protect is off
sd 1:0:0:0: [sda] Mode Sense: cb 00 00 08
sd 1:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sda: sda1
sd 1:0:0:0: [sda] Attached SCSI disk
sd 1:0:1:0: [sdb] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:1:0: [sdb] Write Protect is off
sd 1:0:1:0: [sdb] Mode Sense: cb 00 00 08
sd 1:0:1:0: [sdb] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:1:0: [sdb] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:1:0: [sdb] Write Protect is off
sd 1:0:1:0: [sdb] Mode Sense: cb 00 00 08
sd 1:0:1:0: [sdb] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sdb: sdb1 sdb2 sdb3 sdb4
sd 1:0:1:0: [sdb] Attached SCSI disk
sd 1:0:2:0: [sdc] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:2:0: [sdc] Write Protect is off
sd 1:0:2:0: [sdc] Mode Sense: cb 00 00 08
sd 1:0:2:0: [sdc] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:2:0: [sdc] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:2:0: [sdc] Write Protect is off
sd 1:0:2:0: [sdc] Mode Sense: cb 00 00 08
sd 1:0:2:0: [sdc] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sdc: sdc1 sdc2
sd 1:0:2:0: [sdc] Attached SCSI disk
sd 1:0:3:0: [sdd] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:3:0: [sdd] Write Protect is off
sd 1:0:3:0: [sdd] Mode Sense: cb 00 00 08
sd 1:0:3:0: [sdd] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:3:0: [sdd] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:3:0: [sdd] Write Protect is off
sd 1:0:3:0: [sdd] Mode Sense: cb 00 00 08
sd 1:0:3:0: [sdd] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sdd: sdd1 sdd2 sdd3
sd 1:0:3:0: [sdd] Attached SCSI disk
sd 1:0:4:0: [sde] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:4:0: [sde] Write Protect is off
sd 1:0:4:0: [sde] Mode Sense: cb 00 00 08
sd 1:0:4:0: [sde] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:4:0: [sde] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:4:0: [sde] Write Protect is off
sd 1:0:4:0: [sde] Mode Sense: cb 00 00 08
sd 1:0:4:0: [sde] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sde: sde1
sd 1:0:4:0: [sde] Attached SCSI disk
sd 1:0:5:0: [sdf] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:5:0: [sdf] Write Protect is off
sd 1:0:5:0: [sdf] Mode Sense: b3 00 10 08
sd 1:0:5:0: [sdf] Write cache: disabled, read cache: enabled, supports DPO and 
FUA
sd 1:0:5:0: [sdf] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:5:0: [sdf] Write Protect is off
sd 1:0:5:0: [sdf] Mode Sense: b3 00 10 08
sd 1:0:5:0: [sdf] Write cache: disabled, read 

Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-15 Thread Dhaval Giani
On Fri, Dec 14, 2007 at 10:00:30PM -0800, Andrew Morton wrote:
 On Sat, 15 Dec 2007 09:22:00 +0530 Dhaval Giani [EMAIL PROTECTED] wrote:
 
   Is it really the case that the bug only turns up when you run tests like
   
 while echo; do cat /sys/kernel/kexec_crash_loaded; done
   and
 while echo; do cat /sys/kernel/uevent_seqnum ; done;
   
   or will any fork-intensive workload also do it?  Say,
   
 while echo ; do true ; done
   
  
  This does not leak, but having a simple text file and reading it in a
  loop causes it.
 
 hm.
 
   ?
   
   Another interesting factoid here is that after the oomkilling you 
   slabinfo has
   
   mm_struct 38 9858471 : tunables   32   16
   8 : slabdata 14 14  0 : globalstat278119649   31  
 01000 : cpustat 368800  
   11864 368920  11721
   
   so we aren't leaking mm_structs.  In fact we aren't leaking anything from
   slab.   But we are leaking pgds.
   
   iirc the most recent change we've made in the pgd_t area is the quicklist
   management which went into 2.6.22-rc1.  You say the bug was present in
   2.6.22.  Can you test 2.6.21?  
  
  Nope, leak is not present in 2.6.21.7
 
 Could you try this debug patch please?
 

Here is the dmesg with that patch,

use, ignoring.
PCI: Unable to reserve mem region #2:[EMAIL PROTECTED] for device :08:0a.1
aic7xxx: Adaptec AIC-7899 Ultra 160/m SCSI host adapter at PCI 8/10/1
aic7xxx: I/O ports already in use, ignoring.
megaraid cmm: 2.20.2.7 (Release Date: Sun Jul 16 00:01:03 EST 2006)
megaraid: 2.20.5.1 (Release Date: Thu Nov 16 15:32:35 EST 2006)
megasas: 00.00.03.16-rc1 Thu. Nov. 07 10:09:32 PDT 2007
st: Version 20070203, fixed bufsize 32768, s/g segs 256
osst :I: Tape driver with OnStream support version 0.99.4
osst :I: $Id: osst.c,v 1.73 2005/01/01 21:13:34 wriede Exp $
sd 1:0:0:0: [sda] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:0:0: [sda] Write Protect is off
sd 1:0:0:0: [sda] Mode Sense: cb 00 00 08
sd 1:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:0:0: [sda] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:0:0: [sda] Write Protect is off
sd 1:0:0:0: [sda] Mode Sense: cb 00 00 08
sd 1:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sda: sda1
sd 1:0:0:0: [sda] Attached SCSI disk
sd 1:0:1:0: [sdb] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:1:0: [sdb] Write Protect is off
sd 1:0:1:0: [sdb] Mode Sense: cb 00 00 08
sd 1:0:1:0: [sdb] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:1:0: [sdb] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:1:0: [sdb] Write Protect is off
sd 1:0:1:0: [sdb] Mode Sense: cb 00 00 08
sd 1:0:1:0: [sdb] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sdb: sdb1 sdb2 sdb3 sdb4
sd 1:0:1:0: [sdb] Attached SCSI disk
sd 1:0:2:0: [sdc] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:2:0: [sdc] Write Protect is off
sd 1:0:2:0: [sdc] Mode Sense: cb 00 00 08
sd 1:0:2:0: [sdc] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:2:0: [sdc] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:2:0: [sdc] Write Protect is off
sd 1:0:2:0: [sdc] Mode Sense: cb 00 00 08
sd 1:0:2:0: [sdc] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sdc: sdc1 sdc2
sd 1:0:2:0: [sdc] Attached SCSI disk
sd 1:0:3:0: [sdd] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:3:0: [sdd] Write Protect is off
sd 1:0:3:0: [sdd] Mode Sense: cb 00 00 08
sd 1:0:3:0: [sdd] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:3:0: [sdd] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:3:0: [sdd] Write Protect is off
sd 1:0:3:0: [sdd] Mode Sense: cb 00 00 08
sd 1:0:3:0: [sdd] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sdd: sdd1 sdd2 sdd3
sd 1:0:3:0: [sdd] Attached SCSI disk
sd 1:0:4:0: [sde] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:4:0: [sde] Write Protect is off
sd 1:0:4:0: [sde] Mode Sense: cb 00 00 08
sd 1:0:4:0: [sde] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
sd 1:0:4:0: [sde] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:4:0: [sde] Write Protect is off
sd 1:0:4:0: [sde] Mode Sense: cb 00 00 08
sd 1:0:4:0: [sde] Write cache: disabled, read cache: enabled, doesn't support 
DPO or FUA
 sde: sde1
sd 1:0:4:0: [sde] Attached SCSI disk
sd 1:0:5:0: [sdf] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:5:0: [sdf] Write Protect is off
sd 1:0:5:0: [sdf] Mode Sense: b3 00 10 08
sd 1:0:5:0: [sdf] Write cache: disabled, read cache: enabled, supports DPO and 
FUA
sd 1:0:5:0: [sdf] 71096640 512-byte hardware sectors (36401 MB)
sd 1:0:5:0: [sdf] Write Protect is off
sd 1:0:5:0: [sdf] Mode Sense: b3 00 10 08
sd 1:0:5:0: [sdf] Write cache: disabled, read cache: enabled, supports DPO and 
FUA
 sdf: sdf1
sd 

Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Andrew Morton
On Sat, 15 Dec 2007 09:22:00 +0530 Dhaval Giani <[EMAIL PROTECTED]> wrote:

> > Is it really the case that the bug only turns up when you run tests like
> > 
> > while echo; do cat /sys/kernel/kexec_crash_loaded; done
> > and
> > while echo; do cat /sys/kernel/uevent_seqnum ; done;
> > 
> > or will any fork-intensive workload also do it?  Say,
> > 
> > while echo ; do true ; done
> > 
> 
> This does not leak, but having a simple text file and reading it in a
> loop causes it.

hm.

> > ?
> > 
> > Another interesting factoid here is that after the oomkilling you slabinfo 
> > has
> > 
> > mm_struct 38 9858471 : tunables   32   168 
> > : slabdata 14 14  0 : globalstat278119649   31  
> > 01000 : cpustat 368800  11864 
> > 368920  11721
> > 
> > so we aren't leaking mm_structs.  In fact we aren't leaking anything from
> > slab.   But we are leaking pgds.
> > 
> > iirc the most recent change we've made in the pgd_t area is the quicklist
> > management which went into 2.6.22-rc1.  You say the bug was present in
> > 2.6.22.  Can you test 2.6.21?  
> 
> Nope, leak is not present in 2.6.21.7

Could you try this debug patch please?

It might need some fiddling to get useful output.  Basic idea is to see if
we are failing to empty the quicklists.

--- a/include/linux/quicklist.h~a
+++ a/include/linux/quicklist.h
@@ -69,6 +69,8 @@ static inline void __quicklist_free(int 
*(void **)p = q->page;
q->page = p;
q->nr_pages++;
+   if (q->nr_pages && !(q->nr_pages % 1000))
+   printk("eek: %d\n", q->nr_pages);
put_cpu_var(quicklist);
 }
 
_

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Dhaval Giani
> Is it really the case that the bug only turns up when you run tests like
> 
>   while echo; do cat /sys/kernel/kexec_crash_loaded; done
> and
>   while echo; do cat /sys/kernel/uevent_seqnum ; done;
> 
> or will any fork-intensive workload also do it?  Say,
> 
>   while echo ; do true ; done
> 

This does not leak, but having a simple text file and reading it in a
loop causes it.

> ?
> 
> Another interesting factoid here is that after the oomkilling you slabinfo has
> 
> mm_struct 38 9858471 : tunables   32   168 : 
> slabdata 14 14  0 : globalstat278119649   31  
>   01000 : cpustat 368800  11864 368920  
> 11721
> 
> so we aren't leaking mm_structs.  In fact we aren't leaking anything from
> slab.   But we are leaking pgds.
> 
> iirc the most recent change we've made in the pgd_t area is the quicklist
> management which went into 2.6.22-rc1.  You say the bug was present in
> 2.6.22.  Can you test 2.6.21?  

Nope, leak is not present in 2.6.21.7

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Andrew Morton
On Fri, 14 Dec 2007 23:58:02 +0530
Dhaval Giani <[EMAIL PROTECTED]> wrote:

> On Fri, Dec 14, 2007 at 09:50:23AM -0800, Andrew Morton wrote:
> > On Fri, 14 Dec 2007 21:46:37 +0530 Dhaval Giani <[EMAIL PROTECTED]> wrote:
> > 
> > > On Sat, Dec 15, 2007 at 12:54:09AM +0900, Tejun Heo wrote:
> > > > Dhaval Giani wrote:
> > > > > XXX sysfs_page_cnt=1
> > > > 
> > > > Hmm.. so, sysfs r/w buffer wasn't the culprit.  I'm curious what eats up
> > > > all your low memory.  Please do the following.
> > > > 
> > > > 1. Right after boot, record /proc/meminfo and slabinfo.
> > > > 
> > > > 2. After or near OOM, record /proc/meminfo and slabinfo.  This can be
> > > > tricky but if your machine reliably OOMs after 10mins, run it for 9mins
> > > > and capturing the result should show enough.
> > > > 
> > > 
> > > Attached. The results are after oom, but i think about a min or so after
> > > that. I missed the oom point.
> > 
> > Looking back at your original oom-killer output: something has consumed all
> > your ZONE_NORMAL memory and we cannot tell what it is.
> > 
> > Please run 2.6.24-rc5-mm1 again (with CONFIG_PAGE_OWNER=y) and take a peek
> > at the changelog in
> > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc5/2.6.24-rc5-mm1/broken-out/page-owner-tracking-leak-detector.patch.
> > 
> > Build up Documentation/page_owner.c then cause the leak to happen then
> > execute page_owner.
> > 
> Hi Andrew
> 
> This is a peek during the leak.
> 
> ...
> 
> [sorted_page_owner.txt  text/plain (100.2KB)]
> 51957 times:
> Page allocated via order 0, mask 0x80d0
> [0xc015b9aa] __alloc_pages+706
> [0xc015b9f0] __get_free_pages+60
> [0xc011b7c9] pgd_alloc+60
> [0xc0122b9e] mm_init+196
> [0xc0122e06] dup_mm+101
> [0xc0122eda] copy_mm+104
> [0xc0123b8c] copy_process+1149
> [0xc0124229] do_fork+141
> 
> 12335 times:
> Page allocated via order 0, mask 0x84d0
> [0xc015b9aa] __alloc_pages+706
> [0xc011b6ca] pte_alloc_one+21
> [0xc01632ac] __pte_alloc+21
> [0xc01634bb] copy_pte_range+67
> [0xc0163827] copy_page_range+284
> [0xc0122a79] dup_mmap+427
> [0xc0122e22] dup_mm+129
> [0xc0122eda] copy_mm+104

OK, so you're leaking pgd's on a fork-intensive load.  It's a 4G i386
highmem system but I'm sure there are enough of those out there (still) for
this bug to have been promptly reported if it was generally occurring.

There's something special about either your setup or the test which you're
running.

Is it really the case that the bug only turns up when you run tests like

while echo; do cat /sys/kernel/kexec_crash_loaded; done
and
while echo; do cat /sys/kernel/uevent_seqnum ; done;

or will any fork-intensive workload also do it?  Say,

while echo ; do true ; done

?

Another interesting factoid here is that after the oomkilling you slabinfo has

mm_struct 38 9858471 : tunables   32   168 : 
slabdata 14 14  0 : globalstat278119649   31
  01000 : cpustat 368800  11864 368920  
11721

so we aren't leaking mm_structs.  In fact we aren't leaking anything from
slab.   But we are leaking pgds.

iirc the most recent change we've made in the pgd_t area is the quicklist
management which went into 2.6.22-rc1.  You say the bug was present in
2.6.22.  Can you test 2.6.21?  

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Andrew Morton
On Fri, 14 Dec 2007 21:46:37 +0530 Dhaval Giani <[EMAIL PROTECTED]> wrote:

> On Sat, Dec 15, 2007 at 12:54:09AM +0900, Tejun Heo wrote:
> > Dhaval Giani wrote:
> > > XXX sysfs_page_cnt=1
> > 
> > Hmm.. so, sysfs r/w buffer wasn't the culprit.  I'm curious what eats up
> > all your low memory.  Please do the following.
> > 
> > 1. Right after boot, record /proc/meminfo and slabinfo.
> > 
> > 2. After or near OOM, record /proc/meminfo and slabinfo.  This can be
> > tricky but if your machine reliably OOMs after 10mins, run it for 9mins
> > and capturing the result should show enough.
> > 
> 
> Attached. The results are after oom, but i think about a min or so after
> that. I missed the oom point.

Looking back at your original oom-killer output: something has consumed all
your ZONE_NORMAL memory and we cannot tell what it is.

Please run 2.6.24-rc5-mm1 again (with CONFIG_PAGE_OWNER=y) and take a peek
at the changelog in
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc5/2.6.24-rc5-mm1/broken-out/page-owner-tracking-leak-detector.patch.

Build up Documentation/page_owner.c then cause the leak to happen then
execute page_owner.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Tejun Heo
Dhaval Giani wrote:
> XXX sysfs_page_cnt=1

Hmm.. so, sysfs r/w buffer wasn't the culprit.  I'm curious what eats up
all your low memory.  Please do the following.

1. Right after boot, record /proc/meminfo and slabinfo.

2. After or near OOM, record /proc/meminfo and slabinfo.  This can be
tricky but if your machine reliably OOMs after 10mins, run it for 9mins
and capturing the result should show enough.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Dhaval Giani
> > OK, so it ooms there as well. I am attaching its config and part of the
> > dmesg (whatever I could capture).
> 
> I can't reproduce it here either.  Please apply the attached patch and
> reproduce the problem.  It will report the number of allocated buffer
> pages every 10 sec.  After oom occurs, please report how this number
> changed and the result of 'cat /proc/slabinfo'.
> 

Here you go.

elay using timer specific routine.. 4976.59 BogoMIPS (lpj=9953199)
CPU: After generic identify, caps: bfebfbff    4400 
  
CPU: Trace cache: 12K uops, L1 D cache: 8K
CPU: L2 cache: 512K
CPU: L3 cache: 1024K
CPU: Physical Processor ID: 3
CPU: After all inits, caps: bfebfbff   b080 4400 
  
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#6.
CPU6: Intel P4/Xeon Extended MCE MSRs (12) available
CPU6: Thermal monitoring enabled
CPU6: Intel(R) Xeon(TM) MP CPU 2.50GHz stepping 05
lockdep: not fixing up alternatives.
Booting processor 7/7 ip 3000
Initializing CPU#7
Calibrating delay using timer specific routine.. 4976.45 BogoMIPS (lpj=9952905)
CPU: After generic identify, caps: bfebfbff    4400 
  
CPU: Trace cache: 12K uops, L1 D cache: 8K
CPU: L2 cache: 512K
CPU: L3 cache: 1024K
CPU: Physical Processor ID: 3
CPU: After all inits, caps: bfebfbff   b080 4400 
  
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#7.
CPU7: Intel P4/Xeon Extended MCE MSRs (12) available
CPU7: Thermal monitoring enabled
CPU7: Intel(R) Xeon(TM) MP CPU 2.50GHz stepping 05
Total of 8 processors activated (39819.96 BogoMIPS).
ENABLING IO-APIC IRQs
..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
APIC timer registered as dummy, due to nmi_watchdog=1!
checking TSC synchronization [CPU#0 -> CPU#1]: passed.
checking TSC synchronization [CPU#0 -> CPU#2]: passed.
checking TSC synchronization [CPU#0 -> CPU#3]: passed.
checking TSC synchronization [CPU#0 -> CPU#4]: passed.
checking TSC synchronization [CPU#0 -> CPU#5]: passed.
checking TSC synchronization [CPU#0 -> CPU#6]: passed.
checking TSC synchronization [CPU#0 -> CPU#7]: passed.
Brought up 8 CPUs
net_namespace: 136 bytes
NET: Registered protocol family 16
ACPI: bus type pci registered
PCI: PCI BIOS revision 2.10 entry at 0xfd74c, last bus=12
PCI: Using configuration type 1
Setting up standard PCI resources
evgpeblk-0956 [00] ev_create_gpe_block   : GPE 00 to 1F [_GPE] 4 regs on int 0x7
evgpeblk-0956 [00] ev_create_gpe_block   : GPE 20 to 3F [_GPE] 4 regs on int 0x7
evgpeblk-1052 [00] ev_initialize_gpe_bloc: Found 1 Wake, Enabled 0 Runtime GPEs 
in this block
evgpeblk-1052 [00] ev_initialize_gpe_bloc: Found 0 Wake, Enabled 1 Runtime GPEs 
in this block
ACPI: EC: Look up EC in DSDT
Completing Region/Field/Buffer/Package 
initialization:.
Initialized 8/13 Regions 0/0 Fields 8/8 Buffers 17/18 Packages (999 nodes)
Initializing Device/Processor/Thermal objects by executing _INI methods:.
Executed 1 _INI methods requiring 0 _STA executions (examined 123 objects)
ACPI: Interpreter enabled
ACPI: (supports S0 S5)
ACPI: Using IOAPIC for interrupt routing
ACPI: PCI Root Bridge [PCI0] (:00)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
ACPI: PCI Root Bridge [PCI1] (:01)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI1._PRT]
ACPI: PCI Root Bridge [PCI2] (:05)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI2._PRT]
ACPI: PCI Root Bridge [PCI3] (:08)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI3._PRT]
ACPI: PCI Root Bridge [PCI4] (:09)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI4._PRT]
ACPI: PCI Interrupt Link [LP00] (IRQs *10)
ACPI: Blank IRQ resource
ACPI: Resource is not an IRQ entry
ACPI: PCI Interrupt Link [LP01] (IRQs) *0, disabled.
ACPI: Blank IRQ resource
ACPI: Resource is not an IRQ entry
ACPI: PCI Interrupt Link [LP02] (IRQs) *0, disabled.
ACPI: Blank IRQ resource
ACPI: Resource is not an IRQ entry
ACPI: PCI Interrupt Link [LP03] (IRQs) *0, disabled.
ACPI: PCI Interrupt Link [LP04] (IRQs *5)
ACPI: PCI Interrupt Link [LP05] (IRQs *5)
ACPI: PCI Interrupt Link [LP06] (IRQs *5)
ACPI: PCI Interrupt Link [LP07] (IRQs *5)
ACPI: PCI Interrupt Link [LP08] (IRQs *5)
ACPI: PCI Interrupt Link [LP09] (IRQs *5)
ACPI: PCI Interrupt Link [LP0A] (IRQs *5)
ACPI: PCI Interrupt Link [LP0B] (IRQs *5)
ACPI: PCI Interrupt Link [LP0C] (IRQs *5)
ACPI: PCI Interrupt Link [LP0D] (IRQs *5)
ACPI: PCI Interrupt Link [LP0E] (IRQs *5)
ACPI: PCI Interrupt Link [LP0F] (IRQs *5)
ACPI: PCI Interrupt Link [LP10] (IRQs *5)
ACPI: PCI Interrupt Link [LP11] (IRQs *5)
ACPI: PCI Interrupt Link [LP12] (IRQs *5)
ACPI: PCI Interrupt Link [LP13] (IRQs *5)
ACPI: PCI Interrupt Link [LP14] (IRQs *5)
ACPI: PCI Interrupt Link [LP15] (IRQs *5)
ACPI: PCI Interrupt Link [LP16] (IRQs *5)
ACPI: PCI Interrupt Link [LP17] (IRQs *5)

Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Tejun Heo
Dhaval Giani wrote:
> On Thu, Dec 13, 2007 at 10:16:58PM +0530, Dhaval Giani wrote:
>> On Thu, Dec 13, 2007 at 08:29:36AM -0800, Greg KH wrote:
>>> On Thu, Dec 13, 2007 at 08:48:47PM +0530, Dhaval Giani wrote:
>>>> On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
>>>>> On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
>>>>>> Hi Greg, Tejun,
>>>>>>
>>>>>> The following script causes oomkiller to be invoked on my system here.
>>>>>>
>>>>>> while echo; do cat /sys/kernel/kexec_crash_loaded; done
>>>>>>
>>>>>  while echo; do cat /sys/kernel/uevent_seqnum ; done;
>>>>>
>>>>> causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
>>>>> 2.6.24-rc5 as well. It seems not be particularly related to any single
>>>>> file in sysfs.
>>>>>
>>>> And on 2.6.24-rc5-mm1 as well.
>>> How long do you have to run this?  I'm not seeing a problem here with
>>> 2.6.24-rc5 using SLUB, but I might not have run things long enough.
>>>
>> I hit it reliably under 10 mins. I've seen it with both SLUB and SLAB.
>>
>>> I ran slabinfo and don't see anything leaking either, do you?
>> Nor could I find anything, at least nothing directly apparent. (But, I
>> am not very good at ready slabinfo yet.)
>>
>> I've attached the .config if that helps in reproducing it. (Am testing
>> with SLUB again on latest -mm).
>>
> 
> OK, so it ooms there as well. I am attaching its config and part of the
> dmesg (whatever I could capture).

I can't reproduce it here either.  Please apply the attached patch and
reproduce the problem.  It will report the number of allocated buffer
pages every 10 sec.  After oom occurs, please report how this number
changed and the result of 'cat /proc/slabinfo'.

Thanks.

-- 
tejun
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 4045bdc..6e7fa62 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -17,9 +17,30 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "sysfs.h"
 
+static unsigned long last_jiffies = INITIAL_JIFFIES;
+static atomic_t sysfs_page_cnt = ATOMIC_INIT(0);
+
+static void sysfs_page_inc(void)
+{
+	atomic_inc(_page_cnt);
+
+	if (time_before(jiffies, last_jiffies + 10 * HZ))
+		return;
+
+	last_jiffies = jiffies;
+	printk("XXX sysfs_page_cnt=%d\n", atomic_read(_page_cnt));
+}
+
+static void sysfs_page_dec(void)
+{
+	atomic_dec(_page_cnt);
+}
+
 #define to_sattr(a) container_of(a,struct subsys_attribute, attr)
 
 /*
@@ -105,8 +126,10 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 	int ret = 0;
 	ssize_t count;
 
-	if (!buffer->page)
+	if (!buffer->page) {
 		buffer->page = (char *) get_zeroed_page(GFP_KERNEL);
+		sysfs_page_inc();
+	}
 	if (!buffer->page)
 		return -ENOMEM;
 
@@ -188,8 +211,10 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 {
 	int error;
 
-	if (!buffer->page)
+	if (!buffer->page) {
 		buffer->page = (char *)get_zeroed_page(GFP_KERNEL);
+		sysfs_page_inc();
+	}
 	if (!buffer->page)
 		return -ENOMEM;
 
@@ -434,8 +459,10 @@ static int sysfs_release(struct inode *inode, struct file *filp)
 
 	sysfs_put_open_dirent(sd, buffer);
 
-	if (buffer->page)
+	if (buffer->page) {
 		free_page((unsigned long)buffer->page);
+		sysfs_page_dec();
+	}
 	kfree(buffer);
 
 	return 0;


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Tejun Heo
Dhaval Giani wrote:
 On Thu, Dec 13, 2007 at 10:16:58PM +0530, Dhaval Giani wrote:
 On Thu, Dec 13, 2007 at 08:29:36AM -0800, Greg KH wrote:
 On Thu, Dec 13, 2007 at 08:48:47PM +0530, Dhaval Giani wrote:
 On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
 On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
 Hi Greg, Tejun,

 The following script causes oomkiller to be invoked on my system here.

 while echo; do cat /sys/kernel/kexec_crash_loaded; done

  while echo; do cat /sys/kernel/uevent_seqnum ; done;

 causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
 2.6.24-rc5 as well. It seems not be particularly related to any single
 file in sysfs.

 And on 2.6.24-rc5-mm1 as well.
 How long do you have to run this?  I'm not seeing a problem here with
 2.6.24-rc5 using SLUB, but I might not have run things long enough.

 I hit it reliably under 10 mins. I've seen it with both SLUB and SLAB.

 I ran slabinfo and don't see anything leaking either, do you?
 Nor could I find anything, at least nothing directly apparent. (But, I
 am not very good at ready slabinfo yet.)

 I've attached the .config if that helps in reproducing it. (Am testing
 with SLUB again on latest -mm).

 
 OK, so it ooms there as well. I am attaching its config and part of the
 dmesg (whatever I could capture).

I can't reproduce it here either.  Please apply the attached patch and
reproduce the problem.  It will report the number of allocated buffer
pages every 10 sec.  After oom occurs, please report how this number
changed and the result of 'cat /proc/slabinfo'.

Thanks.

-- 
tejun
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 4045bdc..6e7fa62 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -17,9 +17,30 @@
 #include linux/list.h
 #include linux/mutex.h
 #include asm/uaccess.h
+#include linux/jiffies.h
+#include asm/atomic.h
 
 #include sysfs.h
 
+static unsigned long last_jiffies = INITIAL_JIFFIES;
+static atomic_t sysfs_page_cnt = ATOMIC_INIT(0);
+
+static void sysfs_page_inc(void)
+{
+	atomic_inc(sysfs_page_cnt);
+
+	if (time_before(jiffies, last_jiffies + 10 * HZ))
+		return;
+
+	last_jiffies = jiffies;
+	printk(XXX sysfs_page_cnt=%d\n, atomic_read(sysfs_page_cnt));
+}
+
+static void sysfs_page_dec(void)
+{
+	atomic_dec(sysfs_page_cnt);
+}
+
 #define to_sattr(a) container_of(a,struct subsys_attribute, attr)
 
 /*
@@ -105,8 +126,10 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 	int ret = 0;
 	ssize_t count;
 
-	if (!buffer-page)
+	if (!buffer-page) {
 		buffer-page = (char *) get_zeroed_page(GFP_KERNEL);
+		sysfs_page_inc();
+	}
 	if (!buffer-page)
 		return -ENOMEM;
 
@@ -188,8 +211,10 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 {
 	int error;
 
-	if (!buffer-page)
+	if (!buffer-page) {
 		buffer-page = (char *)get_zeroed_page(GFP_KERNEL);
+		sysfs_page_inc();
+	}
 	if (!buffer-page)
 		return -ENOMEM;
 
@@ -434,8 +459,10 @@ static int sysfs_release(struct inode *inode, struct file *filp)
 
 	sysfs_put_open_dirent(sd, buffer);
 
-	if (buffer-page)
+	if (buffer-page) {
 		free_page((unsigned long)buffer-page);
+		sysfs_page_dec();
+	}
 	kfree(buffer);
 
 	return 0;


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Dhaval Giani
  OK, so it ooms there as well. I am attaching its config and part of the
  dmesg (whatever I could capture).
 
 I can't reproduce it here either.  Please apply the attached patch and
 reproduce the problem.  It will report the number of allocated buffer
 pages every 10 sec.  After oom occurs, please report how this number
 changed and the result of 'cat /proc/slabinfo'.
 

Here you go.

elay using timer specific routine.. 4976.59 BogoMIPS (lpj=9953199)
CPU: After generic identify, caps: bfebfbff    4400 
  
CPU: Trace cache: 12K uops, L1 D cache: 8K
CPU: L2 cache: 512K
CPU: L3 cache: 1024K
CPU: Physical Processor ID: 3
CPU: After all inits, caps: bfebfbff   b080 4400 
  
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#6.
CPU6: Intel P4/Xeon Extended MCE MSRs (12) available
CPU6: Thermal monitoring enabled
CPU6: Intel(R) Xeon(TM) MP CPU 2.50GHz stepping 05
lockdep: not fixing up alternatives.
Booting processor 7/7 ip 3000
Initializing CPU#7
Calibrating delay using timer specific routine.. 4976.45 BogoMIPS (lpj=9952905)
CPU: After generic identify, caps: bfebfbff    4400 
  
CPU: Trace cache: 12K uops, L1 D cache: 8K
CPU: L2 cache: 512K
CPU: L3 cache: 1024K
CPU: Physical Processor ID: 3
CPU: After all inits, caps: bfebfbff   b080 4400 
  
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#7.
CPU7: Intel P4/Xeon Extended MCE MSRs (12) available
CPU7: Thermal monitoring enabled
CPU7: Intel(R) Xeon(TM) MP CPU 2.50GHz stepping 05
Total of 8 processors activated (39819.96 BogoMIPS).
ENABLING IO-APIC IRQs
..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
APIC timer registered as dummy, due to nmi_watchdog=1!
checking TSC synchronization [CPU#0 - CPU#1]: passed.
checking TSC synchronization [CPU#0 - CPU#2]: passed.
checking TSC synchronization [CPU#0 - CPU#3]: passed.
checking TSC synchronization [CPU#0 - CPU#4]: passed.
checking TSC synchronization [CPU#0 - CPU#5]: passed.
checking TSC synchronization [CPU#0 - CPU#6]: passed.
checking TSC synchronization [CPU#0 - CPU#7]: passed.
Brought up 8 CPUs
net_namespace: 136 bytes
NET: Registered protocol family 16
ACPI: bus type pci registered
PCI: PCI BIOS revision 2.10 entry at 0xfd74c, last bus=12
PCI: Using configuration type 1
Setting up standard PCI resources
evgpeblk-0956 [00] ev_create_gpe_block   : GPE 00 to 1F [_GPE] 4 regs on int 0x7
evgpeblk-0956 [00] ev_create_gpe_block   : GPE 20 to 3F [_GPE] 4 regs on int 0x7
evgpeblk-1052 [00] ev_initialize_gpe_bloc: Found 1 Wake, Enabled 0 Runtime GPEs 
in this block
evgpeblk-1052 [00] ev_initialize_gpe_bloc: Found 0 Wake, Enabled 1 Runtime GPEs 
in this block
ACPI: EC: Look up EC in DSDT
Completing Region/Field/Buffer/Package 
initialization:.
Initialized 8/13 Regions 0/0 Fields 8/8 Buffers 17/18 Packages (999 nodes)
Initializing Device/Processor/Thermal objects by executing _INI methods:.
Executed 1 _INI methods requiring 0 _STA executions (examined 123 objects)
ACPI: Interpreter enabled
ACPI: (supports S0 S5)
ACPI: Using IOAPIC for interrupt routing
ACPI: PCI Root Bridge [PCI0] (:00)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
ACPI: PCI Root Bridge [PCI1] (:01)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI1._PRT]
ACPI: PCI Root Bridge [PCI2] (:05)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI2._PRT]
ACPI: PCI Root Bridge [PCI3] (:08)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI3._PRT]
ACPI: PCI Root Bridge [PCI4] (:09)
ACPI: PCI Interrupt Routing Table [\_SB_.PCI4._PRT]
ACPI: PCI Interrupt Link [LP00] (IRQs *10)
ACPI: Blank IRQ resource
ACPI: Resource is not an IRQ entry
ACPI: PCI Interrupt Link [LP01] (IRQs) *0, disabled.
ACPI: Blank IRQ resource
ACPI: Resource is not an IRQ entry
ACPI: PCI Interrupt Link [LP02] (IRQs) *0, disabled.
ACPI: Blank IRQ resource
ACPI: Resource is not an IRQ entry
ACPI: PCI Interrupt Link [LP03] (IRQs) *0, disabled.
ACPI: PCI Interrupt Link [LP04] (IRQs *5)
ACPI: PCI Interrupt Link [LP05] (IRQs *5)
ACPI: PCI Interrupt Link [LP06] (IRQs *5)
ACPI: PCI Interrupt Link [LP07] (IRQs *5)
ACPI: PCI Interrupt Link [LP08] (IRQs *5)
ACPI: PCI Interrupt Link [LP09] (IRQs *5)
ACPI: PCI Interrupt Link [LP0A] (IRQs *5)
ACPI: PCI Interrupt Link [LP0B] (IRQs *5)
ACPI: PCI Interrupt Link [LP0C] (IRQs *5)
ACPI: PCI Interrupt Link [LP0D] (IRQs *5)
ACPI: PCI Interrupt Link [LP0E] (IRQs *5)
ACPI: PCI Interrupt Link [LP0F] (IRQs *5)
ACPI: PCI Interrupt Link [LP10] (IRQs *5)
ACPI: PCI Interrupt Link [LP11] (IRQs *5)
ACPI: PCI Interrupt Link [LP12] (IRQs *5)
ACPI: PCI Interrupt Link [LP13] (IRQs *5)
ACPI: PCI Interrupt Link [LP14] (IRQs *5)
ACPI: PCI Interrupt Link [LP15] (IRQs *5)
ACPI: PCI Interrupt Link [LP16] (IRQs *5)
ACPI: PCI Interrupt Link [LP17] (IRQs *5)
ACPI: PCI 

Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Tejun Heo
Dhaval Giani wrote:
 XXX sysfs_page_cnt=1

Hmm.. so, sysfs r/w buffer wasn't the culprit.  I'm curious what eats up
all your low memory.  Please do the following.

1. Right after boot, record /proc/meminfo and slabinfo.

2. After or near OOM, record /proc/meminfo and slabinfo.  This can be
tricky but if your machine reliably OOMs after 10mins, run it for 9mins
and capturing the result should show enough.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Andrew Morton
On Fri, 14 Dec 2007 21:46:37 +0530 Dhaval Giani [EMAIL PROTECTED] wrote:

 On Sat, Dec 15, 2007 at 12:54:09AM +0900, Tejun Heo wrote:
  Dhaval Giani wrote:
   XXX sysfs_page_cnt=1
  
  Hmm.. so, sysfs r/w buffer wasn't the culprit.  I'm curious what eats up
  all your low memory.  Please do the following.
  
  1. Right after boot, record /proc/meminfo and slabinfo.
  
  2. After or near OOM, record /proc/meminfo and slabinfo.  This can be
  tricky but if your machine reliably OOMs after 10mins, run it for 9mins
  and capturing the result should show enough.
  
 
 Attached. The results are after oom, but i think about a min or so after
 that. I missed the oom point.

Looking back at your original oom-killer output: something has consumed all
your ZONE_NORMAL memory and we cannot tell what it is.

Please run 2.6.24-rc5-mm1 again (with CONFIG_PAGE_OWNER=y) and take a peek
at the changelog in
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc5/2.6.24-rc5-mm1/broken-out/page-owner-tracking-leak-detector.patch.

Build up Documentation/page_owner.c then cause the leak to happen then
execute page_owner.

Thanks.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Andrew Morton
On Fri, 14 Dec 2007 23:58:02 +0530
Dhaval Giani [EMAIL PROTECTED] wrote:

 On Fri, Dec 14, 2007 at 09:50:23AM -0800, Andrew Morton wrote:
  On Fri, 14 Dec 2007 21:46:37 +0530 Dhaval Giani [EMAIL PROTECTED] wrote:
  
   On Sat, Dec 15, 2007 at 12:54:09AM +0900, Tejun Heo wrote:
Dhaval Giani wrote:
 XXX sysfs_page_cnt=1

Hmm.. so, sysfs r/w buffer wasn't the culprit.  I'm curious what eats up
all your low memory.  Please do the following.

1. Right after boot, record /proc/meminfo and slabinfo.

2. After or near OOM, record /proc/meminfo and slabinfo.  This can be
tricky but if your machine reliably OOMs after 10mins, run it for 9mins
and capturing the result should show enough.

   
   Attached. The results are after oom, but i think about a min or so after
   that. I missed the oom point.
  
  Looking back at your original oom-killer output: something has consumed all
  your ZONE_NORMAL memory and we cannot tell what it is.
  
  Please run 2.6.24-rc5-mm1 again (with CONFIG_PAGE_OWNER=y) and take a peek
  at the changelog in
  ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc5/2.6.24-rc5-mm1/broken-out/page-owner-tracking-leak-detector.patch.
  
  Build up Documentation/page_owner.c then cause the leak to happen then
  execute page_owner.
  
 Hi Andrew
 
 This is a peek during the leak.
 
 ...
 
 [sorted_page_owner.txt  text/plain (100.2KB)]
 51957 times:
 Page allocated via order 0, mask 0x80d0
 [0xc015b9aa] __alloc_pages+706
 [0xc015b9f0] __get_free_pages+60
 [0xc011b7c9] pgd_alloc+60
 [0xc0122b9e] mm_init+196
 [0xc0122e06] dup_mm+101
 [0xc0122eda] copy_mm+104
 [0xc0123b8c] copy_process+1149
 [0xc0124229] do_fork+141
 
 12335 times:
 Page allocated via order 0, mask 0x84d0
 [0xc015b9aa] __alloc_pages+706
 [0xc011b6ca] pte_alloc_one+21
 [0xc01632ac] __pte_alloc+21
 [0xc01634bb] copy_pte_range+67
 [0xc0163827] copy_page_range+284
 [0xc0122a79] dup_mmap+427
 [0xc0122e22] dup_mm+129
 [0xc0122eda] copy_mm+104

OK, so you're leaking pgd's on a fork-intensive load.  It's a 4G i386
highmem system but I'm sure there are enough of those out there (still) for
this bug to have been promptly reported if it was generally occurring.

There's something special about either your setup or the test which you're
running.

Is it really the case that the bug only turns up when you run tests like

while echo; do cat /sys/kernel/kexec_crash_loaded; done
and
while echo; do cat /sys/kernel/uevent_seqnum ; done;

or will any fork-intensive workload also do it?  Say,

while echo ; do true ; done

?

Another interesting factoid here is that after the oomkilling you slabinfo has

mm_struct 38 9858471 : tunables   32   168 : 
slabdata 14 14  0 : globalstat278119649   31
  01000 : cpustat 368800  11864 368920  
11721

so we aren't leaking mm_structs.  In fact we aren't leaking anything from
slab.   But we are leaking pgds.

iirc the most recent change we've made in the pgd_t area is the quicklist
management which went into 2.6.22-rc1.  You say the bug was present in
2.6.22.  Can you test 2.6.21?  

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Dhaval Giani
 Is it really the case that the bug only turns up when you run tests like
 
   while echo; do cat /sys/kernel/kexec_crash_loaded; done
 and
   while echo; do cat /sys/kernel/uevent_seqnum ; done;
 
 or will any fork-intensive workload also do it?  Say,
 
   while echo ; do true ; done
 

This does not leak, but having a simple text file and reading it in a
loop causes it.

 ?
 
 Another interesting factoid here is that after the oomkilling you slabinfo has
 
 mm_struct 38 9858471 : tunables   32   168 : 
 slabdata 14 14  0 : globalstat278119649   31  
   01000 : cpustat 368800  11864 368920  
 11721
 
 so we aren't leaking mm_structs.  In fact we aren't leaking anything from
 slab.   But we are leaking pgds.
 
 iirc the most recent change we've made in the pgd_t area is the quicklist
 management which went into 2.6.22-rc1.  You say the bug was present in
 2.6.22.  Can you test 2.6.21?  

Nope, leak is not present in 2.6.21.7

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-14 Thread Andrew Morton
On Sat, 15 Dec 2007 09:22:00 +0530 Dhaval Giani [EMAIL PROTECTED] wrote:

  Is it really the case that the bug only turns up when you run tests like
  
  while echo; do cat /sys/kernel/kexec_crash_loaded; done
  and
  while echo; do cat /sys/kernel/uevent_seqnum ; done;
  
  or will any fork-intensive workload also do it?  Say,
  
  while echo ; do true ; done
  
 
 This does not leak, but having a simple text file and reading it in a
 loop causes it.

hm.

  ?
  
  Another interesting factoid here is that after the oomkilling you slabinfo 
  has
  
  mm_struct 38 9858471 : tunables   32   168 
  : slabdata 14 14  0 : globalstat278119649   31  
  01000 : cpustat 368800  11864 
  368920  11721
  
  so we aren't leaking mm_structs.  In fact we aren't leaking anything from
  slab.   But we are leaking pgds.
  
  iirc the most recent change we've made in the pgd_t area is the quicklist
  management which went into 2.6.22-rc1.  You say the bug was present in
  2.6.22.  Can you test 2.6.21?  
 
 Nope, leak is not present in 2.6.21.7

Could you try this debug patch please?

It might need some fiddling to get useful output.  Basic idea is to see if
we are failing to empty the quicklists.

--- a/include/linux/quicklist.h~a
+++ a/include/linux/quicklist.h
@@ -69,6 +69,8 @@ static inline void __quicklist_free(int 
*(void **)p = q-page;
q-page = p;
q-nr_pages++;
+   if (q-nr_pages  !(q-nr_pages % 1000))
+   printk(eek: %d\n, q-nr_pages);
put_cpu_var(quicklist);
 }
 
_

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
On Thu, Dec 13, 2007 at 08:29:36AM -0800, Greg KH wrote:
> On Thu, Dec 13, 2007 at 08:48:47PM +0530, Dhaval Giani wrote:
> > On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
> > > On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
> > > > Hi Greg, Tejun,
> > > > 
> > > > The following script causes oomkiller to be invoked on my system here.
> > > > 
> > > > while echo; do cat /sys/kernel/kexec_crash_loaded; done
> > > > 
> > > 
> > >  while echo; do cat /sys/kernel/uevent_seqnum ; done;
> > > 
> > > causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
> > > 2.6.24-rc5 as well. It seems not be particularly related to any single
> > > file in sysfs.
> > > 
> > 
> > And on 2.6.24-rc5-mm1 as well.
> 
> How long do you have to run this?  I'm not seeing a problem here with
> 2.6.24-rc5 using SLUB, but I might not have run things long enough.
> 

I hit it reliably under 10 mins. I've seen it with both SLUB and SLAB.

> I ran slabinfo and don't see anything leaking either, do you?

Nor could I find anything, at least nothing directly apparent. (But, I
am not very good at ready slabinfo yet.)

I've attached the .config if that helps in reproducing it. (Am testing
with SLUB again on latest -mm).

thanks,

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.23.9
# Thu Dec 13 18:30:09 2007
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_QUICKLIST=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
# CONFIG_USER_NS is not set
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=15
CONFIG_CPUSETS=y
CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SYSCTL=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_ANON_INODES=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLAB=y
# CONFIG_SLUB is not set
# CONFIG_SLOB is not set
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
# CONFIG_KMOD is not set
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
CONFIG_LBD=y
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
# CONFIG_BLK_DEV_BSG is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="anticipatory"

#
# Processor type and features
#
# CONFIG_TICK_ONESHOT is not set
# CONFIG_NO_HZ is not set
# CONFIG_HIGH_RES_TIMERS is not set
CONFIG_SMP=y
# CONFIG_X86_PC is not set
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
CONFIG_X86_GENERICARCH=y
# CONFIG_X86_ES7000 is not set
# CONFIG_PARAVIRT is not set
CONFIG_X86_CYCLONE_TIMER=y
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MCORE2 is not set
CONFIG_MPENTIUM4=y
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=7
CONFIG_X86_XADD=y
CONFIG_RWSEM_XCHGADD_ALGORITHM

Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Greg KH
On Thu, Dec 13, 2007 at 08:48:47PM +0530, Dhaval Giani wrote:
> On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
> > On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
> > > Hi Greg, Tejun,
> > > 
> > > The following script causes oomkiller to be invoked on my system here.
> > > 
> > > while echo; do cat /sys/kernel/kexec_crash_loaded; done
> > > 
> > 
> >  while echo; do cat /sys/kernel/uevent_seqnum ; done;
> > 
> > causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
> > 2.6.24-rc5 as well. It seems not be particularly related to any single
> > file in sysfs.
> > 
> 
> And on 2.6.24-rc5-mm1 as well.

How long do you have to run this?  I'm not seeing a problem here with
2.6.24-rc5 using SLUB, but I might not have run things long enough.

I ran slabinfo and don't see anything leaking either, do you?

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
> On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
> > Hi Greg, Tejun,
> > 
> > The following script causes oomkiller to be invoked on my system here.
> > 
> > while echo; do cat /sys/kernel/kexec_crash_loaded; done
> > 
> 
>  while echo; do cat /sys/kernel/uevent_seqnum ; done;
> 
> causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
> 2.6.24-rc5 as well. It seems not be particularly related to any single
> file in sysfs.
> 

And on 2.6.24-rc5-mm1 as well.

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
> Hi Greg, Tejun,
> 
> The following script causes oomkiller to be invoked on my system here.
> 
> while echo; do cat /sys/kernel/kexec_crash_loaded; done
> 

 while echo; do cat /sys/kernel/uevent_seqnum ; done;

causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
2.6.24-rc5 as well. It seems not be particularly related to any single
file in sysfs.

Thanks,
-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
Hi Greg, Tejun,

The following script causes oomkiller to be invoked on my system here.

while echo; do cat /sys/kernel/kexec_crash_loaded; done

It gets invoked within 10 mins.

[EMAIL PROTECTED] ~]# cat /proc/cpuinfo 
processor   : 0
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 0
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4981.41
clflush size: 64

processor   : 1
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 0
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4976.80
clflush size: 64

processor   : 2
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 1
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4977.07
clflush size: 64

processor   : 3
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 1
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4977.20
clflush size: 64

processor   : 4
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 2
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4976.94
clflush size: 64

processor   : 5
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 2
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4977.08
clflush size: 64

processor   : 6
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 3
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4976.88
clflush size: 64

processor   : 7
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping 

2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
Hi Greg, Tejun,

The following script causes oomkiller to be invoked on my system here.

while echo; do cat /sys/kernel/kexec_crash_loaded; done

It gets invoked within 10 mins.

[EMAIL PROTECTED] ~]# cat /proc/cpuinfo 
processor   : 0
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 0
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4981.41
clflush size: 64

processor   : 1
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 0
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4976.80
clflush size: 64

processor   : 2
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 1
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4977.07
clflush size: 64

processor   : 3
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 1
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4977.20
clflush size: 64

processor   : 4
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 2
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4976.94
clflush size: 64

processor   : 5
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 2
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4977.08
clflush size: 64

processor   : 6
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping: 5
cpu MHz : 2500.000
cache size  : 1024 KB
physical id : 3
siblings: 2
core id : 0
cpu cores   : 1
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid
xtpr
bogomips: 4976.88
clflush size: 64

processor   : 7
vendor_id   : GenuineIntel
cpu family  : 15
model   : 2
model name  : Intel(R) Xeon(TM) MP CPU 2.50GHz
stepping 

Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
 Hi Greg, Tejun,
 
 The following script causes oomkiller to be invoked on my system here.
 
 while echo; do cat /sys/kernel/kexec_crash_loaded; done
 

 while echo; do cat /sys/kernel/uevent_seqnum ; done;

causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
2.6.24-rc5 as well. It seems not be particularly related to any single
file in sysfs.

Thanks,
-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
 On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
  Hi Greg, Tejun,
  
  The following script causes oomkiller to be invoked on my system here.
  
  while echo; do cat /sys/kernel/kexec_crash_loaded; done
  
 
  while echo; do cat /sys/kernel/uevent_seqnum ; done;
 
 causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
 2.6.24-rc5 as well. It seems not be particularly related to any single
 file in sysfs.
 

And on 2.6.24-rc5-mm1 as well.

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Greg KH
On Thu, Dec 13, 2007 at 08:48:47PM +0530, Dhaval Giani wrote:
 On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
  On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
   Hi Greg, Tejun,
   
   The following script causes oomkiller to be invoked on my system here.
   
   while echo; do cat /sys/kernel/kexec_crash_loaded; done
   
  
   while echo; do cat /sys/kernel/uevent_seqnum ; done;
  
  causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
  2.6.24-rc5 as well. It seems not be particularly related to any single
  file in sysfs.
  
 
 And on 2.6.24-rc5-mm1 as well.

How long do you have to run this?  I'm not seeing a problem here with
2.6.24-rc5 using SLUB, but I might not have run things long enough.

I ran slabinfo and don't see anything leaking either, do you?

thanks,

greg k-h
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2007-12-13 Thread Dhaval Giani
On Thu, Dec 13, 2007 at 08:29:36AM -0800, Greg KH wrote:
 On Thu, Dec 13, 2007 at 08:48:47PM +0530, Dhaval Giani wrote:
  On Thu, Dec 13, 2007 at 06:53:26PM +0530, Dhaval Giani wrote:
   On Thu, Dec 13, 2007 at 06:03:33PM +0530, Dhaval Giani wrote:
Hi Greg, Tejun,

The following script causes oomkiller to be invoked on my system here.

while echo; do cat /sys/kernel/kexec_crash_loaded; done

   
while echo; do cat /sys/kernel/uevent_seqnum ; done;
   
   causes oomkiller to be invoked on 2.6.22-stable, 2.6.23-stable and
   2.6.24-rc5 as well. It seems not be particularly related to any single
   file in sysfs.
   
  
  And on 2.6.24-rc5-mm1 as well.
 
 How long do you have to run this?  I'm not seeing a problem here with
 2.6.24-rc5 using SLUB, but I might not have run things long enough.
 

I hit it reliably under 10 mins. I've seen it with both SLUB and SLAB.

 I ran slabinfo and don't see anything leaking either, do you?

Nor could I find anything, at least nothing directly apparent. (But, I
am not very good at ready slabinfo yet.)

I've attached the .config if that helps in reproducing it. (Am testing
with SLUB again on latest -mm).

thanks,

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.23.9
# Thu Dec 13 18:30:09 2007
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_QUICKLIST=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_DEFCONFIG_LIST=/lib/modules/$UNAME_RELEASE/.config

#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=
CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
# CONFIG_USER_NS is not set
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=15
CONFIG_CPUSETS=y
CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SYSCTL=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_ANON_INODES=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLAB=y
# CONFIG_SLUB is not set
# CONFIG_SLOB is not set
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
# CONFIG_KMOD is not set
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
CONFIG_LBD=y
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
# CONFIG_BLK_DEV_BSG is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED=anticipatory

#
# Processor type and features
#
# CONFIG_TICK_ONESHOT is not set
# CONFIG_NO_HZ is not set
# CONFIG_HIGH_RES_TIMERS is not set
CONFIG_SMP=y
# CONFIG_X86_PC is not set
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
CONFIG_X86_GENERICARCH=y
# CONFIG_X86_ES7000 is not set
# CONFIG_PARAVIRT is not set
CONFIG_X86_CYCLONE_TIMER=y
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MCORE2 is not set
CONFIG_MPENTIUM4=y
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=7
CONFIG_X86_XADD=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y