On 2010-Jul-12 19:38:18 +1000, Peter Jeremy <[email protected]> wrote: >I have been using the attached arc.patch1 based on a patch written by >Artem Belevich <[email protected]> (see http://pastebin.com/ZCkzkWcs ) >for about a month. I have had reasonable success with it (and junked >my cronjob) but have managed to wedge my system a couple of times >whilst doing zfs send|recv. Whilst looking at that diff, I just >noticed a nasty signed/unsigned bug that could bite in low memory >conditions and have revised it to arc.patch2 (untested as yet).
Let try actually attaching those patches... Sorry. -- Peter Jeremy
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c,v
retrieving revision 1.22.2.6
diff -u -r1.22.2.6 arc.c
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010
20:09:40 -0000 1.22.2.6
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jun 2010
21:04:13 -0000
@@ -183,10 +183,15 @@
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
+uint64_t zfs_arc_bp_active;
+uint64_t zfs_arc_bp_inactive;
+
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable);
+TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active);
+TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive);
SYSCTL_DECL(_vfs_zfs);
SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
"Maximum ARC size");
@@ -195,6 +200,11 @@
SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
&zfs_mdcomp_disable, 0, "Disable metadata compression");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN,
&zfs_arc_bp_active, 0,
+ "Start ARC backpressure if active memory is below this limit");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN,
&zfs_arc_bp_inactive, 0,
+ "Start ARC backpressure if inactive memory is below this limit");
+
/*
* Note that buffers can be in one of 6 states:
* ARC_anon - anonymous (discussed below)
@@ -2103,7 +2113,6 @@
}
static int needfree = 0;
-
static int
arc_reclaim_needed(void)
{
@@ -2112,20 +2121,58 @@
#endif
#ifdef _KERNEL
- if (needfree)
- return (1);
+ /* We've grown too much, */
if (arc_size > arc_c_max)
return (1);
+
+ /* Pagedaemon is stuck, let's free something right away */
+ if (vm_pageout_pages_needed)
+ return 1;
+
+ /* Check if inactive list have grown too much */
+ if ( zfs_arc_bp_inactive
+ && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) {
+ /* tell pager to reap 1/2th of inactive queue*/
+ atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2);
+ pagedaemon_wakeup();
+ return needfree;
+ }
+
+ /* Same for active list... */
+ if ( zfs_arc_bp_active
+ && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) {
+ atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2);
+ pagedaemon_wakeup();
+ return needfree;
+ }
+
+
+ /* Old style behavior -- ARC gives up memory whenever page daemon
asks.. */
+ if (needfree)
+ return 1;
+
+ /*
+ We got here either because active/inactive lists are
+ getting short or because we've been called during voluntary
+ ARC size checks. Kind of gray area...
+ */
+
+ /* If we didn't reach our minimum yet, don't rush to give memory up..*/
if (arc_size <= arc_c_min)
return (0);
+ /* If we're really short on memory now, give it up. */
+ if (vm_page_count_min()) {
+ return (1);
+ }
+
/*
- * If pages are needed or we're within 2048 pages
- * of needing to page need to reclaim
+ * If we're within 2048 pages of pagedaemon start, reclaim...
*/
- if (vm_pages_needed || (vm_paging_target() > -2048))
+ if (vm_pages_needed && (vm_paging_target() > -2048))
return (1);
+
#if 0
/*
* take 'desfree' extra pages, so we reclaim sooner, rather than later
@@ -2169,8 +2216,6 @@
return (1);
#endif
#else
- if (kmem_used() > (kmem_size() * 3) / 4)
- return (1);
#endif
#else
@@ -2279,7 +2324,7 @@
if (arc_eviction_list != NULL)
arc_do_user_evicts();
- if (arc_reclaim_needed()) {
+ if (needfree) {
needfree = 0;
#ifdef _KERNEL
wakeup(&needfree);
@@ -3611,10 +3656,15 @@
{
#ifdef _KERNEL
uint64_t inflight_data = arc_anon->arcs_size;
- uint64_t available_memory = ptoa((uintmax_t)cnt.v_free_count);
+ uint64_t available_memory;
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
+ /* How much memory is potentially available */
+ available_memory = ptoa((uintmax_t)cnt.v_free_count);
+ available_memory += ptoa((uintmax_t)cnt.v_cache_count);
+ available_memory -= ptoa((uintmax_t)cnt.v_free_min);
+
#if 0
#if defined(__i386)
available_memory =
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c,v
retrieving revision 1.22.2.6
diff -u -r1.22.2.6 arc.c
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010
20:09:40 -0000 1.22.2.6
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jul 2010
09:21:31 -0000
@@ -183,10 +183,15 @@
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
+uint64_t zfs_arc_bp_active;
+uint64_t zfs_arc_bp_inactive;
+
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable);
+TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active);
+TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive);
SYSCTL_DECL(_vfs_zfs);
SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
"Maximum ARC size");
@@ -195,6 +200,11 @@
SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
&zfs_mdcomp_disable, 0, "Disable metadata compression");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN,
&zfs_arc_bp_active, 0,
+ "Start ARC backpressure if active memory is below this limit");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN,
&zfs_arc_bp_inactive, 0,
+ "Start ARC backpressure if inactive memory is below this limit");
+
/*
* Note that buffers can be in one of 6 states:
* ARC_anon - anonymous (discussed below)
@@ -2103,7 +2113,6 @@
}
static int needfree = 0;
-
static int
arc_reclaim_needed(void)
{
@@ -2112,20 +2121,58 @@
#endif
#ifdef _KERNEL
- if (needfree)
- return (1);
+ /* We've grown too much, */
if (arc_size > arc_c_max)
return (1);
+
+ /* Pagedaemon is stuck, let's free something right away */
+ if (vm_pageout_pages_needed)
+ return 1;
+
+ /* Check if inactive list have grown too much */
+ if ( zfs_arc_bp_inactive
+ && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) {
+ /* tell pager to reap 1/2th of inactive queue*/
+ atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2);
+ pagedaemon_wakeup();
+ return needfree;
+ }
+
+ /* Same for active list... */
+ if ( zfs_arc_bp_active
+ && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) {
+ atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2);
+ pagedaemon_wakeup();
+ return needfree;
+ }
+
+
+ /* Old style behavior -- ARC gives up memory whenever page daemon
asks.. */
+ if (needfree)
+ return 1;
+
+ /*
+ We got here either because active/inactive lists are
+ getting short or because we've been called during voluntary
+ ARC size checks. Kind of gray area...
+ */
+
+ /* If we didn't reach our minimum yet, don't rush to give memory up..*/
if (arc_size <= arc_c_min)
return (0);
+ /* If we're really short on memory now, give it up. */
+ if (vm_page_count_min()) {
+ return (1);
+ }
+
/*
- * If pages are needed or we're within 2048 pages
- * of needing to page need to reclaim
+ * If we're within 2048 pages of pagedaemon start, reclaim...
*/
- if (vm_pages_needed || (vm_paging_target() > -2048))
+ if (vm_pages_needed && (vm_paging_target() > -2048))
return (1);
+
#if 0
/*
* take 'desfree' extra pages, so we reclaim sooner, rather than later
@@ -2169,8 +2216,6 @@
return (1);
#endif
#else
- if (kmem_used() > (kmem_size() * 3) / 4)
- return (1);
#endif
#else
@@ -2279,7 +2324,7 @@
if (arc_eviction_list != NULL)
arc_do_user_evicts();
- if (arc_reclaim_needed()) {
+ if (needfree) {
needfree = 0;
#ifdef _KERNEL
wakeup(&needfree);
@@ -3611,10 +3656,17 @@
{
#ifdef _KERNEL
uint64_t inflight_data = arc_anon->arcs_size;
- uint64_t available_memory = ptoa((uintmax_t)cnt.v_free_count);
+ uint64_t available_memory;
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
+ /* How much memory is potentially available */
+ available_memory = (uint64_t)cnt.v_free_count + cnt.v_cache_count;
+ if (available_memory > cnt.v_free_min)
+ available_memory = ptoa(available_memory - cnt.v_free_min);
+ else
+ available_memory = 0;
+
#if 0
#if defined(__i386)
available_memory =
pgp8gmNzycqYQ.pgp
Description: PGP signature
