Module Name:    src
Committed By:   sborrill
Date:           Fri Feb 24 17:58:45 UTC 2012

Modified Files:
        src/sys/dev/raidframe [netbsd-5]: rf_reconmap.c rf_reconstruct.c

Log Message:
Pull up the following revisions(s) (requested by oster in ticket #1728):
        sys/dev/raidframe/rf_reconmap.c:        revision 1.34
        sys/dev/raidframe/rf_reconstruct.c:     revision 1.118

Remove a DIAGNOSTIC check that is invalid for RAID5_RS.
Add logic to the main reconstruction loop to handle RAID5 with rotated
spares. Correct issue where we were doing one more stripe than necessary.


To generate a diff of this commit:
cvs rdiff -u -r1.31 -r1.31.8.1 src/sys/dev/raidframe/rf_reconmap.c
cvs rdiff -u -r1.105.4.4 -r1.105.4.5 src/sys/dev/raidframe/rf_reconstruct.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/raidframe/rf_reconmap.c
diff -u src/sys/dev/raidframe/rf_reconmap.c:1.31 src/sys/dev/raidframe/rf_reconmap.c:1.31.8.1
--- src/sys/dev/raidframe/rf_reconmap.c:1.31	Mon May 19 19:49:54 2008
+++ src/sys/dev/raidframe/rf_reconmap.c	Fri Feb 24 17:58:44 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconmap.c,v 1.31 2008/05/19 19:49:54 oster Exp $	*/
+/*	$NetBSD: rf_reconmap.c,v 1.31.8.1 2012/02/24 17:58:44 sborrill Exp $	*/
 /*
  * Copyright (c) 1995 Carnegie-Mellon University.
  * All rights reserved.
@@ -34,7 +34,7 @@
  *************************************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.31 2008/05/19 19:49:54 oster Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.31.8.1 2012/02/24 17:58:44 sborrill Exp $");
 
 #include "rf_raid.h"
 #include <sys/time.h>
@@ -156,7 +156,14 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF
 
 		/* do we need to move the queue? */
 		while (i > mapPtr->high_ru) {
+#if 0
 #ifdef DIAGNOSTIC
+			/* XXX: The check below is not valid for
+			 * RAID5_RS.  It is valid for RAID 1 and RAID 5.
+			 * The issue is that we can easily have
+			 * RU_NOTHING entries here too, and those are
+			 * quite correct.
+			 */
 			if (mapPtr->status[mapPtr->head]!=RU_ALL) {
 				printf("\nraid%d: reconmap incorrect -- working on i %" PRIu64 "\n",
 				       raidPtr->raidid, i);
@@ -169,6 +176,7 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF
 				panic("reconmap incorrect");
 			} 
 #endif
+#endif
 			mapPtr->low_ru++;
 			mapPtr->high_ru++;
 			/* initialize "highest" RU status entry, which

Index: src/sys/dev/raidframe/rf_reconstruct.c
diff -u src/sys/dev/raidframe/rf_reconstruct.c:1.105.4.4 src/sys/dev/raidframe/rf_reconstruct.c:1.105.4.5
--- src/sys/dev/raidframe/rf_reconstruct.c:1.105.4.4	Sun Nov 21 22:06:53 2010
+++ src/sys/dev/raidframe/rf_reconstruct.c	Fri Feb 24 17:58:44 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconstruct.c,v 1.105.4.4 2010/11/21 22:06:53 riz Exp $	*/
+/*	$NetBSD: rf_reconstruct.c,v 1.105.4.5 2012/02/24 17:58:44 sborrill Exp $	*/
 /*
  * Copyright (c) 1995 Carnegie-Mellon University.
  * All rights reserved.
@@ -33,7 +33,7 @@
  ************************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.105.4.4 2010/11/21 22:06:53 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.105.4.5 2012/02/24 17:58:44 sborrill Exp $");
 
 #include <sys/param.h>
 #include <sys/time.h>
@@ -557,6 +557,9 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 	RF_ReconCtrl_t *tmp_reconctrl;
 	RF_ReconEvent_t *event;
 	RF_StripeCount_t incPSID,lastPSID,num_writes,pending_writes,prev;
+#if RF_INCLUDE_RAID5_RS > 0
+	RF_StripeCount_t startPSID,endPSID,aPSID,bPSID,offPSID;
+#endif
 	RF_ReconUnitCount_t RUsPerPU;
 	struct timeval etime, elpsd;
 	unsigned long xor_s, xor_resid_us;
@@ -609,7 +612,17 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 	recon_error = 0;
 	write_error = 0;
 	pending_writes = incPSID;
-	raidPtr->reconControl->lastPSID = incPSID;
+	raidPtr->reconControl->lastPSID = incPSID - 1;
+
+	/* bounds check raidPtr->reconControl->lastPSID and
+	   pending_writes so that we don't attempt to wait for more IO
+	   than can possibly happen */
+
+	if (raidPtr->reconControl->lastPSID > lastPSID)
+		raidPtr->reconControl->lastPSID = lastPSID;
+
+	if (pending_writes > lastPSID)
+		pending_writes = lastPSID;
 
 	/* start the actual reconstruction */
 
@@ -623,6 +636,49 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 		}
 
 		num_writes = 0;
+
+#if RF_INCLUDE_RAID5_RS > 0
+		/* For RAID5 with Rotated Spares we will be 'short'
+		   some number of writes since no writes will get
+		   issued for stripes where the spare is on the
+		   component being rebuilt.  Account for the shortage
+		   here so that we don't hang indefinitely below
+		   waiting for writes to complete that were never
+		   scheduled.
+
+		   XXX: Should be fixed for PARITY_DECLUSTERING and
+		   others too! 
+
+		*/
+
+		if (raidPtr->Layout.numDataCol < 
+		    raidPtr->numCol - raidPtr->Layout.numParityCol) {
+			/* numDataCol is at least 2 less than numCol, so
+			   should be RAID 5 with Rotated Spares */
+
+			/* XXX need to update for RAID 6 */
+			
+			startPSID = raidPtr->reconControl->lastPSID - pending_writes + 1;
+			endPSID = raidPtr->reconControl->lastPSID;
+			
+			offPSID = raidPtr->numCol - col - 1;
+			
+			aPSID = startPSID - startPSID % raidPtr->numCol + offPSID;
+			if (aPSID < startPSID) {
+				aPSID += raidPtr->numCol;
+			}
+			
+			bPSID = endPSID - ((endPSID - offPSID) % raidPtr->numCol);
+			
+			if (aPSID < endPSID) {
+				num_writes = ((bPSID - aPSID) / raidPtr->numCol) + 1;
+			}
+			
+			if ((aPSID == endPSID) && (bPSID == endPSID)) {
+				num_writes++;
+			}
+		}
+#endif
 		
 		/* issue a read for each surviving disk */
 		
@@ -701,7 +757,7 @@ rf_ContinueReconstructFailedDisk(RF_Raid
 #endif
 		}
 
-		/* reads done, wakup any waiters, and then wait for writes */
+		/* reads done, wakeup any waiters, and then wait for writes */
 
 		rf_WakeupHeadSepCBWaiters(raidPtr);
 
@@ -1121,7 +1177,7 @@ IssueNextReadRequest(RF_Raid_t *raidPtr,
 			ctrl->ru_count = 0;
 			/* code left over from when head-sep was based on
 			 * parity stripe id */
-			if (ctrl->curPSID >= raidPtr->reconControl->lastPSID) {
+			if (ctrl->curPSID > raidPtr->reconControl->lastPSID) {
 				CheckForNewMinHeadSep(raidPtr, ++(ctrl->headSepCounter));
 				return (RF_RECON_DONE_READS);	/* finito! */
 			}

Reply via email to