So my dead drive has died further and will now read() nothing.
It is an ex drive. Unable to do any manual XOR sanity checking,
I can only read out the remaining drives, and pray that in the scratch
space where they get copied to, I can find a partition that will
make sense to the debugfs utility.
Here's the C program I wrote to that end. Read the source
and make it suck less before you try to use it.
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <stdlib.h>
/* --------------
A user-space RAID test facility for testing
RAID 5 configurations.
*/
extern char *optarg;
extern int optind, opterr, optopt;
//extern int errno;
#define LEFT_SYMMETRIC 1
#define LEFT_ASYMMETRIC 1
long total_data=0;
long chunk_size = 64;
int verbose=0;
int ndevices=0;
int parity_algorithm=LEFT_SYMMETRIC;
int write_out=0;
int raid_level=5;
int bad_disk=-1;
int check_parity_p=1;
unsigned long ** chunks;
FILE ** devices;
struct stat * devfs;
void print_version(){
fprintf(stdout,"RaidTest 0.0\n");
}
void print_usage(){
fprintf(stderr,"Usage: raidtest [-vpPhV] [-b NUMBER]"
" [-c CHUNKSIZE] [DEVICES]");
}
void stat_devices(char * devs[],int start,int stop){
/* calls stat() on each device, complains if it cannot,
[vapor:or if device is not something that makes sense here] */
int n;
struct stat devstat;
for (n=start; n < stop; n++) {
if (stat (devs[n],&devstat)==0){
if (verbose)
fprintf(stderr,
"%s stats ok.\n",
devs[n]);
if (verbose > 1){
if (S_ISBLK(devstat.st_mode)){
fprintf(stderr,
"Block Device\n");
}
}
if (S_ISDIR(devstat.st_mode)){
fprintf(stderr,
"%s is a directory. Confused. Exiting.\n",
devs[n]);
exit(-1);
}
} else {
fprintf(stderr,"Could not stat %s.\n",devs[n]);
perror(NULL);
exit(-1);
}
}
}
unsigned long * parity;
/* For a single row of data, this procedure
writes out what it holds. It skips the parity block,
but uses it to reconstruct for a bad device what the
data must be.
This may be wrappered around
to deal with any RAID 5 parity algorithms.
For Left symmetric,
pdevice = ndevices - stripe_number % ndevices;
It's that simple.
*/
int write_out_5_left_asymmetric(unsigned long ** buf,
long cs,
int ndevices,
int pdevice,
int bad_device){
int i;
ssize_t r;
/* use xor to recover data for bad_device */
if ((bad_device > -1)&&(bad_device!=pdevice) ) {
for (i=0;i<cs;i++)
buf[bad_device][i]=0l;
for (i=0;i<ndevices;i++)
if (i!= bad_device)
for (i=0;i<cs;i++)
buf[bad_device][i] ^= buf[pdevice][i];
}
for (i=0;i<ndevices;i++) {
if (i!=pdevice) {
r = fwrite(buf[i],
sizeof(long),
cs,
stdout);
if (r<cs) {
perror( "Error dumping to stdout?\n");
return -1;
}
}
}
return 0;
}
int write_out_5_left_symmetric(unsigned long ** buf,
long cs,
int ndevices,
int pdevice,
int bad_device){
int i,n;
ssize_t r;
/* use xor to recover data for bad_device */
if ((bad_device > -1)&&(bad_device!=pdevice) ) {
for (i=0;i<cs;i++)
buf[bad_device][i]=0l;
for (i=0;i<ndevices;i++)
if (i!= bad_device)
for (i=0;i<cs;i++)
buf[bad_device][i] ^= buf[pdevice][i];
}
for (i=pdevice+1;i<ndevices+pdevice;i++) {
if (i!=pdevice) {
r = fwrite(buf[i%ndevices],
sizeof(long),
cs,
stdout);
if (r<cs) {
perror( "Error dumping to stdout?\n");
return -1;
}
}
}
return 0;
}
/*
It really is this simple for RAID 4.
Might as well include it.
*/
int write_out_4_simple (unsigned long ** buf,
long cs,
int ndevices,
int bad_device){
return write_out_5_left_asymmetric(buf,
cs,
ndevices,
ndevices-1,
bad_device);
}
int check_parity_simple(unsigned long ** buf, unsigned long *pbuf){
size_t i,c ;
/* reinitialize */
// memset ((long*)pbuf, 0, chunk_size*8);
for (c=0;c<chunk_size;c++)
pbuf[c]=0;
/* check parity */
for (i=0;i<ndevices;i++)
for (c=0;c<chunk_size;c++)
pbuf[c] ^= buf[i][c];
for (c=0;c<chunk_size;c++)
if (pbuf[c]>0)
break;
/* If the for loop broke, negative value.
otherwise, 0. */
return c - chunk_size ;
}
int read_devices (char * devs[],long offset) {
int i,n,cp ,y=1;
long pos;
size_t r;
for (i=0 ; i<ndevices ; i++){
if ((devices[i]=fopen(devs[i+optind],"r"))==NULL){
fprintf(stderr,"Could not open %s.\n",devs[i+optind]);
perror(NULL);
exit(-1);
}
}
/* go to starting point */
if (offset>0){
for (i=0 ; i<ndevices ; i++){
if (fseek(devices[i],offset,SEEK_SET) <0){
fprintf(stderr,"Error seek() %s to %ld.\n",devs[i+optind],offset);
perror(NULL);
exit(-1);
}
}
}
n=0;
while (y) {
/* read each device */
for (i=0 ; i<ndevices ; i++){
if (i==bad_disk)
continue;
if ((r=fread(chunks[i],
sizeof(long),
chunk_size,
devices[i]) )<
chunk_size){
fprintf(stderr,
"Could not do a whole read() on %s at %ld. Read only %d\n",
devs[i+optind],
ftell(devices[i]),
r);
// y=-1;
r=fseek(devices[i],
sizeof(long)*(chunk_size-r),
SEEK_CUR);
if (r<0)
fprintf(stderr,
"Could not do a skip on %s at %ld.\n",
devs[i+optind],
ftell(devices[i]));
}
}
/* get the parity checking done */
if (check_parity_p){
cp=check_parity_simple(chunks,parity);
if (cp<0)
break;
}
/* spew out to stdout if wanted */
if (write_out) {
// Left Symmetric: parity = ndevices-n%ndevices-1;
write_out_5_left_symmetric(chunks,
chunk_size,
ndevices,
ndevices-n%ndevices-1,
bad_disk);
}
n++;
}
fprintf(stderr,
"Stopping after %d iterations. Parity broke at %d \n",
n,cp);
}
/* return size of total data in kilobytes. */
long atosize(const char * a){
char * l;
long *s;
if (strchr(a,'G'))
return 1048576*atol(a);
if (strchr(a,'m'))
return 1024*atol(a);
return atol(a);
}
int main(int argc, char *argv[]) {
int c,oi;
struct option long_options[] =
{
{"verbose",0,0,'v'},
{"parity",0,0,'P'},
{"noparity",0,0,'p'},
{"bad",1,0,'b'},
{"writeout",0,0,'w'},
{"chunksize",1,0,'c'},
{"version",0,0,'V'},
{"help",0,0,'h'},
{"usage",0,0,'h'},
{0,0,0,0}
};
while(1){
c= getopt_long (argc, argv,"hpPvVwb:c:",
long_options, &oi);
if (c == -1)
break ;
switch(c) {
case 'c':
chunk_size=atol(optarg);
break;
case 't':
total_data=atosize(optarg);
break;
case 'b':
bad_disk=atoi(optarg);
break;
case 'h':
print_usage();
exit(0);
break;
case 'P':
check_parity_p=1;
break;
case 'p':
check_parity_p=0;
break;
case 'V':
print_version();
exit(0);
case 'v':
verbose++;
break;
case 'w':
write_out++;
break;
default:
print_usage();
fprintf(stderr,"Unrecognized flag %s. Exiting.\n",optarg);
exit(-1);
break;
}
}
/* argc and optind should know how many devices we have*/
ndevices = argc-optind;
if (!ndevices){
fprintf(stderr,"No Devices Listed.\n");
exit(-1);
}
if (verbose>2){
for (c=optind;c<argc;c++)
fprintf(stderr,
"%s ",
argv[c]);
fprintf(stderr,
"argc %d optind %d ndevices %d\n",
argc,
optind,
ndevices);
}
/* chunk_size is entered in k's.
But we're doing things in long. 1 k is 128 longs */
/* This would be 256 if we used 32 bit operations. */
chunk_size *= 128;
/* allocate file handles and memory */
/* first the ludicrous task of allocating ndevices worth of pointers. */
if (!(chunks = (unsigned long**)malloc((size_t)ndevices*
sizeof(unsigned long*)))){
fprintf(stderr,"Could not allocate memory for chunk buffer.\n");
exit(-1);
}
/* now a chunk for each device, in one place */
if (!(chunks[0] =
(unsigned long*)malloc(ndevices*chunk_size*sizeof(long)))){
fprintf(stderr,"Could not allocate memory for chunk buffer.\n");
exit(-1);
}
/* spreading the love. */
for (c=1;c<ndevices;c++){
chunks[c]=chunks[c-1]+chunk_size;
}
if (!(devices = (FILE**)malloc((size_t)ndevices* sizeof(FILE*)))){
fprintf(stderr,"Could not allocate memory for FILE objects\n");
exit(-1);
}
if (!(parity =
(unsigned long*)malloc(chunk_size*sizeof(long)))){
fprintf(stderr,"Could not allocate memory for parity buffer.\n");
exit(-1);
}
if (verbose){
fprintf(stderr,"Stat()ing devices.\n");
}
stat_devices(argv,optind,argc);
if (verbose){
fprintf(stderr,"Scanning devices.\n");
}
read_devices(argv,0);
}