This will allow us to pass dynamic data and parity combination to make different erasure coding possible.
At most we support 7 parity strip, which means we can stand with 7 nodes failure at the same time. Signed-off-by: Liu Yuan <[email protected]> --- include/fec.h | 41 +++++----- lib/fec.c | 230 ++++++++++++++++++++++++++++-------------------------- sheep/gateway.c | 61 ++++++++------- sheep/recovery.c | 9 ++- 4 files changed, 179 insertions(+), 162 deletions(-) diff --git a/include/fec.h b/include/fec.h index ff79edb..55e0bdb 100644 --- a/include/fec.h +++ b/include/fec.h @@ -61,16 +61,16 @@ struct fec { unsigned long magic; - unsigned short k, n; /* parameters of the code */ + unsigned short d, dp; /* parameters of the code */ uint8_t *enc_matrix; }; void init_fec(void); /* - * param k the number of blocks required to reconstruct - * param m the total number of blocks created + * param d the number of blocks required to reconstruct + * param dp the total number of blocks created */ -struct fec *fec_new(unsigned short k, unsigned short m); +struct fec *fec_new(unsigned short d, unsigned short dp); void fec_free(struct fec *p); /* @@ -104,7 +104,6 @@ void fec_decode(const struct fec *code, #define SD_EC_D 4 /* No. of data strips */ #define SD_EC_P 2 /* No. of parity strips */ #define SD_EC_DP (SD_EC_D + SD_EC_P) -#define SD_EC_STRIP_SIZE (256) /* * SD_EC_D_SIZE <= 1K is the safe value to run VM after some experimentations. @@ -115,10 +114,9 @@ void fec_decode(const struct fec *code, * failed (grub got screwed) and 1K is probably the biggest value if we want * VM to run on erasure coded volume. */ -#define SD_EC_D_SIZE (SD_EC_STRIP_SIZE * SD_EC_D) +#define SD_EC_DATA_STRIPE_SIZE (1024) /* 1K */ #define SD_EC_OBJECT_SIZE (SD_DATA_OBJ_SIZE / SD_EC_D) -#define SD_EC_STRIPE (SD_EC_STRIP_SIZE * SD_EC_DP) -#define SD_EC_NR_STRIPE_PER_OBJECT (SD_EC_OBJECT_SIZE / SD_EC_STRIP_SIZE) +#define SD_EC_NR_STRIPE_PER_OBJECT (SD_DATA_OBJ_SIZE / SD_EC_DATA_STRIPE_SIZE) /* * Stripe: data strips + parity strips, spread on all replica @@ -127,19 +125,19 @@ void fec_decode(const struct fec *code, * R: Replica * * +--------------------stripe ----------------------+ - * v v - * +----+----------------------------------------------+ + * v data stripe parity stripe v + * +----+----+----+----+----+-----+----+----+-----+----+ * | ds | ds | ds | ds | ds | ... | ps | ps | ... | ps | - * +----+----------------------------------------------+ + * +----+----+----+----+----+-----+----+----+-----+----+ * | .. | .. | .. | .. | .. | ... | .. | .. | ... | .. | * +----+----+----+----+----+ ... +----+----+-----+----+ * R1 R2 R3 R4 R5 ... Rn Rn+1 Rn+2 Rn+3 */ /* Return the erasure code context to encode|decode */ -static inline struct fec *ec_init(void) +static inline struct fec *ec_init(int d, int dp) { - return fec_new(SD_EC_D, SD_EC_DP); + return fec_new(d, dp); } /* @@ -148,13 +146,16 @@ static inline struct fec *ec_init(void) * @ds: data strips to generate parity strips * @ps: parity strips to return */ -static inline void ec_encode(struct fec *ctx, const uint8_t *ds[SD_EC_D], - uint8_t *ps[SD_EC_P]) +static inline void ec_encode(struct fec *ctx, const uint8_t *ds[], + uint8_t *ps[]) { - int total = SD_EC_D + SD_EC_P; - const int pidx[SD_EC_P] = { total - 2, total - 1 }; + int p = ctx->dp - ctx->d; + int pidx[p]; - fec_encode(ctx, ds, ps, pidx, SD_EC_P, SD_EC_STRIP_SIZE); + for (int i = 0; i < p; i++) + pidx[i] = ctx->d + i; + + fec_encode(ctx, ds, ps, pidx, p, SD_EC_DATA_STRIPE_SIZE / ctx->d); } /* @@ -166,8 +167,8 @@ static inline void ec_encode(struct fec *ctx, const uint8_t *ds[SD_EC_D], * @output: the lost ds or ps to return * @idx: index of output which is lost */ -void ec_decode(struct fec *ctx, const uint8_t *input[SD_EC_D], - const int inidx[SD_EC_D], +void ec_decode(struct fec *ctx, const uint8_t *input[], + const int inidx[], uint8_t output[], int idx); /* Destroy the erasure code context */ diff --git a/lib/fec.c b/lib/fec.c index 6deafe4..05dffa0 100644 --- a/lib/fec.c +++ b/lib/fec.c @@ -251,18 +251,18 @@ static void _addmul1(register uint8_t *dst, GF_ADDMULC(*dst, *src); } -/* computes C = AB where A is n*k, B is k*m, C is n*m */ -static void _matmul(uint8_t *a, uint8_t *b, uint8_t *c, unsigned n, unsigned k, +/* computes C = AB where A is dp*d, B is d*m, C is dp*m */ +static void _matmul(uint8_t *a, uint8_t *b, uint8_t *c, unsigned dp, unsigned d, unsigned m) { unsigned row, col, i; - for (row = 0; row < n; row++) { + for (row = 0; row < dp; row++) { for (col = 0; col < m; col++) { - uint8_t *pa = &a[row * k]; + uint8_t *pa = &a[row * d]; uint8_t *pb = &b[col]; uint8_t acc = 0; - for (i = 0; i < k; i++, pa++, pb += m) + for (i = 0; i < d; i++, pa++, pb += m) acc ^= gf_mul(*pa, *pb); c[row * m + col] = acc; } @@ -271,43 +271,43 @@ static void _matmul(uint8_t *a, uint8_t *b, uint8_t *c, unsigned n, unsigned k, /* * _invert_mat() takes a matrix and produces its inverse - * k is the size of the matrix. + * d is the size of the matrix. * (Gauss-Jordan, adapted from Numerical Recipes in C) * Return non-zero if singular. */ -static void _invert_mat(uint8_t *src, unsigned k) +static void _invert_mat(uint8_t *src, unsigned d) { uint8_t c, *p; unsigned irow = 0; unsigned icol = 0; unsigned row, col, i, ix; - unsigned *indxc = (unsigned *)xmalloc(k * sizeof(unsigned)); - unsigned *indxr = (unsigned *)xmalloc(k * sizeof(unsigned)); - unsigned *ipiv = (unsigned *)xmalloc(k * sizeof(unsigned)); - uint8_t *id_row = NEW_GF_MATRIX(1, k); + unsigned *indxc = (unsigned *)xmalloc(d * sizeof(unsigned)); + unsigned *indxr = (unsigned *)xmalloc(d * sizeof(unsigned)); + unsigned *ipiv = (unsigned *)xmalloc(d * sizeof(unsigned)); + uint8_t *id_row = NEW_GF_MATRIX(1, d); - memset(id_row, '\0', k * sizeof(uint8_t)); + memset(id_row, '\0', d * sizeof(uint8_t)); /* ipiv marks elements already used as pivots. */ - for (i = 0; i < k; i++) + for (i = 0; i < d; i++) ipiv[i] = 0; - for (col = 0; col < k; col++) { + for (col = 0; col < d; col++) { uint8_t *pivot_row; /* * Zeroing column 'col', look for a non-zero element. * First try on the diagonal, if it fails, look elsewhere. */ - if (ipiv[col] != 1 && src[col * k + col] != 0) { + if (ipiv[col] != 1 && src[col * d + col] != 0) { irow = col; icol = col; goto found_piv; } - for (row = 0; row < k; row++) { + for (row = 0; row < d; row++) { if (ipiv[row] != 1) { - for (ix = 0; ix < k; ix++) { + for (ix = 0; ix < d; ix++) { if (ipiv[ix] == 0) { - if (src[row * k + ix] != 0) { + if (src[row * d + ix] != 0) { irow = row; icol = ix; goto found_piv; @@ -325,11 +325,11 @@ found_piv: * optimizing. */ if (irow != icol) - for (ix = 0; ix < k; ix++) - SWAP(src[irow*k + ix], src[icol*k + ix]); + for (ix = 0; ix < d; ix++) + SWAP(src[irow*d + ix], src[icol*d + ix]); indxr[col] = irow; indxc[col] = icol; - pivot_row = &src[icol * k]; + pivot_row = &src[icol * d]; c = pivot_row[icol]; assert(c != 0); if (c != 1) { /* otherwhise this is a NOP */ @@ -339,7 +339,7 @@ found_piv: */ c = inverse[c]; pivot_row[icol] = 1; - for (ix = 0; ix < k; ix++) + for (ix = 0; ix < d; ix++) pivot_row[ix] = gf_mul(c, pivot_row[ix]); } /* @@ -350,22 +350,22 @@ found_piv: * we can optimize the addmul). */ id_row[icol] = 1; - if (memcmp(pivot_row, id_row, k * sizeof(uint8_t)) != 0) { - for (p = src, ix = 0; ix < k; ix++, p += k) { + if (memcmp(pivot_row, id_row, d * sizeof(uint8_t)) != 0) { + for (p = src, ix = 0; ix < d; ix++, p += d) { if (ix != icol) { c = p[icol]; p[icol] = 0; - addmul(p, pivot_row, c, k); + addmul(p, pivot_row, c, d); } } } id_row[icol] = 0; } /* done all columns */ - for (col = k; col > 0; col--) + for (col = d; col > 0; col--) if (indxr[col-1] != indxc[col-1]) - for (row = 0; row < k; row++) - SWAP(src[row * k + indxr[col-1]], - src[row * k + indxc[col-1]]); + for (row = 0; row < d; row++) + SWAP(src[row * d + indxr[col-1]], + src[row * d + indxc[col-1]]); } /* @@ -379,51 +379,51 @@ found_piv: * p = coefficients of the matrix (p_i) * q = values of the polynomial (known) */ -static void _invert_vdm(uint8_t *src, unsigned k) +static void _invert_vdm(uint8_t *src, unsigned d) { unsigned i, j, row, col; uint8_t *b, *c, *p; uint8_t t, xx; - if (k == 1) /* degenerate case, matrix must be p^0 = 1 */ + if (d == 1) /* degenerate case, matrix must be p^0 = 1 */ return; /* - * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1 + * c holds the coefficient of P(x) = Prod (x - p_i), i=0..d-1 * b holds the coefficient for the matrix inversion */ - c = NEW_GF_MATRIX(1, k); - b = NEW_GF_MATRIX(1, k); - p = NEW_GF_MATRIX(1, k); + c = NEW_GF_MATRIX(1, d); + b = NEW_GF_MATRIX(1, d); + p = NEW_GF_MATRIX(1, d); - for (j = 1, i = 0; i < k; i++, j += k) { + for (j = 1, i = 0; i < d; i++, j += d) { c[i] = 0; p[i] = src[j]; /* p[i] */ } /* - * construct coeffs. recursively. We know c[k] = 1 (implicit) + * construct coeffs. recursively. We know c[d] = 1 (implicit) * and start P_0 = x - p_0, then at each stage multiply by * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1} - * After k steps we are done. + * After d steps we are done. */ - c[k - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */ - for (i = 1; i < k; i++) { + c[d - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */ + for (i = 1; i < d; i++) { uint8_t p_i = p[i]; /* see above comment */ - for (j = k - 1 - (i - 1); j < k - 1; j++) + for (j = d - 1 - (i - 1); j < d - 1; j++) c[j] ^= gf_mul(p_i, c[j + 1]); - c[k - 1] ^= p_i; + c[d - 1] ^= p_i; } - for (row = 0; row < k; row++) { + for (row = 0; row < d; row++) { /* synthetic division etc. */ xx = p[row]; t = 1; - b[k - 1] = 1; /* this is in fact c[k] */ - for (i = k - 1; i > 0; i--) { + b[d - 1] = 1; /* this is in fact c[d] */ + for (i = d - 1; i > 0; i--) { b[i-1] = c[i] ^ gf_mul(xx, b[i]); t = gf_mul(xx, t) ^ b[i-1]; } - for (col = 0; col < k; col++) - src[col * k + row] = gf_mul(inverse[t], b[col]); + for (col = 0; col < d; col++) + src[col * d + row] = gf_mul(inverse[t], b[col]); } free(c); free(b); @@ -447,13 +447,13 @@ void init_fec(void) void fec_free(struct fec *p) { - assert(p != NULL && p->magic == (((FEC_MAGIC ^ p->k) ^ p->n) ^ + assert(p != NULL && p->magic == (((FEC_MAGIC ^ p->d) ^ p->dp) ^ (unsigned long) (p->enc_matrix))); free(p->enc_matrix); free(p); } -struct fec *fec_new(unsigned short k, unsigned short n) +struct fec *fec_new(unsigned short d, unsigned short dp) { unsigned row, col; uint8_t *p, *tmp_m; @@ -461,32 +461,32 @@ struct fec *fec_new(unsigned short k, unsigned short n) struct fec *retval; retval = (struct fec *)xmalloc(sizeof(struct fec)); - retval->k = k; - retval->n = n; - retval->enc_matrix = NEW_GF_MATRIX(n, k); - retval->magic = ((FEC_MAGIC^k)^n)^(unsigned long)(retval->enc_matrix); - tmp_m = NEW_GF_MATRIX(n, k); + retval->d = d; + retval->dp = dp; + retval->enc_matrix = NEW_GF_MATRIX(dp, d); + retval->magic = ((FEC_MAGIC^d)^dp)^(unsigned long)(retval->enc_matrix); + tmp_m = NEW_GF_MATRIX(dp, d); /* * fill the matrix with powers of field elements, starting from 0. * The first row is special, cannot be computed with exp. table. */ tmp_m[0] = 1; - for (col = 1; col < k; col++) + for (col = 1; col < d; col++) tmp_m[col] = 0; - for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k) - for (col = 0; col < k; col++) + for (p = tmp_m + d, row = 0; row < dp - 1; row++, p += d) + for (col = 0; col < d; col++) p[col] = gf_exp[modnn(row * col)]; /* * quick code to build systematic matrix: invert the top - * k*k vandermonde matrix, multiply right the bottom n-k rows + * d*d vandermonde matrix, multiply right the bottom dp-d rows * by the inverse, and construct the identity matrix at the top. */ - _invert_vdm(tmp_m, k); /* much faster than _invert_mat */ - _matmul(tmp_m + k * k, tmp_m, retval->enc_matrix + k * k, n - k, k, k); + _invert_vdm(tmp_m, d); /* much faster than _invert_mat */ + _matmul(tmp_m + d * d, tmp_m, retval->enc_matrix + d * d, dp - d, d, d); /* the upper matrix is I so do not bother with a slow multiply */ - memset(retval->enc_matrix, '\0', k * k * sizeof(uint8_t)); - for (p = retval->enc_matrix, col = 0; col < k; col++, p += k + 1) + memset(retval->enc_matrix, '\0', d * d * sizeof(uint8_t)); + for (p = retval->enc_matrix, col = 0; col < d; col++, p += d + 1) *p = 1; free(tmp_m); @@ -508,19 +508,19 @@ void fec_encode(const struct fec *code, size_t num_block_nums, size_t sz) { unsigned char i, j; - size_t k; + size_t d; unsigned fecnum; const uint8_t *p; - for (k = 0; k < sz; k += STRIDE) { - size_t stride = ((sz-k) < STRIDE) ? (sz-k) : STRIDE; + for (d = 0; d < sz; d += STRIDE) { + size_t stride = ((sz-d) < STRIDE) ? (sz-d) : STRIDE; for (i = 0; i < num_block_nums; i++) { fecnum = block_nums[i]; - assert(fecnum >= code->k); - memset(fecs[i]+k, 0, stride); - p = &(code->enc_matrix[fecnum * code->k]); - for (j = 0; j < code->k; j++) - addmul(fecs[i]+k, src[j]+k, p[j], stride); + assert(fecnum >= code->d); + memset(fecs[i]+d, 0, stride); + p = &(code->enc_matrix[fecnum * code->d]); + for (j = 0; j < code->d; j++) + addmul(fecs[i]+d, src[j]+d, p[j], stride); } } } @@ -528,24 +528,24 @@ void fec_encode(const struct fec *code, /* * Build decode matrix into some memory space. * - * @param matrix a space allocated for a k by k matrix + * @param matrix a space allocated for a d by d matrix */ static void build_decode_matrix_into_space(const struct fec *const code, const int *const idx, - const unsigned k, uint8_t *const matrix) + const unsigned d, uint8_t *const matrix) { unsigned char i; uint8_t *p; - for (i = 0, p = matrix; i < k; i++, p += k) { - if (idx[i] < k) { - memset(p, 0, k); + for (i = 0, p = matrix; i < d; i++, p += d) { + if (idx[i] < d) { + memset(p, 0, d); p[i] = 1; } else { - memcpy(p, &(code->enc_matrix[idx[i] * code->k]), k); + memcpy(p, &(code->enc_matrix[idx[i] * code->d]), d); } } - _invert_mat(matrix, k); + _invert_mat(matrix, d); } void fec_decode(const struct fec *code, @@ -553,25 +553,25 @@ void fec_decode(const struct fec *code, uint8_t *const *const outpkts, const int *const idx, size_t sz) { - uint8_t m_dec[code->k * code->k]; + uint8_t m_dec[code->d * code->d]; unsigned char outix = 0; unsigned char row = 0; unsigned char col = 0; - assert(code->k * code->k < 8 * 1024 * 1024); - build_decode_matrix_into_space(code, idx, code->k, m_dec); + assert(code->d * code->d < 8 * 1024 * 1024); + build_decode_matrix_into_space(code, idx, code->d, m_dec); - for (row = 0; row < code->k; row++) { + for (row = 0; row < code->d; row++) { /* * If the block whose number is i is present, then it is * required to be in the i'th element. */ - assert((idx[row] >= code->k) || (idx[row] == row)); - if (idx[row] >= code->k) { + assert((idx[row] >= code->d) || (idx[row] == row)); + if (idx[row] >= code->d) { memset(outpkts[outix], 0, sz); - for (col = 0; col < code->k; col++) + for (col = 0; col < code->d; col++) addmul(outpkts[outix], inpkts[col], - m_dec[row * code->k + col], sz); + m_dec[row * code->d + col], sz); outix++; } } @@ -584,19 +584,20 @@ void fec_decode(const struct fec *code, * * Return out and outidx as fec_decode requested. */ -static inline void decode_prepare(const uint8_t *dp[], const uint8_t *out[], +static inline void decode_prepare(struct fec *ctx, const uint8_t *dp[], + const uint8_t *out[], int outidx[]) { int i, p = 0; - for (i = SD_EC_D; i < SD_EC_DP; i++) { + for (i = ctx->d; i < ctx->dp; i++) { if (dp[i]) { p = i; break; } } - for (i = 0; i < SD_EC_D; i++) { + for (i = 0; i < ctx->d; i++) { if (dp[i]) { out[i] = dp[i]; outidx[i] = i; @@ -608,9 +609,9 @@ static inline void decode_prepare(const uint8_t *dp[], const uint8_t *out[], } } -static inline bool data_is_missing(const uint8_t *dp[]) +static inline bool data_is_missing(const uint8_t *dp[], int d) { - for (int i = 0; i < SD_EC_D; i++) + for (int i = 0; i < d; i++) if (!dp[i]) return true; return false; @@ -625,39 +626,50 @@ static inline bool data_is_missing(const uint8_t *dp[]) * @output: the lost ds or ps to return * @idx: index of output which is lost */ -void ec_decode(struct fec *ctx, const uint8_t *input[SD_EC_D], - const int inidx[SD_EC_D], +void ec_decode(struct fec *ctx, const uint8_t *input[], const int inidx[], uint8_t output[], int idx) { - const uint8_t *dp[SD_EC_DP] = { NULL }; - const uint8_t *oin[SD_EC_D] = { NULL }; - int oidx[SD_EC_D] = { 0 }, i; - uint8_t m0[SD_EC_STRIP_SIZE], m1[SD_EC_STRIP_SIZE], - p0[SD_EC_STRIP_SIZE], p1[SD_EC_STRIP_SIZE]; - uint8_t *missing[SD_EC_P] = { m0, m1 }; - uint8_t *p[SD_EC_P] = { p0, p1 }; - - for (i = 0; i < SD_EC_D; i++) + int edp = ctx->dp, ep = ctx->dp - ctx->d, ed = ctx->d; + const uint8_t *dp[edp]; + const uint8_t *oin[ed]; + int oidx[ed], i; + int strip_size = SD_EC_DATA_STRIPE_SIZE / ed; + uint8_t m0[strip_size], m1[strip_size], m2[strip_size], m3[strip_size], + m4[strip_size], m5[strip_size], m6[strip_size], m7[strip_size], + p0[strip_size], p1[strip_size], p2[strip_size], p3[strip_size], + p4[strip_size], p5[strip_size], p6[strip_size], p7[strip_size]; +#define SD_EC_MAX_PARITY 8 + uint8_t *missing[SD_EC_MAX_PARITY] = { m0, m1, m2, m3, m4, m5, m6, m7 }; + uint8_t *p[SD_EC_MAX_PARITY] = { p0, p1, p2, p3, p4, p5, p6, p7 }; + + for (i = 0; i < edp; i++) + dp[i] = NULL; + for (i = 0; i < ed; i++) + oin[i] = NULL; + for (i = 0; i < ed; i++) + oidx[i] = 0; + + for (i = 0; i < ed; i++) dp[inidx[i]] = input[i]; - decode_prepare(dp, oin, oidx); + decode_prepare(ctx, dp, oin, oidx); /* Fill the data strip if missing */ - if (data_is_missing(dp)) { + if (data_is_missing(dp, ed)) { int m = 0; - fec_decode(ctx, oin, missing, oidx, SD_EC_STRIP_SIZE); - for (i = 0; i < SD_EC_D; i++) + fec_decode(ctx, oin, missing, oidx, strip_size); + for (i = 0; i < ed; i++) if (!dp[i]) dp[i] = missing[m++]; } - if (idx < SD_EC_D) + if (idx < ed) goto out; /* Fill the parity strip */ ec_encode(ctx, dp, p); - for (i = 0; i < SD_EC_P; i++) - dp[SD_EC_D + i] = p[i]; + for (i = 0; i < ep; i++) + dp[ed + i] = p[i]; out: - memcpy(output, dp[idx], SD_EC_STRIP_SIZE); + memcpy(output, dp[idx], strip_size); } diff --git a/sheep/gateway.c b/sheep/gateway.c index 08b9c1e..4d7e0e2 100644 --- a/sheep/gateway.c +++ b/sheep/gateway.c @@ -48,11 +48,12 @@ static struct req_iter *prepare_replication_requests(struct request *req, } /* - * Make sure we don't overwrite the existing data for unaligned write + * Make sure we don't overwrite the existing data for misaligned write * - * If either offset or length of request isn't aligned to SD_EC_D_SIZE, we have - * to read the unaligned blocks before write. This kind of write amplification - * indeed slow down the write operation with extra read overhead. + * If either offset or length of request isn't aligned to + * SD_EC_DATA_STRIPE_SIZE, we have to read the unaligned blocks before write. + * This kind of write amplification indeed slow down the write operation with + * extra read overhead. */ static void *init_erasure_buffer(struct request *req, int buf_len) { @@ -62,18 +63,18 @@ static void *init_erasure_buffer(struct request *req, int buf_len) uint64_t oid = req->rq.obj.oid; int opcode = req->rq.opcode; struct sd_req hdr; - uint64_t head = round_down(off, SD_EC_D_SIZE); - uint64_t tail = round_down(off + len, SD_EC_D_SIZE); + uint64_t head = round_down(off, SD_EC_DATA_STRIPE_SIZE); + uint64_t tail = round_down(off + len, SD_EC_DATA_STRIPE_SIZE); int ret; if (opcode != SD_OP_WRITE_OBJ) goto out; - if (off % SD_EC_D_SIZE) { + if (off % SD_EC_DATA_STRIPE_SIZE) { /* Read head */ sd_init_req(&hdr, SD_OP_READ_OBJ); hdr.obj.oid = oid; - hdr.data_length = SD_EC_D_SIZE; + hdr.data_length = SD_EC_DATA_STRIPE_SIZE; hdr.obj.offset = head; ret = exec_local_req(&hdr, buf); if (ret != SD_RES_SUCCESS) { @@ -82,11 +83,11 @@ static void *init_erasure_buffer(struct request *req, int buf_len) } } - if ((len + off) % SD_EC_D_SIZE && tail - head > 0) { + if ((len + off) % SD_EC_DATA_STRIPE_SIZE && tail - head > 0) { /* Read tail */ sd_init_req(&hdr, SD_OP_READ_OBJ); hdr.obj.oid = oid; - hdr.data_length = SD_EC_D_SIZE; + hdr.data_length = SD_EC_DATA_STRIPE_SIZE; hdr.obj.offset = tail; ret = exec_local_req(&hdr, buf + tail - head); if (ret != SD_RES_SUCCESS) { @@ -95,7 +96,7 @@ static void *init_erasure_buffer(struct request *req, int buf_len) } } out: - memcpy(buf + off % SD_EC_D_SIZE, req->data, len); + memcpy(buf + off % SD_EC_DATA_STRIPE_SIZE, req->data, len); return buf; } @@ -108,11 +109,12 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr) uint32_t len = req->rq.data_length; uint64_t off = req->rq.obj.offset; int opcode = req->rq.opcode; - int start = off / SD_EC_D_SIZE; - int end = DIV_ROUND_UP(off + len, SD_EC_D_SIZE), i, j; + int start = off / SD_EC_DATA_STRIPE_SIZE; + int end = DIV_ROUND_UP(off + len, SD_EC_DATA_STRIPE_SIZE), i, j; int nr_stripe = end - start; - struct fec *ctx = ec_init(); + struct fec *ctx = ec_init(SD_EC_D, SD_EC_DP); int nr_to_send = (opcode == SD_OP_READ_OBJ) ? SD_EC_D : SD_EC_DP; + int strip_size = SD_EC_DATA_STRIPE_SIZE / SD_EC_D; struct req_iter *reqs = xzalloc(sizeof(*reqs) * nr_to_send); char *p, *buf = NULL; @@ -121,11 +123,11 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr) *nr = nr_to_send; for (i = 0; i < nr_to_send; i++) { - int l = SD_EC_STRIP_SIZE * nr_stripe; + int l = strip_size * nr_stripe; reqs[i].buf = xmalloc(l); reqs[i].dlen = l; - reqs[i].off = start * SD_EC_STRIP_SIZE; + reqs[i].off = start * strip_size; switch (opcode) { case SD_OP_CREATE_AND_WRITE_OBJ: case SD_OP_WRITE_OBJ: @@ -139,7 +141,7 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr) if (opcode != SD_OP_WRITE_OBJ && opcode != SD_OP_CREATE_AND_WRITE_OBJ) goto out; /* Read and remove operation */ - p = buf = init_erasure_buffer(req, SD_EC_D_SIZE * nr_stripe); + p = buf = init_erasure_buffer(req, SD_EC_DATA_STRIPE_SIZE * nr_stripe); if (!buf) { sd_err("failed to init erasure buffer %"PRIx64, req->rq.obj.oid); @@ -152,16 +154,16 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr) uint8_t *ps[SD_EC_P]; for (j = 0; j < SD_EC_D; j++) - ds[j] = reqs[j].buf + SD_EC_STRIP_SIZE * i; + ds[j] = reqs[j].buf + strip_size * i; for (j = 0; j < SD_EC_P; j++) - ps[j] = reqs[SD_EC_D + j].buf + SD_EC_STRIP_SIZE * i; + ps[j] = reqs[SD_EC_D + j].buf + strip_size * i; for (j = 0; j < SD_EC_D; j++) - memcpy((uint8_t *)ds[j], p + j * SD_EC_STRIP_SIZE, - SD_EC_STRIP_SIZE); + memcpy((uint8_t *)ds[j], p + j * strip_size, + strip_size); ec_encode(ctx, ds, ps); - p += SD_EC_D_SIZE; + p += SD_EC_DATA_STRIPE_SIZE; } out: ec_destroy(ctx); @@ -212,8 +214,8 @@ static void finish_requests(struct request *req, struct req_iter *reqs, uint32_t len = req->rq.data_length; uint64_t off = req->rq.obj.offset; int opcode = req->rq.opcode; - int start = off / SD_EC_D_SIZE; - int end = DIV_ROUND_UP(off + len, SD_EC_D_SIZE), i, j; + int start = off / SD_EC_DATA_STRIPE_SIZE; + int end = DIV_ROUND_UP(off + len, SD_EC_DATA_STRIPE_SIZE), i, j; int nr_stripe = end - start; if (!is_erasure_oid(oid)) @@ -224,17 +226,18 @@ static void finish_requests(struct request *req, struct req_iter *reqs, /* We need to assemble the data strips into the req buffer for read */ if (opcode == SD_OP_READ_OBJ) { - char *p, *buf = xmalloc(SD_EC_D_SIZE * nr_stripe); + char *p, *buf = xmalloc(SD_EC_DATA_STRIPE_SIZE * nr_stripe); + int strip_size = SD_EC_DATA_STRIPE_SIZE / SD_EC_D; p = buf; for (i = 0; i < nr_stripe; i++) { for (j = 0; j < nr_to_send; j++) { - memcpy(p, reqs[j].buf + SD_EC_STRIP_SIZE * i, - SD_EC_STRIP_SIZE); - p += SD_EC_STRIP_SIZE; + memcpy(p, reqs[j].buf + strip_size * i, + strip_size); + p += strip_size; } } - memcpy(req->data, buf + off % SD_EC_D_SIZE, len); + memcpy(req->data, buf + off % SD_EC_DATA_STRIPE_SIZE, len); req->rp.data_length = req->rq.data_length; free(buf); } diff --git a/sheep/recovery.c b/sheep/recovery.c index e63e3d6..037d2c3 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -392,7 +392,7 @@ static void *rebuild_erasure_object(uint64_t oid, uint8_t idx, { uint8_t *bufs[SD_EC_D] = { 0 }; int idxs[SD_EC_D], len = get_store_objsize(oid); - struct fec *ctx = ec_init(); + struct fec *ctx = ec_init(SD_EC_D, SD_EC_DP); char *lost = xvalloc(len); int i, j; @@ -414,12 +414,13 @@ static void *rebuild_erasure_object(uint64_t oid, uint8_t idx, /* Rebuild the lost replica */ for (i = 0; i < SD_EC_NR_STRIPE_PER_OBJECT; i++) { const uint8_t *in[SD_EC_D]; - uint8_t out[SD_EC_STRIP_SIZE]; + int strip_size = SD_EC_DATA_STRIPE_SIZE / SD_EC_D; + uint8_t out[strip_size]; for (j = 0; j < SD_EC_D; j++) - in[j] = bufs[j] + SD_EC_STRIP_SIZE * i; + in[j] = bufs[j] + strip_size * i; ec_decode(ctx, in, idxs, out, idx); - memcpy(lost + SD_EC_STRIP_SIZE * i, out, SD_EC_STRIP_SIZE); + memcpy(lost + strip_size * i, out, strip_size); } out: ec_destroy(ctx); -- 1.7.9.5 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
