Ok, here's a patch that does the very very special case of: p=P=1, s > p, DMDA_BOUNDARY_PERIODIC in the z-direction.
Note this is really only possible in the z-direction... when in the z-direction, we can just check if the global index < 0 or > x*y*z and adjust appropriately. So this can't generalize to x- or y- directions (though it could do the y-direction in the 2D case, allowing one to do 1D problems in a 2D algorithm?) Compared to the rest of the cruft in da3.c and how non-general the global index generation is overall, it's not actually that ugly... I've tested with s=2 and 3, BOX and STAR, and the indices look right (i.e. they are identical in the z-dimension). But it's ugly, so please test with your stuff too Jed. Ethan On Tue, 2011-04-19 at 23:28 +0200, Jed Brown wrote: > On Tue, Apr 19, 2011 at 23:21, Ethan Coon <ecoon at lanl.gov> wrote: > The attached patch fixes the errors to error in the correct > cases. > > Pushed, thanks. -- ------------------------------------ Ethan Coon Post-Doctoral Researcher Applied Mathematics - T-5 Los Alamos National Laboratory 505-665-8289 http://www.ldeo.columbia.edu/~ecoon/ ------------------------------------ -------------- next part -------------- cruft to handle the special case of solving a 2D problem in a 3D algorithm with stencil size > 1. This only works in the 3D case, z-dimension diff -r 3ad7a426c22b src/dm/impls/da/da3.c --- a/src/dm/impls/da/da3.c Tue Apr 19 15:18:30 2011 -0600 +++ b/src/dm/impls/da/da3.c Tue Apr 19 16:25:22 2011 -0600 @@ -331,7 +331,10 @@ } z = lz[rank/(m*n)]; - if ((z < s) && ((p > 1) || bz == DMDA_BOUNDARY_PERIODIC)) { + /* note this is different than x- and y-, as we will handle as an important special + case when p=P=1 and DMDA_BOUNDARY_PERIODIC and s > z. This is to deal with 2D problems + in a 3D code. Additional code for this case is noted with "2d case" comments */ + if ((z < s) && ((p > 1) || ((P > 1) && bz == DMDA_BOUNDARY_PERIODIC))) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local z-width of domain z %D is smaller than stencil width s %D",z,s); } zs = 0; @@ -739,6 +742,7 @@ y_t = ly[(n0 % (m*n))/m]; z_t = lz[n0 / (m*n)]; s_t = bases[n0] + x_t*y_t*z_t - (s_y-i)*x_t - s_x - (s_z-k-1)*x_t*y_t; + if (s_t < 0) {s_t = bases[n0] + x_t*y_t*z_t - (s_y-i)*x_t - s_x;} /* 2D case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } if (n1 >= 0) { /* directly below */ @@ -746,6 +750,7 @@ y_t = ly[(n1 % (m*n))/m]; z_t = lz[n1 / (m*n)]; s_t = bases[n1] + x_t*y_t*z_t - (s_y+1-i)*x_t - (s_z-k-1)*x_t*y_t; + if (s_t < 0) {s_t = bases[n1] + x_t*y_t*z_t - (s_y+1-i)*x_t;} /* 2D case */ for (j=0; j<x_t; j++) { idx[nn++] = s_t++;} } if (n2 >= 0) { /* right below */ @@ -753,6 +758,7 @@ y_t = ly[(n2 % (m*n))/m]; z_t = lz[n2 / (m*n)]; s_t = bases[n2] + x_t*y_t*z_t - (s_y+1-i)*x_t - (s_z-k-1)*x_t*y_t; + if (s_t < 0) {s_t = bases[n2] + x_t*y_t*z_t - (s_y+1-i)*x_t;} /* 2D case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } } @@ -763,6 +769,7 @@ y_t = y; z_t = lz[n3 / (m*n)]; s_t = bases[n3] + (i+1)*x_t - s_x + x_t*y_t*z_t - (s_z-k)*x_t*y_t; + if (s_t < 0) {s_t = bases[n3] + (i+1)*x_t - s_x + x_t*y_t*z_t - x_t*y_t;} /* 2D case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } @@ -771,6 +778,7 @@ y_t = y; z_t = lz[n4 / (m*n)]; s_t = bases[n4] + i*x_t + x_t*y_t*z_t - (s_z-k)*x_t*y_t; + if (s_t < 0) {s_t = bases[n4] + i*x_t + x_t*y_t*z_t - x_t*y_t;} /* 2D case */ for (j=0; j<x_t; j++) { idx[nn++] = s_t++;} } @@ -779,6 +787,7 @@ y_t = y; z_t = lz[n5 / (m*n)]; s_t = bases[n5] + i*x_t + x_t*y_t*z_t - (s_z-k)*x_t*y_t; + if (s_t < 0) {s_t = bases[n5] + i*x_t + x_t*y_t*z_t - x_t*y_t;} /* 2D case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } } @@ -789,6 +798,7 @@ y_t = ly[(n6 % (m*n))/m]; z_t = lz[n6 / (m*n)]; s_t = bases[n6] + i*x_t - s_x + x_t*y_t*z_t - (s_z-k)*x_t*y_t; + if (s_t < 0) {s_t = bases[n6] + i*x_t - s_x + x_t*y_t*z_t - x_t*y_t;} /* 2D case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } if (n7 >= 0) { /* directly above */ @@ -796,6 +806,7 @@ y_t = ly[(n7 % (m*n))/m]; z_t = lz[n7 / (m*n)]; s_t = bases[n7] + (i-1)*x_t + x_t*y_t*z_t - (s_z-k)*x_t*y_t; + if (s_t < 0) {s_t = bases[n7] + (i-1)*x_t + x_t*y_t*z_t - x_t*y_t;} /* 2D case */ for (j=0; j<x_t; j++) { idx[nn++] = s_t++;} } if (n8 >= 0) { /* right above */ @@ -803,6 +814,7 @@ y_t = ly[(n8 % (m*n))/m]; z_t = lz[n8 / (m*n)]; s_t = bases[n8] + (i-1)*x_t + x_t*y_t*z_t - (s_z-k)*x_t*y_t; + if (s_t < 0) {s_t = bases[n8] + (i-1)*x_t + x_t*y_t*z_t - x_t*y_t;} /* 2D case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } } @@ -889,6 +901,7 @@ y_t = ly[(n18 % (m*n))/m]; /* z_t = lz[n18 / (m*n)]; */ s_t = bases[n18] - (s_y-i)*x_t -s_x + (k+1)*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n18] - (s_y-i)*x_t -s_x + x_t*y_t;} /* 2d case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } if (n19 >= 0) { /* directly below */ @@ -896,6 +909,7 @@ y_t = ly[(n19 % (m*n))/m]; /* z_t = lz[n19 / (m*n)]; */ s_t = bases[n19] - (s_y+1-i)*x_t + (k+1)*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n19] - (s_y+1-i)*x_t + x_t*y_t;} /* 2d case */ for (j=0; j<x_t; j++) { idx[nn++] = s_t++;} } if (n20 >= 0) { /* right below */ @@ -903,6 +917,7 @@ y_t = ly[(n20 % (m*n))/m]; /* z_t = lz[n20 / (m*n)]; */ s_t = bases[n20] - (s_y+1-i)*x_t + (k+1)*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n20] - (s_y+1-i)*x_t + x_t*y_t;} /* 2d case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } } @@ -913,6 +928,7 @@ y_t = y; /* z_t = lz[n21 / (m*n)]; */ s_t = bases[n21] + (i+1)*x_t - s_x + k*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n21] + (i+1)*x_t - s_x;} /* 2d case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } @@ -921,6 +937,7 @@ y_t = y; /* z_t = lz[n22 / (m*n)]; */ s_t = bases[n22] + i*x_t + k*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n22] + i*x_t;} /* 2d case */ for (j=0; j<x_t; j++) { idx[nn++] = s_t++;} } @@ -929,6 +946,7 @@ y_t = y; /* z_t = lz[n23 / (m*n)]; */ s_t = bases[n23] + i*x_t + k*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n23] + i*x_t;} /* 2d case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } } @@ -939,6 +957,7 @@ y_t = ly[(n24 % (m*n))/m]; /* z_t = lz[n24 / (m*n)]; */ s_t = bases[n24] + i*x_t - s_x + k*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n24] + i*x_t - s_x;} /* 2d case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } if (n25 >= 0) { /* directly above */ @@ -946,6 +965,7 @@ y_t = ly[(n25 % (m*n))/m]; /* z_t = lz[n25 / (m*n)]; */ s_t = bases[n25] + (i-1)*x_t + k*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n25] + (i-1)*x_t;} /* 2d case */ for (j=0; j<x_t; j++) { idx[nn++] = s_t++;} } if (n26 >= 0) { /* right above */ @@ -953,6 +973,7 @@ y_t = ly[(n26 % (m*n))/m]; /* z_t = lz[n26 / (m*n)]; */ s_t = bases[n26] + (i-1)*x_t + k*x_t*y_t; + if (s_t >= x*y*z) {s_t = bases[n26] + (i-1)*x_t;} /* 2d case */ for (j=0; j<s_x; j++) { idx[nn++] = s_t++;} } }
