Re: [opendx-users] De-NaNifying data

Nils Smeds Mon, 26 Aug 2002 14:02:03 -0700 (PDT)

Sorry for the long reply...

The main reason why Include does not work is that it makes a short-cut
for 1D fields and/or arrays. If the statistics modules indicates that
min(field) > min and max(field) < max then the complete set is passed on
as is.


For 1D fields and if the bounds are such that at least one of the regular
numbers are excluded by the bounds and if further there is an invalid
components field in the data already, the module should do the right thing
in "include" mode.

For the fields where the bounds are 2D or greater the logic in the code is
negated and the module will include all NaNs in "include" mode, but exclude
them if in "exclude" mode. So for a field with multidimensional components
an "exclude" with incompatible bounds (i.e. min>max) _could_ work. 

If the shortcuts were removed I _think_ (after having taken a quick look
at the code) that it would work with an Include that was used in the
"exclude" mode with a very high min value and a very low max value such 
that no regular points would match the criterium. At least for fields.
There is separate code for arrays.

I also enclose a patch that could modify the behavior of Include do have
it always exclude NaN. I.e if you pass a field through two separate Include
modules, one in include mode and one in exclude mode and then merged the
results the NaN would still not be present. This patch has not been tested,
but is merely meant as a basis for future discussions. I like my modified
interpretation of Include (of course), but there might be strong reasons
not to modify it for backwards compatibility.

A different approach would be to add isnan() to the known functions
of compute. Would that incorporate anything other than adding the
appropriate definitions in src/exec/dxmods/_compoper.c and
src/exec/dxmods/_compoper.h. It still gets rather tedious using it though.
See the enclosed suggested example (using a==a which is equivalent to 
!isnan(a))

This visual program _should_ mark all NaN as invalid positions. Try it
and tell me if it does. I just had this kind of code but with a<1.0e6 to
exclude outliers. I didn't at the time know Include would have done the
job for me, now I know better :)

/Nils


    The input Field
          |
          +----------------------------------------------------+
          |                                                    |
          V                                                    |
extract(field,name="data") -+                                  |
                            |                                  |
         +------------------+                                  |
         V                                                     |
compute(array,expression="byte ( a == a ? 0 : 1 )")-+          |
                                                    |          |
          +-----------------------------------------+          |
          |          +-----------------------------------------+
          V          V
replace(srcfield,dstfield,srcname="data",destname="invalid positions")-+
                                                                       |
          +------------------------------------------------------------+
          |
          V   /* This updates the statistics */
compute(field,expression="1.0*a") --+  
                                    |
                                    V
                             The modified field

Be warned, this patch has not been tested for correctness, nor even that
it compiles.....
/Nils 
*** dx-4.2.0/src/exec/dxmods/include.c.BAK      2000-08-24 22:04:38.000000000 
+0200
--- dx-4.2.0/src/exec/dxmods/include.c  2002-08-26 22:16:55.000000000 +0200
***************
*** 12,17 ****
--- 12,18 ----
  #include <dx/dx.h>
  #include <math.h>
  
+ #define XOR(a,b) (((a) || (b)) && !((a) && (b)))
  
  struct argblk {
      int justcull;     /* if set, don't take stats - just cull */
*************** Field_Include(Field f, int justcull, int
*** 465,470 ****
--- 466,472 ----
            return NULL;
  
        
+ #ifndef NO_INCLUDE_SHORTCUT
        /* include and all points in range, or exclude & all out of range.
         *  keep all data.  still call cull to remove unreferenced points.
         */
*************** Field_Include(Field f, int justcull, int
*** 507,512 ****
--- 509,515 ----
                goto cullonly;
            }
        }
+ #endif /* NO_INCLUDE_SHORTCUT */
  
        /* convert the data to scalar float if it is anything else.
         *  this is the same routine the statistics code uses.
*************** Field_Include(Field f, int justcull, int
*** 576,582 ****
         */
        if (shape == 1) {
            for (i=0, fp=(float *)dp; i<items; i++, fp++) {
!               if (DXIsElementInvalid(icHandle, i) && (*fp >= *min && *fp <= 
*max))
                    DXSetElementValid(icHandle, i);
            }
        } else {
--- 579,586 ----
         */
        if (shape == 1) {
            for (i=0, fp=(float *)dp; i<items; i++, fp++) {
!               if (DXIsElementInvalid(icHandle, i) && (*fp >= *min && *fp <= 
*max) && 
!                               !XOR(isnan(*fp),exclude))
                    DXSetElementValid(icHandle, i);
            }
        } else {
*************** Field_Include(Field f, int justcull, int
*** 584,590 ****
                if (DXIsElementValid(icHandle, i))
                    continue;
                for (j=0; j<shape; j++) {
!                   if (*(fp+j) <  *(min+j) || *(fp+j) >  *(max+j))
                        continue;
                }
                if (j == shape)
--- 588,594 ----
                if (DXIsElementValid(icHandle, i))
                    continue;
                for (j=0; j<shape; j++) {
!                   if (*(fp+j) <  *(min+j) || *(fp+j) >  *(max+j) || 
XOR(isnan(*fp+j),exclude))
                        continue;
                }
                if (j == shape)
*************** Field_Include(Field f, int justcull, int
*** 600,612 ****
         */
        if (shape == 1) {
            for (i=0, fp=(float *)dp; i<items; i++, fp++) {
!               if (*fp <  *min || *fp >  *max)
                    DXSetElementInvalid(icHandle, i);
            }
        } else {
            for (i=0, fp=(float *)dp; i<items; i++, fp += shape) {
                for (j=0; j<shape; j++) {
!                   if (*(fp+j) <  *(min+j) || *(fp+j) >  *(max+j)) {
                        DXSetElementInvalid(icHandle, i);
                        break;
                    }
--- 604,617 ----
         */
        if (shape == 1) {
            for (i=0, fp=(float *)dp; i<items; i++, fp++) {
!               if (*fp <  *min || *fp >  *max || XOR(isnan(*fp),exclude))
                    DXSetElementInvalid(icHandle, i);
            }
        } else {
            for (i=0, fp=(float *)dp; i<items; i++, fp += shape) {
                for (j=0; j<shape; j++) {
!                   if (*(fp+j) <  *(min+j) || *(fp+j) >  *(max+j) || 
!                                   XOR(isnan(*(fp+j)),exclude)) {
                        DXSetElementInvalid(icHandle, i);
                        break;
                    }
*************** Include_Array(Array a, struct argblk b)
*** 698,704 ****
        if (!DXStatistics((Object)a, "data", &tmin, &tmax, NULL, NULL)) 
            return NULL;
  
!       
        /* include and all points in range, or exclude & all out of range.
         *  keep all data, which means returning the input array is fine.
         */
--- 703,709 ----
        if (!DXStatistics((Object)a, "data", &tmin, &tmax, NULL, NULL)) 
            return NULL;
  
! #ifndef NO_INCLUDE_SHORTCUT   
        /* include and all points in range, or exclude & all out of range.
         *  keep all data, which means returning the input array is fine.
         */
*************** Include_Array(Array a, struct argblk b)
*** 712,718 ****
        if ((!b.exclude && (tmax <  *b.min || tmin >  *b.max))
            || (b.exclude && (tmin >= *b.min && tmax <= *b.max)))
            return DXNewArrayV(t, c, rank, shape);
!           
  
        /* convert the data to scalar float if it is anything else.
         *  this is the same routine the statistics code uses.
--- 717,723 ----
        if ((!b.exclude && (tmax <  *b.min || tmin >  *b.max))
            || (b.exclude && (tmin >= *b.min && tmax <= *b.max)))
            return DXNewArrayV(t, c, rank, shape);
! #endif /* NO_INCLUDE_SHORTCUT */          
  
        /* convert the data to scalar float if it is anything else.
         *  this is the same routine the statistics code uses.
*************** Include_Array(Array a, struct argblk b)
*** 768,774 ****
        } else {
            for (i=0, fp=(float *)dp; i<items; i++, fp += b.shape) {
                for (j=0; j<b.shape; j++) {
!                   if (*(fp+j) < *(b.min+j) || *(fp+j) > *(b.max+j))
                        break;
                }
                if (j == b.shape)
--- 773,779 ----
        } else {
            for (i=0, fp=(float *)dp; i<items; i++, fp += b.shape) {
                for (j=0; j<b.shape; j++) {
!                   if (*(fp+j) < *(b.min+j) || *(fp+j) > *(b.max+j) || 
isnan(*(fp+j)))
                        break;
                }
                if (j == b.shape)
*************** Include_Array(Array a, struct argblk b)
*** 779,792 ****
      } else {  /* exclude */
        if (b.shape == 1) {
            for (i=0, fp=(float *)dp; i<items; i++, fp++) {
!               if (*fp < *b.min || *fp > *b.max)
                    DXAddArrayData(na, id++, 1, 
                                   (Pointer)((char *)odp + i*bytes));
            }
        } else {
            for (i=0, fp=(float *)dp; i<items; i++, fp += b.shape) {
!               for (j=0; j<b.shape; j++) {
!                   if (*(fp+j) >= *(b.min+j) && *(fp+j) <= *(b.max+j))
                        break;
                }
                if (j == b.shape)
--- 784,797 ----
      } else {  /* exclude */
        if (b.shape == 1) {
            for (i=0, fp=(float *)dp; i<items; i++, fp++) {
!               if (*fp < *b.min || *fp > *b.max)  /* NaNs will not be put into 
exclusion array */
                    DXAddArrayData(na, id++, 1, 
                                   (Pointer)((char *)odp + i*bytes));
            }
        } else {
            for (i=0, fp=(float *)dp; i<items; i++, fp += b.shape) {
!               for (j=0; j<b.shape; j++) { /* NaNs will not be put into 
exclusion array */
!                   if ((*(fp+j) >= *(b.min+j) && *(fp+j) <= *(b.max+j)) || 
isnan(*(fp+j)))
                        break;
                }
                if (j == b.shape)

Re: [opendx-users] De-NaNifying data

Reply via email to