Replacing

    w_tot=w.sum();

by

    w_tot=w.head(n).sum();

in the whimed function did the trick.
(~a classic)


On 06/24/2012 11:05 PM, Andreas Alfons wrote:
I only had a quick look at the code and did not have time to try it
myself, but you made many more changes to the code besides swapping
the C arrays to C++ Eigen data types.

I would suggest one of the following approaches to go further:

1) The original C code has been used for a long time and is proved to
be reliable. If you need to use the EIgen data types for your
application, you could simply write a wrapper function.

2) If you really need to re-implement it, I would start by changing
only as little as necessary to make it work with the Eigen data types.
If you first have something that works, you can then incrementally make
more changes to take advantage of the Eigen library.

- Andreas


On Sat, Jun 23, 2012 at 9:27 PM, Kaveh Vakili
<kaveh.vak...@wis.kuleuven.be> wrote:
Ok, "for make benefit" of future generations, i've attached to this
mail the ,stand alone' version of qn_sn.c (i've removed sn and 200
loc of bell and whistles to make it shorter).

I guess now i will feed both this and my implementation to a
debugger and see what is causing the strange divergence....

#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <inttypes.h>
#include <R.h>
#include <Rmath.h> /* -> <math.h> and much more */

double qn0(double *x, int n);
double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int
*w_cand);
double pull(double *a_in, int n, int k);
int cmp (const void *a, const void *b);
int main();


double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int
*w_cand){
/*
  Algorithm to compute the weighted high median in O(n) time.

  The whimed is defined as the smallest a[j] such that the sum
  of the weights of all a[i] <= a[j] is strictly greater than
  half of the total weight.

  Arguments:

  a: double array containing the observations
  n: number of observations
  w: array of (int/double) weights of the observations.
*/

    int n2, i, kcand,mm=0,vv1=1;
    /* sum of weights: `int' do overflow when  n ~>= 1e5 */
    int wleft, wmid, wright, w_tot, wrest;

    double trial,s_apsrt;

    w_tot = 0;
    for (i = 0; i < n; ++i)
        w_tot += w[i];
    wrest = 0;

/* REPEAT : */
    do {
        for (i = 0; i < n; ++i){
            a_psrt[i] = a[i];
        }
        n2 = n/2;/* =^= n/2 +1 with 0-indexing */
        trial = pull(a_psrt, n, n2);;
        wleft = 0;    wmid  = 0;    wright= 0;
        for (i = 0; i < n; ++i) {
            if (a[i] < trial)   wleft += w[i];
            else if (a[i] > trial) wright += w[i];
            else wmid += w[i];
        }
        /* wleft = sum_{i; a[i]  < trial}  w[i]
         * wmid  = sum_{i; a[i] == trial}  w[i] at least one 'i' since trial is
one a[]!
         * wright= sum_{i; a[i]  > trial}  w[i]
         */
        kcand = 0;
        if (2 * (wrest + wleft) > w_tot) {
            for (i = 0; i < n; ++i) {
                if (a[i] < trial) {
                    a_cand[kcand] = a[i];
                    w_cand[kcand] = w[i];
                    ++kcand;
                }
            }
        }
        else if (2 * (wrest + wleft + wmid) <= w_tot) {
            for (i = 0; i < n; ++i) {
                if (a[i] > trial) {
                    a_cand[kcand] = a[i];
                    w_cand[kcand] = w[i];
                    ++kcand;
                }
            }
            wrest += wleft + wmid;
        }
        else {
            return trial;
        }
        mm++;
        n = kcand;
        s_apsrt=0;
        for (i = 0;i<n;++i) {
            a[i] = a_cand[i];
            w[i] = w_cand[i];
            s_apsrt += a[i];
        }

    } while(1);

} /* _WHIMED_ */

/* Main routines for C API */
//double qn(double *x, int n, int finite_corr);
/* these have no extra factors (no consistency factor & finite_corr): */


/* ----------- Implementations -----------------------------------*/

//void Qn0(double *x, Sint *n, double *res) { *res = qn0(x, (int)*n); }

int cmp (const void *pa, const void *pb){
        double a = *(const double*)pa;
   double b = *(const double*)pb;
  return (int) (a - b);
}

double qn0(double *x, int n){
/*--------------------------------------------------------------------

   Efficient algorithm for the scale estimator:

       Q*_n = { |x_i - x_j|; i<j }_(k)  [= Qn without scaling ]

                i.e. the k-th order statistic of the |x_i - x_j|

   Parameters of the function Qn :
       x  : double array containing the observations
       n  : number of observations (n >=2)
  */

    double *y     = (double *)calloc(n, sizeof(double));
    double *work  = (double *)calloc(n, sizeof(double));
    double *a_psrt = (double *)calloc(n, sizeof(double));
    double *a_cand = (double *)calloc(n, sizeof(double));

    int *left     = (int *)calloc(n, sizeof(int));
    int *right    = (int *)calloc(n, sizeof(int));
    int *p        = (int *)calloc(n, sizeof(int));
    int *q        = (int *)calloc(n, sizeof(int));
    int *weight   = (int *)calloc(n, sizeof(int));

    double trial;
    bool found;

    double w_tot=0.0;

    int h, i, j,jj,jh,vv2=3,ll=0;
    /* Following should be `long long int' : they can be of order n^2 */
    int64_t k, knew, nl,nr, sump,sumq;

    h = n / 2 + 1;
    k = (int64_t)h * (h - 1) / 2;
    for (i = 0; i < n; ++i) {
        y[i] = x[i];
        left [i] = n - i + 1;
        right[i] = (i <= h) ? n : n - (i - h);
        /* the n - (i-h) is from the paper; original code had `n' */
    }
  //  R_qsort(y, 1, n); /* y := sort(x) */
    qsort(y,n,sizeof(double),cmp);
    nl = (int64_t)n * (n + 1) / 2;
    nr = (int64_t)n * n;
    knew = k + nl;/* = k + (n+1 \over 2) */
    found = FALSE;
#ifdef DEBUG_qn
    REprintf("qn0(): h,k= %2d,%2d;  nl,nr= %d,%d\n", h,k, nl,nr);
#endif
/* L200: */
    while(!found && nr - nl > n) {
        j = 0;
        /* Truncation to float :
           try to make sure that the same values are got later (guard bits !) */
        for (i = 1; i < n; ++i) {
            if (left[i] <= right[i]) {
                weight[j] = right[i] - left[i] + 1;
                jh = left[i] + weight[j] / 2;
                work[j] = (float)(y[i] - y[n - jh]);
                ++j;
            }
        }
        trial = whimed_i(work, weight, j, a_cand, a_psrt, /*iw_cand*/ p);
#ifdef DEBUG_qn
        REprintf(" ..!found: whimed(");
#  ifdef DEBUG_long
        REprintf("wrk=c(");
        for(i=0; i < j; i++) REprintf("%s%g", (i>0)? ", " : "", work[i]);
        REprintf("),\n     wgt=c(");
        for(i=0; i < j; i++) REprintf("%s%d", (i>0)? ", " : "", weight[i]);
        REprintf("), j= %3d) -> trial= %7g\n", j, trial);
#  else
        REprintf("j=%3d) -> trial= %g:", j, trial);
#  endif
#endif
        j = 0;
        for (i = n - 1; i >= 0; --i) {
            while (j < n && ((float)(y[i] - y[n - j - 1])) < trial)
                ++j;
            p[i] = j;
        }
#ifdef DEBUG_qn
        REprintf(" f_1: j=%2d", j);
#endif
        j = n + 1;
        for (i = 0; i < n; ++i) {
            while ((float)(y[i] - y[n - j + 1]) > trial)
                --j;
            q[i] = j;
        }
        sump = 0;
        sumq = 0;
        for (i = 0; i < n; ++i) {
            sump += p[i];
            sumq += q[i] - 1;
        }
#ifdef DEBUG_qn
        REprintf(" f_2 -> j=%2d, sump|q= %lld,%lld", j, sump,sumq);
#endif
        if (knew <= sump) {
            for (i = 0; i < n; ++i)
                right[i] = p[i];
            nr = sump;
#ifdef DEBUG_qn
            REprintf("knew <= sump =: nr , new right[]\n");
#endif
        } else if (knew > sumq) {
            for (i = 0; i < n; ++i)
                left[i] = q[i];
            nl = sumq;
#ifdef DEBUG_qn
            REprintf("knew > sumq =: nl , new left[]\n");
#endif
        } else { /* sump < knew <= sumq */
            found = TRUE;
#ifdef DEBUG_qn
            REprintf("sump < knew <= sumq ---> FOUND\n");
#endif
        }
        ll++;
        if(ll>vv2)      found=TRUE;
//      printf("%lf\n",trial);

    } /* while */

    if (found)
        return trial;
    else {
#ifdef DEBUG_qn
        REprintf(".. not fnd -> new work[]");
#endif
        j = 0;
        for (i = 1; i < n; ++i) {
            for (jj = left[i]; jj <= right[i]; ++jj) {
                work[j] = y[i] - y[n - jj];
                j++;
            }/* j will be = sum_{i=2}^n (right[i] - left[i] + 1)_{+}  */
        }
#ifdef DEBUG_qn
        REprintf(" of length %d; knew-nl=%d\n", j, knew-nl);
#endif

        return (pull(work, j - 1, knew - nl));
//      knew -= (nl + 1); /* -1: 0-indexing */
//      rPsort(work, j, knew);
//      return(work[knew]);
    }
} /* qn0 */


double pull(double *a_in, int n, int k){
    int j;
    double *a, ax;
  //  char* vmax = vmaxget();
    a = (double *)calloc(n, sizeof(double));
    /* Copy a[] and use copy since it will be re-shuffled: */
    for (j = 0; j < n; j++)
        a[j] = a_in[j];

    k--; /* 0-indexing */
    qsort(a,n,sizeof(double),cmp);
    ax = a[k];

//    vmaxset(vmax);
    return ax;
} /* pull */
int main(){
        FILE *inFile;
        double x[100],resu;
        int i,n=100,numRead=0;
        inFile=fopen("test.txt","r");
        if (!inFile)            return 1;
        for(i=0;i<n;i++) numRead=fscanf(inFile,"%lf",&x[i]);
        printf("%lf\n",x[0]);
        fclose(inFile);
        resu=qn0(x,n);
        printf("%lf\n",resu);
        return 0;
}

_______________________________________________
R-SIG-Robust@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-sig-robust



_______________________________________________
R-SIG-Robust@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-sig-robust

Reply via email to