Dear R developers,

this e-mail concerns a code suggestion to slightly change approxfun, so that it is more efficient when called several times.

We are using approxfun (not approx) to update external variables (time-series) to current time points while integrating our ODE models.

It is not uncommon for these models to take in the order of 10^4 time steps, and sometimes we run a model > 1000 times, so approxfun is called many-many times. For such applications approxfun is a serious performance bottleneck, so we first tried to develop own functions with less overhead.

Then, one of us (Karline Soetart) noticed, that in the C-code of R_approx, each time approxfun is called, it checks the input data for NAs in the x and y, as well as the validity of the method and f.

For common data sets this takes about 40% of the computation time.

While testing is of course necessary for "approx", we think that for "approxfun", testing could be done only once, before the function is created.

Testing validity of the input only once, makes approxfun about 40-45% faster, e.g. for:

x <- seq(0, 10000)
y <- x*2

F1<- approxfun(x, y)

system.time(
 for ( i in 1:10000)
  F1(i)
)

5.50 sec for the original approxfun
2.97 sec for the patched version


This is of course just a suggestion, but we think that a patch for package "stats" may be of general interest and therefore more appropriate than providing a modified approxfun in our own package.

The code suggestion was tested with R-devel rev. 49803 and several contributed packages and we found no negative side effects. Please find the suggested patch below and please excuse if we overlooked something.

Thanks for your consideration

Best wishes,

Karline Soetaert & Thomas Petzoldt


Index: R/approx.R
===================================================================
--- R/approx.R  (revision 49817)
+++ R/approx.R  (working copy)
@@ -114,10 +114,19 @@
     force(f)
     stopifnot(length(yleft) == 1, length(yright) == 1, length(f) == 1)
     rm(o, rule, ties)
-    function(v) .C("R_approx", as.double(x), as.double(y), as.integer(n),
-                  xout = as.double(v), as.integer(length(v)),
-                  as.integer(method), as.double(yleft), as.double(yright),
-                  as.double(f), NAOK = TRUE, PACKAGE = "stats")$xout
+
+## Changed here:
+## suggestion:
+    # 1. Test input consistency once
+    .C("R_approxtest",as.double(x), as.double(y), as.integer(n),
+        as.integer(method), as.double(f), NAOK = TRUE,
+        PACKAGE = "stats")
+
+    # 2. Create and return function that does not test input validity...
+    function(v) .C("R_approxfun", as.double(x), as.double(y), as.integer(n),
+        xout = as.double(v), as.integer(length(v)), as.integer(method),
+        as.double(yleft), as.double(yright), as.double(f), NAOK = TRUE,
+        PACKAGE = "stats")$xout
 }
 
 ### This is a `variant' of  approx( method = "constant" ) :
Index: src/approx.c
===================================================================
--- src/approx.c        (revision 49789)
+++ src/approx.c        (working copy)
@@ -128,3 +128,44 @@
        if(!ISNA(xout[i]))
            xout[i] = approx1(xout[i], x, y, *nxy, &M);
 }
+
+/* Testing done only once - in a separate function */
+void R_approxtest(double *x, double *y, int *nxy,
+             int *method, double *f)
+{
+    int i;
+
+    switch(*method) {
+     case 1: /* linear */
+       break;
+     case 2: /* constant */
+             if(!R_FINITE(*f) || *f < 0 || *f > 1)
+             error(_("approx(): invalid f value"));
+             break;
+     default:
+             error(_("approx(): invalid interpolation method"));
+             break;
+   }
+    /* check interpolation method */
+      for(i=0 ; i<*nxy ; i++)
+         if(ISNA(x[i]) || ISNA(y[i]))
+             error(_("approx(): attempted to interpolate NA values"));
+}
+
+/* R Frontend for Linear and Constant Interpolation, no testing */
+
+void R_approxfun(double *x, double *y, int *nxy, double *xout, int *nout,
+             int *method, double *yleft, double *yright, double *f)
+{
+    int i;
+    appr_meth M = {0.0, 0.0, 0.0, 0.0, 0}; /* -Wall */
+
+    M.f2 = *f;
+    M.f1 = 1 - *f;
+    M.kind = *method;
+    M.ylow = *yleft;
+    M.yhigh = *yright;
+    for(i=0 ; i < *nout; i++)
+         if(!ISNA(xout[i]))
+            xout[i] = approx1(xout[i], x, y, *nxy, &M);
+}
Index: src/init.c
===================================================================
--- src/init.c  (revision 49789)
+++ src/init.c  (working copy)
@@ -67,6 +67,8 @@
 static R_NativePrimitiveArgType band_den_bin_t[] = {INTSXP, INTSXP, REALSXP, 
REALSXP, INTSXP};
 
 static R_NativePrimitiveArgType R_approx_t[] = {REALSXP, REALSXP, INTSXP, 
REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP};
+static R_NativePrimitiveArgType R_approxtest_t[] = {REALSXP, REALSXP, INTSXP, 
INTSXP, REALSXP};
+static R_NativePrimitiveArgType R_approxfun_t[] = {REALSXP, REALSXP, INTSXP, 
REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP};
 
 static R_NativePrimitiveArgType loglin_t[] = {INTSXP, INTSXP, INTSXP, INTSXP, 
INTSXP,
                                              REALSXP, REALSXP, INTSXP, INTSXP, 
REALSXP,
@@ -135,6 +137,8 @@
     {"kmeans_Lloyd", (DL_FUNC) &kmeans_Lloyd, 9},
     {"kmeans_MacQueen", (DL_FUNC) &kmeans_MacQueen, 9},
     CDEF(R_approx),
+    CDEF(R_approxfun),
+    CDEF(R_approxtest),
     CDEF(band_ucv_bin),
     CDEF(band_bcv_bin),
     CDEF(band_phi4_bin),
Index: src/stats.h
===================================================================
--- src/stats.h (revision 49789)
+++ src/stats.h (working copy)
@@ -29,6 +29,9 @@
 
 void R_approx(double *, double *, int *, double *, int *,
              int *, double *, double *, double *);
+void R_approxfun(double *, double *, int *, double *, int *,
+             int *, double *, double *, double *);
+void R_approxtest(double *, double *, int *, int *, double *);
 void band_ucv_bin(int *, int *, double *, int *, double *, double *);
 void band_bcv_bin(int *, int *, double *, int *, double *, double *);
 void band_phi4_bin(int *, int *, double *, int *, double *, double *);
______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to