Re: [Rd] constrOptim( ): conflict between help page and code

Ravi Varadhan Tue, 22 Jun 2010 15:20:32 -0700

Dear R-devel,

I have re-worked the `constrOptim' function extensively to fix some bugs, and 
also to provide some extended functionality.  Please find attached the patch 
(with comments on the bug fixes and improvements).


I am also attaching a "test" file that shows the difference between the "old" 
and "new" versions of `constrOptim'.

Best,
Ravi. 

____________________________________________________________________

Ravi Varadhan, Ph.D.
Assistant Professor,
Division of Geriatric Medicine and Gerontology
School of Medicine
Johns Hopkins University

Ph. (410) 502-2619
email: rvarad...@jhmi.edu


----- Original Message -----
From: Martin Maechler <maech...@stat.math.ethz.ch>
Date: Tuesday, June 22, 2010 3:33 am
Subject: Re: [Rd] constrOptim( ): conflict between help page and code
To: Ravi Varadhan <rvarad...@jhmi.edu>
Cc: maech...@stat.math.ethz.ch


> Dear Ravi,
>  What we (R-core) really want with such postings is a "diff" aka "patch"
>  against the R-devel (!) sources,
>  which you can get from 
>  
>    
>  
>  You produce a patch with e.g.
>  
>  diff -ubBw  <previousversion>constrOptim.R <myversion>constrOptim.R
>  
>  send that (possibly as attached file  e.g.  constrOptim.patch)
>  to R-devel 
>  
>  Best regards,
>  Martin

--- constrOptim.R       2010-06-22 15:44:28.352204298 -0400
+++ constrOptim_NEW.R   2010-06-22 16:00:38.223490505 -0400
@@ -15,22 +15,28 @@
 #  http://www.r-project.org/Licenses/
 
 
-constrOptim <-
-    function(theta, f, grad, ui, ci, mu = 0.0001, control = list(),
-             method = if(is.null(grad)) "Nelder-Mead" else "BFGS",
-             outer.iterations = 100, outer.eps = 0.00001, ..., hessian = FALSE)
-{
 
+constrOptim <- function (theta, f, grad=NULL, ui, ci, mu = 1e-04, control = 
list(), 
+    method = c("BFGS", "Nelder-Mead"), outer.iterations = 100, outer.eps = 
1e-07, grad.eps = 1.e-07, trace.outer=TRUE, ...) 
+{
+# Bugs fixed and improvements made by:  Ravi Varadhan, Johns Hopkins 
University, June 22, 2010.  
+# 
+# The user can call either "Nelder-Mead" or "BFGS"
+# "BFGS" can be called even when the gradient function is not specified
+# in which case, a finite-difference approximation is computed
+#   
     if (!is.null(control$fnscale) && control$fnscale < 0)
-        mu <-  -mu ##maximizing
+        mu <- -mu
 
     R <- function(theta, theta.old, ...) {
         ui.theta <- ui%*%theta
         gi <-  ui.theta-ci
-        if (any(gi<0)) return(NaN)
+        if (any(gi < 0)) 
+            return(NaN)
         gi.old <- ui%*%theta.old-ci
         bar <- sum(gi.old*log(gi)-ui.theta)
-        if (!is.finite(bar)) bar <-  -Inf
+        if (!is.finite(bar)) 
+            bar <- -Inf
         f(theta, ...)-mu*bar
     }
 
@@ -42,44 +48,84 @@
         grad(theta, ...) - mu*dbar
     }
 
+       method <- match.arg(method, choices=c("BFGS", "Nelder-Mead"))
+
+    if (is.null(grad) & method == "BFGS") {
+        grad <- function(par, ...) {
+            df <- rep(NA, length(par))
+               fval <- f(par, ...)
+            for (i in 1:length(par)) {
+                dx <- par
+                dx[i] <- dx[i] + grad.eps
+                df[i] <- (f(dx, ...) - fval)/grad.eps
+            }
+            df
+        }
+}
+
     if (any(ui%*%theta-ci <= 0))
-        stop("initial value is not in the interior of the feasible region")
+        stop("initial value not feasible")
     obj <- f(theta, ...)
     r <- R(theta, theta, ...)
-    fun <- function(theta, ...) R(theta, theta.old, ...)
-    gradient <- if(method == "SANN") {
-        if(missing(grad)) NULL else grad
-    } else function(theta, ...) dR(theta, theta.old, ...)
+       feval <- 0
+       geval <- 0 
 
-    for(i in seq_len(outer.iterations)) {
+    for (i in 1:outer.iterations) {
+       if (trace.outer) {
+               cat("par: ", theta, "\n")
+               cat("fval: ", obj, "\n")
+       }
         obj.old <- obj
         r.old <- r
         theta.old <- theta
-
+        fun <- function(theta, ...) {
+            R(theta, theta.old, ...)
+        }
+        gradient <- function(theta, ...) {
+            dR(theta, theta.old, ...)
+        }
         a <- optim(theta.old, fun, gradient, control = control,
-                   method = method, hessian = hessian, ...)
+            method = method, ...)
         r <- a$value
-        if (is.finite(r) && is.finite(r.old) &&
-            abs(r-r.old)/(outer.eps+abs(r-r.old)) < outer.eps) break
+# Following "relative" convergence criterion is flawed
+#        if (is.finite(r) && is.finite(r.old) && abs(r - r.old)/(outer.eps + 
+#            abs(r - r.old) ) < outer.eps) 
+#            break
+#  Here is a correct way to do it:
+#        if (is.finite(r) && is.finite(r.old) && abs(r - r.old)/(1 + 
+#            abs(r + r.old)/2 ) < outer.eps) 
+#            break
+#  But we use the simpler "absolute" convergence criterion:
+        if (is.finite(r) && is.finite(r.old) && abs(r - r.old) < outer.eps) 
+            break
         theta <- a$par
+# We need to accumulate function and gradient evaluation counts
+       feval <- feval + a$counts[1]
+       geval <- geval + a$counts[2]
         obj <- f(theta, ...)
-        if (obj > obj.old) break
+# The following line is a bug when control$fnscale = -1
+#        if (obj > obj.old) break 
+# This is the correct version
+        if (obj > obj.old * sign(mu)) break
     }
     if (i == outer.iterations) {
         a$convergence <- 7
-        a$message <- gettext("Barrier algorithm ran out of iterations and did 
not converge")
+        a$message <- "Barrier algorithm ran out of iterations and did not 
converge"
     }
     if (mu > 0 && obj > obj.old) {
         a$convergence <- 11
-        a$message <- gettextf("Objective function increased at outer iteration 
%d", i)
+        a$message <- paste("Objective function increased at outer iteration", 
+            i)
     }
     if (mu < 0 && obj < obj.old) {
         a$convergence <- 11
-        a$message <- gettextf("Objective function decreased at outer iteration 
%d", i)
+        a$message <- paste("Objective function decreased at outer iteration", 
+            i)
     }
     a$outer.iterations <- i
     a$barrier.value <- a$value
     a$value <- f(a$par, ...)
     a$barrier.value <- a$barrier.value - a$value
+    a$counts <- c(feval, geval)  
     a
 }
--- constrOptim.R       2010-06-22 15:44:28.352204298 -0400
+++ constrOptim_NEW_NC.R        2010-06-22 16:05:03.736204242 -0400
@@ -15,22 +15,23 @@
 #  http://www.r-project.org/Licenses/
 
 
-constrOptim <-
-    function(theta, f, grad, ui, ci, mu = 0.0001, control = list(),
-             method = if(is.null(grad)) "Nelder-Mead" else "BFGS",
-             outer.iterations = 100, outer.eps = 0.00001, ..., hessian = FALSE)
+
+constrOptim <- function (theta, f, grad=NULL, ui, ci, mu = 1e-04, control = 
list(), 
+    method = c("BFGS", "Nelder-Mead"), outer.iterations = 100, outer.eps = 
1e-07, grad.eps = 1.e-07, trace.outer=TRUE, ...) 
 {
 
     if (!is.null(control$fnscale) && control$fnscale < 0)
-        mu <-  -mu ##maximizing
+        mu <- -mu
 
     R <- function(theta, theta.old, ...) {
         ui.theta <- ui%*%theta
         gi <-  ui.theta-ci
-        if (any(gi<0)) return(NaN)
+        if (any(gi < 0)) 
+            return(NaN)
         gi.old <- ui%*%theta.old-ci
         bar <- sum(gi.old*log(gi)-ui.theta)
-        if (!is.finite(bar)) bar <-  -Inf
+        if (!is.finite(bar)) 
+            bar <- -Inf
         f(theta, ...)-mu*bar
     }
 
@@ -42,44 +43,71 @@
         grad(theta, ...) - mu*dbar
     }
 
+       method <- match.arg(method, choices=c("BFGS", "Nelder-Mead"))
+
+    if (is.null(grad) & method == "BFGS") {
+        grad <- function(par, ...) {
+            df <- rep(NA, length(par))
+               fval <- f(par, ...)
+            for (i in 1:length(par)) {
+                dx <- par
+                dx[i] <- dx[i] + grad.eps
+                df[i] <- (f(dx, ...) - fval)/grad.eps
+            }
+            df
+        }
+}
+
     if (any(ui%*%theta-ci <= 0))
-        stop("initial value is not in the interior of the feasible region")
+        stop("initial value not feasible")
     obj <- f(theta, ...)
     r <- R(theta, theta, ...)
-    fun <- function(theta, ...) R(theta, theta.old, ...)
-    gradient <- if(method == "SANN") {
-        if(missing(grad)) NULL else grad
-    } else function(theta, ...) dR(theta, theta.old, ...)
+       feval <- 0
+       geval <- 0 
 
-    for(i in seq_len(outer.iterations)) {
+    for (i in 1:outer.iterations) {
+       if (trace.outer) {
+               cat("par: ", theta, "\n")
+               cat("fval: ", obj, "\n")
+       }
         obj.old <- obj
         r.old <- r
         theta.old <- theta
-
+        fun <- function(theta, ...) {
+            R(theta, theta.old, ...)
+        }
+        gradient <- function(theta, ...) {
+            dR(theta, theta.old, ...)
+        }
         a <- optim(theta.old, fun, gradient, control = control,
-                   method = method, hessian = hessian, ...)
+            method = method, ...)
         r <- a$value
-        if (is.finite(r) && is.finite(r.old) &&
-            abs(r-r.old)/(outer.eps+abs(r-r.old)) < outer.eps) break
+       if (is.finite(r) && is.finite(r.old) && abs(r - r.old) < outer.eps) 
+            break
         theta <- a$par
+       feval <- feval + a$counts[1]
+       geval <- geval + a$counts[2]
         obj <- f(theta, ...)
-        if (obj > obj.old) break
+        if (obj > obj.old * sign(mu)) break
     }
     if (i == outer.iterations) {
         a$convergence <- 7
-        a$message <- gettext("Barrier algorithm ran out of iterations and did 
not converge")
+        a$message <- "Barrier algorithm ran out of iterations and did not 
converge"
     }
     if (mu > 0 && obj > obj.old) {
         a$convergence <- 11
-        a$message <- gettextf("Objective function increased at outer iteration 
%d", i)
+        a$message <- paste("Objective function increased at outer iteration", 
+            i)
     }
     if (mu < 0 && obj < obj.old) {
         a$convergence <- 11
-        a$message <- gettextf("Objective function decreased at outer iteration 
%d", i)
+        a$message <- paste("Objective function decreased at outer iteration", 
+            i)
     }
     a$outer.iterations <- i
     a$barrier.value <- a$value
     a$value <- f(a$par, ...)
     a$barrier.value <- a$barrier.value - a$value
+    a$counts <- c(feval, geval)  
     a
 }

# source the function "constrOptim.nl"
source("g:/computing/constrOptim2.txt")
################################
fr <- function(x) { ## Rosenbrock Banana function 
x1 <- x[1] 
x2 <- x[2] 
100 * (x2 - x1 * x1)^2 + (1 - x1)^2 
} 
grr <- function(x) { ## Gradient of 'fr' 
x1 <- x[1] 
x2 <- x[2] 
c(-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1), 200 * (x2 - x1 * x1)) 
} 

constrOptim(c(.5,0), fr, grr, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1)) 

# Note the difference in function and gradient counts 
# Also, the number of outer iterations are smaller because of 
# a different (and better) termination criterion
constrOptim2(c(.5,0), fr, grr, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1)) 

################################
# Now we demonstrate the "bug" in constrOptim when maximizing, i.e. when 
control$fnscale = -1

fr.neg <- function(x) { ## negative Rosenbrock Banana function 
x1 <- x[1] 
x2 <- x[2] 
-(100 * (x2 - x1 * x1)^2 + (1 - x1)^2 )
} 
grr.neg <- function(x) { ## Gradient of 'fr.neg' 
x1 <- x[1] 
x2 <- x[2] 
-c(-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1), 200 * (x2 - x1 * x1)) 
} 

# This performs only one outer iteration 
constrOptim(c(.5,0), fr.neg, grr.neg, ui=rbind(c(-1,0),c(1,-1)), 
ci=c(-0.9,0.1), control=list(fnscale=-1)) 

# This correctly performs many outer iterations until convergence 
constrOptim2(c(.5,0), fr.neg, grr.neg, ui=rbind(c(-1,0),c(1,-1)), 
ci=c(-0.9,0.1), control=list(fnscale=-1)) 

################################
# Now we demonstrate the use of numerical gradient
fr <- function(x) { ## Rosenbrock Banana function 
x1 <- x[1] 
x2 <- x[2] 
100 * (x2 - x1 * x1)^2 + (1 - x1)^2 
} 
grr <- function(x) { ## Gradient of 'fr' 
x1 <- x[1] 
x2 <- x[2] 
c(-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1), 200 * (x2 - x1 * x1)) 
} 

# Nelder-Mead is used when gradient is not specified 
constrOptim(c(.5,0), fr, gr=NULL, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1)) 

# BFGS algorithm is used even though gradient is not specified
constrOptim2(c(.5,0), fr, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1))

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Re: [Rd] constrOptim( ): conflict between help page and code

Reply via email to