Hi Greg,

For quasi families I've used extended quasi-likelihood (see Mccullagh and Nelder, Generalized Linear Models 2nd ed, section 9.6) in place of the likelihood/quasi-likelihood in the expression for the (RE)ML score. I hadn't realised that this was possible before the paper was published.

best,
Simon

ps. sorry for slow reply, the original message slipped through my filter for mgcv related stuff

hi

I'm puzzled as to the relation between the REML score computed by gam and
the formula (4) on p.4 here:
http://opus.bath.ac.uk/22707/1/Wood_JRSSB_2011_73_1_3.pdf

I'm ok with this for poisson, or for quasipoisson when phi=1.

However, when phi differs from 1, I'm stuck.

#simulate some data
library(mgcv)
set.seed(1)
x1<-runif(500)
x2<-rnorm(500)
linp<--0.5+x1+exp(-x2^2/2)*sin(4*x2)
y<-rpois(500,exp(linp))

##poisson
#phi=1
m1<-gam(y~s(x1)+s(x2),family="poisson",method="REML")
phi<-m1$scale

#formula
#1st term
S1<-m1$smooth[[1]]$S[[1]]*m1$sp[1]
S2<-m1$smooth[[2]]$S[[1]]*m1$sp[2]
S<-matrix(0,19,19)
for (i in 2:10)
{
for (j in 2:10)
{
S[i,j]=S1[i-1,j-1]
S[i+9,j+9]=S2[i-1,j-1]
}
}
beta<-m1$coef
#penalised deviance
Dp<-m1$dev+t(beta)%*%S%*%beta
F1<-Dp/(2*phi)

#2nd term
F2<-sum(ifelse(y==0,0,y*log(y)-y-log(factorial(y))))

#3rd term
X<-predict(m1,type="lpmatrix")
W<-diag(fitted(m1))
H<-t(X)%*%W%*%X
ldhs<-determinant(H+S,log=TRUE)$modulus[1]
eigS<-eigen(S,only.values=TRUE)$val
lds<-sum(log(eigS[1:16]))
F3<-(ldhs-lds)/2

#4th term
Mp=3
F4<-Mp/2*log(2*pi*phi)

F1-F2+F3-F4
m1$gcv
#reml score = formula

##quasipoisson with scale = 1
#fitting is identical to the poisson case
#F1, F3, and F4 unchanged but F2 is now undefined

m2<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=1)
F1+F3-F4
m2$gcv
#reml score = formula with F2 omitted

##quasipoisson with unknown scale
m3<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=-1)
phiq<-m3$scale

#1st term
S1q<-m3$smooth[[1]]$S[[1]]*m3$sp[1]
S2q<-m3$smooth[[2]]$S[[1]]*m3$sp[2]
Sq<-matrix(0,19,19)
for (i in 2:10)
{
for (j in 2:10)
{
Sq[i,j]=S1q[i-1,j-1]
Sq[i+9,j+9]=S2q[i-1,j-1]
}
}
betaq<-m3$coef
#penalised deviance
Dpq<-m3$dev+t(betaq)%*%Sq%*%betaq
F1q<-Dpq/(2*phiq)

#2nd term undefined

#3rd term
Xq<-predict(m3,type="lpmatrix")
Wq<-diag(fitted(m3))
Hq<-t(Xq)%*%Wq%*%Xq
ldhsq<-determinant(Hq+Sq,log=TRUE)$modulus[1]
eigSq<-eigen(Sq,only.values=TRUE)$val
ldsq<-sum(log(eigSq[1:16]))
F3q<-(ldhsq-ldsq)/2

#4th term
Mp=3
F4q<-Mp/2*log(2*pi*phiq)

F1q+F3q-F4q
m3$gcv

#quite different

#but if phiq is replaced by the Pearson estimate of the scale
P<-sum((y-fitted(m3))^2/fitted(m3))
phip<-P/(500-sum(m3$edf))
F1p<-Dpq/(2*phip)
F3p<-F3q
#third term independent of scale
F4p<-Mp/2*log(2*pi*phip)
F1p+F3p-F4p
m3$gcv

#closer but still wrong

#is there a value of phi which makes this work?
f<-function(t) (Dpq/(2*t) + F3q + Mp/2*log(2*pi*t) - m3$gcv)^2
optimise(f,interval=c(0.5,1.5))$min->phix

Dpq/(2*phix) + F3q + Mp/2*log(2*pi*phix)
m3$gcv
#but what is phix - not the Pearson estimate or the scale returned by gam?

thanks

Greg Dropkin





--
Simon Wood, Mathematical Science, University of Bath BA2 7AY UK
+44 (0)1225 386603               http://people.bath.ac.uk/sw283

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to