Hi Greg,
For quasi families I've used extended quasi-likelihood (see Mccullagh
and Nelder, Generalized Linear Models 2nd ed, section 9.6) in place of
the likelihood/quasi-likelihood in the expression for the (RE)ML score.
I hadn't realised that this was possible before the paper was published.
best,
Simon
ps. sorry for slow reply, the original message slipped through my filter
for mgcv related stuff
hi
I'm puzzled as to the relation between the REML score computed by gam and
the formula (4) on p.4 here:
http://opus.bath.ac.uk/22707/1/Wood_JRSSB_2011_73_1_3.pdf
I'm ok with this for poisson, or for quasipoisson when phi=1.
However, when phi differs from 1, I'm stuck.
#simulate some data
library(mgcv)
set.seed(1)
x1<-runif(500)
x2<-rnorm(500)
linp<--0.5+x1+exp(-x2^2/2)*sin(4*x2)
y<-rpois(500,exp(linp))
##poisson
#phi=1
m1<-gam(y~s(x1)+s(x2),family="poisson",method="REML")
phi<-m1$scale
#formula
#1st term
S1<-m1$smooth[[1]]$S[[1]]*m1$sp[1]
S2<-m1$smooth[[2]]$S[[1]]*m1$sp[2]
S<-matrix(0,19,19)
for (i in 2:10)
{
for (j in 2:10)
{
S[i,j]=S1[i-1,j-1]
S[i+9,j+9]=S2[i-1,j-1]
}
}
beta<-m1$coef
#penalised deviance
Dp<-m1$dev+t(beta)%*%S%*%beta
F1<-Dp/(2*phi)
#2nd term
F2<-sum(ifelse(y==0,0,y*log(y)-y-log(factorial(y))))
#3rd term
X<-predict(m1,type="lpmatrix")
W<-diag(fitted(m1))
H<-t(X)%*%W%*%X
ldhs<-determinant(H+S,log=TRUE)$modulus[1]
eigS<-eigen(S,only.values=TRUE)$val
lds<-sum(log(eigS[1:16]))
F3<-(ldhs-lds)/2
#4th term
Mp=3
F4<-Mp/2*log(2*pi*phi)
F1-F2+F3-F4
m1$gcv
#reml score = formula
##quasipoisson with scale = 1
#fitting is identical to the poisson case
#F1, F3, and F4 unchanged but F2 is now undefined
m2<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=1)
F1+F3-F4
m2$gcv
#reml score = formula with F2 omitted
##quasipoisson with unknown scale
m3<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=-1)
phiq<-m3$scale
#1st term
S1q<-m3$smooth[[1]]$S[[1]]*m3$sp[1]
S2q<-m3$smooth[[2]]$S[[1]]*m3$sp[2]
Sq<-matrix(0,19,19)
for (i in 2:10)
{
for (j in 2:10)
{
Sq[i,j]=S1q[i-1,j-1]
Sq[i+9,j+9]=S2q[i-1,j-1]
}
}
betaq<-m3$coef
#penalised deviance
Dpq<-m3$dev+t(betaq)%*%Sq%*%betaq
F1q<-Dpq/(2*phiq)
#2nd term undefined
#3rd term
Xq<-predict(m3,type="lpmatrix")
Wq<-diag(fitted(m3))
Hq<-t(Xq)%*%Wq%*%Xq
ldhsq<-determinant(Hq+Sq,log=TRUE)$modulus[1]
eigSq<-eigen(Sq,only.values=TRUE)$val
ldsq<-sum(log(eigSq[1:16]))
F3q<-(ldhsq-ldsq)/2
#4th term
Mp=3
F4q<-Mp/2*log(2*pi*phiq)
F1q+F3q-F4q
m3$gcv
#quite different
#but if phiq is replaced by the Pearson estimate of the scale
P<-sum((y-fitted(m3))^2/fitted(m3))
phip<-P/(500-sum(m3$edf))
F1p<-Dpq/(2*phip)
F3p<-F3q
#third term independent of scale
F4p<-Mp/2*log(2*pi*phip)
F1p+F3p-F4p
m3$gcv
#closer but still wrong
#is there a value of phi which makes this work?
f<-function(t) (Dpq/(2*t) + F3q + Mp/2*log(2*pi*t) - m3$gcv)^2
optimise(f,interval=c(0.5,1.5))$min->phix
Dpq/(2*phix) + F3q + Mp/2*log(2*pi*phix)
m3$gcv
#but what is phix - not the Pearson estimate or the scale returned by gam?
thanks
Greg Dropkin
--
Simon Wood, Mathematical Science, University of Bath BA2 7AY UK
+44 (0)1225 386603 http://people.bath.ac.uk/sw283
______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.