[R] EM algorithm mixture of multivariate gaussian

daniele riggi Fri, 22 May 2009 07:31:10 -0700

Hi, i would to know, if someone have ever write the code to estimate the
parameter (mixing proportion, mean, a var/cov matrix) of a mixture of two
multivariate normal distribution. I wrote it and it works (it could find
mean and mixing proportion, if I fix the var/cov matrix), while if I fix
anything, it doesn't work. My suspect is that when the algorithm iterates
the var/cov matrix, something wrong happens, but i don't know how solve this
problem.
I enclose my script, if someone would give it a look, I would appreciate so
much.
thank you
daniele


#########################
#Start Script
#########################

library(mvtnorm)
libray(scatterplot3d)
library(MASS)
n=100

m1=c(5,-5)
m2=c(-3,3)
s1=matrix(c(2,1,1,3), 2,2)
s2=matrix(c(4,1,1,6), 2,2)
alpha=0.3

c1 <- mvrnorm(round(n*alpha),m1,s1)
c2 <- mvrnorm(round(n*(1-alpha)),m2,s2)
allval <- rbind(c1,c2)
x <- allval[sample(n,n),]

mixm<-
function(x,m1,m2,s1,s2, alpha)

{
f1<-dmvnorm(x, m1, s1, log=FALSE)
     f2<-dmvnorm(x, m2, s2, log=FALSE)
     f=alpha*f1+(1-alpha)*f2
f
}



plot(x)

den<-mixm(x,m1,m2,s1,s2,alpha)
scatterplot3d(x[,1],x[,2],den, highlight.3d=TRUE, col.axis="blue",
      col.grid="lightblue",angle=120, pch=20)


em2mn<- function(y)
{
n<-length(y[,1])
p<-matrix(0,n,1)
f1<-matrix(0,n,1)
f2<-matrix(0,n,1)
tau<-matrix(0,n,2)
eps<-0.0001
mu01<-c(0,0)
mu02<-c(0,0)
sd01<-matrix(0,2,2)
sd02<-matrix(0,2,2)
cov1<-matrix(0,2,2)
cov2<-matrix(0,2,2)
 # 1 inizializzare i valori
alpha0= runif(1,0,1)
for (j in 1:2) {
mu01[j] <- runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
mu02[j] <- runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
}
 sd01<- var(y[1:round(n/2),])
sd02<- var(y[round(n/2):n,])
#sd01<-s1
#sd02<-s2
#prima iterazione

iter<-0
 f1<-dmvnorm(y, mu01, sd01, log=FALSE)
     f2<-dmvnorm(y, mu02, sd02, log=FALSE)
     p=alpha0*f1+(1-alpha0)*f2


scatterplot3d(y[,1], y[,2], p , highlight.3d=TRUE, col.axis="blue",
      col.grid="lightblue",main="val iniz mistura normali multivariata",
angle=120, pch=20)

#verosimiglianza iniziale

l0<-sum(log(p))
l1<-l0
alpha<-alpha0
mu1<-mu01
mu2<-mu02
sd1<-sd01
sd2<-sd02

for (iter in 1:itermax)
{

#passo E
 for (i in 1:n) {
tau[i,1]<-(alpha*f1[i])/p[i]
tau[i,2]<-((1-alpha)*f2[i])/p[i]
}


#passo M
 alpha= mean(tau[,1])
 mu1=colSums(tau[,1]*y)/sum(tau[,1])
mu2=colSums(tau[,2]*y)/sum(tau[,2])
 ycen1<-(y-mu1)
ycen2<-(y-mu2)

cov1<-matrix(0,2,2)
cov2<-matrix(0,2,2)

for (i in 1:n){
cov1<-cov1+ (tau[i,1]*(ycen1[i,])%*%t(ycen1[i,]))
cov2<-cov2+ (tau[i,2]*(ycen2[i,])%*%t(ycen2[i,]))
}
# w1<-sqrt(tau[,1])
# w2<-sqrt(tau[,2])
# ywei1<-w1*ycen1
# ywei2<-w2*ycen2
 sd1<-cov1/sum(tau[,1])
sd2<-cov2/sum(tau[,2])
    f1<-dmvnorm(y,mu1,sd1,log=FALSE)
    f2<-dmvnorm(y,mu2,sd2,log=FALSE)
    p<-alpha*f1+(1-alpha)*f2

L<-sum(log(p))
 cat(iter,L,"\t",alpha,"\t",mu1,"\t",mu2,"\n")

if (abs(L1-L)<eps) break
L1<-L
}

denfin<-mixm(x,mu1,mu2,sd1,sd2,alpha)
scatterplot3d(x[,1],x[,2],denfin, highlight.3d=TRUE, col.axis="blue",
      col.grid="lightblue",main="densità valori finali",angle=120, pch=20)

return(list(alpha0=alpha0,alpha=alpha,mu1=mu1,mu2=mu2,sd1=sd1,sd2=sd2))

}

em2mn(x)

##############################
end script
##############################



-- 
Dr. Daniele Riggi, PhD student
University of Milano-Bicocca
Department of Statistics
Building U7, Via Bicocca degli Arcimboldi, 8
20126 Milano, Italy
cell. +39 328 3380690
mailto: daniele.ri...@gmail.com

        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

[R] EM algorithm mixture of multivariate gaussian

Reply via email to