Hi, i would to know, if someone have ever write the code to estimate the parameter (mixing proportion, mean, a var/cov matrix) of a mixture of two multivariate normal distribution. I wrote it and it works (it could find mean and mixing proportion, if I fix the var/cov matrix), while if I fix anything, it doesn't work. My suspect is that when the algorithm iterates the var/cov matrix, something wrong happens, but i don't know how solve this problem. I enclose my script, if someone would give it a look, I would appreciate so much. thank you daniele
######################### #Start Script ######################### library(mvtnorm) libray(scatterplot3d) library(MASS) n=100 m1=c(5,-5) m2=c(-3,3) s1=matrix(c(2,1,1,3), 2,2) s2=matrix(c(4,1,1,6), 2,2) alpha=0.3 c1 <- mvrnorm(round(n*alpha),m1,s1) c2 <- mvrnorm(round(n*(1-alpha)),m2,s2) allval <- rbind(c1,c2) x <- allval[sample(n,n),] mixm<- function(x,m1,m2,s1,s2, alpha) { f1<-dmvnorm(x, m1, s1, log=FALSE) f2<-dmvnorm(x, m2, s2, log=FALSE) f=alpha*f1+(1-alpha)*f2 f } plot(x) den<-mixm(x,m1,m2,s1,s2,alpha) scatterplot3d(x[,1],x[,2],den, highlight.3d=TRUE, col.axis="blue", col.grid="lightblue",angle=120, pch=20) em2mn<- function(y) { n<-length(y[,1]) p<-matrix(0,n,1) f1<-matrix(0,n,1) f2<-matrix(0,n,1) tau<-matrix(0,n,2) eps<-0.0001 mu01<-c(0,0) mu02<-c(0,0) sd01<-matrix(0,2,2) sd02<-matrix(0,2,2) cov1<-matrix(0,2,2) cov2<-matrix(0,2,2) # 1 inizializzare i valori alpha0= runif(1,0,1) for (j in 1:2) { mu01[j] <- runif(1,min=quantile(y[,j], probs =0.25), max=quantile(y[,j], probs =0.75)) mu02[j] <- runif(1,min=quantile(y[,j], probs =0.25), max=quantile(y[,j], probs =0.75)) } sd01<- var(y[1:round(n/2),]) sd02<- var(y[round(n/2):n,]) #sd01<-s1 #sd02<-s2 #prima iterazione iter<-0 f1<-dmvnorm(y, mu01, sd01, log=FALSE) f2<-dmvnorm(y, mu02, sd02, log=FALSE) p=alpha0*f1+(1-alpha0)*f2 scatterplot3d(y[,1], y[,2], p , highlight.3d=TRUE, col.axis="blue", col.grid="lightblue",main="val iniz mistura normali multivariata", angle=120, pch=20) #verosimiglianza iniziale l0<-sum(log(p)) l1<-l0 alpha<-alpha0 mu1<-mu01 mu2<-mu02 sd1<-sd01 sd2<-sd02 for (iter in 1:itermax) { #passo E for (i in 1:n) { tau[i,1]<-(alpha*f1[i])/p[i] tau[i,2]<-((1-alpha)*f2[i])/p[i] } #passo M alpha= mean(tau[,1]) mu1=colSums(tau[,1]*y)/sum(tau[,1]) mu2=colSums(tau[,2]*y)/sum(tau[,2]) ycen1<-(y-mu1) ycen2<-(y-mu2) cov1<-matrix(0,2,2) cov2<-matrix(0,2,2) for (i in 1:n){ cov1<-cov1+ (tau[i,1]*(ycen1[i,])%*%t(ycen1[i,])) cov2<-cov2+ (tau[i,2]*(ycen2[i,])%*%t(ycen2[i,])) } # w1<-sqrt(tau[,1]) # w2<-sqrt(tau[,2]) # ywei1<-w1*ycen1 # ywei2<-w2*ycen2 sd1<-cov1/sum(tau[,1]) sd2<-cov2/sum(tau[,2]) f1<-dmvnorm(y,mu1,sd1,log=FALSE) f2<-dmvnorm(y,mu2,sd2,log=FALSE) p<-alpha*f1+(1-alpha)*f2 L<-sum(log(p)) cat(iter,L,"\t",alpha,"\t",mu1,"\t",mu2,"\n") if (abs(L1-L)<eps) break L1<-L } denfin<-mixm(x,mu1,mu2,sd1,sd2,alpha) scatterplot3d(x[,1],x[,2],denfin, highlight.3d=TRUE, col.axis="blue", col.grid="lightblue",main="densità valori finali",angle=120, pch=20) return(list(alpha0=alpha0,alpha=alpha,mu1=mu1,mu2=mu2,sd1=sd1,sd2=sd2)) } em2mn(x) ############################## end script ############################## -- Dr. Daniele Riggi, PhD student University of Milano-Bicocca Department of Statistics Building U7, Via Bicocca degli Arcimboldi, 8 20126 Milano, Italy cell. +39 328 3380690 mailto: daniele.ri...@gmail.com [[alternative HTML version deleted]]
______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.