[R] EM algorithm mixture of multivariate gaussian

2009-05-22 Thread daniele riggi
Hi, i would to know, if someone have ever write the code to estimate the
parameter (mixing proportion, mean, a var/cov matrix) of a mixture of two
multivariate normal distribution. I wrote it and it works (it could find
mean and mixing proportion, if I fix the var/cov matrix), while if I fix
anything, it doesn't work. My suspect is that when the algorithm iterates
the var/cov matrix, something wrong happens, but i don't know how solve this
problem.
I enclose my script, if someone would give it a look, I would appreciate so
much.
thank you
daniele

#
#Start Script
#

library(mvtnorm)
libray(scatterplot3d)
library(MASS)
n=100

m1=c(5,-5)
m2=c(-3,3)
s1=matrix(c(2,1,1,3), 2,2)
s2=matrix(c(4,1,1,6), 2,2)
alpha=0.3

c1 - mvrnorm(round(n*alpha),m1,s1)
c2 - mvrnorm(round(n*(1-alpha)),m2,s2)
allval - rbind(c1,c2)
x - allval[sample(n,n),]

mixm-
function(x,m1,m2,s1,s2, alpha)

{
f1-dmvnorm(x, m1, s1, log=FALSE)
 f2-dmvnorm(x, m2, s2, log=FALSE)
 f=alpha*f1+(1-alpha)*f2
f
}



plot(x)

den-mixm(x,m1,m2,s1,s2,alpha)
scatterplot3d(x[,1],x[,2],den, highlight.3d=TRUE, col.axis=blue,
  col.grid=lightblue,angle=120, pch=20)


em2mn- function(y)
{
n-length(y[,1])
p-matrix(0,n,1)
f1-matrix(0,n,1)
f2-matrix(0,n,1)
tau-matrix(0,n,2)
eps-0.0001
mu01-c(0,0)
mu02-c(0,0)
sd01-matrix(0,2,2)
sd02-matrix(0,2,2)
cov1-matrix(0,2,2)
cov2-matrix(0,2,2)
 # 1 inizializzare i valori
alpha0= runif(1,0,1)
for (j in 1:2) {
mu01[j] - runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
mu02[j] - runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
}
 sd01- var(y[1:round(n/2),])
sd02- var(y[round(n/2):n,])
#sd01-s1
#sd02-s2
#prima iterazione

iter-0
 f1-dmvnorm(y, mu01, sd01, log=FALSE)
 f2-dmvnorm(y, mu02, sd02, log=FALSE)
 p=alpha0*f1+(1-alpha0)*f2


scatterplot3d(y[,1], y[,2], p , highlight.3d=TRUE, col.axis=blue,
  col.grid=lightblue,main=val iniz mistura normali multivariata,
angle=120, pch=20)

#verosimiglianza iniziale

l0-sum(log(p))
l1-l0
alpha-alpha0
mu1-mu01
mu2-mu02
sd1-sd01
sd2-sd02

for (iter in 1:itermax)
{

#passo E
 for (i in 1:n) {
tau[i,1]-(alpha*f1[i])/p[i]
tau[i,2]-((1-alpha)*f2[i])/p[i]
}


#passo M
 alpha= mean(tau[,1])
 mu1=colSums(tau[,1]*y)/sum(tau[,1])
mu2=colSums(tau[,2]*y)/sum(tau[,2])
 ycen1-(y-mu1)
ycen2-(y-mu2)

cov1-matrix(0,2,2)
cov2-matrix(0,2,2)

for (i in 1:n){
cov1-cov1+ (tau[i,1]*(ycen1[i,])%*%t(ycen1[i,]))
cov2-cov2+ (tau[i,2]*(ycen2[i,])%*%t(ycen2[i,]))
}
# w1-sqrt(tau[,1])
# w2-sqrt(tau[,2])
# ywei1-w1*ycen1
# ywei2-w2*ycen2
 sd1-cov1/sum(tau[,1])
sd2-cov2/sum(tau[,2])
f1-dmvnorm(y,mu1,sd1,log=FALSE)
f2-dmvnorm(y,mu2,sd2,log=FALSE)
p-alpha*f1+(1-alpha)*f2

L-sum(log(p))
 cat(iter,L,\t,alpha,\t,mu1,\t,mu2,\n)

if (abs(L1-L)eps) break
L1-L
}

denfin-mixm(x,mu1,mu2,sd1,sd2,alpha)
scatterplot3d(x[,1],x[,2],denfin, highlight.3d=TRUE, col.axis=blue,
  col.grid=lightblue,main=densità valori finali,angle=120, pch=20)

return(list(alpha0=alpha0,alpha=alpha,mu1=mu1,mu2=mu2,sd1=sd1,sd2=sd2))

}

em2mn(x)

##
end script
##



-- 
Dr. Daniele Riggi, PhD student
University of Milano-Bicocca
Department of Statistics
Building U7, Via Bicocca degli Arcimboldi, 8
20126 Milano, Italy
cell. +39 328 3380690
mailto: daniele.ri...@gmail.com

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] EM algorithm mixture of multivariate

2009-05-22 Thread daniele riggi
Hi, i would to know, if someone have ever write the code to estimate the
parameter (mixing proportion, mean, a var/cov matrix) of a mixture of two
multivariate normal distribution. I wrote it and it works (it could find
mean and mixing proportion, if I fix the var/cov matrix), while if I fix
anything, it doesn't work. My suspect is that when the algorithm iterates
the var/cov matrix, something wrong happens, but i don't know how solve this
problem.
I enclose my script, if someone would give it a look, I would appreciate so
much.
thank you
daniele

#
#Start Script
#

library(mvtnorm)
libray(scatterplot3d)
library(MASS)
n=100

m1=c(5,-5)
m2=c(-3,3)
s1=matrix(c(2,1,1,3), 2,2)
s2=matrix(c(4,1,1,6), 2,2)
alpha=0.3

c1 - mvrnorm(round(n*alpha),m1,s1)
c2 - mvrnorm(round(n*(1-alpha)),m2,s2)
allval - rbind(c1,c2)
x - allval[sample(n,n),]

mixm-
function(x,m1,m2,s1,s2, alpha)

{
f1-dmvnorm(x, m1, s1, log=FALSE)
 f2-dmvnorm(x, m2, s2, log=FALSE)
 f=alpha*f1+(1-alpha)*f2
f
}



plot(x)

den-mixm(x,m1,m2,s1,s2,alpha)
scatterplot3d(x[,1],x[,2],den, highlight.3d=TRUE, col.axis=blue,
  col.grid=lightblue,angle=120, pch=20)


em2mn- function(y)
{
n-length(y[,1])
p-matrix(0,n,1)
f1-matrix(0,n,1)
f2-matrix(0,n,1)
tau-matrix(0,n,2)
eps-0.0001
mu01-c(0,0)
mu02-c(0,0)
sd01-matrix(0,2,2)
sd02-matrix(0,2,2)
cov1-matrix(0,2,2)
cov2-matrix(0,2,2)
 # 1 inizializzare i valori
alpha0= runif(1,0,1)
for (j in 1:2) {
mu01[j] - runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
mu02[j] - runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
}
 sd01- var(y[1:round(n/2),])
sd02- var(y[round(n/2):n,])
#sd01-s1
#sd02-s2
#prima iterazione

iter-0
 f1-dmvnorm(y, mu01, sd01, log=FALSE)
 f2-dmvnorm(y, mu02, sd02, log=FALSE)
 p=alpha0*f1+(1-alpha0)*f2


scatterplot3d(y[,1], y[,2], p , highlight.3d=TRUE, col.axis=blue,
  col.grid=lightblue,main=val iniz mistura normali multivariata,
angle=120, pch=20)

#verosimiglianza iniziale

l0-sum(log(p))
l1-l0
alpha-alpha0
mu1-mu01
mu2-mu02
sd1-sd01
sd2-sd02

for (iter in 1:itermax)
{

#passo E
 for (i in 1:n) {
tau[i,1]-(alpha*f1[i])/p[i]
tau[i,2]-((1-alpha)*f2[i])/p[i]
}


#passo M
 alpha= mean(tau[,1])
 mu1=colSums(tau[,1]*y)/sum(tau[,1])
mu2=colSums(tau[,2]*y)/sum(tau[,2])
 ycen1-(y-mu1)
ycen2-(y-mu2)

cov1-matrix(0,2,2)
cov2-matrix(0,2,2)

for  (i in 1:n){
cov1-cov1+ (tau[i,1]*(ycen1[i,])%*%t(ycen1[i,]))
cov2-cov2+ (tau[i,2]*(ycen2[i,])%*%t(ycen2[i,]))
}
# w1-sqrt(tau[,1])
# w2-sqrt(tau[,2])
# ywei1-w1*ycen1
# ywei2-w2*ycen2
 sd1-cov1/sum(tau[,1])
sd2-cov2/sum(tau[,2])
 f1-dmvnorm(y,mu1,sd1,log=FALSE)
f2-dmvnorm(y,mu2,sd2,log=FALSE)
p-alpha*f1+(1-alpha)*f2

L-sum(log(p))
 cat(iter,L,\t,alpha,\t,mu1,\t,mu2,\n)

if (abs(L1-L)eps) break
L1-L
}

denfin-mixm(x,mu1,mu2,sd1,sd2,alpha)
scatterplot3d(x[,1],x[,2],denfin, highlight.3d=TRUE, col.axis=blue,
  col.grid=lightblue,main=densità valori finali,angle=120, pch=20)

return(list(alpha0=alpha0,alpha=alpha,mu1=mu1,mu2=mu2,sd1=sd1,sd2=sd2))

}

em2mn(x)

##
end script
##

-- 
Dr. Daniele Riggi, PhD student
University of Milano-Bicocca
Department of Statistics
Building U7, Via Bicocca degli Arcimboldi, 8
20126 Milano, Italy
cell. +39 328 3380690
mailto: daniele.ri...@gmail.com

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] em algorithm mixture of multivariate normals

2009-05-21 Thread daniele riggi
Hi,
I would like to know if it is possible to have a R code to estimate the
parameters of a mixture of bivariate (or multivariate) normals via EM
Algorithm. I tried to write it, but in the estimation of the matrix of
variance and covariance, i have some problems. I generate two bidimensional
vectors both from different distribution with their own vector means and
variance and covariance structure. When I create a unique vector, the
structure of covariance changes, and so the implementation of the EM
algorithm doesn't work.

Maybe someone knows the reason. If I fix the starting initial value of the
covariance matrix and I don't update the estimate of this matrix, the
algorithm works and finds the estimate of the vector means, so I wrote it in
the correct way. However if someone could help me I will be very grateful to
him for kindness.

Best RegardsDaniele

-- 
Dr. Daniele Riggi, PhD student
University of Milano-Bicocca
Department of Statistics
Building U7, Via Bicocca degli Arcimboldi, 8
20126 Milano, Italy
cell. +39 328 3380690
mailto: daniele.ri...@gmail.com

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] em algorithm mixture of multivariate normals

2009-05-21 Thread Christian Hennig

Look up packages flexmix and mclust!

Christian

On Thu, 21 May 2009, daniele riggi wrote:


Hi,
I would like to know if it is possible to have a R code to estimate the
parameters of a mixture of bivariate (or multivariate) normals via EM
Algorithm. I tried to write it, but in the estimation of the matrix of
variance and covariance, i have some problems. I generate two bidimensional
vectors both from different distribution with their own vector means and
variance and covariance structure. When I create a unique vector, the
structure of covariance changes, and so the implementation of the EM
algorithm doesn't work.

Maybe someone knows the reason. If I fix the starting initial value of the
covariance matrix and I don't update the estimate of this matrix, the
algorithm works and finds the estimate of the vector means, so I wrote it in
the correct way. However if someone could help me I will be very grateful to
him for kindness.

Best RegardsDaniele

--
Dr. Daniele Riggi, PhD student
University of Milano-Bicocca
Department of Statistics
Building U7, Via Bicocca degli Arcimboldi, 8
20126 Milano, Italy
cell. +39 328 3380690
mailto: daniele.ri...@gmail.com

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.



*** --- ***
Christian Hennig
University College London, Department of Statistical Science
Gower St., London WC1E 6BT, phone +44 207 679 1698
chr...@stats.ucl.ac.uk, www.homepages.ucl.ac.uk/~ucakche

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.