Hello,

I would like to perform a Partial least square discriminate analysis (PLSDA) in 
R.

To do this I use the package mixOmics.

I could perform the PLSDA in R. however I would also like to perform a 
leave-one-out cross validation in order to assess the performance of my model. 
My supervisor told me that I should focus on the R2/Q2 ratios.

However when I read the instruction for running the "perf" function 
(mixomics.org/wp-content/uploads/2014/08/Running_perf_function4.pd) I found no 
test showing the R2/Q2 ratios for a PLSDA.

Following the instructions I ended up with an estimation of 3 different error 
rates (max.dist /centroids. Dist /mahalanobis. Dist) (page 9 of the PDF I 
mentioned above).

1.Are these 3 error rates different variations of R2/Q2 ratios?
2.Is there a rule telling me what values my errorates should have in order to 
have a good model performance
3. Is there a way to calculate R2/Q2 ratios for PLSDA using the mixOmics package

Thank you


Below I provide a simplified example data set and my code:

DATA:
> dput(dta)
structure(list(Treatment = structure(c(2L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L), .Label = c("C", "CAT"), class = "factor"),
    comp1 = c(0, 0.5677, 0.4486, 0.1772, 0.2145, 0.0302, 0.216,
    0.0938, 0.1143, 0.6414, 0.2461, 0.0498, 0.144, 0.0953, 0.3208,
    0.296, 0.418, 0.2247, 0.1921, 0.3792, 0.1394, 0.3069, 0.1211,
    0.0355, 0.8968, 0.1981, 0.1187, 0.418, 0.4313, 0.0835), comp2 = c(1.8378,
    2.3565, 4.6184, 2.3739, 1.3595, 1.9645, 1.2066, 0.9758, 2.259,
    1.9429, 1.9797, 2.3005, 2.2246, 1.5881, 1.3051, 1.5218, 1.8931,
    1.4476, 1.2672, 1.5634, 1.9313, 1.2859, 3.9039, 2.8956, 3.7026,
    2.1356, 1.4473, 1.8477, 2.1495, 1.2323), comp3 = c(5.6652,
    4.3214, 1.8763, 1.7093, 3.6592, 1.6457, 3.4825, 2.7332, 5.1582,
    2.7374, 5.0283, 4.7604, 2.0357, 4.0205, 3.5946, 4.1626, 2.3342,
    3.5049, 3.1272, 3.328, 3.5106, 3.7209, 1.8475, 5.4776, 2.4554,
    5.1995, 3.9241, 4.5022, 4.1593, 4.3931), comp4 = c(3.7994,
    4.2763, 3.7141, 1.166, 1.8907, 4.6145, 1.8988, 1.459, 3.2,
    3.4403, 3.8283, 2.8549, 4.7747, 2.1849, 1.1687, 2.5519, 4.021,
    1.2343, 1.4335, 1.8305, 4.5704, 0.2238, 3.6566, 4.0569, 2.1626,
    3.2887, 1.4183, 2.1783, 2.6233, 3.2128), comp5 = c(1.0424,
    2.2589, 0, 1.2217, 0, 0, 0, 0, 0, 0, 1.6675, 1.7548, 0, 1.0983,
    1.2258, 1.314, 2.9437, 0, 0.9749, 0.8959, 0, 0.9189, 1.5026,
    0, 1.0831, 2.2251, 0.8419, 1.1912, 2.2912, 0), comp6 = c(4.0781,
    7.2073, 6.0885, 4.9657, 4.0133, 7.6783, 4.2064, 1.6421, 6.6831,
    6.8437, 6.5152, 1.4712, 7.048, 4.9872, 4.4658, 1.3119, 10.2047,
    4.7551, 3.7564, 4.829, 8.5836, 3.508, 6.0251, 5.1122, 2.2058,
    6.8343, 3.9664, 2.005, 6.6678, 2.8081), comp7 = c(0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.9795, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Treatment", "comp1",
"comp2", "comp3", "comp4", "comp5", "comp6", "comp7"), class = "data.frame", 
row.names = c(NA,
-30L))


CODE
library(mixOmics)#plsda
library(MetabolAnalyze)#scaling
#read in data & convert to matrix
dta<-read.csv("test.csv",sep=";",header=T)
head(dta)

#Scale and remove "Sample" and create matrix
dta.red<-dta[,-1]
dta.scale<-scaling(dta.red,type="pareto")
matrix<-as.matrix(dta.scale)

#create vector with "Treatment"
dta.treatments<-dta[,1]
dta.factor<-as.factor(dta.treatments)
dta.factor


#PLSDA

#Performance/Loo cross validation
res.plsda2 = plsda(dta.scale, dta.factor, ncomp = 5)
tune.plsda2 = perf(res.plsda2, dist = "all", validation = "loo", progressBar = 
FALSE)
tune.plsda2$error.rate

dta.plsda2<-plsda(dta.scale, dta.factor,scale=F,mode="classic")
dta.plsda2






plotIndiv(dta.plsda2, ind.names = dta.factor, ellipse = TRUE, legend =TRUE)
plotArrow(dta.plsda2, ind.names = dta.factor, legend =TRUE)
plotVar(dta.plsda2, cex = 2)



plot(dta.plsda,typeVc = "x-score",parAsColFcVn = dta.factor,parEllipsesL = TRUE)


Eisenring Michael, Dr.

Federal Department of Economic Affairs, Education and Research
EAER
Agroecology and Environment
Biosafety

Reckenholzstrasse 191, CH-8046 Z�rich
Tel. +41 58 468 7181
Fax +41 58 468 7201
michael.eisenr...@agroscope.admin.ch<mailto:michael.eisenr...@agroscope.admin.ch>
www.agroscope.ch<http://www.agroscope.ch/>


        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to