Thanks Peter.
Perhaps you can repeat this comparison after the next release (this will be very soon!) and split the aroma.affymetrix comparison into: - aroma.affy.oligo -- with RmaPlm(csN,flavor="oligo") - aroma.affy.affyPLM -- with flavor="affyPLM" (as you've done already) Perhaps the best way to look at all of this at once is with a single pairs() plot. Cheers, Mark On 05/12/2008, at 9:01 AM, [EMAIL PROTECTED] wrote: > Dear Mark and Henrik, > > I wanted to confirm that your summary was correct regarding the > different flavors for probeset summarization. I downloaded the MAQC > HG_U133_Plus_2 array data from the MAQC website: > > http://edkb.fda.gov/MAQC/MainStudy/upload/MAQC_AFX_123456_120CELs.zip > > I then ran the analysis of the arrays from site 1, using just the A > and B samples, with aroma.affymetrix, affy, affyPLM and oligo (see > below for the complete code I used to do this). Basically the > aroma.affymetrix and affyPLM data was essentially identical. The > affy and oligo data was also essentially identical. As observed with > the Gene ST array data there were significant differences between > aroma.affymetrix and affy or oligo. Plots are attached. > > The Gene ST arrays do not have any MM probes - as we are using RMA > rather than GCRMA this should not have affected anything. > > Thanks, > > Peter > > #OLIGO ANALYSIS > > library(pd.hg.u133.plus.2) > library(pdInfoBuilder) > fn <- dir("G:\\BGC_EXPERIMENTS\\MAQC_Data\\HG- > U133_Plus_2","CEL",full=T)[1:10] > raw.oligo<-read.celfiles(filenames=fn,pkgname="pd.hg.u133.plus.2") > eset.oligo<-rma(raw.oligo) > data.oligo<-exprs(eset.oligo) > > > #AFFY ANALYSIS > > library(affy) > fn <- dir("G:\\BGC_EXPERIMENTS\\MAQC_Data\\HG- > U133_Plus_2","CEL",full=T)[1:10] > raw.affy <- ReadAffy(filenames=fn) > eset.affy <- rma(raw.affy) > data.affy <- exprs(eset.affy) > > > #AFFY PLM ANALYSIS > > library(affyPLM) > fn <- dir("G:\\BGC_EXPERIMENTS\\MAQC_Data\\HG- > U133_Plus_2","CEL",full=T)[1:10] > raw.affyPLM <- ReadAffy(filenames=fn) > fit.affyPLM <- fitPLM(raw.affyPLM, verbos=9) > data.affyPLM <- coefs(fit.affyPLM) > #Analysis of MAQC on Human U113 Plus 2 > > setwd("G:\\BGC_EXPERIMENTS\\MAQC_Analysis") > library(aroma.affymetrix) > prefixName <- "MAQC_Data" > chip1 <- "HG-U133_Plus_2" > cdf <- AffymetrixCdfFile$fromChipType("HG-U133_Plus_2") > cs <- AffymetrixCelSet$byName(prefixName, cdf=cdf, chipType=chip1) > pattern <- "AFX_1_[AB]" > idxs <- grep(pattern, getNames(cs)) > cs <- extract(cs, idxs) > bc <- RmaBackgroundCorrection(cs) > csBC <- process(bc) > qn <- QuantileNormalization(csBC, typesToUpdate="pm") > csN <- process(qn) > plm <- RmaPlm(csN, flavor="affyPLM") #flavor="oligo", must > library(oligo) > fit(plm) > ces <- getChipEffectSet(plm) > getExprs <- function(ces, ...) { > cdf <- getCdf(ces) > theta <- extractMatrix(ces, ..., returnUgcMap=TRUE) > ugcMap <- attr(theta, "unitGroupCellMap") > un<-getUnitNames(cdf, ugcMap[,"unit"]) > rownames(theta) <- un > log2(theta) > } > data.aroma <- getExprs(ces) > > > #COMPARING THE DATASETS > > > dim(data.affy) > [1] 54675 10 > > dim(data.affyPLM) > [1] 54675 10 > > dim(data.oligo) > [1] 54613 10 > > dim(data.aroma) > [1] 54675 10 > > #Unlike in the Gene ST analysis the packages do not return the > probes in the same order so be careful to reorder them. Also not > that Oligo removes the control probes (AFFX*). > > sum(rownames(data.affyPLM)==rownames(data.affy)) > # [1] 54675 > o <- match(rownames(data.oligo), rownames(data.affy)) > data.affy <- data.affy[o,] > data.affyPLM <- data.affyPLM[o,] > sum(rownames(data.affy)==rownames(data.oligo)) > # [1] 54613 > o <- match(rownames(data.affy), rownames(data.aroma)) > data.aroma <- data.aroma[o,] > sum(rownames(data.affy)==rownames(data.aroma)) > # [1] 54613 > > e<- (data.aroma - data.affy) > mean(as.vector(e^2), na.rm=T) > # [1] 0.2119428 > sd(as.vector(e^2), na.rm=T) > # [1] 0.3704433 > > e <- (data.aroma - data.oligo) > mean(as.vector(e^2), na.rm=T) > # [1] 0.2104522 > sd(as.vector(e^2), na.rm=T) > # [1] 0.3688539 > > e<- (data.aroma - data.affyPLM) > mean(as.vector(e^2), na.rm=T) > # [1] 1.160118e-05 > sd(as.vector(e^2), na.rm=T) > # [1] 2.125207e-05 > > e<- (data.affy - data.oligo) > mean(as.vector(e^2), na.rm=T) > # [1] 1.345037e-05 > sd(as.vector(e^2), na.rm=T) > # [1] 4.111692e-05 > > plot(data.aroma[,1],data.affyPLM[,1],main="Comparison of Aroma and > AffyPLM Data", > col="red",cex=0.5) > abline(0,1, lwd=2) > savePlot(file="HGU133Plus2_Aroma_vs_AffyPLM", type="png") > > plot(data.affy[,1],data.oligo[,1],main="Comparison of Affy and Oligo > Data", > col="red",cex=0.5) > abline(0,1, lwd=2) > savePlot(file="HGU133Plus2_Affy_vs_Oligo", type="png") > > plot(data.aroma[,1],data.affy[,1],main="Comparison of Aroma and Affy > Data", > col="red",cex=0.5) > abline(0,1, lwd=2) > savePlot(file="HGU133Plus2_Aroma_vs_Affy", type="png") > > plot(data.aroma[,1],data.oligo[,1],main="Comparison of Aroma and > Oligo Data", > col="red",cex=0.5) > abline(0,1, lwd=2) > savePlot(file="HGU133Plus2_Aroma_vs_Oligo", type="png") > > plot(data.affy[,1],data.affyPLM[,1],main="Comparison of Affy and > AffyPLM Data", > col="red",cex=0.5) > abline(0,1, lwd=2) > savePlot(file="HGU133Plus2_Affy_vs_AffyPLM", type="png") > > > # FYI CREATING HG_U133_PLUS_2 Oligo Annotation LIbrary > > setwd("P:\\ANNOTATION\\AffyAnnotation\\Human\\HG-U133_Plus_2") > library(pdInfoBuilder) > cdfFile <- "HG-U133_Plus_2.cdf" > csvAnnoFile <- "HG-U133_Plus_2.na27.annot.csv" > tabSeqFile <- "HG-U133_Plus_2.probe_tab" > pkg <- new("AffyExpressionPDInfoPkgSeed", author="Peter White", > email="[EMAIL PROTECTED]", version="0.2.0", > genomebuild="UCSC hg18, June 2006", chipName="hgu133plus2", > manufacturer="affymetrix", biocViews="AnnotationData", > cdfFile=cdfFile, csvAnnoFile=csvAnnoFile, tabSeqFile=tabSeqFile) > makePdInfoPackage(pkg, destDir=".") > > > On Thu, Dec 4, 2008 at 4:38 PM, pwhiteusa <[EMAIL PROTECTED]> wrote: > > > > > On Dec 3, 8:45 pm, Mark Robinson <[EMAIL PROTECTED]> wrote: > > On 04/12/2008, at 10:17 AM, Henrik Bengtsson wrote: > > > > > So, it all has to do with *how* the log-additive probe-level > model is > > > *fitted*, correct? > > > > Correct. Identical linear model, different procedure for fitting. > > > > (as a bit of an aside ... I think of these things in terms of > > influence functions -- median polish will have a different IF than > the > > defaults in affyPLM's robust fit) > > > > M. > > > > > Thus, the model is the same but the algorithms > > > differ. That gives us some sense of how much variance we get from > > > using different algorithms regardless of model. Simulation > studies > > > (under the model) could then show if for instance one of the > > > algorithms is more biased than others. > > > > > Thanks for fixing the flavor="oligo". It will be part of the next > > > release. > > > > > Cheers > > > > > Henrik > > > > > On Wed, Dec 3, 2008 at 1:36 PM, Mark Robinson > > > <[EMAIL PROTECTED]> wrote: > > > > >> Hi all. > > > > >> First of all, thanks Peter for 1) doing this testing and 2) for > > >> spelling everything out. I expect to refer people to this > thread in > > >> the future, so thanks for that. > > > > >> Just wanted to add 3 more tidbits of hopefully useful > information. > > > > >> 1. I dug a bit into why flavor="oligo" doesn't work within > > >> aroma.affymetrix. It turns out it was a simple fix. Since I > don't > > >> use it regularly (it doesn't give probe affinities!) and the > > >> underlying 'oligo' functions had changed, it stopped working. > Its > > >> corrected now. I've checked in the fix, so flavor='oligo' will > be > > >> available in the next release. In my tests, it appears VERY > close to > > >> 'affy' ... and since its based on 'oligo' code, it should be VERY > > >> VERY > > >> similar. > > > > >> ... > > >> plm1 <- RmaPlm(csN,flavor="oligo") > > >> fit(plm1,verbose=verbose) > > >> ces <- getChipEffectSet(plm1) > > >> data.aroma.oligo <- getExprs(ces) > > >> ... > > > > >>> mean( (data.affy[,1]-data.aroma.oligo[,1])^2 ) > > >> [1] 0.0003193267 > > > > >> 2. I dug a bit into the unsupported CDF and the 'platformDesign' > > >> objects from oligo and from what I can tell, the probes used in > the > > >> 33252 units (I'm looking at Human) within aroma.affymetrix are > > >> identical to the probes used within oligo (as built with > > >> pdInfoBuilder) ... not a single probe no accounted for. In > case you > > >> haven't dug into pdInfoBuilder before and the SQLite db behind, > here > > >> are some commands you may find useful ... > > > > >> ------- > > >> library(pd.hugene.1.0.st.v1) > > >> library(pdInfoBuilder) > > >> fn <- dir("rawData/tissues/HuGene-1_0-st-v1","CEL",full=TRUE) > [1:3] > > >> x <- read.celfiles(filenames=fn,pkgname="pd.hugene.1.0.st.v1") > > >> pd <-getPlatformDesign(x) > > >> ff <- dbGetQuery(db(pd), "select * from pmfeature") > > > > >> # three 3 lines speed up the splitting ... > > >> ffs <- split(ff, substr(ff$fsetid,1,4)) > > >> ffs <- unlist( lapply(ffs, FUN=function(u) split(u,u$fsetid)), > > >> recursive=FALSE) > > >> names(ffs) <- substr(names(ffs), 6,nchar(names(ffs))) > > > > >> cs <- AffymetrixCelSet$fromName(name, chipType=chip) > > >> cdf <- getCdf(cs) > > >> cdfCells <- readCdfUnits(getPathname(cdf), units=NULL, > readXY=FALSE, > > >> readBases=FALSE, readExpos=FALSE, > > >> readType=FALSE, > > >> readDirection=FALSE,stratifyBy=c("pm"), readIndices=TRUE, > verbose=0) > > > > >> un <- unique(ff$fsetid) > > >> ids <- intersect(un,names(cdfCells)) > > > > >> mf <- match(ids,names(ffs)) > > >> mc <- match(ids,names(cdfCells)) > > > > >> matches <- matrix(NA,nr=length(ids),nc=3) > > >> rownames(matches) <- ids > > >> colnames(matches) <- c("pdInfoBuilder","unsupportedCDF","union") > > > > >> for(i in 1:nrow(matches)) { > > >> a <- cdfCells[[ ids[i] ]]$groups[[1]]$indices > > >> b <- ffs[[ ids[i] ]]$fid > > >> matches[i,1] <- length(b) > > >> matches[i,2] <- length(a) > > >> matches[i,3] <- length(union(a,b)) > > >> cat(ids[i],"\n") > > >> } > > >> ------- > > > > >> ... this gives .. > > >>> matches[1:5,] > > >> pdInfoBuilder unsupportedCDF union > > >> 7981326 27 27 27 > > >> 8095005 42 42 42 > > >> 8100310 10 10 10 > > >> 7948117 15 15 15 > > >> 8155877 25 25 25 > > >>> table(matches[,3]-matches[,1]) > > >> 0 > > >> 33252 > > >>> table(matches[,3]-matches[,2]) > > >> 0 > > >> 33252 > > > > >> 3. This doesn't address the problem of the missing probesets. > I'm > > >> happy to go and collect these if people want them, but based on > the > > >> reply from Affymetrix (thanks to Mark Cowley for the leg work > here), > > >> they are probably 'low-coverage transcript clusters' that can be > > >> 'safely ignored'. See: > > > > >>http://article.gmane.org/gmane.science.biology.informatics.conductor/ > ... > > > > >> SUMMARY: > > > > >> [aroma.affymetrix flavor='oligo'] = rma in affy, rma in oligo > > > > >> [aroma.affymetrix flavor='affyPLM'] = fitPLM in affyPLM > > > > >> ... where '=' means 'for all intents and purposes equivalent', > not > > >> strictly equal. > > > > >> Cheers, > > >> Mark > > > > >> On 04/12/2008, at 7:43 AM, Henrik Bengtsson wrote: > > > > >>> Hi, > > > > >>> thanks for sharing all this. > > > > >>> On Tue, Dec 2, 2008 at 11:54 AM, pwhiteusa <[EMAIL PROTECTED]> > > >>> wrote: > > > > >>>> Hi All, > > > > >>>> Here is the exact code I used to analyze Gene ST data for an > > >>>> experiment performed with the MoGene-1_0-st-v1 array. > > > > >>>> AROMA.AFFYMETRIX > > > > >>>> library(aroma.affymetrix) > > >>>> cdf <- AffymetrixCdfFile$fromChipType("MoGene-1_0-st- > v1",tags="r3") > > >>>> prefixName <- "Pierson" > > >>>> cs <- AffymetrixCelSet$byName(prefixName, cdf=cdf) > > >>>> bc <- RmaBackgroundCorrection(cs) > > >>>> csBC <- process(bc) > > >>>> qn <- QuantileNormalization(csBC, typesToUpdate="pm") > > >>>> csN <- process(qn) > > >>>> plm <- RmaPlm(csN, flavor="affyPLM") #flavor="oligo", must > library > > >>>> (oligo) > > >>>> fit(plm) > > >>>> ces <- getChipEffectSet(plm) > > >>>> getExprs <- function(ces, ...) { > > >>>> cdf <- getCdf(ces) > > >>>> theta <- extractMatrix(ces, ..., returnUgcMap=TRUE) > > >>>> ugcMap <- attr(theta, "unitGroupCellMap") > > >>>> un<-getUnitNames(cdf, ugcMap[,"unit"]) > > >>>> rownames(theta) <- un > > >>>> log2(theta) > > >>>> } > > >>>> data.aroma <- getExprs(ces) > > > > >>> I think that getExprs() call can be replaced by: > > > > >>> data.aroma <- extractDataFrame(ces, addNames=TRUE) > > > > >>> The difference is that the unit names will be in a separate > column > > >>> and > > >>> not as row names. You will also get group names and more, but > those > > >>> you can drop if you want to. > > > > >>> Mark, is it correct that we can "deprecate" the suggestion to > use > > >>> getExprs()? Btw, is this documented somewhere online, or is > this > > >>> knowledge only from the mailing list? > > > > >>>> Easy! Now to get the same data using the Affy packages: > > > > >>>> BIOCONDUCTOR AFFY > > > > >>>> You first need to create or download your mogene10stv1cdf > library > > >>>> from > > >>>> the Affy unsupported CDF file (https://stat.ethz.ch/pipermail/bioc- > > >>>> devel/2007-October/001403.html has some detail on how to do > this). > > >>>> However, as Mark Robinson pointed out there are potential > issues > > >>>> with > > >>>> using the Affy unsupported CDF files. See the following for > some > > >>>> details: > > > > >>>>https://stat.ethz.ch/pipermail/bioconductor/2007-November/020188.html > > > > >>>> library(affy) > > >>>> AffyRaw <- ReadAffy() > > >>>> AffyEset <- rma(AffyRaw) > > >>>> data.affy <- exprs(AffyEset) > > > > >>>> BIOCONDUCTOR OLIGO > > > > >>>> Download all the required Affy annotation files to your Mouse > > >>>> Gene v1 > > >>>> ST array directory: > > > > >>>>http://www.affymetrix.com/support/technical/byproduct.affx?product=mo > ... > > > > >>>> setwd("P:\\ANNOTATION\\AffyAnnotation\\Mouse\\MoGene-1_0-st- > v1") > > >>>> library(pdInfoBuilder) > > >>>> pgfFile <- "MoGene-1_0-st-v1.r3.pgf" > > >>>> clfFile <- "MoGene-1_0-st-v1.r3.clf" > > >>>> transFile <- "MoGene-1_0-st-v1.na26.mm9.transcript.csv" > > >>>> probeFile <- "MoGene-1_0-st-v1.probe.tab" > > >>>> pkg <- new("AffyGenePDInfoPkgSeed", author="Peter White", > > >>>> email="[EMAIL PROTECTED]", version="0.1.3", > > >>>> genomebuild="UCSC mm9, July 2007", chipName="MoGene10stv1", > > >>>> manufacturer="affymetrix", biocViews="AnnotationData", > > >>>> pgfFile=pgfFile, clfFile=clfFile, transFile=transFile, > > >>>> probeFile=probeFile) > > >>>> makePdInfoPackage(pkg, destDir=".") > > > > >>>> #This takes a little while to make the Package. Once created > you > > >>>> will > > >>>> need to install the package from the Windows DOS prompt > (navigate > > >>>> to > > >>>> the annotation directory with the newly created pd package to > be > > >>>> installed): > > > > >>>> R CMD INSTALL pd.mogene.1.0.st.v1\ > > > > >>>> Note for this to work you need RTools and you Path variable > set up > > >>>> correctly as described at: > > > > >>>>http://cran.r-project.org/doc/manuals/R-admin.html#The-Windows-toolset) > > > > >>>> Now return to R, set the working directory to your CEL file > > >>>> directory: > > > > >>>> library(pd.mogene.1.0.st.v1) > > >>>> library(oligo) > > >>>> OligoRaw<-read.celfiles(filenames=list.celfiles()) > > >>>> OligoEset<-rma(OligoRaw) > > >>>> data.oligo<-exprs(OligoEset) > > > > >>>> COMPARING THE TWO DATASETS > > > > >>>> Here is what I did to compare the data generate by affy, > oligo and > > >>>> aroma.affymetrix: > > > > >>>> dim(data.aroma) > > >>>> [1] 35512 16 > > >>>> dim(data.affy) > > >>>> [1] 35512 16 > > >>>> length(grep(TRUE, rownames(data.affy)==rownames(data.aroma))) > > >>>> [1] 35512 > > > > >>> FYI, sum(rownames(data.affy)==rownames(data.aroma)) gives you > the > > >>> same. Replacing sum() with summary() will also work. > > > > >>>> The output from both the affy rma and aroma.affymetrix methods > > >>>> retains > > >>>> the same order of probes and cel files so the two files can be > > >>>> compared directly. > > > > >>> That is probably because they work of the same CDF, but you > should > > >>> never rely on this/assume that this is always the case. If > you do, > > >>> you should at least verify that the unit names (and group names) > > >>> match. > > > > >>>> However, > > > > >>>> dim(data.oligo) > > >>>> [1] 35557 16 > > > > ... > > > > read more ยป > > > > > > ------------------------------ Mark Robinson Epigenetics Laboratory, Garvan Bioinformatics Division, WEHI e: [EMAIL PROTECTED] e: [EMAIL PROTECTED] p: +61 (0)3 9345 2628 f: +61 (0)3 9347 0852 ------------------------------ --~--~---------~--~----~------------~-------~--~----~ When reporting problems on aroma.affymetrix, make sure 1) to run the latest version of the package, 2) to report the output of sessionInfo() and traceback(), and 3) to post a complete code example. You received this message because you are subscribed to the Google Groups "aroma.affymetrix" group. To post to this group, send email to aroma-affymetrix@googlegroups.com To unsubscribe from this group, send email to [EMAIL PROTECTED] For more options, visit this group at http://groups.google.com/group/aroma-affymetrix?hl=en -~----------~----~----~----~------~----~------~--~---