Dear R-users,
 
I'm having a small problem while bootstraping data.
What i would like to do, is resmple the data and calulate a function on 
this, so i can estimate the measure of reproducability for this data.
 
The function i wrote works fine, even while bootstraping.
The only problem is that bootstraping. 
 
The dataset existes of 10 trials, each divided in to 3 groups of high(3) 
medium(2) and low(1).
A bootstrap samlpe (trial) should always exist of 5 obs. taken from each 
group population, so to be representative.
 
example:
original data:
trial 1 : group(1) = (0,0,1,0,0);group(2) = (0,1,1,0,1);group(3) = 
(1,1,1,1,1)
...
bootstraped data:
trial 1 : group(1) = (0,0,0,0,1);group(2) = (1,1,0,0,1);group(3) = 
(1,0,1,1,1)
 
NOT
bootstraped data:
trial 1 : group(1) = (0,0,0,0,1,1,0);group(2) = (1,0,1);group(3) = 
(1,0,1,1,1,1,1,0,1,1)
 
Now I am familiar how to use function "bootstrap" (pkg "bootstrap"), but i 
read about a function called "boot" (pkg "boot"), however i can't seem to 
master this.
The explanation (help('boot') ) isn't making me any smarter.
I know I can always split the data up (wich is what i am doing) but i was 
wondering whether this would have an effect on the bootstrap, maby it is 
beter to keep all the groups together? 
 
 
here is a (this time WORKING) code example of what i did.
## proc
## generate data
datas <- 
data.frame("protection"=c(rep(c(0,1,0,1,0,0,1,0,1,1,1,0,1,1,1),2),c(0,0,0,0,1,0,1,0,1,1,1,0,1,0,1),rep(c(0,1,1,1,0,0,1,1,0,1,1,1,1,0,1),2),c(0,1,0,0,1,0,1,1,1,1,1,1,1,1,1),rep(c(0,1,0,1,0,0,1,1,1,1,1,0,1,1,1),2),c(0,1,0,0,0,0,1,1,0,1,1,1,1,1,1),c(0,0,1,1,0,0,1,1,1,0,0,1,1,0,1))
 
,"group"=rep(1:3,50),"trial"=c(rep(1,15),rep(2,15),rep(3,15),rep(4,15),rep(5,15),rep(6,15),rep(7,15),rep(8,15),rep(9,15),rep(10,15)))
## describe Function
Vacc.Vcon <-function (dataset1 , trialdata , groupdata ) {

groups <- unique (groupdata)
trials <- unique (trialdata)
Tr <- length(trials) 
G <- length(groups)
Gl <- length(dataset1)/(G*Tr) 
Tl <- length(dataset1)/(Tr) 
iterg <-data.frame(1:G,as.vector(groups))
trials <- unique (trialdata)
Tr <- length(trials) 
itert <-data.frame(1:Tr,as.vector(trials))
triallist <- c()
grouplist <- c() 
for (x in 1:G){ 
ifelse(x==1,y<-x,y<- y+Tr)
grouplist[c(y:(y+Tr-1))] <-rep(iterg[x,2],Tr)} 
iter <-data.frame(1:(Tr),rep(trials,G),grouplist)
VACC <- data.frame()
VACC.sub <- function (dataset1,trialn,groupn){
            p0 <-sum(   ifelse(dataset1==1 & trialdata==trialn & 
groupdata==groupn, 1,0)          )
            p1 <-sum(   ifelse(dataset1==0 & trialdata==trialn & 
groupdata==groupn, 1,0)          ) 
            p <- p0+p1
            VACC.group <- 
list('Trial'=trialn,'Group'=groupn,'Vacc'=sum((p0/p)^2 
,(p1/p)^2),"p0"=(p0/p)  , "p1"=(p1/p) 
,"n0"=as.numeric(p0),"n1"=as.numeric(p1),'n'=as.numeric(p))
            VACC.group                }

for (i in 1:(G*Tr) ) {
          VACC[i,1] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[1] 
          VACC[i,2] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[2] 
          VACC[i,3] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[3]
          VACC[i,4] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[4] 
          VACC[i,5] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[5]
          VACC[i,6] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[6]
          VACC[i,7] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[7]
          VACC[i,8] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[8]
          VACC} 
          rownames(VACC) <- NULL
          rownames(VACC) <- paste(iter[,2],iter[,3],sep='_')
Pcalc <- function(x) { 
out<-(1/(Tr)) * sum(x)
out} 
P0 <- tapply( VACC$p0,VACC$Group,Pcalc)
P1 <-  tapply( VACC$p1,VACC$Group,Pcalc)
Vcon <- mean(cbind(P0^2 + P1^2))
 
          Vacc.total <- mean (tapply( VACC$Vacc,VACC$Group,mean))
          out <- 
list("all"=VACC,"N"=G,"P0"=P0,"P1"=P1,"Vcon"=Vcon*100,"Vacc.total"=Vacc.total*100)
          out   }
## end describe Function 
Vacc.Vcon (datas[,1] , datas[,3], datas[,2]) # example of how fun works

## data needs to be in matrix form for bootstrap function
xdata <-matrix( 
cbind(datas$protection,datas$group,datas$trial),ncol=3,byrow=F) 
## function for bootstrap
  vacc.boot <- function(x,xdata){ 
Vacc.Vcon(xdata[x,1],xdata[x,3],xdata[x,2]) }
    bootk <- 10 
results <- bootstrap(1:150,bootk,vacc.boot,xdata) 
 
taccs <- list() ;Vaccs <- vector();Vcons <- vector()
  boot.amp.vac2<- for(i in 1:bootk) {
         m.i <- results$thetastar[[i]] 
         taccs[i] <- list(m.i )
         G.Vacc <- round( 
tapply(taccs[[i]]$all$Vacc,rownames(taccs[[i]]$all),mean)*100 ,digits=3)
         Vaccs <- round( mean(taccs[[i]]$Vacc.total),digits=3)
         Vcons<-round( mean(taccs[[i]]$Vcon ),digits=3)
         tacc <- list( 
"data"=taccs,"Booted.means"=list("Vacc.grouped"=G.Vacc 
,"Vacc.Total"=Vaccs,"Vcon.Total"=Vcons)) 
         tacc} 
 
 
Rep.table <- tacc$Booted.mean 
Rep.table 
## problem area => n should always be 5 in each group as in the original 
data
#calcues based on original data   last colon : n = 5
Vacc.Vcon (datas[,1] ,datas[,3], datas[,2])$all [1:5,]
#calcues based on Booted data    n is not 5 !
tacc$data[[2]]$all[1:5,]
tacc$data[[10]]$all[1:5,] 
## does not work at all
# i thoughed that f means the strata have to be used 
# as frequenties, for resampling from this (strata)group
boot(xdata, Vacc.Vcon, R=bootk, stype="f", strata=xdata[,3]) 
boot(xdata, vacc.boot, R=bootk, stype="f", strata=xdata[,3]) 
 
## end proc
 
Thanks in advance,
Tom.


Disclaimer: click here
        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to