Re: [R] _: how to replace values in x by means in subgroups created in ...(not loops)
Re:_: how to replace values in x by means in subgroups created in ...(not loops) Thanks, below some code and reply: #_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_# #--- # my slow function with loops: # replace_x_by_locallyMean_x_4_0s_in_y.f(x,y) #--- #--- # replace_x_by_locallyMean_x_4_0s_in_y.f # Arguments: function takes vectors x, y #these vector have the same length: length(x)==length(y) # function replaces in x # values that have in indexes in y # at least one zero in the same position in y # by locally compted mean of values in x #from all local x that also have continous 000 in their positions close to this x # in y vector #x # 0 1 2 3 4 5 6 7 8 9 #y # 0 0 0 1 0 0 0 1 1 1 # Value expected: #x_m # 1.5 1.5 1.5 3.0 5.0 5.0 5.0 7.0 8.0 9.0 #--- # author: me # replace_x_by_locallyMean_x_4_0s_in_y.f(x,y) #--- #initialisation Dff-NULL index_Dff.start_0-NULL index_Dff.stop_0-NULL x-NULL y-NULL replace_x_by_locallyMean_x_4_0s_in_y.f-NULL replace_x_by_locallyMean_x_4_0s_in_y.f-function(x=Df$x, y=Df$y){ Dff-as.data.frame(cbind(as.vector(x),as.vector(y))) colnames(Dff)-c('x','y') index_Dff.start_0-which( ( (Dff$y==0) rev(rev(c(0,Dff$y))[-1]) !=0) ) if (Dff$y[1]==0) { index_Dff.start_0=c(1,index_Dff.start_0) } index_Dff.stop_0-which( ( (Dff$y==0) c(Dff$y[-1],rev(Dff$y)[1]) !=0) ) if (rev(Dff$y)[1]==0) { index_Dff.stop_0=c(index_Dff.stop_0,length(x)) } Dff$x_m-Dff$x for (i in ( 1:length(index_Dff.start_0) ) ) { Dff$x_m[index_Dff.start_0[i]:index_Dff.stop_0[i]]-mean( Dff$x[index_Dff.start_0[i]:index_Dff.stop_0[i]],na.rm=T) } for (i in ( 1:length(index_Dff.start_0) ) ) { Dff$x_m[index_Dff.start_0[i]:index_Dff.stop_0[i]]-mean( Dff$x[index_Dff.start_0[i]:index_Dff.stop_0[i]],na.rm=T) } Dff$x_m } #replace_x_by_locallyMean_x_4_0s_in_y.f() #--- # replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry # Arguments: function takes vectors x, y #these vector have the same length: length(x)==length(y) # function replaces in x # values that have in indexes in y # at least one zero in the same position in y # by locally compted mean of values in x #from all local x that also have continous 000 in their positions close to this x # in y vector #x # 0 1 2 3 4 5 6 7 8 9 #y # 0 0 0 1 0 0 0 1 1 1 # Value expected: #x_m # 1.5 1.5 1.5 3.0 5.0 5.0 5.0 7.0 8.0 9.0 #--- # author: Thierry # replace_x_by_Avgx_for000iny_Thierry.f(x,y) #--- #initialisation dataset - NULL tmp - NULL result - NULL replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry-NULL library(plyr) replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry-function(x=x, y=y){ dataset - data.frame(x = x, y = y) dataset$Group - cumsum(c(0, diff(!is.na(dataset$y) dataset$y == 0)) == 1) #library(plyr) tmp - ddply(subset(dataset, y == 0), .(Group), function(z){c(Mean = mean(z$x, na.rm = TRUE))}) result - merge(dataset, tmp) result$Mean[is.na(result$y) | result$y != 0] - result$x[is.na(result$y) | result$y != 0] #result result$Mean } #replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry(x,y) # Let's test it for longer vector x-1:(50*1000) y-sample(c(0,1), length(x), replace = TRUE) # for Thierry we must start by 1 if it start by 0, y-c(0,1,sample(c(0,1), length(x)-2, replace = TRUE) ) paste(length(x),length(y)) # let's comapre times: # slow solution with loops, my function x_by_locallyMean_x_4_0s_in_y__loop() # system.time( # x_by_locallyMean_x_4_0s_in_y__loop- #replace_x_by_locallyMean_x_4_0s_in_y.f(x,y) # ) # _for_ x-1:(50*1000) # # user system elapsed # 760.340.86 769.16 #--- # time of Thierry's function : system.time( x_by_locallyMean_x_4_0s_in_y__Thierry- replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry(x,y) ) #Df.x_m_Thierry (10x faster) then my loop # _for_ x-1:50 # user system elapsed # 11.610.00 11.68 # good news: # 769.16 sec / 11.68 sec = Thierry's solution is 65 (x) times faster than loops, # great! length(x_by_locallyMean_x_4_0s_in_y__Thierry) cbind(x,y,x_by_locallyMean_x_4_0s_in_y__Thierry) [1:15,] paste(' good, faster than my loops ') #1. Thierry, your function is fast, ()
[R] how to replace values in x by means in subgroups created in ... (not loops)
# Dear all, # how to replace values in x by means in subgroups created in ... # replace only these values where y=0 in continous sequence # replace by mean calculated locally for each subgroup created by # continous sequence of 0,0,0 in parallel y vector, i.e. # where there is continous sequence of 0 in data frame vector y #but we do not replace values in x[i], if y[i]!=0 # we do not want use loops we do not use apply (not very fast) x -c(0 ,1,2,3,4,5,6,7,8,NA,NA,1 ,1 ,NA,2 ,2) y -c(0 ,0,0,1,0,0,0,1,1,1 ,NA,0 ,0 ,0 ,0 ,1) Must_be-c(1 ,1,1,3,5,5,5,7,8,NA,NA,1.5,1 ,NA,1.5,2) (df-as.data.frame(cbind(x,y)) ) # I have traied many bad colusions based on cumsum, pmin, pmax, ... (mean_dfx_if_yIs0-y*cumsum(x*y)/(cumsum(y)*y) ) # how to do this? # thans for any advice # E [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] how to replace values in x by means in subgroups created in ... (not loops)
Something like this? dataset - data.frame(x = x, y = y) dataset$Group - cumsum(c(0, diff(!is.na(dataset$y) dataset$y == 0)) == 1) library(plyr) tmp - ddply(subset(dataset, y == 0), .(Group), function(z){c(Mean = mean(z$x, na.rm = TRUE))}) result - merge(dataset, tmp) result$Mean[is.na(result$y) | result$y != 0] - result$x[is.na(result$y) | result$y != 0] Best regards, Thierry -Oorspronkelijk bericht- Van: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] Namens Eugeniusz Kaluza Verzonden: woensdag 27 juli 2011 11:21 Aan: r-help@r-project.org Onderwerp: [R] how to replace values in x by means in subgroups created in ... (not loops) # Dear all, # how to replace values in x by means in subgroups created in ... # replace only these values where y=0 in continous sequence # replace by mean calculated locally for each subgroup created by # continous sequence of 0,0,0 in parallel y vector, i.e. # where there is continous sequence of 0 in data frame vector y #but we do not replace values in x[i], if y[i]!=0 # we do not want use loops we do not use apply (not very fast) x -c(0 ,1,2,3,4,5,6,7,8,NA,NA,1 ,1 ,NA,2 ,2) y -c(0 ,0,0,1,0,0,0,1,1,1 ,NA,0 ,0 ,0 ,0 ,1) Must_be-c(1 ,1,1,3,5,5,5,7,8,NA,NA,1.5,1 ,NA,1.5,2) (df-as.data.frame(cbind(x,y)) ) # I have traied many bad colusions based on cumsum, pmin, pmax, ... (mean_dfx_if_yIs0-y*cumsum(x*y)/(cumsum(y)*y) ) # how to do this? # thans for any advice # E [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.