Re: [R] _: how to replace values in x by means in subgroups created in ...(not loops)

2011-07-28 Thread Eugeniusz Kałuża

Re:_: how to replace values in x by means in subgroups created in ...(not loops)


Thanks, below some code and reply:
#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_#
#---
# my slow function with loops:
# replace_x_by_locallyMean_x_4_0s_in_y.f(x,y)
#---
#---
# replace_x_by_locallyMean_x_4_0s_in_y.f
  # Arguments: function takes vectors x, y
  #these vector have the same length: length(x)==length(y)
  # function replaces in x
  # values that have in indexes in y
  # at least one zero in the same position in y
  # by locally compted mean of values in x
  #from all local x that also have continous 000 in their positions 
close to this x
  # in y vector
  #x   # 0   1   2   3   4   5   6   7   8   9
  #y   # 0   0   0   1   0   0   0   1   1   1
  # Value expected:
  #x_m # 1.5 1.5 1.5 3.0 5.0 5.0 5.0 7.0 8.0 9.0
  #---
  # author: me
  # replace_x_by_locallyMean_x_4_0s_in_y.f(x,y)
  #---
 #initialisation

Dff-NULL
index_Dff.start_0-NULL
index_Dff.stop_0-NULL
x-NULL
y-NULL
replace_x_by_locallyMean_x_4_0s_in_y.f-NULL
replace_x_by_locallyMean_x_4_0s_in_y.f-function(x=Df$x,
y=Df$y){
  Dff-as.data.frame(cbind(as.vector(x),as.vector(y)))
  colnames(Dff)-c('x','y')
  index_Dff.start_0-which( ( (Dff$y==0) 
 rev(rev(c(0,Dff$y))[-1]) !=0) )
  if (Dff$y[1]==0) {
index_Dff.start_0=c(1,index_Dff.start_0)
  }
  index_Dff.stop_0-which( ( (Dff$y==0) 
 c(Dff$y[-1],rev(Dff$y)[1]) !=0) )
  if (rev(Dff$y)[1]==0) {
 index_Dff.stop_0=c(index_Dff.stop_0,length(x))
  }
  Dff$x_m-Dff$x
  for (i in ( 1:length(index_Dff.start_0) ) ) {
 Dff$x_m[index_Dff.start_0[i]:index_Dff.stop_0[i]]-mean(
  Dff$x[index_Dff.start_0[i]:index_Dff.stop_0[i]],na.rm=T)
 }
  for (i in ( 1:length(index_Dff.start_0) ) ) {
 Dff$x_m[index_Dff.start_0[i]:index_Dff.stop_0[i]]-mean(
  Dff$x[index_Dff.start_0[i]:index_Dff.stop_0[i]],na.rm=T)
 }
  Dff$x_m
  }
#replace_x_by_locallyMean_x_4_0s_in_y.f()


#---
# replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry
  # Arguments: function takes vectors x, y
  #these vector have the same length: length(x)==length(y)
  # function replaces in x
  # values that have in indexes in y
  # at least one zero in the same position in y
  # by locally compted mean of values in x
  #from all local x that also have continous 000 in their positions 
close to this x
  # in y vector
  #x   # 0   1   2   3   4   5   6   7   8   9
  #y   # 0   0   0   1   0   0   0   1   1   1
  # Value expected:
  #x_m # 1.5 1.5 1.5 3.0 5.0 5.0 5.0 7.0 8.0 9.0
  #---
  # author: Thierry
  # replace_x_by_Avgx_for000iny_Thierry.f(x,y)
  #---
 #initialisation
  dataset - NULL
  tmp - NULL
  result - NULL
  replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry-NULL
library(plyr)

replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry-function(x=x,
y=y){
  dataset - data.frame(x = x, y = y)
  dataset$Group - cumsum(c(0, diff(!is.na(dataset$y)  dataset$y == 0)) == 1)
  #library(plyr)
  tmp - ddply(subset(dataset, y == 0), .(Group), function(z){c(Mean = 
mean(z$x, na.rm = TRUE))})
  result - merge(dataset, tmp)
  result$Mean[is.na(result$y) | result$y != 0] - result$x[is.na(result$y) | 
result$y != 0]
  #result
  result$Mean
} #replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry(x,y)


# Let's test it for longer vector
x-1:(50*1000)
 y-sample(c(0,1), length(x), replace = TRUE)
 # for Thierry we must start by 1 if it start by 0,
y-c(0,1,sample(c(0,1), length(x)-2, replace = TRUE)  )
paste(length(x),length(y))
# let's comapre times:
# slow solution with loops, my function x_by_locallyMean_x_4_0s_in_y__loop()
#  system.time(
# x_by_locallyMean_x_4_0s_in_y__loop-
#replace_x_by_locallyMean_x_4_0s_in_y.f(x,y)
# )
# _for_ x-1:(50*1000) #
#  user  system elapsed
# 760.340.86  769.16
#---
# time of Thierry's function :
system.time(
x_by_locallyMean_x_4_0s_in_y__Thierry-
   replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry(x,y)
   )
#Df.x_m_Thierry (10x faster) then my loop
# _for_ x-1:50
# user  system elapsed
#  11.610.00   11.68

# good news:
# 769.16 sec / 11.68 sec = Thierry's solution is 65 (x) times faster than 
loops,
# great!

length(x_by_locallyMean_x_4_0s_in_y__Thierry)
cbind(x,y,x_by_locallyMean_x_4_0s_in_y__Thierry) [1:15,]
paste(' good, faster than my loops ')

#1. Thierry, your function is fast, ()

[R] how to replace values in x by means in subgroups created in ... (not loops)

2011-07-27 Thread Eugeniusz Kałuża

# Dear all,
# how to replace values in x by means in subgroups created in ...
# replace only these values where y=0 in continous sequence
# replace by mean calculated locally for each subgroup created by 
# continous sequence of 0,0,0 in parallel y vector, i.e. 
# where there is continous sequence of 0 in data frame vector y 
#but we do not replace values in x[i], if y[i]!=0 
# we do not want use loops we do not use apply (not very fast) 


x  -c(0 ,1,2,3,4,5,6,7,8,NA,NA,1  ,1 ,NA,2  ,2)
y  -c(0 ,0,0,1,0,0,0,1,1,1 ,NA,0  ,0 ,0 ,0  ,1)
Must_be-c(1 ,1,1,3,5,5,5,7,8,NA,NA,1.5,1 ,NA,1.5,2)

(df-as.data.frame(cbind(x,y))  )

# I have traied many bad colusions based on cumsum, pmin, pmax, ... 
(mean_dfx_if_yIs0-y*cumsum(x*y)/(cumsum(y)*y) )

# how to do this?
# thans for any advice 
# E

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] how to replace values in x by means in subgroups created in ... (not loops)

2011-07-27 Thread ONKELINX, Thierry
Something like this?

dataset - data.frame(x = x, y = y)
dataset$Group - cumsum(c(0, diff(!is.na(dataset$y)  dataset$y == 0)) == 1)
library(plyr)
tmp - ddply(subset(dataset, y == 0), .(Group), function(z){c(Mean = mean(z$x, 
na.rm = TRUE))})
result - merge(dataset, tmp)
result$Mean[is.na(result$y) | result$y != 0] - result$x[is.na(result$y) | 
result$y != 0]

Best regards,

Thierry

 -Oorspronkelijk bericht-
 Van: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org]
 Namens Eugeniusz Kaluza
 Verzonden: woensdag 27 juli 2011 11:21
 Aan: r-help@r-project.org
 Onderwerp: [R] how to replace values in x by means in subgroups created in ...
 (not loops)
 
 
 # Dear all,
 # how to replace values in x by means in subgroups created in ...
 # replace only these values where y=0 in continous sequence # replace by mean
 calculated locally for each subgroup created by # continous sequence of 0,0,0 
 in
 parallel y vector, i.e.
 # where there is continous sequence of 0 in data frame vector y
 #but we do not replace values in x[i], if y[i]!=0
 # we do not want use loops we do not use apply (not very fast)
 
 
 x  -c(0 ,1,2,3,4,5,6,7,8,NA,NA,1  ,1 ,NA,2  ,2)
 y  -c(0 ,0,0,1,0,0,0,1,1,1 ,NA,0  ,0 ,0 ,0  ,1)
 Must_be-c(1 ,1,1,3,5,5,5,7,8,NA,NA,1.5,1 ,NA,1.5,2)
 
 (df-as.data.frame(cbind(x,y))  )
 
 # I have traied many bad colusions based on cumsum, pmin, pmax, ...
 (mean_dfx_if_yIs0-y*cumsum(x*y)/(cumsum(y)*y) )
 
 # how to do this?
 # thans for any advice
 # E
 
   [[alternative HTML version deleted]]
 
 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.