[R] filter and merge 2 big Df's by=t_chr in sec.

2012-03-26 Thread Eugeniusz Kałuża
 
Please help to solve that problem:
 
filter and merge 2 big Df's  by=t_chr in sec.
 
 
 require(zoo)
  t1=2012-03-25 17:00:00.0
  t2=2012-03-25 17:00:05.0  
  t1_POSIXlt-strptime(t1, %Y-%m-%d %H:%M:%OS)#
  t2_POSIXlt-strptime(t2, %Y-%m-%d %H:%M:%OS)#
#  ts_t1t2_POSIXlt-seq(t1_POSIXlt,t2_POSIXlt,length.out 
=-1+2*length(seq(t1_POSIXlt,t2_POSIXlt, sec) ) )
  ts_t1t2_POSIXlt-seq(t1_POSIXlt,t2_POSIXlt,length.out 
=length(seq(t1_POSIXlt,t2_POSIXlt, sec) ) )
  t_ts_t1t2_chr-as.character(format(ts_t1t2_POSIXlt,%Y-%m-%d %H:%M:%OS))
  tail(t_ts_t1t2_chr)
  t_chr-rep(t_ts_t1t2_chr,c((1:(-1+length(t_ts_t1t2_chr))),1))
  str(t_chr)
# system.time()
Df.a-as.data.frame(cbind(t_chr,a=c(1:length(t_chr)),b=2*c(1:length(t_chr)),c=0.1*c(length(t_chr):1)));
 Df.a
 
 
#
#We wants to obtain for each single second (time as a key) a) mean filtered
 t_chr  a   b c
  2012-03-25 17:00:00  1   2 1.6
  2012-03-25 17:00:01  2.5 5 1.45
  2012-03-25 17:00:02  5   101.2
  2012-03-25 17:00:03  8.5 170.85
  2012-03-25 17:00:04 13   26 0  0.4
  2012-03-25 17:00:05 16   320.1

Df.a.filtered.mean=??? 
#--
#We wants to obtain for each single second (time as a key) 
b) max filtered
#key = time, each second: 1 max from these same seconds
 t_chr  a  b   c
1  2012-03-25 17:00:00  1  2 1.6
3  2012-03-25 17:00:01  3  6 1.5
6  2012-03-25 17:00:02  6 12 1.3
10 2012-03-25 17:00:03 10 20 1.0
10 2012-03-25 17:00:04 15 30 1.0
16 2012-03-25 17:00:05 16 32 0.1

Df.a.filtered.max =??? 
 
 
#--
 
the problem is, taht we have huge matrix. The solution should be fast.
 
next we want to merge it with another Data frame
#--
  t1=2012-03-25 17:00:00.0
  t2=2012-03-25 17:00:05.0  
  t1_POSIXlt-strptime(t1, %Y-%m-%d %H:%M:%OS)#
  t2_POSIXlt-strptime(t2, %Y-%m-%d %H:%M:%OS)#
  ts_t1t2_POSIXlt-seq(t1_POSIXlt,t2_POSIXlt,length.out 
=-1+2*length(seq(t1_POSIXlt,t2_POSIXlt, sec) ) )
#  ts_t1t2_POSIXlt-seq(t1_POSIXlt,t2_POSIXlt,length.out 
=length(seq(t1_POSIXlt,t2_POSIXlt, sec) ) )
  t_chr2-t_ts_t1t2_chr-as.character(format(ts_t1t2_POSIXlt,%Y-%m-%d 
%H:%M:%OS))
  tail(t_ts_t1t2_chr)
Df.b-as.data.frame(cbind(t_chr2,dfb.b1=c(1:length(t_chr2)),dfb.b=2*c(1:length(t_chr2)),dfb.c=0.1*c(length(t_chr2):1)));
 Df.b
#--
 
merge(Df.a,Df.b, by.x=t_chr,by.y=t_chr2,all.x=TRUE,all.y=TRUE)
 
And if in Df.a there is no all seconds measurements?
And if in Df.a. there are many NA?   
How we can handle it in R?
 
Thanks
 
KaluzaEA.
 
 

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] _: how to replace values in x by means in subgroups created in ...(not loops)

2011-07-28 Thread Eugeniusz Kałuża

Re:_: how to replace values in x by means in subgroups created in ...(not loops)


Thanks, below some code and reply:
#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_#
#---
# my slow function with loops:
# replace_x_by_locallyMean_x_4_0s_in_y.f(x,y)
#---
#---
# replace_x_by_locallyMean_x_4_0s_in_y.f
  # Arguments: function takes vectors x, y
  #these vector have the same length: length(x)==length(y)
  # function replaces in x
  # values that have in indexes in y
  # at least one zero in the same position in y
  # by locally compted mean of values in x
  #from all local x that also have continous 000 in their positions 
close to this x
  # in y vector
  #x   # 0   1   2   3   4   5   6   7   8   9
  #y   # 0   0   0   1   0   0   0   1   1   1
  # Value expected:
  #x_m # 1.5 1.5 1.5 3.0 5.0 5.0 5.0 7.0 8.0 9.0
  #---
  # author: me
  # replace_x_by_locallyMean_x_4_0s_in_y.f(x,y)
  #---
 #initialisation

Dff-NULL
index_Dff.start_0-NULL
index_Dff.stop_0-NULL
x-NULL
y-NULL
replace_x_by_locallyMean_x_4_0s_in_y.f-NULL
replace_x_by_locallyMean_x_4_0s_in_y.f-function(x=Df$x,
y=Df$y){
  Dff-as.data.frame(cbind(as.vector(x),as.vector(y)))
  colnames(Dff)-c('x','y')
  index_Dff.start_0-which( ( (Dff$y==0) 
 rev(rev(c(0,Dff$y))[-1]) !=0) )
  if (Dff$y[1]==0) {
index_Dff.start_0=c(1,index_Dff.start_0)
  }
  index_Dff.stop_0-which( ( (Dff$y==0) 
 c(Dff$y[-1],rev(Dff$y)[1]) !=0) )
  if (rev(Dff$y)[1]==0) {
 index_Dff.stop_0=c(index_Dff.stop_0,length(x))
  }
  Dff$x_m-Dff$x
  for (i in ( 1:length(index_Dff.start_0) ) ) {
 Dff$x_m[index_Dff.start_0[i]:index_Dff.stop_0[i]]-mean(
  Dff$x[index_Dff.start_0[i]:index_Dff.stop_0[i]],na.rm=T)
 }
  for (i in ( 1:length(index_Dff.start_0) ) ) {
 Dff$x_m[index_Dff.start_0[i]:index_Dff.stop_0[i]]-mean(
  Dff$x[index_Dff.start_0[i]:index_Dff.stop_0[i]],na.rm=T)
 }
  Dff$x_m
  }
#replace_x_by_locallyMean_x_4_0s_in_y.f()


#---
# replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry
  # Arguments: function takes vectors x, y
  #these vector have the same length: length(x)==length(y)
  # function replaces in x
  # values that have in indexes in y
  # at least one zero in the same position in y
  # by locally compted mean of values in x
  #from all local x that also have continous 000 in their positions 
close to this x
  # in y vector
  #x   # 0   1   2   3   4   5   6   7   8   9
  #y   # 0   0   0   1   0   0   0   1   1   1
  # Value expected:
  #x_m # 1.5 1.5 1.5 3.0 5.0 5.0 5.0 7.0 8.0 9.0
  #---
  # author: Thierry
  # replace_x_by_Avgx_for000iny_Thierry.f(x,y)
  #---
 #initialisation
  dataset - NULL
  tmp - NULL
  result - NULL
  replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry-NULL
library(plyr)

replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry-function(x=x,
y=y){
  dataset - data.frame(x = x, y = y)
  dataset$Group - cumsum(c(0, diff(!is.na(dataset$y)  dataset$y == 0)) == 1)
  #library(plyr)
  tmp - ddply(subset(dataset, y == 0), .(Group), function(z){c(Mean = 
mean(z$x, na.rm = TRUE))})
  result - merge(dataset, tmp)
  result$Mean[is.na(result$y) | result$y != 0] - result$x[is.na(result$y) | 
result$y != 0]
  #result
  result$Mean
} #replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry(x,y)


# Let's test it for longer vector
x-1:(50*1000)
 y-sample(c(0,1), length(x), replace = TRUE)
 # for Thierry we must start by 1 if it start by 0,
y-c(0,1,sample(c(0,1), length(x)-2, replace = TRUE)  )
paste(length(x),length(y))
# let's comapre times:
# slow solution with loops, my function x_by_locallyMean_x_4_0s_in_y__loop()
#  system.time(
# x_by_locallyMean_x_4_0s_in_y__loop-
#replace_x_by_locallyMean_x_4_0s_in_y.f(x,y)
# )
# _for_ x-1:(50*1000) #
#  user  system elapsed
# 760.340.86  769.16
#---
# time of Thierry's function :
system.time(
x_by_locallyMean_x_4_0s_in_y__Thierry-
   replace_x_by_locallyMean_x_4_0s_in_y.f__Thierry(x,y)
   )
#Df.x_m_Thierry (10x faster) then my loop
# _for_ x-1:50
# user  system elapsed
#  11.610.00   11.68

# good news:
# 769.16 sec / 11.68 sec = Thierry's solution is 65 (x) times faster than 
loops,
# great!

length(x_by_locallyMean_x_4_0s_in_y__Thierry)
cbind(x,y,x_by_locallyMean_x_4_0s_in_y__Thierry) [1:15,]
paste(' good, faster than my loops ')

#1. Thierry, your function is fast, ()

[R] how to replace values in x by means in subgroups created in ... (not loops)

2011-07-27 Thread Eugeniusz Kałuża

# Dear all,
# how to replace values in x by means in subgroups created in ...
# replace only these values where y=0 in continous sequence
# replace by mean calculated locally for each subgroup created by 
# continous sequence of 0,0,0 in parallel y vector, i.e. 
# where there is continous sequence of 0 in data frame vector y 
#but we do not replace values in x[i], if y[i]!=0 
# we do not want use loops we do not use apply (not very fast) 


x  -c(0 ,1,2,3,4,5,6,7,8,NA,NA,1  ,1 ,NA,2  ,2)
y  -c(0 ,0,0,1,0,0,0,1,1,1 ,NA,0  ,0 ,0 ,0  ,1)
Must_be-c(1 ,1,1,3,5,5,5,7,8,NA,NA,1.5,1 ,NA,1.5,2)

(df-as.data.frame(cbind(x,y))  )

# I have traied many bad colusions based on cumsum, pmin, pmax, ... 
(mean_dfx_if_yIs0-y*cumsum(x*y)/(cumsum(y)*y) )

# how to do this?
# thans for any advice 
# E

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] How to filter XY pairs of inacurate gps position along track, taking into account the time index to not mix track from different days in one average track

2011-07-01 Thread Eugeniusz Kałuża
Dear R users,


subject:  How to filter XY pairs of inacurate gps position along track, taking 
into account the time index to not mix track from different days in one average 
track


or subject as: How to filter XY pairs of inacurate gps position: (latitude, 
longtitude)
to get estimated correct position before calculating track

i.e.

How to estimate (time series? )?  
How to filter XY pairs of inacurate gps position along track: 
Lon-c(18.13015167,18.13022667,18.13028500,18.13035500,18.13041333,18.13047000,18.13054667,18.13060333,18.13067500,18.13073167,18.13079000,18.13085167,18.13091000,18.13096667,18.13103000,18.13109500,18.13114500,18.13120167,18.13124667,18.13130500,18.13136167,18.13140667,18.13146333,18.13150833,18.13156500,18.13161000,18.13167167,18.13171667,18.13177000,18.13181333,18.13187167,18.13191667,18.13197333,18.13203167,18.13208833,18.13214667,18.13220333,18.13225333,18.13231000,18.13236833,18.13243000,18.13249167,18.13254167,18.13259833,18.13265667,18.13270500,18.13276333,18.13282000,18.13287000,18.13292667,18.13298500,18.13304667,18.13310500,18.13315333,18.13321500,18.13327833,18.13332667)
Lat-c(48.95961500,48.95961667,48.95961500,48.95961000,48.95960667,48.95959833,48.95960167,48.95959833,48.95959333,48.95959167,48.95959000,48.95959333,48.95959167,48.95958833,48.95959333,48.95958167,48.95958833,48.95958500,48.95958500,48.95958333,48.95958000,48.95957500,48.95957167,48.95957167,48.95956500,48.95955833,48.95956833,48.95956833,48.95955833,48.95955333,48.95955167,48.95955000,48.95954333,48.95954000,48.95953833,48.95953500,48.9595,48.95954000,48.95953667,48.95954000,48.95954500,48.95954833,48.95955500,48.95955833,48.95955667,48.95956167,48.95956000,48.95955833,48.95957000,48.95956667,48.95956000,48.95956333,48.95956167,48.95956833,48.95957167,48.95958167,48.95958833)
t_index-1:(length(Lat))

Raw-as.data.frame(cbind(t_index,Lon,Lat))
colnames(Raw)-c('t_index','Lon','Lat')


plot(Raw$Lon,Raw$Lat,type=l,main = lowess(cars))
# data should be or filtared or ...?
#expected answer:
# ver 1 
--
#Raw$Lon_v2- f_x (x=Raw$Lon, y=Raw$Lat)
#Raw$Lat_v2 - f_y (x=Raw$Lon, y=Raw$Lat)
#lines(Raw$Lon_v2,Raw$Lat_v2,col=2)


# ver 2 
--
op - par(mfrow = c(2,1), mgp = c(2,.8,0), mar = .1+c(3,3,3,1))
plot(Raw$Lon,Raw$Lat,type=l,main = lowess())
f_k2=0.3
Raw$Lon_v2-(lowess(x=Raw$Lon, y=Raw$Lat,f=f_k2))$x
Raw$Lat_v2-(lowess(x=Raw$Lon, y=Raw$Lat,f=f_k2))$y
points(Raw$Lon_v2,Raw$Lat_v2,col=2,pch='+')
lines(Raw$Lon_v2,Raw$Lat_v2,col=2)
 f_k3=0.6
 Raw$Lon_v2-(lowess(x=Raw$Lon, y=Raw$Lat,f=f_k3))$x
 Raw$Lat_v2-(lowess(x=Raw$Lon, y=Raw$Lat,f=f_k3))$y
 points(Raw$Lon_v2,Raw$Lat_v2,col=3,pch='x')
 lines(Raw$Lon_v2,Raw$Lat_v2,col=3,lwd=2)
 legend(Raw$Lon_v2[1],Raw$Lat_v2[1], c(paste(f = , 
c(round(f_k2,2),round(f_k3,2, lty = 1, col = 2:3)

par(op)

How to approximate these positions along some possible track (line, curve)
for all localizations along the track, that can change directions.

The above methods are not good, because, if there are many track in vincinity 
of the first track, the above method 

will average the points between track, that are close each another.

And we need to aproximate only along one time track,

i.e. the car (Lat Lon position) in one moment must be laid along one route 
(lane) (today's track) 

and not along the route that it will cover 2 days ago, only because that is 
another day, the another index of point.   

Yours sincerely,
Eugen


 

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] create group markers in original data frame ie.countinued... ? to calculate sth for groups defined betweenpoints in one variable (string), /separating/ spliting variable into groups by i.e. be

2010-06-26 Thread Eugeniusz Kałuża

Dear useRs, and expeRts,
tahanks 
I have found idea how to add to oryginal data a column with markers to know 
with all data in wchich period in c2 they are, suimply in the code I could add: 

  stacked_idx-stack(idx)
  merge(stacked_idx,C.df,by.x=c('values'),by.y=c('c0'), all=T) 
 
thanks for suggestions,
Kaluza 

-Wiadomość oryginalna-
Od: r-help-boun...@r-project.org w imieniu Eugeniusz Kaluza
Wysłano: Pt 2010-06-25 14:48
Do: c
Temat: Re: [R] create group markers in original data frame ie.countinued... ? 
to calculate sth for groups defined betweenpoints in one variable 
(string),/separating/ spliting variable into groups by i.e. between A-B,B-C, 
C-D, from: A, NA, NA, B, NA, NA, C, NA, NA, NA, D
 

Dear useRs,

at the beginning, 
Joris Meys, thank you for explaining how to obtain calculation result possible 
for groups between string marks in one variable in data frame, like in this 
example below (between START and STOP), wchich I would like to complete at the 
end by asking about... how is possible to mark each observations presented in 
oryginal data set
 

# so firstly, below 
# START...working example of solution proposed by: Joris Meys 
[jorism...@gmail.com] 
# Same trick :
  c0-rbind( 1,  2 , 3, 4,  5, 6, 7, 8, 9,10,11,
  12,13,14,15,16,17 )
  c0 
  c1-rbind(10, 20 ,30,40, 50,10,60,20,30,40,50,  30,10,
  0,NA,20,10.3444)
  c1
  c2-rbind(NA,A,NA,NA,B,NA,NA,NA,NA,NA,NA,C,NA,NA,NA,NA,D)
  c2

  pos - which(!is.na(C.df$c2))
  idx - sapply(2:length(pos),function(i) pos[i-1]:(pos[i]-1))
  names(idx) - sapply(2:length(pos),
  function(i) paste(C.df$c2[pos[i-1]],-,C.df$c2[pos[i]]))
  out - lapply(idx,function(i) summary(C.df[i,1:2]))
  out
#STOP ... below from: Sent: Thu 2010-06-24 18:02:  Joris Meys 
[jorism...@gmail.com]


#Thank you, it is done and works very well

# - - - - - - - -- - - - - - -- - -
# Now, I try to finish my question to add gruping sybol to the whole set, 
making 
# each observation marked by the name of the interval in which that observation 
is placed.
# to tell the observator, that this observation is between ...A and B, to 
enable sorting, to eneable simple acess using match
in_sub_starting_from-rbind(NA,A,A,A,B,B,B,B,B,B,B,C,C,C,C,C,C)
in_sub_finished_by 
-rbind(NA,B,B,B,C,C,C,C,C,C,C,D,D,D,D,D,D)
in_sub_limited_by-rbind(NA,A-B,A-B,A-B,B-C,B-C,B-C,B-C,B-C,B-C,B-C,C-D,C-D,C-D,C-D,C-D,C-D)
C.df-data.frame(c0,c1,c2,in_sub_starting_from,in_sub_finished_by,in_sub_limited_by)
C.df
#

# Therefore my one more question: 
How is possible to create these vectors automaticly, having  C.df$c2 (and of 
course having also: C.df$c0,C.df$c1), :
C.df$in_sub_starting_from
C.df$in_sub_finished_by
C.df$in_sub_limited_by
#to tell the observator, that this observation is between ...A and B, to enable 
sorting, to eneable simple acess using match


#for example, to make possible this access to data:
#to to take the 7'th observation from any row of data frame,
C.df$c0[7]
C.df$c1[c0==7]
#and could
#find in this same row in_sub_starting_from  that observation is preceded by 
... 
C.df$in_sub_starting_from[c0==7]
#find in this same row in_sub_finished_by  that observation is before ...   
  
C.df$in_sub_finished_by[c0==7]
#find in this same row in_sub_finished_by  that this observation is between ... 

C.df$in_sub_limited_by[c0==7]
#

?





#Thanks for advices, and maybe and this answer, 
#looking impatiently for time with possible access to internet... 

#

Sincerely,
Kaluza


and the beginnig of this story;







-Original Message-
From: Eugeniusz Kaluza
Sent: Thu 2010-06-24 17:12
To: r-help@r-project.org
Subject: PD: [R] ?to calculate sth for groups defined between points in one 
variable (string), / value separating/ spliting variable into groups by i.e. 
between start, NA, NA, stop1, start2, NA, stop2

Dear useRs,

Thanks for advice from Joris Meys, 
Now will try to think how to make it working for less specyfic case, 
to make the problem more general.
Then the result should be displayed for every group between non empty string in 
c2 
i.e. not only result for:
 #mean:
  c1 c3c4   c5
  20  Start1 Stop1 Start1-Stop1
25.48585  Start2 Stop2 Start2-Stop2 

but also for every one group created by space between two closest strings in 
c2, that contains only seriess of Na, NA, NA, separated from time to time by 
one string i.e.:
 #mean:
  c1 c3c4   c5
  20 Start1 Stop1 Start1-Stop1
  .. Stop1 Start2 Stop1-Start2
25.48585  Start2 Stop2 Start2-Stop2 

i.e.
to rewrite this maybe for another simpler version of command

but also for every one group created by space between two closest strings in 
c2, that contains only seriess of Na, NA, NA, separated from time to time by 
one 

[R] PD: ?to calculate sth for groups defined between points in one variable (string), / value separating/ spliting variable into groups by i.e. between start, NA, NA, stop1, start2, NA, stop2

2010-06-24 Thread Eugeniusz Kałuża
Dear useRs,

Thanks for advice from Joris Meys, 
Now will try to think how to make it working for less specyfic case, 
to make the problem more general.
Then the result should be displayed for every group between non empty string in 
c2 
i.e. not only result for:
 #mean:
  c1 c3c4   c5
  20  Start1 Stop1 Start1-Stop1
25.48585  Start2 Stop2 Start2-Stop2 

but also for every one group created by space between two closest strings in 
c2, that contains only seriess of Na, NA, NA, separated from time to time by 
one string i.e.:
 #mean:
  c1 c3c4   c5
  20 Start1 Stop1 Start1-Stop1
  .. Stop1 Start2 Stop1-Start2
25.48585  Start2 Stop2 Start2-Stop2 

i.e.
to rewrite this maybe for another simpler version of command

but also for every one group created by space between two closest strings in 
c2, that contains only seriess of Na, NA, NA, separated from time to time by 
one string A, NA, NA, NA, NA, B, NA, NA, NA, C, NA,NA,NA,NA,D, NA,NA
i.e.:
 #mean:
  c1 c3c4   c5
  20  A B  A-B
  ..  B C  B-C
25.48585  C D  C-D 
...


Looking for more general method (function), grouping between these letters in 
c2,
I will now try to study solution proposed by Joris Meys
Thanks for immediate aswer  
Kaluza




-Wiadomo¶æ oryginalna-
Od: Joris Meys [mailto:jorism...@gmail.com]
Wys³ano: Cz 2010-06-24 15:14
Do: Eugeniusz Ka³u¿a
DW: r-help@r-project.org
Temat: Re: [R] ?to calculate sth for groups defined between points in one 
variable (string), / value separating/ spliting variable into groups by i.e. 
between start, NA, NA, stop1, start2, NA, stop2
 
On Thu, Jun 24, 2010 at 1:18 PM, Eugeniusz Kaluza
eugeniusz.kal...@polsl.pl wrote:

 Dear useRs,

 Thanks for any advices

 # I do not know where are the examples how to mark groups
 #  based on signal occurence in the additional variable: cf. variable c2,
 # How to calculate different calculations for groups defined by (split by 
 occurence of c2 characteristic data)


 #First example of simple data
 #mexample   1      2    3  4     5  6  7  8  9  10 11       12 13 14 15 16 17
 c0-rbind( 1,      2 , 3, 4,      5, 6, 7, 8, 9,10,11,      12,13,14,15,16,17 
     )
 c0
 c1-rbind(10,     20 ,30,40,     50,10,60,20,30,40,50,      30,10, 
 0,NA,20,10.3444)
 c1
 c2-rbind(NA,Start1,NA,NA,Stop1,NA,NA,NA,NA,NA,NA,Start2,NA,NA,NA,NA,Stop2)
 c2
 C.df-data.frame(cbind(c0,c1,c2))
 colnames(C.df)-c(c0,c1,c2)
 C.df

 # preparation of form for explaining further needed result (next 3 lines are 
 not needed indeed, they are only  to explain how to obtain final result
  c3-rbind(NA,Start1,Start1,Start1,Start1,Start2,Start2,Start2,Start2,Start2,Start2,Start2,Start2,Start2,Start2,Start2,Start2)
  c4-rbind(NA, Stop1, Stop1, Stop1, Stop1, Stop2, Stop2, Stop2, 
 Stop2, Stop2, Stop2, Stop2, Stop2, Stop2, Stop2, Stop2, 
 Stop2)
  C.df-data.frame(cbind(c0,c1,c2,c3,c4))
  colnames(C.df)-c(c0,c1,c2,c3,c4)
  C.df$c5-paste(C.df$c3,C.df$c4,sep=-)
  C.df

Now this is something I don't get. The list Start2-Stop2 starts way
before Start2, actually at Stop1. Sure that's what you want?

I took the liberty of showing how to get the data between start and
stop for every entry, and how to apply functions to it. If you don't
get the code, look at
?lapply
?apply
?grep

I also adjusted your example, as you caused all variables to be
factors by using the cbind in the data.frame function. Never do this
unless you're really sure you have to. But I can't think of a case
where that would be beneficial...

...
C.df-data.frame(c0,c1,c2)
C.df

# find positions
Start - grep(Start,C.df$c2)
Stop - grep(Stop,C.df$c2)

# create indices
idx - apply(cbind(Start,Stop),1,function(i) i[1]:i[2])
names(idx) - paste(Start,1:length(Start),-Stop,1:length(Start),sep=)

# Apply the function summary and get a list back named by the interval.
out - lapply(idx,function(i) summary(C.df[i,1:2]))
out

If you really need to start Start2 right after Stop1, you can use a
similar approach.

Cheers
Joris

 # NEEDED RESULTS
  # needed result
 # for Stat1-Stop1: mean(20,30,40,50)
 # for Stat2-Stop2: mean(c(10,60,20,30,40,50,30,10,0,NA,20,10.3444), na.rm=T)
 #mean:
         c1     c3    c4           c5
         20  Start1 Stop1 Start1-Stop1
   25.48585  Start2 Stop2 Start2-Stop2

 #sum
 # for Stat1-Stop1: sum(20,30,40,50)
 # for Stat2-Stop2: sum(c(10,60,20,30,40,50,30,10,0,NA,20,10.3444), na.rm=T)
 #sum:
         c1     c3    c4           c5
        140  Start1 Stop1 Start1-Stop1
   280.3444  Start2 Stop2 Start2-Stop2

 # for Stat1-Stop1: max(20,30,40,50)
 # for Stat2-Stop2: max(c(10,60,20,30,40,50,30,10,0,NA,20,10.3444), na.rm=T)
 #max:
         c1     c3    c4           c5
        50  Start1 Stop1 Start1-Stop1
        60  Start2 Stop2 Start2-Stop2

 # place of max  (in Start1-Stop1: 4 th element in gruop Start1-Stop1
 # place of max  (in Start1-Stop1: 2 nd element in 

[R] GPS finding and mark interesting POI within table with large GPS points collection

2010-05-21 Thread Eugeniusz Kałuża
Dear R users,
#
#There is table containing 1000 (lat, lon, time) GPS positions, wchich should 
be recognized and labeled in every row of that #recognized position by label 
from defined POI vector

 GPS$Lat=c(9,2.2,2,3,4,5,6,7,2,9,2.1,10,3.3,0 , 6, 8, 9)
 GPS$Lon=c(0,1  ,1,4,4,5,6,7,2,9,1.1,10,4.2,0 , 6, 1, 9)
 GPS$time   =c(0,1  ,2,3,4,5,6,7,8,9,10 ,11,12 ,13,14,15,16)

# and definition of points of interest:
 POIdef$Lat =c( 2   ,3.2)
 POIdef$Lon =c( 1.1 ,4  )
 POIdef$Lab =c(A  ,B) 


# but if there are two recognitized points in the small time difference,
# (within time window of tw=3)
# there should be choosen only one point from POIdef  

#Result expected:

#like in these vectors: 
(GPSfindings$Lat =c(2.2, 2.1, 3.3)  )
(GPSfindings$Lon =c(1  , 1.1, 4.2)  )
(GPSfindings$time=c(2  ,10  , 12 )  )
(GPSfindings$Lab =c(A,A , B)  )
(GPSfindings$POI_lat =c(2  ,2   , 3.2)  )
(GPSfindings$POI_lon =c(1.1,1.1 , 4.1)  )

#and as result obtain marked vector:  
(GPS$Lab=c(NA,A,NA,NA,NA,NA,NA,NA,NA,B,NA,NA,NA,NA,NA,NA))


#1) Is that possible without using specialized package?
#2) Any specialized package function realize that?

#
Thanks for suggestions.
Sincerely, Eugen

[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.