Hi I have a simple question:
the following data.frame
id iwv type 1 1 1 a 2 1 2 b 3 1 11 b 4 1 5 a 5 1 6 c 6 2 4 c 7 2 3 c 8 2 10 a 9 3 6 b 10 3 9 a 11 3 8 b 12 3 7 c
shall be aggregated into the form:
id t.a t.b t.c 1 1 6 13 6 6 2 10 0 7 9 3 9 14 7
means for each 'type' (a, b, c) a new column is introduced which gets the sum of iwv for the respective observations 'id'
of course I can do this transformation/aggregation in a loop (see below), but is there a way to do this more efficiently, eg. in using tapply (or something similar)- since I have lot many rows?
thanks for a hint
christoph
#------------------------------------------------------------------------------
# the loop-way
t <- data.frame(cbind(c(1,1,1,1,1,2,2,2,3,3,3,3), c(10,12,8,33,34,3,27,77,34,45,4,39), c('a', 'b', 'b', 'a', 'c', 'c', 'c', 'a', 'b', 'a', 'b', 'c')))
names(t) <- c("id", "iwv", "type")
t$iwv <- as.numeric(t$iwv)
t
# define the additional columns (type.a, type.b, type.c)
tt <- rep(0, nrow(t) * length(levels(t$type)))
dim(tt) <- c(nrow(t), length(levels(t$type)))
tt <- data.frame(tt)
dimnames(tt)[[2]] <- paste("t.", levels(t$type), sep = "")
t <- cbind(t, tt)
tobs <- 0
obs.previous <- 0
row.elim <- rep(FALSE, nrow(t))
ta <- which((names(t) == "t.a")) #number of column which codes the first type
r.ctr <- 0
for (i in 1:nrow(t)){
obs <- t[i,]$id
if (obs == obs.previous) {
row.elim[i] <- TRUE
r.ctr <- r.ctr + 1 #increment
type.col <- as.numeric(t[i,]$type)
t[i - r.ctr, ta - 1 + type.col] <- t[i - r.ctr, ta - 1 +
type.col] + t[i,]$iwv
}
else {
r.ctr <- 0 #record counter
type.col <- as.numeric(t[i,]$type)
t[i, ta - 1 + type.col] <- t[i,]$iwv
}
obs.previous <- obs
}
t <- t[!row.elim,] t <- subset(t, select = -c(iwv, type)) t
#------------------------------------------------------------------------------
______________________________________________ [email protected] mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
