Dear R-mates,

# Here's what I am trying to do. I have a dataset like this:

id = c(rep(1,8), rep(2,8))
dur1 <- c( 17,18,19,18,24,19,24,24 )
est1 <- c( rep(1,5), rep(2,3) )
dur2 <- c(1,1,3,4,8,12,13,14)
est2 <- rep(1,8)

mydata = data.frame(id,
                    estat=c(est1, est2),
                    durat=c(dur1, dur2))


# I want to one have this:

id = c(rep(1,8), rep(2,8))
dur1 <- c( 17,18,19,20,28,1,2,3 )
est1 <- c( rep(1,5), rep(2,3) )
dur2 <- c(1,2,3,4,12,13,14,15)
est2 <- rep(1,8)

mydata2 = = data.frame(id,
                    estat=c(est1, est2),
                    durat=c(dur1, dur2))


# What is happening here? I have a longitudinal dataset.
# Individuals are observed 8 times, and each time each of them are in a
certain state J (here, J={1,2}).
# Each observation is one unit of time away from the following one, except
observations 4 and 5, which are 8 units of time away from each other.
# So here we have individual 1 migrating from state 1 to state 2 at
observation #6,
# while individual 2 stays in state 1 as long as we can observe her.
# I am interested in the spell (duration) of each state.
# However, the durations are clearly mismesuared, and now I am trying to
give some consistency to the data.
# I am assuming that the first duration is correct. Departing from this, I
wrote the following function:

    d <- function(dur,est)
{
    if ( sum( diff(est) )==0 ) # for those who didn't change state
        {
        for( i in c(2:4))
            dur[i] <- dur[i-1] + 1

        dur[5] <- dur[4] + 8

        for( i in c(6:8) )
            dur[i] <- dur[i-1] + 1
        }
    if ( sum( diff(est) )!=0 ) # for those who changed state
        {
             j = which(diff(est)!=0) + 1    # j is when the change occured
             dur[j] = 1

             k0      = which( c(1:8) < j )[-c(1)]
             k1      = which( c(1:8) > j )
             if(length(j) > 1)
                {
                for( i in 1:(length(j)-1) )
                    k2 = c(1:8)[c(1:8)> j[i] & c(1:8)< j[i+1]]
                k = unique( c(k0,k1,k2) )
                }
             k = unique( c(k0,k1) )
             k = k[!k%in%j]
             if(5%in%k)
                {
                 k = k[k != 5]
                 for(i in k[k<5])
                    dur[i] = dur[i-1] + 1

                 dur[5] = dur[4] + 8

                 for(i in k[k>5])
                    dur[i] = dur[i-1] + 1
                } else
                      {
                        for(i in k)
                        dur[i] = dur[i-1] + 1
                      }
            }
dur

}

# Now, if a do

    d(dur1, est1)
# and
    d(dur2,est2)
# I get what I want, except from the fact that I couldn't do this for a
large dataset.
# So I decide to use tapply. But this gives me

    new.durat <- tapply(mydata$durat, IND=mydata$id, FUN=d,
est=mydata$estat)
    mydata$new.durat <- unlist(new.durat)

>    mydata
    id   estat durat new.durat
1   1     1    17        17
2   1     1    18        18
3   1     1    19        19
4   1     1    18        20
5   1     1    24        28
6   1     2    19        29
7   1     2    24        30
8   1     2    24        31
9   2     1     1         1
10  2     1     1         2
11  2     1     3         3
12  2     1     4         4
13  2     1     8        12
14  2     1    12        13
15  2     1    13        14
16  2     1    14        15

# what is not what I want. I can't figure it out why, but when I use tapply,
# the logical expression "sum( diff(est) )==0" turns out to be true for both
individuals
# (whereas we know this is true only for individual #2).
# I am sorry for the long message. I will be very grateful for any help with
this problem.

        [[alternative HTML version deleted]]

______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html

Reply via email to