HI, of course. The a mini-version of my data-set is below, stored in d2. Then the code I'm working follows. library(reshape2) #Create d2 structure(list(row = 1:50, rank1 = structure(c(3L, 3L, 3L, 4L, 3L, 3L, NA, NA, 3L, NA, 3L, 3L, 1L, NA, 2L, NA, 3L, NA, 2L, 1L, 1L, 3L, NA, 6L, NA, 1L, NA, 3L, 1L, NA, 1L, NA, NA, 6L, 3L, NA, 1L, 3L, 3L, 4L, 1L, NA, 3L, 3L, 3L, NA, 3L, 3L, NA, 1L), .Label = c("accessible", "alternatives", "information", "responsive", "social", "technical", "trade"), class = "factor"), rank2 = structure(c(6L, 1L, 1L, 2L, 4L, 6L, NA, NA, 6L, NA, 6L, 4L, 2L, NA, 4L, NA, 6L, NA, 1L, 6L, 3L, 2L, NA, 3L, NA, 6L, NA, 6L, 6L, NA, 3L, NA, NA, 3L, 6L, NA, 6L, 6L, 6L, 7L, 3L, NA, 1L, 6L, 6L, NA, 2L, 6L, NA, 2L), .Label = c("accessible", "alternatives", "information", "responsive", "social", "technical", "trade"), class = "factor"), rank3 = structure(c(1L, 6L, 4L, 3L, 2L, 4L, NA, NA, 4L, NA, 1L, 1L, 6L, NA, 1L, NA, 1L, NA, 7L, 3L, 6L, 1L, NA, 2L, NA, 4L, NA, 1L, 3L, NA, 6L, NA, NA, 4L, 2L, NA, 7L, 1L, 1L, 6L, 7L, NA, 6L, 1L, 1L, NA, 4L, 1L, NA, 3L), .Label = c("accessible", "alternatives", "information", "responsive", "social", "technical", "trade"), class = "factor"), rank4 = structure(c(7L, 4L, 2L, 1L, 1L, 7L, NA, NA, 1L, NA, 7L, 2L, 7L, NA, 3L, NA, 2L, NA, 3L, 4L, 5L, 6L, NA, 4L, NA, 3L, NA, 4L, 4L, NA, 4L, NA, NA, 2L, 7L, NA, 2L, 2L, 2L, 3L, 6L, NA, 2L, 5L, 4L, NA, 1L, 2L, NA, 4L), .Label = c("accessible", "alternatives", "information", "responsive", "social", "technical", "trade"), class = "factor"), rank5 = structure(c(2L, 7L, 6L, 7L, 7L, 2L, NA, NA, 2L, NA, 2L, 7L, 3L, NA, 6L, NA, 7L, NA, 6L, 7L, 4L, 7L, NA, 7L, NA, 7L, NA, 2L, 2L, NA, 2L, NA, NA, 7L, 1L, NA, 3L, 7L, 4L, 2L, 2L, NA, 4L, 2L, 2L, NA, 6L, 4L, NA, 5L), .Label = c("accessible", "alternatives", "information", "responsive", "social", "technical", "trade"), class = "factor"), rank6 = structure(c(4L, 2L, 7L, 6L, 6L, 1L, NA, NA, 7L, NA, 4L, 5L, 4L, NA, 7L, NA, 4L, NA, 4L, 2L, 2L, 4L, NA, 1L, NA, 2L, NA, 7L, 7L, NA, 7L, NA, NA, 1L, 4L, NA, 4L, 4L, 7L, 1L, 4L, NA, 7L, 7L, 7L, NA, 7L, 7L, NA, 7L), .Label = c("accessible", "alternatives", "information", "responsive", "social", "technical", "trade"), class = "factor"), rank7 = structure(c(5L, 5L, 5L, 5L, 5L, 5L, NA, NA, 5L, NA, 5L, 6L, 5L, NA, 5L, NA, 5L, NA, 5L, 5L, 7L, 5L, NA, 5L, NA, 5L, NA, 5L, 5L, NA, 5L, NA, NA, 5L, 5L, NA, 5L, NA, 5L, 5L, 5L, NA, 5L, 4L, 5L, NA, 5L, 5L, NA, 6L), .Label = c("accessible", "alternatives", "information", "responsive", "social", "technical", "trade"), class = "factor")), .Names = c("row", "rank1", "rank2", "rank3", "rank4", "rank5", "rank6", "rank7"), row.names = c(NA, 50L), class = "data.frame")
#This code is a replication of David Carlson's code (below) which works splendidly, but does not work on my data-set #Melt d2: Note, I've used value.name='color' to maximize comparability with David's suggestion d3 <- melt(d2, id.vars=1, measure.vars=2:8, variable.name="rank",value.name="color") #Make Rank Variable Numeric d3$rank<-as.numeric(d3$rank) #Recast d3 into d4 d4<- dcast(d3, row~color,value.var="rank", fill=0) #Note that d4 appears to provide a binary variable for one if a respondent checked the option, but does not provide information as to which rank they assigned each option, but also seems to summarize the number of missing values #David Carlson's Code mydf <- data.frame(t(replicate(100, sample(c("red", "blue", "green", "yellow", NA), 4)))) mydf <- data.frame(rows=1:100, mydf) colnames(mydf) <- c("row", "rank1", "rank2", "rank3", "rank4") mymelt <- melt(mydf, id.vars=1, measure.vars=2:5, variable.name="rank", value.name="color") mymelt$rank <- as.numeric(mymelt$rank) mycast <- dcast(mymelt, row~color, value.var="rank", fill=0) #Compare str(mydf) str(d2) head(mycast) head(d4) Again, I'm grateful for assistance. I can't understand what how my data-set differs from David's sample data-set. Simon Kiss On Sep 4, 2014, at 2:35 PM, David L Carlson <dcarl...@tamu.edu> wrote: > I think we would need enough of the data you are using to figure out how to > modify the process. Can you use dput() to send a small data set that fails to > work? > > David C > > -----Original Message----- > From: Simon Kiss [mailto:sjk...@gmail.com] > Sent: Thursday, September 4, 2014 1:28 PM > To: David L Carlson > Cc: r-help@r-project.org > Subject: Re: [R] Turn Rank Ordering Into Numerical Scores By Transposing A > Data Frame > > Hi David and list: > This is working, except at this command > mycast <- dcast(mymelt, row~color, value.var="rank", fill=0) > > dcast is using "length" as the default aggregating function. This results in > not accurate results. It tells me, for example how many choices were missing > values and it tells me if a person selected any given option (value is > reported as 1). > When I try to run your reproducible research, it works great, but something > with the aggregating function is not working properly with mine. > Any other thoughts? > Simon > On Aug 18, 2014, at 10:44 AM, David L Carlson <dcarl...@tamu.edu> wrote: > >> Another approach using reshape2: >> >>> library(reshape2) >>> # Construct data/ add column of row numbers >>> set.seed(42) >>> mydf <- data.frame(t(replicate(100, sample(c("red", "blue", >> + "green", "yellow", NA), 4)))) >>> mydf <- data.frame(rows=1:100, mydf) >>> colnames(mydf) <- c("row", "rank1", "rank2", "rank3", "rank4") >>> head(mydf) >> row rank1 rank2 rank3 rank4 >> 1 1 <NA> yellow red blue >> 2 2 yellow green <NA> red >> 3 3 yellow green blue <NA> >> 4 4 <NA> blue yellow green >> 5 5 <NA> red blue green >> 6 6 <NA> red green blue >>> # Reshape >>> mymelt <- melt(mydf, id.vars=1, measure.vars=2:5, >> + variable.name="rank", value.name="color") >>> # Convert rank to numeric >>> mymelt$rank <- as.numeric(mymelt$rank) >>> mycast <- dcast(mymelt, row~color, value.var="rank", fill=0) >>> head(mycast) >> row blue green red yellow NA >> 1 1 4 0 3 2 1 >> 2 2 0 2 4 1 3 >> 3 3 3 2 0 1 4 >> 4 4 2 4 0 3 1 >> 5 5 3 4 2 0 1 >> 6 6 4 3 2 0 1 >> >> David C >> >> -----Original Message----- >> From: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] On >> Behalf Of David L Carlson >> Sent: Sunday, August 17, 2014 6:32 PM >> To: Simon Kiss; r-help@r-project.org >> Subject: Re: [R] Turn Rank Ordering Into Numerical Scores By Transposing A >> Data Frame >> >> There is probably an easier way to do this, but >> >>> set.seed(42) >>> mydf <- data.frame(t(replicate(100, sample(c("red", "blue", >> + "green", "yellow", NA), 4)))) >>> colnames(mydf) <- c("rank1", "rank2", "rank3", "rank4") >>> head(mydf) >> rank1 rank2 rank3 rank4 >> 1 <NA> yellow red blue >> 2 yellow green <NA> red >> 3 yellow green blue <NA> >> 4 <NA> blue yellow green >> 5 <NA> red blue green >> 6 <NA> red green blue >>> lvls <- levels(mydf$rank1) >>> # convert color factors to numeric >>> for (i in seq_along(mydf)) mydf[,i] <- as.numeric(mydf[,i]) >>> # stack the columns >>> mydf2 <- stack(mydf) >>> # convert rank factor to numeric >>> mydf2$ind <- as.numeric(mydf2$ind) >>> # add row numbers >>> mydf2 <- data.frame(rows=1:100, mydf2) >>> # Create table >>> mytbl <- xtabs(ind~rows+values, mydf2) >>> # convert to data frame >>> mydf3 <- data.frame(unclass(mytbl)) >>> colnames(mydf3) <- lvls >>> head(mydf3) >> blue green red yellow >> 1 4 0 3 2 >> 2 0 2 4 1 >> 3 3 2 0 1 >> 4 2 4 0 3 >> 5 3 4 2 0 >> 6 4 3 2 0 >> >> David C >> >> -----Original Message----- >> From: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] On >> Behalf Of Simon Kiss >> Sent: Friday, August 15, 2014 3:58 PM >> To: r-help@r-project.org >> Subject: Re: [R] Turn Rank Ordering Into Numerical Scores By Transposing A >> Data Frame >> >> >> Both the suggestions I got work very well, but what I didn't realize is that >> NA values would cause serious problems. Where there is a missing value, >> using the argument na.last=NA to order just returns the the order of the >> factor levels, but excludes the missing values, but I have no idea where >> those occur in the or rather which of those variables were actually missing. >> >> Have I explained this problem sufficiently? >> I didn't think it would cause such a problem so I didn't include it in the >> original problem definition. >> Yours, Simon >> On Jul 25, 2014, at 4:58 PM, David L Carlson <dcarl...@tamu.edu> wrote: >> >>> I think this gets what you want. But your data are not reproducible since >>> they are randomly drawn without setting a seed and the two data sets have >>> no relationship to one another. >>> >>>> set.seed(42) >>>> mydf <- data.frame(t(replicate(100, sample(c("red", "blue", >>> + "green", "yellow"))))) >>>> colnames(mydf) <- c("rank1", "rank2", "rank3", "rank4") >>>> mydf2 <- data.frame(t(apply(mydf, 1, order))) >>>> colnames(mydf2) <- levels(mydf$rank1) >>>> head(mydf) >>> rank1 rank2 rank3 rank4 >>> 1 yellow green red blue >>> 2 green blue yellow red >>> 3 green yellow red blue >>> 4 yellow red green blue >>> 5 yellow red green blue >>> 6 yellow red blue green >>>> head(mydf2) >>> blue green red yellow >>> 1 4 2 3 1 >>> 2 2 1 4 3 >>> 3 4 1 3 2 >>> 4 4 3 2 1 >>> 5 4 3 2 1 >>> 6 3 4 2 1 >>> >>> ------------------------------------- >>> David L Carlson >>> Department of Anthropology >>> Texas A&M University >>> College Station, TX 77840-4352 >>> >>> -----Original Message----- >>> From: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] On >>> Behalf Of Simon Kiss >>> Sent: Friday, July 25, 2014 2:34 PM >>> To: r-help@r-project.org >>> Subject: [R] Turn Rank Ordering Into Numerical Scores By Transposing A Data >>> Frame >>> >>> Hello: >>> I have data that looks like mydf, below. It is the results of a survey >>> where participants were to put a number of statements (in this case >>> colours) in their order of preference. In this case, the rank number is the >>> variable, and the factor level for each respondent is which colour they >>> assigned to that rank. I would like to find a way to effectively transpose >>> the data frame so that it looks like mydf2, also below, where the colours >>> the participants were able to choose are the variables and the variable >>> score is what that person ranked that variable. >>> >>> Ultimately what I would like to do is a factor analysis on these items, so >>> I'd like to be able to see if people ranked red and yellow higher together >>> but ranked green and blue together lower, that sort of thing. >>> I have played around with different variations of t(), melt(), ifelse() and >>> if() but can't find a solution. >>> Thank you >>> Simon >>> #Reproducible code >>> mydf<-data.frame(rank1=sample(c('red', 'blue', 'green', 'yellow'), >>> replace=TRUE, size=100), rank2=sample(c('red', 'blue', 'green', 'yellow'), >>> replace=TRUE, size=100), rank3=sample(c('red', 'blue', 'green', 'yellow'), >>> replace=TRUE, size=100), rank4=sample(c('red', 'blue', 'green', 'yellow'), >>> replace=TRUE, size=100)) >>> >>> mydf2<-data.frame(red=sample(c(1,2,3,4), >>> replace=TRUE,size=100),blue=sample(c(1,2,3,4), >>> replace=TRUE,size=100),green=sample(c(1,2,3,4), replace=TRUE,size=100) >>> ,yellow=sample(c(1,2,3,4), replace=TRUE,size=100)) >>> ********************************* >>> Simon J. Kiss, PhD >>> Assistant Professor, Wilfrid Laurier University >>> 73 George Street >>> Brantford, Ontario, Canada >>> N3T 2C9 >>> >>> ______________________________________________ >>> R-help@r-project.org mailing list >>> https://stat.ethz.ch/mailman/listinfo/r-help >>> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html >>> and provide commented, minimal, self-contained, reproducible code. >> >> ********************************* >> Simon J. Kiss, PhD >> Assistant Professor, Wilfrid Laurier University >> 73 George Street >> Brantford, Ontario, Canada >> N3T 2C9 >> Cell: +1 905 746 7606 >> >> ______________________________________________ >> R-help@r-project.org mailing list >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible code. >> >> ______________________________________________ >> R-help@r-project.org mailing list >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible code. > > ********************************* > Simon J. Kiss, PhD > Assistant Professor, Wilfrid Laurier University > 73 George Street > Brantford, Ontario, Canada > N3T 2C9 > Cell: +1 905 746 7606 > > > ********************************* Simon J. Kiss, PhD Assistant Professor, Wilfrid Laurier University 73 George Street Brantford, Ontario, Canada N3T 2C9 Cell: +1 905 746 7606 ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.