Re: [R] Contingency tables from data.frames
The final version with the help of Gabor Grotendieck (thanks Gabor, very much!) ### # EasieR - Package # ### # Common function er.make.table - function(x, start, end, h, right) { # Absolut frequency f - table(cut(x, br=seq(start, end, h), right=right)) # Relative frequency fr - f/length(x) # Relative frequency, % frP - 100*(f/length(x)) # Cumulative frequency fac - cumsum(f) # Cumulative frequency, % facP - 100*(cumsum(f/length(x))) fi - round(f, 2) fr - round(as.numeric(fr), 2) frP - round(as.numeric(frP), 2) fac - round(as.numeric(fac), 2) facP - round(as.numeric(facP),2) # Make final table res - data.frame(fi, fr, frP, fac, facP) names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)') return(res) } #With Gabor Grotendieck suggestions (thanks Gabor, very much!) er.table - function(x, ...) UseMethod(er.table) er.table.default - function(x, k, start, end, h, breaks=c('Sturges', 'Scott', 'FD'), right=FALSE) { #User define nothing or not 'x' isn't numeric - stop stopifnot(is.numeric(x)) #User define only 'x' #(x, {k, start, end, h}, [breaks, right]) if (missing(k) missing(start) missing(end) missing(h) ){ x - na.omit(x) brk - match.arg(breaks) switch(brk, Sturges = k - nclass.Sturges(x), Scott = k - nclass.scott(x), FD = k - nclass.FD(x)) tmp - range(x) start - tmp[1] - abs(tmp[2])/100 end - tmp[2] + abs(tmp[2])/100 R - end-start h - R/k } #User define 'x' and 'k' #(x, k, {start, end, h}, [breaks, right]) else if (missing(start) missing(end) missing(h)) { stopifnot(length(k) = 1) x - na.omit(x) tmp - range(x) start - tmp[1] - abs(tmp[2])/100 end - tmp[2] + abs(tmp[2])/100 R - end-start h - R/abs(k) } #User define 'x', 'start' and 'end' #(x, {k,} start, end, {h,} [breaks, right]) else if (missing(k) missing(h)) { stopifnot(length(start) = 1, length(end) =1) x - na.omit(x) tmp - range(x) R - end-start k - sqrt(abs(R)) if (k 5) k - 5 #min value of k h - R/k } #User define 'x', 'start', 'end' and 'h' #(x, {k,} start, end, h, [breaks, right]) else if (missing(k)) { stopifnot(length(start) = 1, length(end) = 1, length(h) = 1) x - na.omit(x) } else stop('Error, please, see the function sintax!') tbl - er.make.table(x, start, end, h, right) return(tbl) } er.table.data.frame - function(df, k, breaks=c('Sturges', 'Scott', 'FD'), right=FALSE) { stopifnot(is.data.frame(df)) tmpList - list() logCol - sapply(df, is.numeric) for (i in 1:ncol(df)) { if (logCol[i]) { x - as.matrix(df[ ,i]) x - na.omit(x) #User define only x and/or 'breaks' #(x, {k,}[breaks, right]) if (missing(k)) { brk - match.arg(breaks) switch(brk, Sturges = k - nclass.Sturges(x), Scott = k - nclass.scott(x), FD = k - nclass.FD(x)) tmp - range(x) start - tmp[1] - abs(tmp[2])/100 end - tmp[2] + abs(tmp[2])/100 R - end-start h - R/k } #User define 'x' and 'k' #(x, k,[breaks, right]) else { tmp - range(x) start - tmp[1] - abs(tmp[2])/100 end - tmp[2] + abs(tmp[2])/100 R - end-start h - R/abs(k) } tbl - er.make.table(x, start, end, h, right) tmpList - c(tmpList, list(tbl)) } } valCol - logCol[logCol] names(tmpList) - names(valCol) return(tmpList) } Best, -- Jose Claudio Faria Brasil/Bahia/UESC/DCET Estatistica Experimental/Prof. Adjunto mails: [EMAIL PROTECTED] [EMAIL PROTECTED] [EMAIL PROTECTED] tel: 73-3634.2779 __ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
Re: [R] Contingency tables from data.frames
Gabor Grothendieck wrote: On 5/24/05, Jose Claudio Faria [EMAIL PROTECTED] wrote: Dear list, I'm trying to do a set of generic functions do make contingency tables from data.frames. It is just running nice (I'm learning R), but I think it can be better. I would like to filter the data.frame, i.e, eliminate all not numeric variables. And I don't know how to make it: please, help me. Below one of the my functions ('er' is a mention to EasieR, because I'm trying to do a package for myself and the my students): #2. Tables from data.frames #2.1---er.table.df.br (User define breaks and right) er.table.df.br - function(df, breaks = c('Sturges', 'Scott', 'FD'), right = FALSE) { if (is.data.frame(df) != 'TRUE') stop('need data.frame data') dim_df - dim(df) tmpList - list() for (i in 1:dim_df[2]) { x - as.matrix(df[ ,i]) x - na.omit(x) k - switch(breaks[1], 'Sturges' = nclass.Sturges(x), 'Scott' = nclass.scott(x), 'FD' = nclass.FD(x), stop('breaks' must be 'Sturges', 'Scott' or 'FD')) tmp - range(x) classIni - tmp[1] - tmp[2]/100 classEnd - tmp[2] + tmp[2]/100 R- classEnd-classIni h- R/k # Absolut frequency f - table(cut(x, br = seq(classIni, classEnd, h), right = right)) # Relative frequency fr - f/length(x) # Relative frequency, % frP - 100*(f/length(x)) # Cumulative frequency fac - cumsum(f) # Cumulative frequency, % facP - 100*(cumsum(f/length(x))) fi - round(f, 2) fr - round(as.numeric(fr), 2) frP - round(as.numeric(frP), 2) fac - round(as.numeric(fac), 2) facP - round(as.numeric(facP),2) # Table res - data.frame(fi, fr, frP, fac, facP) names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)') tmpList - c(tmpList, list(res)) } names(tmpList) - names(df) return(tmpList) } To try the function: #a) runing nice y1=rnorm(100, 10, 1) y2=rnorm(100, 58, 4) y3=rnorm(100, 500, 10) mydf=data.frame(y1, y2, y3) #tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F) #tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F) tbdf=er.table.df.br (mydf, breaks = 'FD', right=F) print(tbdf) #b) One of the problems y1=rnorm(100, 10, 1) y2=rnorm(100, 58, 4) y3=rnorm(100, 500, 10) y4=rep(letters[1:10], 10) mydf=data.frame(y1, y2, y3, y4) tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F) print(tbdf) Try this: sapply(my.data.frame, is.numeric) Also you might want to look up: ?match.arg ?stopifnot ?ncol ?sapply ?lapply Thanks Gabor, you suggestion solve my basic problem. I'm working is same basic (but I think useful) functions for begginiers. Below you can see the set of functions: ### # EasyeR - Package # ### # Common function--- er.make.table - function(x, classIni, classEnd, h, right) { # Absolut frequency f - table(cut(x, br = seq(classIni, classEnd, h), right = right)) # Relative frequency fr - f/length(x) # Relative frequency, % frP - 100*(f/length(x)) # Cumulative frequency fac - cumsum(f) # Cumulative frequency, % facP - 100*(cumsum(f/length(x))) fi - round(f, 2) fr - round(as.numeric(fr), 2) frP - round(as.numeric(frP), 2) fac - round(as.numeric(fac), 2) facP - round(as.numeric(facP),2) # Table res - data.frame(fi, fr, frP, fac, facP) names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)') return(res) } #1. Tables from vectors #1.1---er.table.br (User define breaks and right)--- er.table.br - function(x, breaks = c('Sturges', 'Scott', 'FD'), right = FALSE) { if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data') x - na.omit(x) k - switch(breaks[1], 'Sturges' = nclass.Sturges(x), 'Scott' = nclass.scott(x), 'FD' = nclass.FD(x), stop('breaks' must be 'Sturges', 'Scott' or 'FD')) tmp - range(x) classIni - tmp[1] - abs(tmp[2])/100 classEnd - tmp[2] + abs(tmp[2])/100 R- classEnd-classIni h- R/k tbl - er.make.table(x, classIni, classEnd, h, right) return(tbl) } #1.2---er.table.kr (User define the class number (k) and right)- er.table.kr - function(x, k, right = FALSE) { if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data') if ((k == '') || (k == ' ')) stop('k not defined') x - na.omit(x) tmp - range(x) classIni - tmp[1] - abs(tmp[2])/100 classEnd - tmp[2] + abs(tmp[2])/100 R- classEnd-classIni h- R/k tbl - er.make.table(x, classIni, classEnd, h, right)
[R] Contingency tables from data.frames
Dear list, I'm trying to do a set of generic functions do make contingency tables from data.frames. It is just running nice (I'm learning R), but I think it can be better. I would like to filter the data.frame, i.e, eliminate all not numeric variables. And I don't know how to make it: please, help me. Below one of the my functions ('er' is a mention to EasieR, because I'm trying to do a package for myself and the my students): #2. Tables from data.frames #2.1---er.table.df.br (User define breaks and right) er.table.df.br - function(df, breaks = c('Sturges', 'Scott', 'FD'), right = FALSE) { if (is.data.frame(df) != 'TRUE') stop('need data.frame data') dim_df - dim(df) tmpList - list() for (i in 1:dim_df[2]) { x - as.matrix(df[ ,i]) x - na.omit(x) k - switch(breaks[1], 'Sturges' = nclass.Sturges(x), 'Scott' = nclass.scott(x), 'FD' = nclass.FD(x), stop('breaks' must be 'Sturges', 'Scott' or 'FD')) tmp - range(x) classIni - tmp[1] - tmp[2]/100 classEnd - tmp[2] + tmp[2]/100 R- classEnd-classIni h- R/k # Absolut frequency f - table(cut(x, br = seq(classIni, classEnd, h), right = right)) # Relative frequency fr - f/length(x) # Relative frequency, % frP - 100*(f/length(x)) # Cumulative frequency fac - cumsum(f) # Cumulative frequency, % facP - 100*(cumsum(f/length(x))) fi - round(f, 2) fr - round(as.numeric(fr), 2) frP - round(as.numeric(frP), 2) fac - round(as.numeric(fac), 2) facP - round(as.numeric(facP),2) # Table res - data.frame(fi, fr, frP, fac, facP) names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)') tmpList - c(tmpList, list(res)) } names(tmpList) - names(df) return(tmpList) } To try the function: #a) runing nice y1=rnorm(100, 10, 1) y2=rnorm(100, 58, 4) y3=rnorm(100, 500, 10) mydf=data.frame(y1, y2, y3) #tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F) #tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F) tbdf=er.table.df.br (mydf, breaks = 'FD', right=F) print(tbdf) #b) One of the problems y1=rnorm(100, 10, 1) y2=rnorm(100, 58, 4) y3=rnorm(100, 500, 10) y4=rep(letters[1:10], 10) mydf=data.frame(y1, y2, y3, y4) tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F) print(tbdf) Could anyone give me a hint how to work around this? PS: Excuse my bad English ;-) -- Jose Claudio Faria Brasil/Bahia/UESC/DCET Estatistica Experimental/Prof. Adjunto mails: [EMAIL PROTECTED] [EMAIL PROTECTED] [EMAIL PROTECTED] __ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
Re: [R] Contingency tables from data.frames
On 5/24/05, Jose Claudio Faria [EMAIL PROTECTED] wrote: Dear list, I'm trying to do a set of generic functions do make contingency tables from data.frames. It is just running nice (I'm learning R), but I think it can be better. I would like to filter the data.frame, i.e, eliminate all not numeric variables. And I don't know how to make it: please, help me. Below one of the my functions ('er' is a mention to EasieR, because I'm trying to do a package for myself and the my students): #2. Tables from data.frames #2.1---er.table.df.br (User define breaks and right) er.table.df.br - function(df, breaks = c('Sturges', 'Scott', 'FD'), right = FALSE) { if (is.data.frame(df) != 'TRUE') stop('need data.frame data') dim_df - dim(df) tmpList - list() for (i in 1:dim_df[2]) { x - as.matrix(df[ ,i]) x - na.omit(x) k - switch(breaks[1], 'Sturges' = nclass.Sturges(x), 'Scott' = nclass.scott(x), 'FD' = nclass.FD(x), stop('breaks' must be 'Sturges', 'Scott' or 'FD')) tmp - range(x) classIni - tmp[1] - tmp[2]/100 classEnd - tmp[2] + tmp[2]/100 R- classEnd-classIni h- R/k # Absolut frequency f - table(cut(x, br = seq(classIni, classEnd, h), right = right)) # Relative frequency fr - f/length(x) # Relative frequency, % frP - 100*(f/length(x)) # Cumulative frequency fac - cumsum(f) # Cumulative frequency, % facP - 100*(cumsum(f/length(x))) fi - round(f, 2) fr - round(as.numeric(fr), 2) frP - round(as.numeric(frP), 2) fac - round(as.numeric(fac), 2) facP - round(as.numeric(facP),2) # Table res - data.frame(fi, fr, frP, fac, facP) names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)') tmpList - c(tmpList, list(res)) } names(tmpList) - names(df) return(tmpList) } To try the function: #a) runing nice y1=rnorm(100, 10, 1) y2=rnorm(100, 58, 4) y3=rnorm(100, 500, 10) mydf=data.frame(y1, y2, y3) #tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F) #tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F) tbdf=er.table.df.br (mydf, breaks = 'FD', right=F) print(tbdf) #b) One of the problems y1=rnorm(100, 10, 1) y2=rnorm(100, 58, 4) y3=rnorm(100, 500, 10) y4=rep(letters[1:10], 10) mydf=data.frame(y1, y2, y3, y4) tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F) print(tbdf) Try this: sapply(my.data.frame, is.numeric) Also you might want to look up: ?match.arg ?stopifnot ?ncol ?sapply ?lapply __ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html