Re: [R] Contingency tables from data.frames

2005-05-27 Thread Jose Claudio Faria

The final version with the help of Gabor Grotendieck (thanks Gabor, very much!)

###
#   EasieR - Package  #
###

# Common function
er.make.table - function(x,
  start,
  end,
  h,
  right) {
  # Absolut frequency
  f - table(cut(x, br=seq(start, end, h), right=right))

  # Relative frequency
  fr - f/length(x)

  # Relative frequency, %
  frP - 100*(f/length(x))

  # Cumulative frequency
  fac - cumsum(f)

  # Cumulative frequency, %
  facP - 100*(cumsum(f/length(x)))

  fi   - round(f, 2)
  fr   - round(as.numeric(fr), 2)
  frP  - round(as.numeric(frP), 2)
  fac  - round(as.numeric(fac), 2)
  facP - round(as.numeric(facP),2)

  # Make final table
  res - data.frame(fi, fr, frP, fac, facP)
  names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
  return(res)

}

#With Gabor Grotendieck suggestions (thanks Gabor, very much!)
er.table - function(x, ...) UseMethod(er.table)

er.table.default - function(x,
 k,
 start,
 end,
 h,
 breaks=c('Sturges', 'Scott', 'FD'),
 right=FALSE) {

  #User define nothing or not 'x' isn't numeric - stop
  stopifnot(is.numeric(x))

  #User define only 'x'
  #(x, {k, start, end, h}, [breaks, right])
  if (missing(k)  missing(start)  missing(end)  missing(h) ){

x - na.omit(x)

brk - match.arg(breaks)
switch(brk,
   Sturges = k - nclass.Sturges(x),
   Scott   = k - nclass.scott(x),
   FD  = k - nclass.FD(x))

tmp   - range(x)
start - tmp[1] - abs(tmp[2])/100
end   - tmp[2] + abs(tmp[2])/100
R - end-start
h - R/k

  }

  #User define 'x' and 'k'
  #(x, k, {start, end, h}, [breaks, right])
  else if (missing(start)  missing(end)  missing(h)) {

stopifnot(length(k) = 1)

x - na.omit(x)

tmp   - range(x)
start - tmp[1] - abs(tmp[2])/100
end   - tmp[2] + abs(tmp[2])/100
R - end-start
h - R/abs(k)

  }

  #User define 'x', 'start' and 'end'
  #(x, {k,} start, end, {h,} [breaks, right])
  else if (missing(k)  missing(h)) {

stopifnot(length(start) = 1, length(end) =1)

x - na.omit(x)

tmp - range(x)
R   - end-start
k   - sqrt(abs(R))
if (k  5)  k - 5 #min value of k
h   - R/k

  }

  #User define 'x', 'start', 'end' and 'h'
  #(x, {k,} start, end, h, [breaks, right])
  else if (missing(k)) {

stopifnot(length(start) = 1, length(end) = 1, length(h) = 1)
x - na.omit(x)

  }

  else stop('Error, please, see the function sintax!')

  tbl - er.make.table(x, start, end, h, right)
  return(tbl)

}

er.table.data.frame - function(df,
k,
breaks=c('Sturges', 'Scott', 'FD'),
right=FALSE) {

  stopifnot(is.data.frame(df))

  tmpList - list()
  logCol  - sapply(df, is.numeric)

  for (i in 1:ncol(df)) {

if (logCol[i]) {

  x - as.matrix(df[ ,i])
  x - na.omit(x)

  #User define only x and/or 'breaks'
  #(x, {k,}[breaks, right])
  if (missing(k)) {

brk - match.arg(breaks)
switch(brk,
   Sturges = k - nclass.Sturges(x),
   Scott   = k - nclass.scott(x),
   FD  = k - nclass.FD(x))

tmp   - range(x)
start - tmp[1] - abs(tmp[2])/100
end   - tmp[2] + abs(tmp[2])/100
R - end-start
h - R/k

  }

  #User define 'x' and 'k'
  #(x, k,[breaks, right])
  else {

tmp   - range(x)
start - tmp[1] - abs(tmp[2])/100
end   - tmp[2] + abs(tmp[2])/100
R - end-start
h - R/abs(k)

  }

  tbl - er.make.table(x, start, end, h, right)
  tmpList - c(tmpList, list(tbl))

}

  }

  valCol - logCol[logCol]
  names(tmpList) - names(valCol)
  return(tmpList)

}

Best,
--
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
 [EMAIL PROTECTED]
 [EMAIL PROTECTED]
 [EMAIL PROTECTED]
tel: 73-3634.2779

__
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html


Re: [R] Contingency tables from data.frames

2005-05-25 Thread Jose Claudio Faria

Gabor Grothendieck wrote:

On 5/24/05, Jose Claudio Faria [EMAIL PROTECTED] wrote:


Dear list,

I'm trying to do a set of generic functions do make contingency tables from
data.frames. It is just running nice (I'm learning R), but I think it can be
better.

I would like to filter the data.frame, i.e, eliminate all not numeric variables.
And I don't know how to make it: please, help me.

Below one of the my functions ('er' is a mention to EasieR, because I'm trying
to do a package for myself and the my students):

#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)
er.table.df.br - function(df,
  breaks = c('Sturges', 'Scott', 'FD'),
  right = FALSE) {

 if (is.data.frame(df) != 'TRUE')
   stop('need data.frame data')

 dim_df - dim(df)

 tmpList - list()

 for (i in 1:dim_df[2]) {

   x - as.matrix(df[ ,i])
   x - na.omit(x)

   k - switch(breaks[1],
   'Sturges' = nclass.Sturges(x),
   'Scott'   = nclass.scott(x),
   'FD'  = nclass.FD(x),
   stop('breaks' must be 'Sturges', 'Scott' or 'FD'))

   tmp  - range(x)
   classIni - tmp[1] - tmp[2]/100
   classEnd - tmp[2] + tmp[2]/100
   R- classEnd-classIni
   h- R/k

   # Absolut frequency
   f - table(cut(x, br = seq(classIni, classEnd, h), right = right))

   # Relative frequency
   fr - f/length(x)

   # Relative frequency, %
   frP - 100*(f/length(x))

   # Cumulative frequency
   fac - cumsum(f)

   # Cumulative frequency, %
   facP - 100*(cumsum(f/length(x)))

   fi   - round(f, 2)
   fr   - round(as.numeric(fr), 2)
   frP  - round(as.numeric(frP), 2)
   fac  - round(as.numeric(fac), 2)
   facP - round(as.numeric(facP),2)

   # Table
   res - data.frame(fi, fr, frP, fac, facP)
   names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
   tmpList - c(tmpList, list(res))
 }
 names(tmpList) - names(df)
 return(tmpList)
}

To try the function:

#a) runing nice
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
mydf=data.frame(y1, y2, y3)
#tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
#tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
print(tbdf)


#b) One of the problems
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
y4=rep(letters[1:10], 10)
mydf=data.frame(y1, y2, y3, y4)
tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
print(tbdf)




Try this:

sapply(my.data.frame, is.numeric)

Also you might want to look up:

?match.arg
?stopifnot
?ncol
?sapply
?lapply



Thanks Gabor, you suggestion solve my basic problem.
I'm working is same basic (but I think useful) functions
for begginiers.

Below you can see the set of functions:

###
#   EasyeR - Package  #
###

# Common function---
er.make.table - function(x,
  classIni,
  classEnd,
  h,
  right) {
  # Absolut frequency
  f - table(cut(x, br = seq(classIni, classEnd, h), right = right))

  # Relative frequency
  fr - f/length(x)

  # Relative frequency, %
  frP - 100*(f/length(x))

  # Cumulative frequency
  fac - cumsum(f)

  # Cumulative frequency, %
  facP - 100*(cumsum(f/length(x)))

  fi   - round(f, 2)
  fr   - round(as.numeric(fr), 2)
  frP  - round(as.numeric(frP), 2)
  fac  - round(as.numeric(fac), 2)
  facP - round(as.numeric(facP),2)

  # Table
  res - data.frame(fi, fr, frP, fac, facP)
  names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
  return(res)
}


#1. Tables from vectors
#1.1---er.table.br (User define breaks and right)---
er.table.br - function(x,
breaks = c('Sturges', 'Scott', 'FD'),
right = FALSE) {

  if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')

  x - na.omit(x)

  k - switch(breaks[1],
  'Sturges' = nclass.Sturges(x),
  'Scott'   = nclass.scott(x),
  'FD'  = nclass.FD(x),
  stop('breaks' must be 'Sturges', 'Scott' or 'FD'))

  tmp  - range(x)
  classIni - tmp[1] - abs(tmp[2])/100
  classEnd - tmp[2] + abs(tmp[2])/100
  R- classEnd-classIni
  h- R/k

  tbl - er.make.table(x, classIni, classEnd, h, right)
  return(tbl)
}


#1.2---er.table.kr (User define the class number (k) and right)-
er.table.kr - function(x,
k,
right = FALSE) {

  if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
  if ((k == '') || (k == ' ')) stop('k not defined')

  x - na.omit(x)

  tmp  - range(x)
  classIni - tmp[1] - abs(tmp[2])/100
  classEnd - tmp[2] + abs(tmp[2])/100
  R- classEnd-classIni
  h- R/k

  tbl - er.make.table(x, classIni, classEnd, h, right)
  

[R] Contingency tables from data.frames

2005-05-24 Thread Jose Claudio Faria

Dear list,

I'm trying to do a set of generic functions do make contingency tables from 
data.frames. It is just running nice (I'm learning R), but I think it can be 
better.


I would like to filter the data.frame, i.e, eliminate all not numeric variables.
And I don't know how to make it: please, help me.

Below one of the my functions ('er' is a mention to EasieR, because I'm trying 
to do a package for myself and the my students):


#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)
er.table.df.br - function(df,
   breaks = c('Sturges', 'Scott', 'FD'),
   right = FALSE) {

  if (is.data.frame(df) != 'TRUE')
stop('need data.frame data')

  dim_df - dim(df)

  tmpList - list()

  for (i in 1:dim_df[2]) {

x - as.matrix(df[ ,i])
x - na.omit(x)

k - switch(breaks[1],
'Sturges' = nclass.Sturges(x),
'Scott'   = nclass.scott(x),
'FD'  = nclass.FD(x),
stop('breaks' must be 'Sturges', 'Scott' or 'FD'))

tmp  - range(x)
classIni - tmp[1] - tmp[2]/100
classEnd - tmp[2] + tmp[2]/100
R- classEnd-classIni
h- R/k

# Absolut frequency
f - table(cut(x, br = seq(classIni, classEnd, h), right = right))

# Relative frequency
fr - f/length(x)

# Relative frequency, %
frP - 100*(f/length(x))

# Cumulative frequency
fac - cumsum(f)

# Cumulative frequency, %
facP - 100*(cumsum(f/length(x)))

fi   - round(f, 2)
fr   - round(as.numeric(fr), 2)
frP  - round(as.numeric(frP), 2)
fac  - round(as.numeric(fac), 2)
facP - round(as.numeric(facP),2)

# Table
res - data.frame(fi, fr, frP, fac, facP)
names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
tmpList - c(tmpList, list(res))
  }
  names(tmpList) - names(df)
  return(tmpList)
}

To try the function:

#a) runing nice
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
mydf=data.frame(y1, y2, y3)
#tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
#tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
print(tbdf)


#b) One of the problems
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
y4=rep(letters[1:10], 10)
mydf=data.frame(y1, y2, y3, y4)
tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
print(tbdf)

Could anyone give me a hint how to work around this?

PS: Excuse my bad English ;-)
--
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
 [EMAIL PROTECTED]
 [EMAIL PROTECTED]
 [EMAIL PROTECTED]

__
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html


Re: [R] Contingency tables from data.frames

2005-05-24 Thread Gabor Grothendieck
On 5/24/05, Jose Claudio Faria [EMAIL PROTECTED] wrote:
 Dear list,
 
 I'm trying to do a set of generic functions do make contingency tables from
 data.frames. It is just running nice (I'm learning R), but I think it can be
 better.
 
 I would like to filter the data.frame, i.e, eliminate all not numeric 
 variables.
 And I don't know how to make it: please, help me.
 
 Below one of the my functions ('er' is a mention to EasieR, because I'm trying
 to do a package for myself and the my students):
 
 #2. Tables from data.frames
 #2.1---er.table.df.br (User define breaks and right)
 er.table.df.br - function(df,
breaks = c('Sturges', 'Scott', 'FD'),
right = FALSE) {
 
   if (is.data.frame(df) != 'TRUE')
 stop('need data.frame data')
 
   dim_df - dim(df)
 
   tmpList - list()
 
   for (i in 1:dim_df[2]) {
 
 x - as.matrix(df[ ,i])
 x - na.omit(x)
 
 k - switch(breaks[1],
 'Sturges' = nclass.Sturges(x),
 'Scott'   = nclass.scott(x),
 'FD'  = nclass.FD(x),
 stop('breaks' must be 'Sturges', 'Scott' or 'FD'))
 
 tmp  - range(x)
 classIni - tmp[1] - tmp[2]/100
 classEnd - tmp[2] + tmp[2]/100
 R- classEnd-classIni
 h- R/k
 
 # Absolut frequency
 f - table(cut(x, br = seq(classIni, classEnd, h), right = right))
 
 # Relative frequency
 fr - f/length(x)
 
 # Relative frequency, %
 frP - 100*(f/length(x))
 
 # Cumulative frequency
 fac - cumsum(f)
 
 # Cumulative frequency, %
 facP - 100*(cumsum(f/length(x)))
 
 fi   - round(f, 2)
 fr   - round(as.numeric(fr), 2)
 frP  - round(as.numeric(frP), 2)
 fac  - round(as.numeric(fac), 2)
 facP - round(as.numeric(facP),2)
 
 # Table
 res - data.frame(fi, fr, frP, fac, facP)
 names(res) - c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
 tmpList - c(tmpList, list(res))
   }
   names(tmpList) - names(df)
   return(tmpList)
 }
 
 To try the function:
 
 #a) runing nice
 y1=rnorm(100, 10, 1)
 y2=rnorm(100, 58, 4)
 y3=rnorm(100, 500, 10)
 mydf=data.frame(y1, y2, y3)
 #tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
 #tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
 tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
 print(tbdf)
 
 
 #b) One of the problems
 y1=rnorm(100, 10, 1)
 y2=rnorm(100, 58, 4)
 y3=rnorm(100, 500, 10)
 y4=rep(letters[1:10], 10)
 mydf=data.frame(y1, y2, y3, y4)
 tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
 print(tbdf)
 

Try this:

sapply(my.data.frame, is.numeric)

Also you might want to look up:

?match.arg
?stopifnot
?ncol
?sapply
?lapply

__
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html