Sorry, looks like my work e-mailer put the attachments in the body. Please e-mail [EMAIL PROTECTED] if interested and I'll send you a copy of the files.
I think it'll also work to grab the files from: ftp://mayoftp:''@ftp.mayo.edu/pub/weigand/writeForeignSAS7.R ftp://mayoftp:''@ftp.mayo.edu/pub/weigand/diff.txt Thank you, Stephen On Jul 13, 2006, at 1:48 PM, Stephen Weigand wrote: > Dear R-devel, > > I've made some potential extensions to writeForeignSAS > in 'foreign' that I wanted to pass along if anyone is > interested. I've attached the diff -u output against > the version found in foreign_0.8-15 and an .R file > with my changes. (In this .R file, the function is named > writeForeignSAS7 to simplify testing/comparisons.) > > I've tried to alter the current version as little as > possible while making the following changes: > > * Try to convert data.frame names to SAS-legal names and > allow the user to specify an 8- or 32-character limit. > > * For factors, try to convert the variable name to a > SAS-legal 8-character name not ending in a digit > > * Read in 'datafile' with DSD specified in the INFILE > statement. SAS says this "changes how SAS treats > delimiters when list input is used and sets the default > delimiter to a comma. When you specify DSD, SAS treats > two consecutive delimiters as a missing value and > removes quotation marks from character values." The > point of this is the added safety of using 'quote=TRUE' > when writing 'datafile' via write.table > > * Functionality to write out Dates and read them in with > an INFORMAT statement > > * Functionality to write out datetime variables > (assuming a class of POSIXct) and read them in with an > INFORMAT statement > > * In order to handle character variables a bit better, > use a LENGTH statement to tell SAS the maximum character > width of values in the variable. Without this, some > character values can be truncated. > > If it'd be helpful to make any changes or add anything, > I'd be happy try to do so. > > Finally, some testing code that works in SAS 6.12, 8.2, > and 9. > > d <- > structure(list(a.b = as.integer(c(1, 2)), > alphabetsoup = > structure(as.integer(c(1, 2)), > .Label = c("A", "B"), > class = "factor"), > datevar1 = structure(c(13342, 12977), > class = "Date"), > datetimevar1 = structure(c(1152802685, > 1152716285), > class = c("POSIXt", "POSIXct")), > charactervariable = c("L", > "Last, First")), > .Names = c("a.b", "alphabetsoup", > "datevar1", "datetimevar1", > "charactervariable"), > row.names = c("1", "2"), > class = "data.frame") > > require(foreign) > > ### adQuote here to (temporarily) avoid ':::' > adQuote <- function (x) paste("\"", x, "\"", sep = "") > > dfile <- file.path(tempdir(), "test.dat") > cfile <- file.path(tempdir(), "test.sas") > write.foreign(d, datafile = dfile, codefile = cfile, > package = "SAS7", validvarname = "V6") > file.show(dfile) > file.show(cfile) > > Sincerely, > > Stephen > > :::::::::::::::::::::::::::::::::: > Stephen Weigand > Division of Biostatistics > Mayo Clinic Rochester, Minn., USA > Phone (507) 266-1650, fax 284-9542 > --- writeForeignSAS.R Fri Feb 17 03:30:53 2006 > +++ /tmp/writeForeignSAS.R Thu Jul 13 12:24:24 2006 > @@ -1,21 +1,52 @@ > -writeForeignSAS<-function(df,datafile,codefile,dataname="rdata"){ > +make.SAS.names <- function(varnames, validvarname = c("V7", "V6")){ > + validvarname <- match.arg(validvarname) > + nmax <- if(validvarname == "V7") 32 else 8 > > + x <- sub("^([0-9])", "_\\1", varnames) > + x <- gsub("[^a-zA-Z0-9_]", "_", x) > + x <- abbreviate(x, minlength = nmax) > + > + if (any(nchar(x) > nmax) || any(duplicated(x))) > + stop("Cannot uniquely abbreviate the variable names to ", > + nmax, " or fewer characters") > + names(x) <- varnames > + x > +} > + > +make.SAS.formats <- function(varnames){ > + x <- sub("^([0-9])", "_\\1", varnames) > + x <- gsub("[^a-zA-Z0-9_]", "_", x) > + x <- sub("([0-9])$", "\\1f", x) # can't end in digit so append 'f' > + x <- abbreviate(x, minlength = 8) > + > + if(any(nchar(x) > 8) || any(duplicated(x))) > + stop("Cannot uniquely abbreviate format names to conform to ", > + " eight-character limit and not ending in a digit") > + names(x) <- varnames > + x > +} > + > +writeForeignSAS7<-function(df,datafile,codefile,dataname="rdata", > + validvarname = c("V7", "V6")){ > factors <- sapply(df, is.factor) > strings <- sapply(df, is.character) > - > + dates <- sapply(df, FUN = function(x) inherits(x, "Date")) > + datetimes <- sapply(df, FUN = function(x) inherits(x, "POSIXct")) > + > varlabels <- names(df) > - varnames <- abbreviate(names(df), 8) > - if (any(sapply(varnames, nchar) > 8)) > - stop("Cannot abbreviate the variable names to eight or fewer > letters") > - if (any(abbreviated <- (varnames != varlabels))) > - message("Some variable names were abbreviated.") > + varnames <- make.SAS.names(names(df), validvarname = validvarname) > + if (any(varnames != varlabels)) > + message("Some variable names were abbreviated or otherwise > altered.") > > > dfn<-df > if (any(factors)) > dfn[factors]<-lapply(dfn[factors], as.numeric) > + if (any(datetimes)) > + dfn[datetimes] <- lapply(dfn[datetimes], > + FUN = function(x) format(x, "%d%b%Y > %H:%M:%S")) > write.table(dfn, file = datafile, row = FALSE, col = FALSE, > - sep = ",", quote = FALSE, na = ".") > + sep = ",", quote = TRUE, na = "") > lrecl<-max(sapply(readLines(datafile),nchar))+4 > > cat("* Written by R;\n", file=codefile) > @@ -22,24 +53,50 @@ > cat("* ",deparse(sys.call(-2))[1],";\n\n",file=codefile,append=TRUE) > if (any(factors)){ > cat("PROC FORMAT;\n",file=codefile,append=TRUE) > - for(v in 1:ncol(df)){ > - if (factors[v]){ > - cat("value ",varnames[v],"\n",file=codefile,append=TRUE) > - values<-levels(df[[v]]) > + fmtnames <- make.SAS.formats(varnames[factors]) > + fmt.values <- lapply(df[, factors, drop = FALSE], levels) > + names(fmt.values) <- fmtnames > + for (f in fmtnames){ > + cat("value",f,"\n",file=codefile,append = TRUE) > + values<-fmt.values[[f]] > for(i in 1:length(values)){ > cat(" > ",i,"=",adQuote(values[i]),"\n",file=codefile,append=TRUE) > } > cat(";\n\n",file=codefile,append=TRUE) > - } > - } > + } > } > > cat("DATA ",dataname,";\n",file=codefile,append=TRUE) > + > + if (any(strings)){ > + cat("LENGTH", file = codefile, append = TRUE) > + lengths <- sapply(df[,strings, drop = FALSE], > + FUN = function(x) max(nchar(x))) > + names(lengths) <- varnames[strings] > + for(v in varnames[strings]) > + cat("\n", v, "$", lengths[v],file=codefile,append=TRUE) > + cat("\n;\n\n", file = codefile, append = TRUE) > + } > + > + if (any(dates)){ > + cat("INFORMAT", file = codefile, append = TRUE) > + for(v in varnames[dates]) > + cat("\n", v, file = codefile, append = TRUE) > + cat("\n YYMMDD10.\n;\n\n", file = codefile, append = TRUE) > + } > + > + if (any(datetimes)){ > + cat("INFORMAT", file = codefile, append = TRUE) > + for(v in varnames[datetimes]) > + cat("\n", v, file = codefile, append = TRUE) > + cat("\n DATETIME18.\n;\n\n", file = codefile, append = TRUE) > + } > + > cat("INFILE ",adQuote(datafile), > - "\n DELIMITER=','", > + "\n DSD", > "\n LRECL=",lrecl,";\n", > file=codefile,append=TRUE) > - > + > cat("INPUT",file=codefile,append=TRUE) > for(v in 1:ncol(df)){ > cat("\n",varnames[v],file=codefile,append=TRUE) > @@ -49,16 +106,26 @@ > cat("\n;\n",file=codefile,append=TRUE) > > for(v in 1:ncol(df)){ > - if (abbreviated[v]) > + if (varnames[v] != names(varnames)[v]) > cat("LABEL ",varnames[v],"=",adQuote(varlabels[v]),";\n", > file=codefile,append=TRUE) > - } > - > - for(v in 1:ncol(df)){ > - if(factors[v]) > - cat("FORMAT ",varnames[v],paste(varnames[v],".",sep=""),";\n", > + } > + > + if (any(factors)){ > + for (f in 1:length(fmtnames)) > + cat("FORMAT", names(fmtnames)[f],paste(fmtnames[f],".",sep = > ""),";\n", > file=codefile,append=TRUE) > } > - > + > + if (any(dates)){ > + for(v in varnames[dates]) > + cat("FORMAT", v, "yymmdd10.;\n", file = codefile, append = TRUE) > + } > + > + if (any(datetimes)){ > + for(v in varnames[datetimes]) > + cat("FORMAT", v, "datetime18.;\n", file = codefile, append = > TRUE) > + } > + > cat("RUN;\n",file=codefile,append=TRUE) > } > make.SAS.names <- function(varnames, validvarname = c("V7", "V6")){ > validvarname <- match.arg(validvarname) > nmax <- if(validvarname == "V7") 32 else 8 > > x <- sub("^([0-9])", "_\\1", varnames) > x <- gsub("[^a-zA-Z0-9_]", "_", x) > x <- abbreviate(x, minlength = nmax) > > if (any(nchar(x) > nmax) || any(duplicated(x))) > stop("Cannot uniquely abbreviate the variable names to ", > nmax, " or fewer characters") > names(x) <- varnames > x > } > > make.SAS.formats <- function(varnames){ > x <- sub("^([0-9])", "_\\1", varnames) > x <- gsub("[^a-zA-Z0-9_]", "_", x) > x <- sub("([0-9])$", "\\1f", x) # can't end in digit so append 'f' > x <- abbreviate(x, minlength = 8) > > if(any(nchar(x) > 8) || any(duplicated(x))) > stop("Cannot uniquely abbreviate format names to conform to ", > " eight-character limit and not ending in a digit") > names(x) <- varnames > x > } > > writeForeignSAS7<-function(df,datafile,codefile,dataname="rdata", > validvarname = c("V7", "V6")){ > factors <- sapply(df, is.factor) > strings <- sapply(df, is.character) > dates <- sapply(df, FUN = function(x) inherits(x, "Date")) > datetimes <- sapply(df, FUN = function(x) inherits(x, "POSIXct")) > > varlabels <- names(df) > varnames <- make.SAS.names(names(df), validvarname = validvarname) > if (any(varnames != varlabels)) > message("Some variable names were abbreviated or otherwise > altered.") > > > dfn<-df > if (any(factors)) > dfn[factors]<-lapply(dfn[factors], as.numeric) > if (any(datetimes)) > dfn[datetimes] <- lapply(dfn[datetimes], > FUN = function(x) format(x, "%d%b%Y > %H:%M:%S")) > write.table(dfn, file = datafile, row = FALSE, col = FALSE, > sep = ",", quote = TRUE, na = "") > lrecl<-max(sapply(readLines(datafile),nchar))+4 > > cat("* Written by R;\n", file=codefile) > cat("* ",deparse(sys.call(-2))[1],";\n\n",file=codefile,append=TRUE) > if (any(factors)){ > cat("PROC FORMAT;\n",file=codefile,append=TRUE) > fmtnames <- make.SAS.formats(varnames[factors]) > fmt.values <- lapply(df[, factors, drop = FALSE], levels) > names(fmt.values) <- fmtnames > for (f in fmtnames){ > cat("value",f,"\n",file=codefile,append = TRUE) > values<-fmt.values[[f]] > for(i in 1:length(values)){ > cat(" > ",i,"=",adQuote(values[i]),"\n",file=codefile,append=TRUE) > } > cat(";\n\n",file=codefile,append=TRUE) > } > } > > cat("DATA ",dataname,";\n",file=codefile,append=TRUE) > > if (any(strings)){ > cat("LENGTH", file = codefile, append = TRUE) > lengths <- sapply(df[,strings, drop = FALSE], > FUN = function(x) max(nchar(x))) > names(lengths) <- varnames[strings] > for(v in varnames[strings]) > cat("\n", v, "$", lengths[v],file=codefile,append=TRUE) > cat("\n;\n\n", file = codefile, append = TRUE) > } > > if (any(dates)){ > cat("INFORMAT", file = codefile, append = TRUE) > for(v in varnames[dates]) > cat("\n", v, file = codefile, append = TRUE) > cat("\n YYMMDD10.\n;\n\n", file = codefile, append = TRUE) > } > > if (any(datetimes)){ > cat("INFORMAT", file = codefile, append = TRUE) > for(v in varnames[datetimes]) > cat("\n", v, file = codefile, append = TRUE) > cat("\n DATETIME18.\n;\n\n", file = codefile, append = TRUE) > } > > cat("INFILE ",adQuote(datafile), > "\n DSD", > "\n LRECL=",lrecl,";\n", > file=codefile,append=TRUE) > > cat("INPUT",file=codefile,append=TRUE) > for(v in 1:ncol(df)){ > cat("\n",varnames[v],file=codefile,append=TRUE) > if(strings[v]) > cat(" $ ",file=codefile,append=TRUE) > } > cat("\n;\n",file=codefile,append=TRUE) > > for(v in 1:ncol(df)){ > if (varnames[v] != names(varnames)[v]) > cat("LABEL ",varnames[v],"=",adQuote(varlabels[v]),";\n", > file=codefile,append=TRUE) > } > > if (any(factors)){ > for (f in 1:length(fmtnames)) > cat("FORMAT", names(fmtnames)[f],paste(fmtnames[f],".",sep = > ""),";\n", > file=codefile,append=TRUE) > } > > if (any(dates)){ > for(v in varnames[dates]) > cat("FORMAT", v, "yymmdd10.;\n", file = codefile, append = TRUE) > } > > if (any(datetimes)){ > for(v in varnames[datetimes]) > cat("FORMAT", v, "datetime18.;\n", file = codefile, append = > TRUE) > } > > cat("RUN;\n",file=codefile,append=TRUE) > } > ______________________________________________ > R-devel@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-devel ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel