I am reading several hundred files. Anywhere from 50k-400k in size. It appears that when I read these files with R 2.15.1 the process will hang or seg fault on the scan() call. This does not happen on R 2.14.1.
This is happening on the precise build of Ubuntu. I have included everything, but the issue appears to be when performing the scan in the method parseTickData. Below is the code. Hopefully this is the right place to post. parseTickDataFromDir = function(tickerDir, per, subper, fun) { tickerAbsFilenames = list.files(tickerDir,full.names=T) tickerNames = list.files(tickerDir,full.names=F) tickerNames = gsub("_[a-zA-Z0-9].csv","",tickerNames) pb <- txtProgressBar(min = 0, max = length(tickerAbsFilenames), style = 3) for(i in 1:length(tickerAbsFilenames)) { # Grab Raw Tick Data dat.i = parseTickData(tickerAbsFilenames[i]) #Sys.sleep(1) # Create Template dates <- unique(substr(as.character(index(dat.i)), 1,10)) times <- rep("09:30:00", length(dates)) openDateTimes <- strptime(paste(dates, times), "%F %H:%M:%S") templateTimes <- NULL for (j in 1:length(openDateTimes)) { if (is.null(templateTimes)) { templateTimes <- openDateTimes[j] + 0:23400 } else { templateTimes <- c(templateTimes, openDateTimes[j] + 0:23400) } } # Convert templateTimes to XTS, merge with data and convert NA's templateTimes <- as.xts(templateTimes) dat.i <- merge(dat.i, templateTimes, all=T) # If there is no data in the first print, we will have leading NA's. So set them to -1. # Since we do not want these values removed by to.period if (is.na(dat.i[1])) { dat.i[1] <- -1 } # Fix remaining NA's dat.i <- na.locf(dat.i) # Convert to desired bucket size dat.i <- to.period(dat.i, period=per, k=subper, name=NULL) # Always use templated index, otherwise merge fails with other symbols index(dat.i) <- index(to.period(templateTimes, period=per, k=subper)) # If there was missing data at open, set close to NA valsToChange <- which(dat.i[,"Open"] == -1) if (length(valsToChange) != 0) { dat.i[valsToChange, "Close"] <- NA } if(i == 1) { DAT = fun(dat.i) } else { DAT = merge(DAT,fun(dat.i)) } setTxtProgressBar(pb, i) } close(pb) colnames(DAT) = tickerNames return(DAT) } parseTickData <- function(inputFile) { DAT.list <- scan(file=inputFile, sep=",",skip=1,what=list(Date="",Time="",Close=0,Volume=0),quiet=T) index <- as.POSIXct(paste(DAT.list$Date,DAT.list$Time),format="%m/%d/%Y %H:%M:%S") DAT.xts <- xts(DAT.list$Close,index) DAT.xts <- make.index.unique(DAT.xts) return(DAT.xts) } [[alternative HTML version deleted]] ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel