Still not clear what solution you would consider a success. On the one hand, you said you needed the NULLs, but you want one big data frame also.
Does refill <- refill[ -which( sapply( refill, is.null ), arr.ind=TRUE ) ) ] refill <- as.data.frame( refill ) do what you want? If you need to keep the nulls, perhaps don't overwrite the refill list? --------------------------------------------------------------------------- Jeff Newmiller The ..... ..... Go Live... DCN:<jdnew...@dcn.davis.ca.us> Basics: ##.#. ##.#. Live Go... Live: OO#.. Dead: OO#.. Playing Research Engineer (Solar/Batteries O.O#. #.O#. with /Software/Embedded Controllers) .OO#. .OO#. rocks...1k --------------------------------------------------------------------------- Sent from my phone. Please excuse my brevity. jeff6868 <geoffrey_kl...@etu.u-bourgogne.fr> wrote: >Ok Jeff, but then it'll be a big one. I'm working on a list of files >and my >problem depends on different functions used previously. So it's very >hard >for me to summarize to reproduct my error. But here is the >reproductible >example with the error at the last line of the code (just copy and >paste >it). >You'll notice that the data.frame with only NAs is set to NULL in >"refill", >and I just want to have it unchanged in output (so the same as input). >The aim of the function is to fill the NAs of my data.frames. It'll not >work >in this example because there're only big NA gaps which are my problem >for >the moment. But maybe now you can have an idea where the problem is >(change >NULL for "only NA DF" in output to the same DF as in input). >For the example, we are just testing for "x1". >Hope you have understood my problem now :) >Thanks Jeff, Rui or everyone else! > ># my data for example >DF1 <- data.frame(x1=rnorm(1:20),x2=c(31:50)) >write.table(DF1,"ST001_2008.csv",sep=";") >DF2 <- >data.frame(x1=c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,rnorm(1:10)),x2=c(1:20)) >write.table(DF2,"ST002_2008.csv",sep=";") >DF3 <- data.frame(x1=rnorm(81:100),x2=NA) >write.table(DF3,"ST003_2008.csv",sep=";") >DF4 <- data.frame(x1=c(21:40),x2=rnorm(1:20)) >write.table(DF4,"ST004_2008.csv",sep=";") > > #list my data > filenames <- list.files(pattern="\\_2008.csv$") > > Sensors <- paste("x", 1:2,sep="") > > Stations <-substr(filenames,1,5) > > nsensors <- length(Sensors) > nstations <- length(Stations) > > nobs <- nrow(read.table(filenames[1], header=TRUE)) > > yr2008 <- array(NA, dim=c(nobs, nsensors, nstations)) > > for(i in seq_len(nstations)){ > tmp <- read.table(filenames[i], header=TRUE, sep=";") > yr2008[ , , i] <- as.matrix(tmp[, Sensors]) > } > > dimnames(yr2008) <- list(seq.int(nobs), Sensors, Stations) > > yr2008capt1hiver<-yr2008[1:10,1,] > yr2008capt1hiver <- as.data.frame(yr2008capt1hiver) > > #correlation between my data for x1 (for the example) > corhiver2008capt1 <- cor(yr2008capt1hiver,use="pairwise.complete.obs") > > capt1hiver <- c(1:length(yr2008capt1hiver)) > > for(i in 1:length(capt1hiver)) > { > >if(sum(!is.na(yr2008capt1hiver[,capt1hiver[i]]))<(length(yr2008capt1hiver[[capt1hiver[i]]])/2)) > { > corhiver2008capt1[i,]=NA > corhiver2008capt1[,i]=NA > } > } > > > lst <- lapply(list.files(pattern="\\_2008.csv$"), read.table,sep=";", >header=TRUE, stringsAsFactors=FALSE) > names(lst) <- Stations > > # searching the highest correlation for each data.Frame > get.max.cor <- function(station, mat){ > mat[row(mat) == col(mat)] <- -Inf > m <- max(mat[station, ],na.rm=TRUE) > if (is.finite(m)) {return(which( mat[station, ] == m ))} > else {return(NA)} > } > > # fill the data.frame with the data.frame which has the highest >correlation coefficient > na.fill <- function(x, y){ > if(all(!is.finite(y[1:10,1]))) return(y) > i <- is.na(x[1:10,1]) > xx <- y[1:10,1] > new <- data.frame(xx=xx) > x[1:10,1][i] <- predict(lm(x[1:10,1]~xx, na.action=na.exclude),new)[i] > x > } > > process.all <- function(df.list, mat){ > > f <- function(station) > na.fill(df.list[[ station ]], df.list[[ max.cor[station] ]]) > > g <- function(station){ > x <- df.list[[station]] > if(any(!is.finite(x[1:10,1]))){ > mat[row(mat) == col(mat)] <- -Inf > nas <- which(is.na(x[1:10,1])) > ord <- order(mat[station, ], decreasing = TRUE)[-c(1, >ncol(mat))] > for(y in ord){ > if(all(!is.na(df.list[[y]][1:10,1][nas]))){ > xx <- df.list[[y]][1:10,1] > new <- data.frame(xx=xx) > x[1:10,1][nas] <- predict(lm(x[1:10,1]~xx, >na.action=na.exclude), new)[nas] > break > } > } > } > x > } > > n <- length(df.list) > nms <- names(df.list) > max.cor <- sapply(seq.int(n), get.max.cor, corhiver2008capt1) > df.list <- lapply(seq.int(n), f) > df.list <- lapply(seq.int(n), g) > names(df.list) <- nms > df.list > } > > refill <- process.all(lst, corhiver2008capt1) >refill <- as.data.frame(refill) > >########## HERE IS THE PROBLEM ###### > head(refill) > >-- >View this message in context: >http://r.789695.n4.nabble.com/how-to-ignore-NA-with-NA-or-NULL-tp4632287p4632527.html >Sent from the R help mailing list archive at Nabble.com. > >______________________________________________ >R-help@r-project.org mailing list >https://stat.ethz.ch/mailman/listinfo/r-help >PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.