Re: [R] select .txt from .txt in a directory
Thanks, AK. The three codes worked as expected. Again, thanks so much for understanding my problem and proving the right solutions. Atem. On Saturday, November 9, 2013 6:27 PM, arun smartpink...@yahoo.com wrote: HI, The code could be shortened by using ?merge or ?join(). library(plyr) ##Using the output from `lst6` lst7 - lapply(lst6,function(x) {x1 - data.frame(Year=rep(1961:2005,each=12),Mo=rep(1:12,45)); x2 -join(x1,x,type=left,by=c(Year,Mo))}) ##rest are the same (only change in object names) sapply(lst7,nrow) lst8 -lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) lst9- lapply(seq_along(lst8),function(i){ x- lst11[[i]] colnames(x)- lstf1[i] row.names(x)- 1:nrow(x) x }) sapply(lst9,nrow) res2New - do.call(cbind,lst9) dim(res2New) #[1] 16740 98 res2New[res2New ==-.9]-NA # change missing value identifier as in your data set which(res2New==-.9) #integer(0) dates1-seq.Date(as.Date('1Jan1961',format=%d%b%Y),as.Date('31Dec2005',format=%d%b%Y),by=day) dates2- as.character(dates1) sldat- split(dates2,list(gsub(-.*,,dates2))) lst12-lapply(sldat,function(x) lapply(split(x,gsub(.*-(.*)-.*,\\1,x)), function(y){x1-as.numeric(gsub(.*-.*-(.*),\\1,y));if((31-max(x1))0) {x2-seq(max(x1)+1,31,1);x3-paste0(unique(gsub((.*-.*-).*,\\1,y)),x2);c(y,x3)} else y} )) any(sapply(lst12,function(x) any(lapply(x,length)!=31))) #[1] FALSE lst22-lapply(lst12,function(x) unlist(x,use.names=FALSE)) sapply(lst22,length) dates3-unlist(lst22,use.names=FALSE) length(dates3) res3New - data.frame(dates=dates3,res2New,stringsAsFactors=FALSE) str(res3New) res3New$dates-as.Date(res3New$dates) res4New - res3New[!is.na(res3New$dates),] res4New[1:3,1:3] dim(res4New) colnames(res4) - colnames(res4New) identical(res4,res4New) #[1] TRUE A.K. On Saturday, November 9, 2013 5:46 PM, arun smartpink...@yahoo.com wrote: Hi, Try: library(stringr) # Created the selected files (98) in a separate working folder (SubsetFiles1) (refer to my previous mail) filelst - list.files() #Sublst - filelst[1:2] res - lapply(filelst,function(x) {con - file(x) Lines1 - readLines(con) close(con) Lines2 - Lines1[-1] Lines3 - str_split(Lines2,-.9M) Lines4 - str_trim(unlist(lapply(Lines3,function(x) {x[x==] - NA paste(x,collapse= )}))) Lines5 - gsub((\\d+)[A-Za-z],\\1,Lines4) res1 - read.table(text=Lines5,sep=,header=FALSE,fill=TRUE) res1}) ##Created another folder Modified to store the res files lapply(seq_along(res),function(i) write.table(res[[i]],paste(/home/arunksa111/Zl/Modified,paste0(Mod_,filelst[i]),sep=/),row.names=FALSE,quote=FALSE)) lstf1 - list.files(path=/home/arunksa111/Zl/Modified) lst1 - lapply(lstf1,function(x) readLines(paste(/home/arunksa111/Zl/Modified,x,sep=/))) which(lapply(lst1,function(x) length(grep(\\d+-.9,x)))0 ) #[1] 7 11 14 15 30 32 39 40 42 45 46 53 60 65 66 68 69 70 73 74 75 78 80 82 83 #[26] 86 87 90 91 93 lst2 - lapply(lst1,function(x) gsub((\\d+)(-.9),\\1 \\2,x)) #lapply(lst2,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern lst3 - lapply(lst2,function(x) {x-gsub((-.9)(-.9),\\1 \\2,x)})# #lapply(lst3,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern # lapply(lst3,function(x) x[grep(-.9,x)]) ###second check lst4 - lapply(lst3,function(x) gsub((Day) (\\d+),\\1_\\2, x[-1])) #removed the additional header V1, V2, etc. #sapply(lst4,function(x) length(strsplit(x[1], )[[1]])) #checking the number of columns that should be present lst5 - lapply(lst4,function(x) unlist(lapply(x, function(y) word(y,1,33 lst6 - lapply(lst5,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep=,fill=TRUE)) # head(lst6[[94]],3) lst7 - lapply(lst6,function(x) x[x$Year =1961 x$Year =2005,]) #head(lst7[[45]],3) lst8 - lapply(lst7,function(x) x[!is.na(x$Year),]) lst9 - lapply(lst8,function(x) { if((min(x$Year)1961)|(max(x$Year)2005)){ n1- (min(x$Year)-1961)*12 x1- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2- (2005-max(x$Year))*12 x2- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) colnames(x1) - colnames(x) colnames(x2) - colnames(x) x3- rbind(x1,x,x2) } else if((min(x$Year)==1961) (max(x$Year)==2005)) { if((min(x$Mo[x$Year==1961])1)|(max(x$Mo[x$Year==2005])12)){ n1 - min(x$Mo[x$Year==1961])-1 x1 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2 - (12-max(x$Mo[x$Year==2005])) x2 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) colnames(x1) - colnames(x) colnames(x2) - colnames(x) x3 - rbind(x1,x,x2) } else { x } } }) which(sapply(lst9,nrow)!=540) #[1] 45 46 54 64 65 66 70 75 97 lst10 - lapply(lst9,function(x) {x1 - x[!is.na(x$Year),] hx1 - head(x1,1) tx1 - tail(x1,1) x2 - as.data.frame(matrix(NA, ncol=ncol(x),
Re: [R] select .txt from .txt in a directory
Hi, Try: library(stringr) # Created the selected files (98) in a separate working folder (SubsetFiles1) (refer to my previous mail) filelst - list.files() #Sublst - filelst[1:2] res - lapply(filelst,function(x) {con - file(x) Lines1 - readLines(con) close(con) Lines2 - Lines1[-1] Lines3 - str_split(Lines2,-.9M) Lines4 - str_trim(unlist(lapply(Lines3,function(x) {x[x==] - NA paste(x,collapse= )}))) Lines5 - gsub((\\d+)[A-Za-z],\\1,Lines4) res1 - read.table(text=Lines5,sep=,header=FALSE,fill=TRUE) res1}) ##Created another folder Modified to store the res files lapply(seq_along(res),function(i) write.table(res[[i]],paste(/home/arunksa111/Zl/Modified,paste0(Mod_,filelst[i]),sep=/),row.names=FALSE,quote=FALSE)) lstf1 - list.files(path=/home/arunksa111/Zl/Modified) lst1 - lapply(lstf1,function(x) readLines(paste(/home/arunksa111/Zl/Modified,x,sep=/))) which(lapply(lst1,function(x) length(grep(\\d+-.9,x)))0 ) #[1] 7 11 14 15 30 32 39 40 42 45 46 53 60 65 66 68 69 70 73 74 75 78 80 82 83 #[26] 86 87 90 91 93 lst2 - lapply(lst1,function(x) gsub((\\d+)(-.9),\\1 \\2,x)) #lapply(lst2,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern lst3 - lapply(lst2,function(x) {x-gsub((-.9)(-.9),\\1 \\2,x)})# #lapply(lst3,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern # lapply(lst3,function(x) x[grep(-.9,x)]) ###second check lst4 - lapply(lst3,function(x) gsub((Day) (\\d+),\\1_\\2, x[-1])) #removed the additional header V1, V2, etc. #sapply(lst4,function(x) length(strsplit(x[1], )[[1]])) #checking the number of columns that should be present lst5 - lapply(lst4,function(x) unlist(lapply(x, function(y) word(y,1,33 lst6 - lapply(lst5,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep=,fill=TRUE)) # head(lst6[[94]],3) lst7 - lapply(lst6,function(x) x[x$Year =1961 x$Year =2005,]) #head(lst7[[45]],3) lst8 - lapply(lst7,function(x) x[!is.na(x$Year),]) lst9 - lapply(lst8,function(x) { if((min(x$Year)1961)|(max(x$Year)2005)){ n1- (min(x$Year)-1961)*12 x1- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2- (2005-max(x$Year))*12 x2- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) colnames(x1) - colnames(x) colnames(x2) - colnames(x) x3- rbind(x1,x,x2) } else if((min(x$Year)==1961) (max(x$Year)==2005)) { if((min(x$Mo[x$Year==1961])1)|(max(x$Mo[x$Year==2005])12)){ n1 - min(x$Mo[x$Year==1961])-1 x1 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2 - (12-max(x$Mo[x$Year==2005])) x2 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) colnames(x1) - colnames(x) colnames(x2) - colnames(x) x3 - rbind(x1,x,x2) } else { x } } }) which(sapply(lst9,nrow)!=540) #[1] 45 46 54 64 65 66 70 75 97 lst10 - lapply(lst9,function(x) {x1 - x[!is.na(x$Year),] hx1 - head(x1,1) tx1 - tail(x1,1) x2 - as.data.frame(matrix(NA, ncol=ncol(x), nrow=hx1$Mo-1)) x3 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=12-tx1$Mo)) colnames(x2) - colnames(x) colnames(x3) - colnames(x) if(nrow(x) 540) rbind(x2,x,x3) else x }) which(sapply(lst10,nrow)!=540) #integer(0) lst11 -lapply(lst10,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) lst12- lapply(seq_along(lst10),function(i){ x- lst11[[i]] colnames(x)- lstf1[i] row.names(x)- 1:nrow(x) x }) res2 - do.call(cbind,lst11) dim(res2) #[1] 16740 98 res2[res2==-.9]-NA # change missing value identifier as in your data set which(res2==-.9) #integer(0) dates1-seq.Date(as.Date('1Jan1961',format=%d%b%Y),as.Date('31Dec2005',format=%d%b%Y),by=day) dates2- as.character(dates1) sldat- split(dates2,list(gsub(-.*,,dates2))) lst12-lapply(sldat,function(x) lapply(split(x,gsub(.*-(.*)-.*,\\1,x)), function(y){x1-as.numeric(gsub(.*-.*-(.*),\\1,y));if((31-max(x1))0) {x2-seq(max(x1)+1,31,1);x3-paste0(unique(gsub((.*-.*-).*,\\1,y)),x2);c(y,x3)} else y} )) any(sapply(lst12,function(x) any(lapply(x,length)!=31))) #[1] FALSE lst22-lapply(lst12,function(x) unlist(x,use.names=FALSE)) sapply(lst22,length) dates3-unlist(lst22,use.names=FALSE) length(dates3) res3 - data.frame(dates=dates3,res2,stringsAsFactors=FALSE) str(res3) res3$dates-as.Date(res3$dates) res4 - res3[!is.na(res3$dates),] res4[1:3,1:3] dim(res4) #[1] 16436 99 A.K. On Friday, November 8, 2013 5:54 PM, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi Ak, I think I figured out how to do the sub-setting. All I needed was to use column 3 in Temperature_inventory and select matching .txt files in the .zip file. The final result would be a subset of files whose IDs are in column 3 of temp_inventory. * I also have this script which you developed
Re: [R] select .txt from .txt in a directory
HI, The code could be shortened by using ?merge or ?join(). library(plyr) ##Using the output from `lst6` lst7 - lapply(lst6,function(x) {x1 - data.frame(Year=rep(1961:2005,each=12),Mo=rep(1:12,45)); x2 -join(x1,x,type=left,by=c(Year,Mo))}) ##rest are the same (only change in object names) sapply(lst7,nrow) lst8 -lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) lst9- lapply(seq_along(lst8),function(i){ x- lst11[[i]] colnames(x)- lstf1[i] row.names(x)- 1:nrow(x) x }) sapply(lst9,nrow) res2New - do.call(cbind,lst9) dim(res2New) #[1] 16740 98 res2New[res2New ==-.9]-NA # change missing value identifier as in your data set which(res2New==-.9) #integer(0) dates1-seq.Date(as.Date('1Jan1961',format=%d%b%Y),as.Date('31Dec2005',format=%d%b%Y),by=day) dates2- as.character(dates1) sldat- split(dates2,list(gsub(-.*,,dates2))) lst12-lapply(sldat,function(x) lapply(split(x,gsub(.*-(.*)-.*,\\1,x)), function(y){x1-as.numeric(gsub(.*-.*-(.*),\\1,y));if((31-max(x1))0) {x2-seq(max(x1)+1,31,1);x3-paste0(unique(gsub((.*-.*-).*,\\1,y)),x2);c(y,x3)} else y} )) any(sapply(lst12,function(x) any(lapply(x,length)!=31))) #[1] FALSE lst22-lapply(lst12,function(x) unlist(x,use.names=FALSE)) sapply(lst22,length) dates3-unlist(lst22,use.names=FALSE) length(dates3) res3New - data.frame(dates=dates3,res2New,stringsAsFactors=FALSE) str(res3New) res3New$dates-as.Date(res3New$dates) res4New - res3New[!is.na(res3New$dates),] res4New[1:3,1:3] dim(res4New) colnames(res4) - colnames(res4New) identical(res4,res4New) #[1] TRUE A.K. On Saturday, November 9, 2013 5:46 PM, arun smartpink...@yahoo.com wrote: Hi, Try: library(stringr) # Created the selected files (98) in a separate working folder (SubsetFiles1) (refer to my previous mail) filelst - list.files() #Sublst - filelst[1:2] res - lapply(filelst,function(x) {con - file(x) Lines1 - readLines(con) close(con) Lines2 - Lines1[-1] Lines3 - str_split(Lines2,-.9M) Lines4 - str_trim(unlist(lapply(Lines3,function(x) {x[x==] - NA paste(x,collapse= )}))) Lines5 - gsub((\\d+)[A-Za-z],\\1,Lines4) res1 - read.table(text=Lines5,sep=,header=FALSE,fill=TRUE) res1}) ##Created another folder Modified to store the res files lapply(seq_along(res),function(i) write.table(res[[i]],paste(/home/arunksa111/Zl/Modified,paste0(Mod_,filelst[i]),sep=/),row.names=FALSE,quote=FALSE)) lstf1 - list.files(path=/home/arunksa111/Zl/Modified) lst1 - lapply(lstf1,function(x) readLines(paste(/home/arunksa111/Zl/Modified,x,sep=/))) which(lapply(lst1,function(x) length(grep(\\d+-.9,x)))0 ) #[1] 7 11 14 15 30 32 39 40 42 45 46 53 60 65 66 68 69 70 73 74 75 78 80 82 83 #[26] 86 87 90 91 93 lst2 - lapply(lst1,function(x) gsub((\\d+)(-.9),\\1 \\2,x)) #lapply(lst2,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern lst3 - lapply(lst2,function(x) {x-gsub((-.9)(-.9),\\1 \\2,x)})# #lapply(lst3,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern # lapply(lst3,function(x) x[grep(-.9,x)]) ###second check lst4 - lapply(lst3,function(x) gsub((Day) (\\d+),\\1_\\2, x[-1])) #removed the additional header V1, V2, etc. #sapply(lst4,function(x) length(strsplit(x[1], )[[1]])) #checking the number of columns that should be present lst5 - lapply(lst4,function(x) unlist(lapply(x, function(y) word(y,1,33 lst6 - lapply(lst5,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep=,fill=TRUE)) # head(lst6[[94]],3) lst7 - lapply(lst6,function(x) x[x$Year =1961 x$Year =2005,]) #head(lst7[[45]],3) lst8 - lapply(lst7,function(x) x[!is.na(x$Year),]) lst9 - lapply(lst8,function(x) { if((min(x$Year)1961)|(max(x$Year)2005)){ n1- (min(x$Year)-1961)*12 x1- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2- (2005-max(x$Year))*12 x2- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) colnames(x1) - colnames(x) colnames(x2) - colnames(x) x3- rbind(x1,x,x2) } else if((min(x$Year)==1961) (max(x$Year)==2005)) { if((min(x$Mo[x$Year==1961])1)|(max(x$Mo[x$Year==2005])12)){ n1 - min(x$Mo[x$Year==1961])-1 x1 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2 - (12-max(x$Mo[x$Year==2005])) x2 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) colnames(x1) - colnames(x) colnames(x2) - colnames(x) x3 - rbind(x1,x,x2) } else { x } } }) which(sapply(lst9,nrow)!=540) #[1] 45 46 54 64 65 66 70 75 97 lst10 - lapply(lst9,function(x) {x1 - x[!is.na(x$Year),] hx1 - head(x1,1) tx1 - tail(x1,1) x2 - as.data.frame(matrix(NA, ncol=ncol(x), nrow=hx1$Mo-1)) x3 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=12-tx1$Mo)) colnames(x2) - colnames(x) colnames(x3) - colnames(x) if(nrow(x) 540) rbind(x2,x,x3) else x })
Re: [R] select .txt from .txt in a directory
How do you decide which ones you need? Is there some pattern that lets you distinguish needing df.txt from not needing ds.txt? You say you have the names - how do you have them? In a text file? What are you trying to do with the text files? Sarah On Fri, Nov 8, 2013 at 12:33 PM, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi, I have 300 .txt files in a directory. Out of this 300, I need just 100 of the files. I have the names of the 100 .txt files which are also found in the 300 .txt files. How can I extract only the 100 .txt files from the 300 ,txt files? e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of them with names known. Thanks for your great help. Atem. -- Sarah Goslee http://www.functionaldiversity.org __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] select .txt from .txt in a directory
I do not understand the question. If you already know the names what is the problem to select the files by names? If you have the names but not inside of R you have to find a name pattern to avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi, I have 300 .txt files in a directory. Out of this 300, I need just 100 of the files. I have the names of the 100 .txt files which are also found in the 300 .txt files. How can I extract only the 100 .txt files from the 300 ,txt files? e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of them with names known. Thanks for your great help. Atem. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] select .txt from .txt in a directory
1. Please don't post in HTML (see posting guide). 2. What do you mean by extract? 3. Your qiestion sounds very basic. Have you read An Introduction to R or other online R tutorial? If not please do so before posting further. All of R's file input functions allow you to specify the directory path and/or filename, so if I understand you correctly, it's just a matter of giving them to the appropriate function in some sort of loop. e.g. something like alldat - lapply(filenameList, function(x)InputFunction(x,...)) 4. If you need something fancier than is described in the tutorials, consult the R data Import/Export manual, please. -- Bert On Fri, Nov 8, 2013 at 9:33 AM, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi, I have 300 .txt files in a directory. Out of this 300, I need just 100 of the files. I have the names of the 100 .txt files which are also found in the 300 .txt files. How can I extract only the 100 .txt files from the 300 ,txt files? e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of them with names known. Thanks for your great help. Atem. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. -- Bert Gunter Genentech Nonclinical Biostatistics (650) 467-7374 __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] select .txt from .txt in a directory
All files are text files. They are found in a folder on my computer. Assume that I know the names of some of the files I want to select from the 300 txt files. How can I do this in R. Atem. On Friday, November 8, 2013 11:44 AM, Simon Zehnder szehn...@uni-bonn.de wrote: I do not understand the question. If you already know the names what is the problem to select the files by names? If you have the names but not inside of R you have to find a name pattern to avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi, I have 300 .txt files in a directory. Out of this 300, I need just 100 of the files. I have the names of the 100 .txt files which are also found in the 300 .txt files. How can I extract only the 100 .txt files from the 300 ,txt files? e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of them with names known. Thanks for your great help. Atem. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] select .txt from .txt in a directory
If you want to type in the names by hand, you can simply use read.table to load them into R … I still don’t get the aim of your text file handling On 08 Nov 2013, at 18:51, Zilefac Elvis zilefacel...@yahoo.com wrote: All files are text files. They are found in a folder on my computer. Assume that I know the names of some of the files I want to select from the 300 txt files. How can I do this in R. Atem. On Friday, November 8, 2013 11:44 AM, Simon Zehnder szehn...@uni-bonn.de wrote: I do not understand the question. If you already know the names what is the problem to select the files by names? If you have the names but not inside of R you have to find a name pattern to avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi, I have 300 .txt files in a directory. Out of this 300, I need just 100 of the files. I have the names of the 100 .txt files which are also found in the 300 .txt files. How can I extract only the 100 .txt files from the 300 ,txt files? e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of them with names known. Thanks for your great help. Atem. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] select .txt from .txt in a directory
Elvis, first, keep things on the list - so others can learn and comment. Second, as Sarah already commented: We do not like to open unsolicited binary attachments on the list. Sarah gives a good hint how to post data to the list. What I would do so far is use the matching columns to get the names you need from BTemperature: temp_inv - read.table(Temperature Inventory, … ) (here I would change the .xlsx to a .csv and use read.csv instead of read.table) btemp - read.table(“BTemperature_Stations.txt”, … ) (again think about converting via Excel to .csv - it makes things far more easy) Check ?read.table for options - you gonna need them. Then match mynames - btemp[(temp_inv[,3] %in% btemp[, 3]), 2] Now you have the names of the stations and if your .txt files are named by the stations you can do something like: for (name in mynames) { tmp.table - read.table(paste(“path/to/your/Homog_daily_min_temp/“, name, “.txt”, sep = “”), … ) …. do things with the data } Best Simon On 08 Nov 2013, at 19:26, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi Simon, Attached are my data files. Btemperature_Stations is my main file. Temperature inventory is my 'wanted' file and is a subset of Btemperature_Stations. Using column 3 in both files, select the files in Temperature inventory from Btemperature_Stations. The .zip file contains the .txt files which you will extract to a folder and do the selection in R. Thanks, Atem. On Friday, November 8, 2013 11:54 AM, Simon Zehnder szehn...@uni-bonn.de wrote: If you want to type in the names by hand, you can simply use read.table to load them into R … I still don’t get the aim of your text file handling On 08 Nov 2013, at 18:51, Zilefac Elvis zilefacel...@yahoo.com wrote: All files are text files. They are found in a folder on my computer. Assume that I know the names of some of the files I want to select from the 300 txt files. How can I do this in R. Atem. On Friday, November 8, 2013 11:44 AM, Simon Zehnder szehn...@uni-bonn.de wrote: I do not understand the question. If you already know the names what is the problem to select the files by names? If you have the names but not inside of R you have to find a name pattern to avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi, I have 300 .txt files in a directory. Out of this 300, I need just 100 of the files. I have the names of the 100 .txt files which are also found in the 300 .txt files. How can I extract only the 100 .txt files from the 300 ,txt files? e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of them with names known. Thanks for your great help. Atem. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. BTemperature_Stations.txtTempearture inventory.xlsxHomog_daily_min_temp.zip __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] select .txt from .txt in a directory
Hi Atem, It is not clear what you wanted to do. If you want to transfer the subset of files from the main folder to a new location, then you may try: (make sure you create a copy of the original .txt folder before doing this) I created three sub folders and two files (BTemperature_Stations.txt and Tempearture inventory.csv) in my working directory. list.files() #[1] BTemperature_Stations.txt Files1 ## Files1 folder contains all the .txt files; #SubsetFiles: created to subset the files that match the condition #[3] FilesCopy SubsetFiles1 #FilesCopy. A copy of the Files1 folder #[5] Tempearture inventory.csv list.files(pattern=\\.) #[1] BTemperature_Stations.txt Tempearture inventory.csv fl1 - list.files(pattern=\\.) dat1 - read.table(fl1[1],header=TRUE,sep=,stringsAsFactors=FALSE,fill=TRUE,check.names=FALSE) dat2 - read.csv(fl1[2],header=TRUE,sep=,,stringsAsFactors=FALSE,check.names=FALSE) vec1 - dat1[,3][dat1[,3]%in% dat2[,3]] vec2 - list.files(path=/home/arunksa111/Zl/Files1,recursive=TRUE) sum(gsub(.txt,,vec2) %in% vec1) #[1] 98 vec3 - vec2[gsub(.txt,,vec2) %in% vec1] lapply(vec3, function(x) file.rename(paste(/home/arunksa111/Zl/Files1,x,sep=/), paste(/home/arunksa111/Zl/SubsetFiles1,x,sep=/))) #change the path accordingly. length(list.files(path=/home/arunksa111/Zl/SubsetFiles1)) #[1] 98 fileDim - sapply(vec3,function(x) {x1 -read.delim(paste(/home/arunksa111/Zl/SubsetFiles1,x,sep=/),header=TRUE,stringsAsFactors=FALSE,sep=,,check.names=FALSE); dim(x1)}) fileDim[,1:3] # dn3011120.txt dn3011240.txt dn3011887.txt #[1,] 1151 791 1054 #[2,] 7 7 7 A.K. On Friday, November 8, 2013 1:41 PM, Zilefac Elvis zilefacel...@yahoo.com wrote: Hi AK, I want to select some files from a list of files. All are text files. The index for selection is found in column 3 of both files. Attached are my data files. Btemperature_Stations is my main file. Temperature inventory is my 'wanted' file and is a subset of Btemperature_Stations. Using column 3 in both files, select the files in Temperature inventory from Btemperature_Stations. The .zip file contains the .txt files which you will extract to a folder and do the selection in R. Thanks, Atem. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.