subject:"Re\: \[R\] select .txt from .txt in a directory"

Re: [R] select .txt from .txt in a directory

2013-11-11 Thread Zilefac Elvis

Thanks, AK.
The three codes worked as expected.
Again, thanks so much for understanding my problem and proving the right 
solutions.
Atem.



On Saturday, November 9, 2013 6:27 PM, arun smartpink...@yahoo.com wrote:
 
HI,

The code could be shortened by using ?merge or ?join().
library(plyr)
##Using the output from `lst6`


lst7 - lapply(lst6,function(x) {x1 - 
data.frame(Year=rep(1961:2005,each=12),Mo=rep(1:12,45)); x2 
-join(x1,x,type=left,by=c(Year,Mo))})

##rest are the same (only change in object names)

 sapply(lst7,nrow)
 lst8 -lapply(lst7,function(x) 
data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) 
  lst9- lapply(seq_along(lst8),function(i){
    x- lst11[[i]]
    colnames(x)- lstf1[i]
    row.names(x)- 1:nrow(x)
    x
  }) 
sapply(lst9,nrow)
res2New - do.call(cbind,lst9)
 dim(res2New)
#[1] 16740    98
res2New[res2New ==-.9]-NA # change missing value identifier as in your 
data set
which(res2New==-.9)
#integer(0)

dates1-seq.Date(as.Date('1Jan1961',format=%d%b%Y),as.Date('31Dec2005',format=%d%b%Y),by=day)
dates2- as.character(dates1)
sldat- split(dates2,list(gsub(-.*,,dates2)))
lst12-lapply(sldat,function(x) lapply(split(x,gsub(.*-(.*)-.*,\\1,x)), 
function(y){x1-as.numeric(gsub(.*-.*-(.*),\\1,y));if((31-max(x1))0) 
{x2-seq(max(x1)+1,31,1);x3-paste0(unique(gsub((.*-.*-).*,\\1,y)),x2);c(y,x3)}
 else y} ))
any(sapply(lst12,function(x) any(lapply(x,length)!=31)))
#[1] FALSE

lst22-lapply(lst12,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
dates3-unlist(lst22,use.names=FALSE)
length(dates3)
res3New - data.frame(dates=dates3,res2New,stringsAsFactors=FALSE)
str(res3New)
res3New$dates-as.Date(res3New$dates)
res4New - res3New[!is.na(res3New$dates),]
res4New[1:3,1:3]
dim(res4New)
colnames(res4) - colnames(res4New)
 identical(res4,res4New)
#[1] TRUE

A.K.






On Saturday, November 9, 2013 5:46 PM, arun smartpink...@yahoo.com wrote:


Hi,
Try:
library(stringr)
# Created the selected files (98) in a separate working  folder 
(SubsetFiles1) (refer to my previous mail)
filelst - list.files()
#Sublst - filelst[1:2]
res - lapply(filelst,function(x) {con - file(x)
     Lines1 - readLines(con) close(con)
     Lines2 - Lines1[-1]
     Lines3 - str_split(Lines2,-.9M)
     Lines4 - str_trim(unlist(lapply(Lines3,function(x) {x[x==] - NA
     paste(x,collapse= )})))
     Lines5 - gsub((\\d+)[A-Za-z],\\1,Lines4)
     res1 - read.table(text=Lines5,sep=,header=FALSE,fill=TRUE)
     res1})

##Created another folder Modified to store the res files
lapply(seq_along(res),function(i) 
write.table(res[[i]],paste(/home/arunksa111/Zl/Modified,paste0(Mod_,filelst[i]),sep=/),row.names=FALSE,quote=FALSE))

 lstf1 - list.files(path=/home/arunksa111/Zl/Modified)  

lst1 - lapply(lstf1,function(x) 
readLines(paste(/home/arunksa111/Zl/Modified,x,sep=/)))
 which(lapply(lst1,function(x) length(grep(\\d+-.9,x)))0 )
 #[1]  7 11 14 15 30 32 39 40 42 45 46 53 60 65 66 68 69 70 73 74 75 78 80 82 83
#[26] 86 87 90 91 93

lst2 - lapply(lst1,function(x) gsub((\\d+)(-.9),\\1 \\2,x))
 #lapply(lst2,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern

lst3 - lapply(lst2,function(x) {x-gsub((-.9)(-.9),\\1 \\2,x)})#
#lapply(lst3,function(x) x[grep(\\d+-.9,x)])  ##checking for the pattern
# lapply(lst3,function(x) x[grep(-.9,x)]) ###second check
lst4 - lapply(lst3,function(x) gsub((Day) (\\d+),\\1_\\2, x[-1]))  
#removed the additional header V1, V2, etc.

#sapply(lst4,function(x) length(strsplit(x[1], )[[1]])) #checking the number 
of columns that should be present
lst5 - lapply(lst4,function(x) unlist(lapply(x, function(y) word(y,1,33
lst6 - lapply(lst5,function(x) 
read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep=,fill=TRUE))
# head(lst6[[94]],3)
lst7 - lapply(lst6,function(x) x[x$Year =1961  x$Year =2005,])
#head(lst7[[45]],3)
 lst8 - lapply(lst7,function(x) x[!is.na(x$Year),])


lst9 - lapply(lst8,function(x) {
    if((min(x$Year)1961)|(max(x$Year)2005)){
  n1- (min(x$Year)-1961)*12
  x1- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
  n2- (2005-max(x$Year))*12
  x2- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
   colnames(x1) - colnames(x)
   colnames(x2) - colnames(x)        
  x3- rbind(x1,x,x2)
    }
   else if((min(x$Year)==1961)  (max(x$Year)==2005)) {
      if((min(x$Mo[x$Year==1961])1)|(max(x$Mo[x$Year==2005])12)){
       n1 - min(x$Mo[x$Year==1961])-1
       x1 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
       n2 - (12-max(x$Mo[x$Year==2005]))      
       x2 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
       colnames(x1) - colnames(x)
       colnames(x2) - colnames(x)
       x3 - rbind(x1,x,x2)
      }
        else {    
        x
    }
 
    } })

which(sapply(lst9,nrow)!=540)
#[1] 45 46 54 64 65 66 70 75 97
lst10 - lapply(lst9,function(x) {x1 - x[!is.na(x$Year),]
             hx1 - head(x1,1)
             tx1 - tail(x1,1)
             x2 - as.data.frame(matrix(NA, ncol=ncol(x),

Re: [R] select .txt from .txt in a directory

2013-11-09 Thread arun



Hi,
Try:
library(stringr)
# Created the selected files (98) in a separate working  folder 
(SubsetFiles1) (refer to my previous mail)
filelst - list.files()
#Sublst - filelst[1:2]
res - lapply(filelst,function(x) {con - file(x)
     Lines1 - readLines(con) close(con)
     Lines2 - Lines1[-1]
     Lines3 - str_split(Lines2,-.9M)
     Lines4 - str_trim(unlist(lapply(Lines3,function(x) {x[x==] - NA
     paste(x,collapse= )})))
     Lines5 - gsub((\\d+)[A-Za-z],\\1,Lines4)
     res1 - read.table(text=Lines5,sep=,header=FALSE,fill=TRUE)
     res1})

##Created another folder Modified to store the res files
lapply(seq_along(res),function(i) 
write.table(res[[i]],paste(/home/arunksa111/Zl/Modified,paste0(Mod_,filelst[i]),sep=/),row.names=FALSE,quote=FALSE))

 lstf1 - list.files(path=/home/arunksa111/Zl/Modified)  

lst1 - lapply(lstf1,function(x) 
readLines(paste(/home/arunksa111/Zl/Modified,x,sep=/)))
 which(lapply(lst1,function(x) length(grep(\\d+-.9,x)))0 )
 #[1]  7 11 14 15 30 32 39 40 42 45 46 53 60 65 66 68 69 70 73 74 75 78 80 82 83
#[26] 86 87 90 91 93

lst2 - lapply(lst1,function(x) gsub((\\d+)(-.9),\\1 \\2,x))
 #lapply(lst2,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern

lst3 - lapply(lst2,function(x) {x-gsub((-.9)(-.9),\\1 \\2,x)})#
#lapply(lst3,function(x) x[grep(\\d+-.9,x)])  ##checking for the pattern
# lapply(lst3,function(x) x[grep(-.9,x)]) ###second check
lst4 - lapply(lst3,function(x) gsub((Day) (\\d+),\\1_\\2, x[-1]))  
#removed the additional header V1, V2, etc.

#sapply(lst4,function(x) length(strsplit(x[1], )[[1]])) #checking the number 
of columns that should be present
lst5 - lapply(lst4,function(x) unlist(lapply(x, function(y) word(y,1,33
lst6 - lapply(lst5,function(x) 
read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep=,fill=TRUE))
# head(lst6[[94]],3)
lst7 - lapply(lst6,function(x) x[x$Year =1961  x$Year =2005,])
#head(lst7[[45]],3)
 lst8 - lapply(lst7,function(x) x[!is.na(x$Year),])


lst9 - lapply(lst8,function(x) {
    if((min(x$Year)1961)|(max(x$Year)2005)){
  n1- (min(x$Year)-1961)*12
  x1- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
  n2- (2005-max(x$Year))*12
  x2- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
   colnames(x1) - colnames(x)
   colnames(x2) - colnames(x)        
  x3- rbind(x1,x,x2)
    }
   else if((min(x$Year)==1961)  (max(x$Year)==2005)) {
      if((min(x$Mo[x$Year==1961])1)|(max(x$Mo[x$Year==2005])12)){
       n1 - min(x$Mo[x$Year==1961])-1
       x1 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
       n2 - (12-max(x$Mo[x$Year==2005]))      
       x2 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
       colnames(x1) - colnames(x)
       colnames(x2) - colnames(x)
       x3 - rbind(x1,x,x2)
      }
        else {    
        x
    }
 
    } })

which(sapply(lst9,nrow)!=540)
#[1] 45 46 54 64 65 66 70 75 97
lst10 - lapply(lst9,function(x) {x1 - x[!is.na(x$Year),]
             hx1 - head(x1,1)
             tx1 - tail(x1,1)
             x2 - as.data.frame(matrix(NA, ncol=ncol(x), nrow=hx1$Mo-1))
             x3 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=12-tx1$Mo))
             colnames(x2) - colnames(x)
             colnames(x3) - colnames(x)
             if(nrow(x)  540) rbind(x2,x,x3) else x  })
which(sapply(lst10,nrow)!=540)
#integer(0)



lst11 -lapply(lst10,function(x) 
data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) 
  lst12- lapply(seq_along(lst10),function(i){
    x- lst11[[i]]
    colnames(x)- lstf1[i]
    row.names(x)- 1:nrow(x)
    x
  })
res2 -  do.call(cbind,lst11)
 dim(res2)
#[1] 16740    98
 
res2[res2==-.9]-NA # change missing value identifier as in your data set
which(res2==-.9)
#integer(0)

dates1-seq.Date(as.Date('1Jan1961',format=%d%b%Y),as.Date('31Dec2005',format=%d%b%Y),by=day)
dates2- as.character(dates1)
sldat- split(dates2,list(gsub(-.*,,dates2)))
lst12-lapply(sldat,function(x) lapply(split(x,gsub(.*-(.*)-.*,\\1,x)), 
function(y){x1-as.numeric(gsub(.*-.*-(.*),\\1,y));if((31-max(x1))0) 
{x2-seq(max(x1)+1,31,1);x3-paste0(unique(gsub((.*-.*-).*,\\1,y)),x2);c(y,x3)}
 else y} ))
any(sapply(lst12,function(x) any(lapply(x,length)!=31)))
#[1] FALSE

lst22-lapply(lst12,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
dates3-unlist(lst22,use.names=FALSE)
length(dates3)
res3 - data.frame(dates=dates3,res2,stringsAsFactors=FALSE)
str(res3)
res3$dates-as.Date(res3$dates)
res4 - res3[!is.na(res3$dates),]
res4[1:3,1:3]
dim(res4)
 #[1] 16436    99


A.K.




On Friday, November 8, 2013 5:54 PM, Zilefac Elvis zilefacel...@yahoo.com 
wrote:

Hi Ak,

I think I figured out how to do the sub-setting. All I needed was to use column 
3 in Temperature_inventory and select matching .txt files in the .zip file. The 
final result would be a subset of files whose IDs are in column 3 of 
temp_inventory.
*
I also have this script which you developed

Re: [R] select .txt from .txt in a directory

2013-11-09 Thread arun

HI,

The code could be shortened by using ?merge or ?join().
library(plyr)
##Using the output from `lst6`


lst7 - lapply(lst6,function(x) {x1 - 
data.frame(Year=rep(1961:2005,each=12),Mo=rep(1:12,45)); x2 
-join(x1,x,type=left,by=c(Year,Mo))})

##rest are the same (only change in object names)

 sapply(lst7,nrow)
 lst8 -lapply(lst7,function(x) 
data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) 
  lst9- lapply(seq_along(lst8),function(i){
    x- lst11[[i]]
    colnames(x)- lstf1[i]
    row.names(x)- 1:nrow(x)
    x
  }) 
sapply(lst9,nrow)
res2New - do.call(cbind,lst9)
 dim(res2New)
#[1] 16740    98
res2New[res2New ==-.9]-NA # change missing value identifier as in your 
data set
which(res2New==-.9)
#integer(0)

dates1-seq.Date(as.Date('1Jan1961',format=%d%b%Y),as.Date('31Dec2005',format=%d%b%Y),by=day)
dates2- as.character(dates1)
sldat- split(dates2,list(gsub(-.*,,dates2)))
lst12-lapply(sldat,function(x) lapply(split(x,gsub(.*-(.*)-.*,\\1,x)), 
function(y){x1-as.numeric(gsub(.*-.*-(.*),\\1,y));if((31-max(x1))0) 
{x2-seq(max(x1)+1,31,1);x3-paste0(unique(gsub((.*-.*-).*,\\1,y)),x2);c(y,x3)}
 else y} ))
any(sapply(lst12,function(x) any(lapply(x,length)!=31)))
#[1] FALSE

lst22-lapply(lst12,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
dates3-unlist(lst22,use.names=FALSE)
length(dates3)
res3New - data.frame(dates=dates3,res2New,stringsAsFactors=FALSE)
str(res3New)
res3New$dates-as.Date(res3New$dates)
res4New - res3New[!is.na(res3New$dates),]
res4New[1:3,1:3]
dim(res4New)
colnames(res4) - colnames(res4New)
 identical(res4,res4New)
#[1] TRUE

A.K.





On Saturday, November 9, 2013 5:46 PM, arun smartpink...@yahoo.com wrote:


Hi,
Try:
library(stringr)
# Created the selected files (98) in a separate working  folder 
(SubsetFiles1) (refer to my previous mail)
filelst - list.files()
#Sublst - filelst[1:2]
res - lapply(filelst,function(x) {con - file(x)
     Lines1 - readLines(con) close(con)
     Lines2 - Lines1[-1]
     Lines3 - str_split(Lines2,-.9M)
     Lines4 - str_trim(unlist(lapply(Lines3,function(x) {x[x==] - NA
     paste(x,collapse= )})))
     Lines5 - gsub((\\d+)[A-Za-z],\\1,Lines4)
     res1 - read.table(text=Lines5,sep=,header=FALSE,fill=TRUE)
     res1})

##Created another folder Modified to store the res files
lapply(seq_along(res),function(i) 
write.table(res[[i]],paste(/home/arunksa111/Zl/Modified,paste0(Mod_,filelst[i]),sep=/),row.names=FALSE,quote=FALSE))

 lstf1 - list.files(path=/home/arunksa111/Zl/Modified)  

lst1 - lapply(lstf1,function(x) 
readLines(paste(/home/arunksa111/Zl/Modified,x,sep=/)))
 which(lapply(lst1,function(x) length(grep(\\d+-.9,x)))0 )
 #[1]  7 11 14 15 30 32 39 40 42 45 46 53 60 65 66 68 69 70 73 74 75 78 80 82 83
#[26] 86 87 90 91 93

lst2 - lapply(lst1,function(x) gsub((\\d+)(-.9),\\1 \\2,x))
 #lapply(lst2,function(x) x[grep(\\d+-.9,x)]) ##checking for the pattern

lst3 - lapply(lst2,function(x) {x-gsub((-.9)(-.9),\\1 \\2,x)})#
#lapply(lst3,function(x) x[grep(\\d+-.9,x)])  ##checking for the pattern
# lapply(lst3,function(x) x[grep(-.9,x)]) ###second check
lst4 - lapply(lst3,function(x) gsub((Day) (\\d+),\\1_\\2, x[-1]))  
#removed the additional header V1, V2, etc.

#sapply(lst4,function(x) length(strsplit(x[1], )[[1]])) #checking the number 
of columns that should be present
lst5 - lapply(lst4,function(x) unlist(lapply(x, function(y) word(y,1,33
lst6 - lapply(lst5,function(x) 
read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep=,fill=TRUE))
# head(lst6[[94]],3)
lst7 - lapply(lst6,function(x) x[x$Year =1961  x$Year =2005,])
#head(lst7[[45]],3)
 lst8 - lapply(lst7,function(x) x[!is.na(x$Year),])


lst9 - lapply(lst8,function(x) {
    if((min(x$Year)1961)|(max(x$Year)2005)){
  n1- (min(x$Year)-1961)*12
  x1- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
  n2- (2005-max(x$Year))*12
  x2- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
   colnames(x1) - colnames(x)
   colnames(x2) - colnames(x)        
  x3- rbind(x1,x,x2)
    }
   else if((min(x$Year)==1961)  (max(x$Year)==2005)) {
      if((min(x$Mo[x$Year==1961])1)|(max(x$Mo[x$Year==2005])12)){
       n1 - min(x$Mo[x$Year==1961])-1
       x1 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
       n2 - (12-max(x$Mo[x$Year==2005]))      
       x2 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
       colnames(x1) - colnames(x)
       colnames(x2) - colnames(x)
       x3 - rbind(x1,x,x2)
      }
        else {    
        x
    }
 
    } })

which(sapply(lst9,nrow)!=540)
#[1] 45 46 54 64 65 66 70 75 97
lst10 - lapply(lst9,function(x) {x1 - x[!is.na(x$Year),]
             hx1 - head(x1,1)
             tx1 - tail(x1,1)
             x2 - as.data.frame(matrix(NA, ncol=ncol(x), nrow=hx1$Mo-1))
             x3 - as.data.frame(matrix(NA,ncol=ncol(x),nrow=12-tx1$Mo))
             colnames(x2) - colnames(x)
             colnames(x3) - colnames(x)
             if(nrow(x)  540) rbind(x2,x,x3) else x  })

Re: [R] select .txt from .txt in a directory

2013-11-08 Thread Sarah Goslee

How do you decide which ones you need?

Is there some pattern that lets you distinguish needing df.txt from
not needing ds.txt?

You say you have the names - how do you have them? In a text file?

What are you trying to do with the text files?

Sarah

On Fri, Nov 8, 2013 at 12:33 PM, Zilefac Elvis zilefacel...@yahoo.com wrote:
 Hi,
 I have 300 .txt files in a directory. Out of this 300, I need just 100 of the 
 files.
 I have the names of the 100 .txt files which are also found in the 300 .txt 
 files.
 How can I extract only the 100 .txt files from the 300 ,txt files?

 e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only 
 d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of 
 them with names known.

 Thanks for your great help.
 Atem.

-- 
Sarah Goslee
http://www.functionaldiversity.org

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] select .txt from .txt in a directory

2013-11-08 Thread Simon Zehnder

I do not understand the question. If you already know the names what is the 
problem to select the files by names? 

If you have the names but not inside of R you have to find a name pattern to 
avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? 


On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote:

 Hi,
 I have 300 .txt files in a directory. Out of this 300, I need just 100 of the 
 files.
 I have the names of the 100 .txt files which are also found in the 300 .txt 
 files.
 How can I extract only the 100 .txt files from the 300 ,txt files?
 
 e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only 
 d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of 
 them with names known.
 
 Thanks for your great help.
 Atem.
   [[alternative HTML version deleted]]
 
 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] select .txt from .txt in a directory

2013-11-08 Thread Bert Gunter

1. Please don't post in HTML (see posting guide).

2. What do you mean by extract?

3. Your qiestion sounds very basic. Have you read An Introduction to
R or other online R tutorial? If not please do so before posting
further. All of R's file input functions allow you to specify the
directory path and/or filename, so if I understand you correctly, it's
just a matter of giving them to the appropriate function in some sort
of loop. e.g. something like

alldat - lapply(filenameList, function(x)InputFunction(x,...))

4. If you need something fancier than is described in the tutorials,
consult the R data Import/Export manual,  please.

-- Bert

On Fri, Nov 8, 2013 at 9:33 AM, Zilefac Elvis zilefacel...@yahoo.com wrote:
 Hi,
 I have 300 .txt files in a directory. Out of this 300, I need just 100 of the 
 files.
 I have the names of the 100 .txt files which are also found in the 300 .txt 
 files.
 How can I extract only the 100 .txt files from the 300 ,txt files?

 e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only 
 d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of 
 them with names known.

 Thanks for your great help.
 Atem.
 [[alternative HTML version deleted]]

 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.



-- 

Bert Gunter
Genentech Nonclinical Biostatistics

(650) 467-7374

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] select .txt from .txt in a directory

2013-11-08 Thread Zilefac Elvis

All files are text files. They are found in a folder on my computer. 
Assume that I know the names of some of the files I want to select from the 300 
txt files.
How can I do this in R.
Atem.



On Friday, November 8, 2013 11:44 AM, Simon Zehnder szehn...@uni-bonn.de 
wrote:
 
I do not understand the question. If you already know the names what is the 
problem to select the files by names? 

If you have the names but not inside of R you have to find a name pattern to 
avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? 



On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote:

 Hi,
 I have 300 .txt files in a directory. Out of this 300, I need just 100 of the 
 files.
 I have the names of the 100 .txt files which are also found in the 300 .txt 
 files.
 How can I extract only the 100 .txt files from the 300 ,txt files?
 
 e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only 
 d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of 
 them with names known.
 
 Thanks for your great help.
 Atem.
     [[alternative HTML version deleted]]
 
 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.
[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] select .txt from .txt in a directory

2013-11-08 Thread Simon Zehnder

If you want to type in the names by hand, you can simply use read.table to load 
them into R … I still don’t get the aim of your text file handling


On 08 Nov 2013, at 18:51, Zilefac Elvis zilefacel...@yahoo.com wrote:

 All files are text files. They are found in a folder on my computer. 
 Assume that I know the names of some of the files I want to select from the 
 300 txt files.
 How can I do this in R.
 Atem.
 
 
 On Friday, November 8, 2013 11:44 AM, Simon Zehnder szehn...@uni-bonn.de 
 wrote:
 I do not understand the question. If you already know the names what is the 
 problem to select the files by names? 
 
 If you have the names but not inside of R you have to find a name pattern to 
 avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? 
 
 
 On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote:
 
  Hi,
  I have 300 .txt files in a directory. Out of this 300, I need just 100 of 
  the files.
  I have the names of the 100 .txt files which are also found in the 300 .txt 
  files.
  How can I extract only the 100 .txt files from the 300 ,txt files?
  
  e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select only 
  d1.txt and df.txt? Remember, I have 300 of such and want to extract 100 of 
  them with names known.
  
  Thanks for your great help.
  Atem.
 
  [[alternative HTML version deleted]]
  
  __
  R-help@r-project.org mailing list
  https://stat.ethz.ch/mailman/listinfo/r-help
  PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
  and provide commented, minimal, self-contained, reproducible code.
 
 
 

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] select .txt from .txt in a directory

2013-11-08 Thread Simon Zehnder

Elvis,

first, keep things on the list - so others can learn and comment. Second, as 
Sarah already commented: We do not like to open unsolicited binary attachments 
on the list. Sarah gives a good hint how to post data to the list.

What I would do so far is use the matching columns to get the names you need 
from BTemperature: 

temp_inv - read.table(Temperature Inventory, … ) (here I would change the 
.xlsx to a .csv and use read.csv instead of read.table)
btemp - read.table(“BTemperature_Stations.txt”, … ) (again think about 
converting via Excel to .csv - it makes things far more easy) 

Check ?read.table for options - you gonna need them.

Then match
mynames - btemp[(temp_inv[,3] %in% btemp[, 3]), 2]

Now you have the names of the stations and if your .txt files are named by the 
stations you can do something like:

for (name in mynames) {
tmp.table - read.table(paste(“path/to/your/Homog_daily_min_temp/“, name, 
“.txt”, sep = “”), … )
…. do things with the data
}



Best

Simon
 
On 08 Nov 2013, at 19:26, Zilefac Elvis zilefacel...@yahoo.com wrote:

 Hi Simon,
 Attached are my data files.
 Btemperature_Stations is my main file.
 Temperature inventory is my 'wanted' file and is a subset of 
 Btemperature_Stations.
 Using column 3 in both files, select the files in Temperature inventory from 
 Btemperature_Stations.
 The .zip file contains the .txt files which you will extract to a folder and 
 do the selection in R.
 
 Thanks,
 Atem.
  
 
 
 
 
 On Friday, November 8, 2013 11:54 AM, Simon Zehnder szehn...@uni-bonn.de 
 wrote:
 If you want to type in the names by hand, you can simply use read.table to 
 load them into R … I still don’t get the aim of your text file handling
 
 
 On 08 Nov 2013, at 18:51, Zilefac Elvis zilefacel...@yahoo.com wrote:
 
  All files are text files. They are found in a folder on my computer. 
  Assume that I know the names of some of the files I want to select from the 
  300 txt files.
  How can I do this in R.
  Atem.
  
  
  On Friday, November 8, 2013 11:44 AM, Simon Zehnder szehn...@uni-bonn.de 
  wrote:
  I do not understand the question. If you already know the names what is the 
  problem to select the files by names? 
  
  If you have the names but not inside of R you have to find a name pattern 
  to avoid typing them in. Is there a pattern, e.g. da.txt, db.txt, dc.txt? 
  
  
  On 08 Nov 2013, at 18:33, Zilefac Elvis zilefacel...@yahoo.com wrote:
  
   Hi,
   I have 300 .txt files in a directory. Out of this 300, I need just 100 of 
   the files.
   I have the names of the 100 .txt files which are also found in the 300 
   .txt files.
   How can I extract only the 100 .txt files from the 300 ,txt files?
   
   e.g given d1.txt, ds.txt, dx.txt, df.txt...d300.txt, how can I select 
   only d1.txt and df.txt? Remember, I have 300 of such and want to extract 
   100 of them with names known.
   
   Thanks for your great help.
   Atem.
  
  [[alternative HTML version deleted]]
   
   __
   R-help@r-project.org mailing list
   https://stat.ethz.ch/mailman/listinfo/r-help
   PLEASE do read the posting guide 
   http://www.R-project.org/posting-guide.html
   and provide commented, minimal, self-contained, reproducible code.
  
  
  
 
 
 BTemperature_Stations.txtTempearture 
 inventory.xlsxHomog_daily_min_temp.zip

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] select .txt from .txt in a directory

2013-11-08 Thread arun



Hi Atem,

It is not clear what you wanted to do.  If you want to transfer the subset of 
files from the main folder to a new location, then you may try: (make sure you 
create a copy of the original .txt folder before doing this)
I created three sub folders and two files (BTemperature_Stations.txt and 
Tempearture inventory.csv) in my working directory.


list.files()
#[1] BTemperature_Stations.txt Files1  ## Files1 folder contains 
all the .txt files; #SubsetFiles: created to subset the files that match the 
condition 
#[3] FilesCopy SubsetFiles1  #FilesCopy. A copy of 
the Files1 folder   
#[5] Tempearture inventory.csv


 

list.files(pattern=\\.)
#[1] BTemperature_Stations.txt Tempearture inventory.csv
fl1 - list.files(pattern=\\.)
 dat1 - 
read.table(fl1[1],header=TRUE,sep=,stringsAsFactors=FALSE,fill=TRUE,check.names=FALSE)
 dat2 - 
read.csv(fl1[2],header=TRUE,sep=,,stringsAsFactors=FALSE,check.names=FALSE)
vec1 - dat1[,3][dat1[,3]%in% dat2[,3]]
vec2 - list.files(path=/home/arunksa111/Zl/Files1,recursive=TRUE)
 sum(gsub(.txt,,vec2) %in% vec1)
#[1] 98
vec3 -  vec2[gsub(.txt,,vec2) %in% vec1]
lapply(vec3, function(x) 
file.rename(paste(/home/arunksa111/Zl/Files1,x,sep=/), 
paste(/home/arunksa111/Zl/SubsetFiles1,x,sep=/))) #change the path 
accordingly. 
length(list.files(path=/home/arunksa111/Zl/SubsetFiles1))
#[1] 98

fileDim - sapply(vec3,function(x) {x1 
-read.delim(paste(/home/arunksa111/Zl/SubsetFiles1,x,sep=/),header=TRUE,stringsAsFactors=FALSE,sep=,,check.names=FALSE);
 dim(x1)})
fileDim[,1:3]
# dn3011120.txt dn3011240.txt dn3011887.txt
#[1,]  1151   791  1054
#[2,] 7 7 7


A.K.





On Friday, November 8, 2013 1:41 PM, Zilefac Elvis zilefacel...@yahoo.com 
wrote:

Hi AK,


I want to select some files from a list of files. All are text files. The index 
for selection is found in column 3 of both files.


Attached are my data files.
Btemperature_Stations is my main file.
Temperature inventory is my 'wanted' file and is a subset of 
Btemperature_Stations.
Using column 3 in both files, select the files in Temperature inventory from 
Btemperature_Stations.
The .zip file contains the .txt files which you will extract to a folder and do 
the selection in R.

Thanks,
Atem.

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

Re: [R] select .txt from .txt in a directory

10 matches

Site Navigation

Mail list logo

Footer information