Re: [julia-users] Slow reading of file

Ford Ox Sat, 14 May 2016 05:31:54 -0700

type Tokenizer
    tokens::Array{ASCIIString, 1}
    index::Int
    Tokenizer(s::ASCIIString) = new(split(strip(s)), 0)
end


Julia still runs 11 seconds...

Dne sobota 14. května 2016 14:08:48 UTC+2 Milan Bouchet-Valat napsal(a):
>
> Le samedi 14 mai 2016 à 05:01 -0700, Ford Ox a écrit : 
> > Fixed. Julia now takes 11 seconds to finish 
> > type Tokenizer 
> >     tokens::Array{AbstractString, 1} 
> >     index::Int 
> >     Tokenizer(s::AbstractString) = new(split(strip(s)), 0) 
> > end 
> > 
> > type Buffer 
> >     stream::IOStream 
> >     tokenizer::Tokenizer 
> >     Buffer(stream) = new(stream, Tokenizer("")) 
> > end 
> AbstractString is still not a concrete type. Use 
> UTF8String/ASCIIString, or do this instead: 
>
> type Tokenizer{T<:AbstractString} 
>      tokens::Array{T, 1} 
>      index::Int 
>      Tokenizer(s::AbstractString) = new(split(strip(s)), 0) 
> end 
>
> type Buffer{T<:AbstractString} 
>     stream::IOStream 
>     tokenizer::Tokenizer{T} 
>     Buffer(stream) = new(stream, Tokenizer("")) 
> end 
>
> (Note that "" will create an ASCIIString, use UTF8String("") if you need 
> to support non-ASCII chars.) 
>
>
> Regards 
>
> > 
> > 
> > > Your types have totally untyped fields – the compiler has to emit 
> > > very pessimistic code about this. Rule of thumb: locations (fields, 
> > > collections) should be as concretely typed as possible; parameters 
> > > don't need to be. 
> > > 
> > > On Sat, May 14, 2016 at 1:36 PM, Ford Ox <[email protected]> wrote: 
> > > > I have written exact same code in java and julia for reading 
> > > > integers from file.  
> > > > Julia code was A LOT slower. (12 seconds vs 1.16 seconds) 
> > > > 
> > > > import Base.isempty, Base.close 
> > > > 
> > > > ##    Tokenizer ## 
> > > > 
> > > > type Tokenizer 
> > > >     tokens 
> > > >     index 
> > > >     Tokenizer(s::AbstractString) = new(split(strip(s)), 0) 
> > > > end 
> > > > 
> > > > isempty(t::Tokenizer) = length(t.tokens) == t.index 
> > > > 
> > > > function next!(t::Tokenizer) 
> > > >     t.index += 1 
> > > >     t.tokens[t.index] 
> > > > end 
> > > > 
> > > > ## Buffer ## 
> > > > 
> > > > type Buffer 
> > > >     stream 
> > > >     tokenizer 
> > > >     Buffer(stream) = new(stream, []) 
> > > > end 
> > > > 
> > > > function next!(b::Buffer) 
> > > >     if isempty(b.tokenizer) 
> > > >         b.tokenizer = Tokenizer(readline(b.stream)) 
> > > >     end 
> > > >     next!(b.tokenizer) 
> > > > end 
> > > > 
> > > > close!(b::Buffer) = close(b.stream) 
> > > > nexttype!(t, b::Buffer) = parse(t, next!(b)) 
> > > > nextint!(b::Buffer) = nexttype!(Int, b) 
> > > > 
> > > > cd("pathToMyFile") 
> > > > b = Buffer(open("File")) 
> > > > 
> > > > function readall!(b::Buffer) 
> > > >     for _ in 1:nextint!(b) 
> > > >         nextint!(b) 
> > > >     end 
> > > >     close!(b) 
> > > > end 
> > > > 
> > > > @time readall!(b) 
> > > > 
> > > > 
> > > > > 12.314114 seconds (84.84 M allocations: 3.793 GB, 11.47% gc 
> > > > > time) 
> > > > package alg; 
> > > > 
> > > > import java.io.*; 
> > > > import java.util.StringTokenizer; 
> > > > 
> > > > public class Try { 
> > > >     StringTokenizer tokenizer; 
> > > >     BufferedReader reader; 
> > > > 
> > > >     public static void main(String[] args) throws IOException { 
> > > >         String name = "fileName"; 
> > > >         Try reader = new Try(new File(name)); 
> > > > 
> > > >         long itime = System.nanoTime(); 
> > > >         int N = reader.nextInt(); 
> > > >         for(int n=0; n < N; n++) 
> > > >             reader.nextInt(); 
> > > >         System.out.println((double) (System.nanoTime() - itime) / 
> > > > 1000000000); 
> > > > 
> > > >     } 
> > > > 
> > > >     Try(File f) throws FileNotFoundException { 
> > > >         tokenizer = new StringTokenizer(""); 
> > > >         reader = new BufferedReader(new FileReader(f)); 
> > > >     } 
> > > > 
> > > >     String next() throws IOException { 
> > > >         if(!tokenizer.hasMoreTokens()) tokenize(); 
> > > >         return tokenizer.nextToken(); 
> > > >     } 
> > > > 
> > > >     void tokenize() throws IOException { 
> > > >         tokenizer = new StringTokenizer(reader.readLine()); 
> > > >     } 
> > > > 
> > > >     int nextInt() throws IOException { 
> > > >         return Integer.parseInt(next()); 
> > > >     } 
> > > > } 
> > > > >  1.169884868 
> > > >   
> > > > The file has 7 068 650 lines. On each line is one integer that is 
> > > > not bigger than 2^16. 
> > > > 
> > > 
>

Re: [julia-users] Slow reading of file

Reply via email to