Re: [julia-users] Slow reading of file

Ford Ox Sat, 14 May 2016 05:01:41 -0700

Fixed. Julia now takes 11 seconds to finish
type Tokenizer
    tokens::Array{AbstractString, 1}
    index::Int
    Tokenizer(s::AbstractString) = new(split(strip(s)), 0)
end


type Buffer
    stream::IOStream
    tokenizer::Tokenizer
    Buffer(stream) = new(stream, Tokenizer(""))
end



Dne sobota 14. května 2016 13:39:24 UTC+2 Stefan Karpinski napsal(a):
>
> Your types have totally untyped fields – the compiler has to emit very 
> pessimistic code about this. Rule of thumb: locations (fields, collections) 
> should be as concretely typed as possible; parameters don't need to be.
>
> On Sat, May 14, 2016 at 1:36 PM, Ford Ox <[email protected] <javascript:>> 
> wrote:
>
>> I have written exact same code in java and julia for reading integers 
>> from file. 
>> Julia code was A LOT slower. (12 seconds vs 1.16 seconds)
>>
>> import Base.isempty, Base.close
>>
>> ##    Tokenizer ##
>>
>> type Tokenizer
>>     tokens
>>     index
>>     Tokenizer(s::AbstractString) = new(split(strip(s)), 0)
>> end
>>
>> isempty(t::Tokenizer) = length(t.tokens) == t.index
>>
>> function next!(t::Tokenizer)
>>     t.index += 1
>>     t.tokens[t.index]
>> end
>>
>> ## Buffer ##
>>
>> type Buffer
>>     stream
>>     tokenizer
>>     Buffer(stream) = new(stream, [])
>> end
>>
>> function next!(b::Buffer)
>>     if isempty(b.tokenizer)
>>         b.tokenizer = Tokenizer(readline(b.stream))
>>     end
>>     next!(b.tokenizer)
>> end
>>
>> close!(b::Buffer) = close(b.stream)
>> nexttype!(t, b::Buffer) = parse(t, next!(b))
>> nextint!(b::Buffer) = nexttype!(Int, b)
>>
>> cd("pathToMyFile")
>> b = Buffer(open("File"))
>>
>> function readall!(b::Buffer)
>>     for _ in 1:nextint!(b)
>>         nextint!(b)
>>     end
>>     close!(b)
>> end
>>
>> @time readall!(b)
>>
>>
>> 12.314114 seconds (84.84 M allocations: 3.793 GB, 11.47% gc time)
>>>
>>
>> package alg;
>>
>> import java.io.*;
>> import java.util.StringTokenizer;
>>
>> public class Try {
>>     StringTokenizer tokenizer;
>>     BufferedReader reader;
>>
>>     public static void main(String[] args) throws IOException {
>>         String name = "fileName";
>>         Try reader = new Try(new File(name));
>>
>>         long itime = System.nanoTime();
>>         int N = reader.nextInt();
>>         for(int n=0; n < N; n++)
>>             reader.nextInt();
>>         System.out.println((double) (System.nanoTime() - itime) / 
>> 1000000000);
>>
>>     }
>>
>>     Try(File f) throws FileNotFoundException {
>>         tokenizer = new StringTokenizer("");
>>         reader = new BufferedReader(new FileReader(f));
>>     }
>>
>>     String next() throws IOException {
>>         if(!tokenizer.hasMoreTokens()) tokenize();
>>         return tokenizer.nextToken();
>>     }
>>
>>     void tokenize() throws IOException {
>>         tokenizer = new StringTokenizer(reader.readLine());
>>     }
>>
>>     int nextInt() throws IOException {
>>         return Integer.parseInt(next());
>>     }
>> }
>>
>>  1.169884868
>>
>>  
>> The file has 7 068 650 lines. On each line is one integer that is not 
>> bigger than 2^16.
>>
>
>

Re: [julia-users] Slow reading of file

Reply via email to