[julia-users] Slow reading of file

Ford Ox Sat, 14 May 2016 04:37:21 -0700

I have written exact same code in java and julia for reading integers from 
file. 
Julia code was A LOT slower. (12 seconds vs 1.16 seconds)


import Base.isempty, Base.close

##    Tokenizer ##

type Tokenizer
    tokens
    index
    Tokenizer(s::AbstractString) = new(split(strip(s)), 0)
end

isempty(t::Tokenizer) = length(t.tokens) == t.index

function next!(t::Tokenizer)
    t.index += 1
    t.tokens[t.index]
end

## Buffer ##

type Buffer
    stream
    tokenizer
    Buffer(stream) = new(stream, [])
end

function next!(b::Buffer)
    if isempty(b.tokenizer)
        b.tokenizer = Tokenizer(readline(b.stream))
    end
    next!(b.tokenizer)
end

close!(b::Buffer) = close(b.stream)
nexttype!(t, b::Buffer) = parse(t, next!(b))
nextint!(b::Buffer) = nexttype!(Int, b)

cd("pathToMyFile")
b = Buffer(open("File"))

function readall!(b::Buffer)
    for _ in 1:nextint!(b)
        nextint!(b)
    end
    close!(b)
end

@time readall!(b)


12.314114 seconds (84.84 M allocations: 3.793 GB, 11.47% gc time)
>

package alg;

import java.io.*;
import java.util.StringTokenizer;

public class Try {
    StringTokenizer tokenizer;
    BufferedReader reader;

    public static void main(String[] args) throws IOException {
        String name = "fileName";
        Try reader = new Try(new File(name));

        long itime = System.nanoTime();
        int N = reader.nextInt();
        for(int n=0; n < N; n++)
            reader.nextInt();
        System.out.println((double) (System.nanoTime() - itime) / 1000000000);

    }

    Try(File f) throws FileNotFoundException {
        tokenizer = new StringTokenizer("");
        reader = new BufferedReader(new FileReader(f));
    }

    String next() throws IOException {
        if(!tokenizer.hasMoreTokens()) tokenize();
        return tokenizer.nextToken();
    }

    void tokenize() throws IOException {
        tokenizer = new StringTokenizer(reader.readLine());
    }

    int nextInt() throws IOException {
        return Integer.parseInt(next());
    }
}

 1.169884868

 
The file has 7 068 650 lines. On each line is one integer that is not 
bigger than 2^16.

[julia-users] Slow reading of file

Reply via email to