type Tokenizer
tokens::Array{ASCIIString, 1}
index::Int
Tokenizer(s::ASCIIString) = new(split(strip(s)), 0)
end
Julia still runs 11 seconds...
Dne sobota 14. května 2016 14:08:48 UTC+2 Milan Bouchet-Valat napsal(a):
>
> Le samedi 14 mai 2016 à 05:01 -0700, Ford Ox a écrit :
> > Fixed. Julia now takes 11 seconds to finish
> > type Tokenizer
> > tokens::Array{AbstractString, 1}
> > index::Int
> > Tokenizer(s::AbstractString) = new(split(strip(s)), 0)
> > end
> >
> > type Buffer
> > stream::IOStream
> > tokenizer::Tokenizer
> > Buffer(stream) = new(stream, Tokenizer(""))
> > end
> AbstractString is still not a concrete type. Use
> UTF8String/ASCIIString, or do this instead:
>
> type Tokenizer{T<:AbstractString}
> tokens::Array{T, 1}
> index::Int
> Tokenizer(s::AbstractString) = new(split(strip(s)), 0)
> end
>
> type Buffer{T<:AbstractString}
> stream::IOStream
> tokenizer::Tokenizer{T}
> Buffer(stream) = new(stream, Tokenizer(""))
> end
>
> (Note that "" will create an ASCIIString, use UTF8String("") if you need
> to support non-ASCII chars.)
>
>
> Regards
>
> >
> >
> > > Your types have totally untyped fields – the compiler has to emit
> > > very pessimistic code about this. Rule of thumb: locations (fields,
> > > collections) should be as concretely typed as possible; parameters
> > > don't need to be.
> > >
> > > On Sat, May 14, 2016 at 1:36 PM, Ford Ox <[email protected]> wrote:
> > > > I have written exact same code in java and julia for reading
> > > > integers from file.
> > > > Julia code was A LOT slower. (12 seconds vs 1.16 seconds)
> > > >
> > > > import Base.isempty, Base.close
> > > >
> > > > ## Tokenizer ##
> > > >
> > > > type Tokenizer
> > > > tokens
> > > > index
> > > > Tokenizer(s::AbstractString) = new(split(strip(s)), 0)
> > > > end
> > > >
> > > > isempty(t::Tokenizer) = length(t.tokens) == t.index
> > > >
> > > > function next!(t::Tokenizer)
> > > > t.index += 1
> > > > t.tokens[t.index]
> > > > end
> > > >
> > > > ## Buffer ##
> > > >
> > > > type Buffer
> > > > stream
> > > > tokenizer
> > > > Buffer(stream) = new(stream, [])
> > > > end
> > > >
> > > > function next!(b::Buffer)
> > > > if isempty(b.tokenizer)
> > > > b.tokenizer = Tokenizer(readline(b.stream))
> > > > end
> > > > next!(b.tokenizer)
> > > > end
> > > >
> > > > close!(b::Buffer) = close(b.stream)
> > > > nexttype!(t, b::Buffer) = parse(t, next!(b))
> > > > nextint!(b::Buffer) = nexttype!(Int, b)
> > > >
> > > > cd("pathToMyFile")
> > > > b = Buffer(open("File"))
> > > >
> > > > function readall!(b::Buffer)
> > > > for _ in 1:nextint!(b)
> > > > nextint!(b)
> > > > end
> > > > close!(b)
> > > > end
> > > >
> > > > @time readall!(b)
> > > >
> > > >
> > > > > 12.314114 seconds (84.84 M allocations: 3.793 GB, 11.47% gc
> > > > > time)
> > > > package alg;
> > > >
> > > > import java.io.*;
> > > > import java.util.StringTokenizer;
> > > >
> > > > public class Try {
> > > > StringTokenizer tokenizer;
> > > > BufferedReader reader;
> > > >
> > > > public static void main(String[] args) throws IOException {
> > > > String name = "fileName";
> > > > Try reader = new Try(new File(name));
> > > >
> > > > long itime = System.nanoTime();
> > > > int N = reader.nextInt();
> > > > for(int n=0; n < N; n++)
> > > > reader.nextInt();
> > > > System.out.println((double) (System.nanoTime() - itime) /
> > > > 1000000000);
> > > >
> > > > }
> > > >
> > > > Try(File f) throws FileNotFoundException {
> > > > tokenizer = new StringTokenizer("");
> > > > reader = new BufferedReader(new FileReader(f));
> > > > }
> > > >
> > > > String next() throws IOException {
> > > > if(!tokenizer.hasMoreTokens()) tokenize();
> > > > return tokenizer.nextToken();
> > > > }
> > > >
> > > > void tokenize() throws IOException {
> > > > tokenizer = new StringTokenizer(reader.readLine());
> > > > }
> > > >
> > > > int nextInt() throws IOException {
> > > > return Integer.parseInt(next());
> > > > }
> > > > }
> > > > > 1.169884868
> > > >
> > > > The file has 7 068 650 lines. On each line is one integer that is
> > > > not bigger than 2^16.
> > > >
> > >
>