Github user t3rmin4t0r commented on a diff in the pull request: https://github.com/apache/orc/pull/189#discussion_r149828230 --- Diff: java/bench/src/java/org/apache/orc/bench/floating/DoubleReadBenchmark.java --- @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.bench.floating; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.hive.common.io.DiskRange; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.orc.CompressionCodec; +import org.apache.orc.CompressionKind; +import org.apache.orc.OrcProto; +import org.apache.orc.Reader; +import org.apache.orc.TypeDescription; +import org.apache.orc.impl.BufferChunk; +import org.apache.orc.impl.DoubleTreeReaderFlip; +import org.apache.orc.impl.DoubleTreeReaderFpcV1; +import org.apache.orc.impl.DoubleTreeReaderFpcV2; +import org.apache.orc.impl.DoubleTreeReaderSplit; +import org.apache.orc.impl.DoubleTreeReaderV2; +import org.apache.orc.impl.InStream; +import org.apache.orc.impl.SchemaEvolution; +import org.apache.orc.impl.StreamName; +import org.apache.orc.impl.TreeReaderFactory; +import org.apache.orc.impl.WriterImpl; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations=1, time=1) +@Measurement(iterations=1, time=1) +@Fork(1) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class DoubleReadBenchmark { + private static final String DATA_DIRECTORY_ENV = "data.directory"; + static final String DEFAULT_DATA_DIR = "bench/target/double-mini-orc/"; + private static String dataDirectory = DEFAULT_DATA_DIR; + static { + if (System.getProperty(DATA_DIRECTORY_ENV) != null) { + dataDirectory = System.getProperty(DATA_DIRECTORY_ENV); + } + } + + private static final int BATCH_SIZE = 1024; + + public static TreeReaderFactory.TreeReader createReader(int column, + TreeReaderFactory.Context context, + DoubleWriteBenchmark.Algorithm algorithm + ) throws IOException { + switch (algorithm) { + case PLAIN: + return new TreeReaderFactory.DoubleTreeReader(column); + case PLAIN_V2: + return new DoubleTreeReaderV2(column, null, context); + case FPC_V1: + return new DoubleTreeReaderFpcV1(column, null, context); + case FPC_V2: + return new DoubleTreeReaderFpcV2(column, null, context); + case FLIP: + return new DoubleTreeReaderFlip(column, null, context); + case SPLIT: + return new DoubleTreeReaderSplit(column, null, context); + default: + throw new IllegalArgumentException("Unknown algorithm " + algorithm); + } + } + + @State(Scope.Benchmark) + public static class InputData { + + @Param({"LIST_PRICE", + "DISCOUNT_AMT", + "IOT_METER", + "NYC_TAXI_DROP_LAT", + "NYC_TAXI_DROP_LONG", + "HIGGS", + "HEPMASS", + "PHONE"}) + DoubleWriteBenchmark.DataSet dataSet; + + @Param({"NONE", "ZLIB", "LZO"}) + CompressionKind compression; + + @Param({"PLAIN", "PLAIN_V2", "FPC_V1", "FPC_V2", "FLIP", "SPLIT"}) + DoubleWriteBenchmark.Algorithm algorithm; + + OrcProto.StripeFooter footer; + Map<StreamName, byte[]> streams = new HashMap<>(); + CompressionCodec codec; + + public InputData() { + // PASS + } + + @Setup + public void setup() throws IOException { + String path = DoubleReadSetup.makeFilename(dataDirectory, algorithm, + dataSet, compression); + DataInputStream inStream = new DataInputStream(new FileInputStream(path)); + int footerSize = inStream.readInt(); + byte[] footerBuffer = new byte[footerSize]; + inStream.readFully(footerBuffer); + footer = OrcProto.StripeFooter.parseFrom(footerBuffer); + streams.clear(); + for(OrcProto.Stream stream: footer.getStreamsList()) { + StreamName name = new StreamName(stream.getColumn(), stream.getKind()); + CompressionCodec codec = WriterImpl.createCodec(compression); --- End diff -- Looks like this is using the old ZLIB codecs. CompressionCodec codec = getCustomizedCodec(OrcProto.Stream.Kind.DATA); ?
---