Github user t3rmin4t0r commented on a diff in the pull request:
https://github.com/apache/orc/pull/189#discussion_r149828230
--- Diff:
java/bench/src/java/org/apache/orc/bench/floating/DoubleReadBenchmark.java ---
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench.floating;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcProto;
+import org.apache.orc.Reader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.BufferChunk;
+import org.apache.orc.impl.DoubleTreeReaderFlip;
+import org.apache.orc.impl.DoubleTreeReaderFpcV1;
+import org.apache.orc.impl.DoubleTreeReaderFpcV2;
+import org.apache.orc.impl.DoubleTreeReaderSplit;
+import org.apache.orc.impl.DoubleTreeReaderV2;
+import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.SchemaEvolution;
+import org.apache.orc.impl.StreamName;
+import org.apache.orc.impl.TreeReaderFactory;
+import org.apache.orc.impl.WriterImpl;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations=1, time=1)
+@Measurement(iterations=1, time=1)
+@Fork(1)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+public class DoubleReadBenchmark {
+ private static final String DATA_DIRECTORY_ENV = "data.directory";
+ static final String DEFAULT_DATA_DIR = "bench/target/double-mini-orc/";
+ private static String dataDirectory = DEFAULT_DATA_DIR;
+ static {
+ if (System.getProperty(DATA_DIRECTORY_ENV) != null) {
+ dataDirectory = System.getProperty(DATA_DIRECTORY_ENV);
+ }
+ }
+
+ private static final int BATCH_SIZE = 1024;
+
+ public static TreeReaderFactory.TreeReader createReader(int column,
+
TreeReaderFactory.Context context,
+
DoubleWriteBenchmark.Algorithm algorithm
+ ) throws
IOException {
+ switch (algorithm) {
+ case PLAIN:
+ return new TreeReaderFactory.DoubleTreeReader(column);
+ case PLAIN_V2:
+ return new DoubleTreeReaderV2(column, null, context);
+ case FPC_V1:
+ return new DoubleTreeReaderFpcV1(column, null, context);
+ case FPC_V2:
+ return new DoubleTreeReaderFpcV2(column, null, context);
+ case FLIP:
+ return new DoubleTreeReaderFlip(column, null, context);
+ case SPLIT:
+ return new DoubleTreeReaderSplit(column, null, context);
+ default:
+ throw new IllegalArgumentException("Unknown algorithm " +
algorithm);
+ }
+ }
+
+ @State(Scope.Benchmark)
+ public static class InputData {
+
+ @Param({"LIST_PRICE",
+ "DISCOUNT_AMT",
+ "IOT_METER",
+ "NYC_TAXI_DROP_LAT",
+ "NYC_TAXI_DROP_LONG",
+ "HIGGS",
+ "HEPMASS",
+ "PHONE"})
+ DoubleWriteBenchmark.DataSet dataSet;
+
+ @Param({"NONE", "ZLIB", "LZO"})
+ CompressionKind compression;
+
+ @Param({"PLAIN", "PLAIN_V2", "FPC_V1", "FPC_V2", "FLIP", "SPLIT"})
+ DoubleWriteBenchmark.Algorithm algorithm;
+
+ OrcProto.StripeFooter footer;
+ Map<StreamName, byte[]> streams = new HashMap<>();
+ CompressionCodec codec;
+
+ public InputData() {
+ // PASS
+ }
+
+ @Setup
+ public void setup() throws IOException {
+ String path = DoubleReadSetup.makeFilename(dataDirectory, algorithm,
+ dataSet, compression);
+ DataInputStream inStream = new DataInputStream(new
FileInputStream(path));
+ int footerSize = inStream.readInt();
+ byte[] footerBuffer = new byte[footerSize];
+ inStream.readFully(footerBuffer);
+ footer = OrcProto.StripeFooter.parseFrom(footerBuffer);
+ streams.clear();
+ for(OrcProto.Stream stream: footer.getStreamsList()) {
+ StreamName name = new StreamName(stream.getColumn(),
stream.getKind());
+ CompressionCodec codec = WriterImpl.createCodec(compression);
--- End diff --
Looks like this is using the old ZLIB codecs.
CompressionCodec codec = getCustomizedCodec(OrcProto.Stream.Kind.DATA); ?
---