[GitHub] orc pull request #252: ORC-345 Create and use a new Decimal64StatisticsImpl ...

2018-05-03 Thread omalley
Github user omalley commented on a diff in the pull request:

https://github.com/apache/orc/pull/252#discussion_r185846011
  
--- Diff: java/bench/src/java/org/apache/orc/bench/DecimalBench.java ---
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import com.google.gson.JsonStreamParser;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.FsInput;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.TrackingLocalFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import 
org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.bench.convert.BatchReader;
+import org.apache.orc.bench.convert.GenerateVariants;
+import org.apache.orc.bench.convert.csv.CsvReader;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.openjdk.jmh.annotations.AuxCounters;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations=2, time=30, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations=10, time=30, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(2)
+public class DecimalBench {
+
+  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
+  private static final Path root;
+  static {
+String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
+root = value == null ? null : new Path(value);
+  }
+
+  /**
+   * Abstract out whether we are writing short or long decimals
+   */
+  interface Loader {
+/**
+ * Load the data from the values array into the ColumnVector.
+ * @param vector the output
+ * @param values the intput
+ * @param offset the first input value
+ * @param length the number of values to copy
+ */
+void loadData(ColumnVector vector, long[] values, int offset, int 
length);
+  }
+
+  static class Decimal64Loader implements Loader {
+final int 

[GitHub] orc pull request #252: ORC-345 Create and use a new Decimal64StatisticsImpl ...

2018-05-03 Thread omalley
Github user omalley commented on a diff in the pull request:

https://github.com/apache/orc/pull/252#discussion_r185845484
  
--- Diff: java/bench/src/java/org/apache/orc/bench/DecimalBench.java ---
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import com.google.gson.JsonStreamParser;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.FsInput;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.TrackingLocalFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import 
org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.bench.convert.BatchReader;
+import org.apache.orc.bench.convert.GenerateVariants;
+import org.apache.orc.bench.convert.csv.CsvReader;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.openjdk.jmh.annotations.AuxCounters;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations=2, time=30, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations=10, time=30, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(2)
+public class DecimalBench {
+
+  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
+  private static final Path root;
+  static {
+String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
+root = value == null ? null : new Path(value);
+  }
+
+  /**
+   * Abstract out whether we are writing short or long decimals
+   */
+  interface Loader {
+/**
+ * Load the data from the values array into the ColumnVector.
+ * @param vector the output
+ * @param values the intput
+ * @param offset the first input value
+ * @param length the number of values to copy
+ */
+void loadData(ColumnVector vector, long[] values, int offset, int 
length);
+  }
+
+  static class Decimal64Loader implements Loader {
+final int 

[GitHub] orc pull request #252: ORC-345 Create and use a new Decimal64StatisticsImpl ...

2018-05-03 Thread omalley
Github user omalley commented on a diff in the pull request:

https://github.com/apache/orc/pull/252#discussion_r185845008
  
--- Diff: java/bench/src/java/org/apache/orc/bench/DecimalBench.java ---
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import com.google.gson.JsonStreamParser;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.FsInput;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.TrackingLocalFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import 
org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.bench.convert.BatchReader;
+import org.apache.orc.bench.convert.GenerateVariants;
+import org.apache.orc.bench.convert.csv.CsvReader;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.openjdk.jmh.annotations.AuxCounters;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations=2, time=30, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations=10, time=30, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(2)
+public class DecimalBench {
+
+  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
+  private static final Path root;
+  static {
+String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
+root = value == null ? null : new Path(value);
+  }
+
+  /**
+   * Abstract out whether we are writing short or long decimals
+   */
+  interface Loader {
+/**
+ * Load the data from the values array into the ColumnVector.
+ * @param vector the output
+ * @param values the intput
+ * @param offset the first input value
+ * @param length the number of values to copy
+ */
+void loadData(ColumnVector vector, long[] values, int offset, int 
length);
+  }
+
+  static class Decimal64Loader implements Loader {
+final int 

[GitHub] orc pull request #252: ORC-345 Create and use a new Decimal64StatisticsImpl ...

2018-05-02 Thread prasanthj
Github user prasanthj commented on a diff in the pull request:

https://github.com/apache/orc/pull/252#discussion_r185673866
  
--- Diff: java/bench/src/java/org/apache/orc/bench/DecimalBench.java ---
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import com.google.gson.JsonStreamParser;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.FsInput;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.TrackingLocalFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import 
org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.bench.convert.BatchReader;
+import org.apache.orc.bench.convert.GenerateVariants;
+import org.apache.orc.bench.convert.csv.CsvReader;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.openjdk.jmh.annotations.AuxCounters;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations=2, time=30, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations=10, time=30, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(2)
+public class DecimalBench {
+
+  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
+  private static final Path root;
+  static {
+String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
+root = value == null ? null : new Path(value);
+  }
+
+  /**
+   * Abstract out whether we are writing short or long decimals
+   */
+  interface Loader {
+/**
+ * Load the data from the values array into the ColumnVector.
+ * @param vector the output
+ * @param values the intput
+ * @param offset the first input value
+ * @param length the number of values to copy
+ */
+void loadData(ColumnVector vector, long[] values, int offset, int 
length);
+  }
+
+  static class Decimal64Loader implements Loader {
+final int 

[GitHub] orc pull request #252: ORC-345 Create and use a new Decimal64StatisticsImpl ...

2018-05-02 Thread prasanthj
Github user prasanthj commented on a diff in the pull request:

https://github.com/apache/orc/pull/252#discussion_r185673722
  
--- Diff: java/bench/src/java/org/apache/orc/bench/DecimalBench.java ---
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import com.google.gson.JsonStreamParser;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.FsInput;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.TrackingLocalFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import 
org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.bench.convert.BatchReader;
+import org.apache.orc.bench.convert.GenerateVariants;
+import org.apache.orc.bench.convert.csv.CsvReader;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.openjdk.jmh.annotations.AuxCounters;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations=2, time=30, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations=10, time=30, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(2)
+public class DecimalBench {
+
+  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
+  private static final Path root;
+  static {
+String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
+root = value == null ? null : new Path(value);
+  }
+
+  /**
+   * Abstract out whether we are writing short or long decimals
+   */
+  interface Loader {
+/**
+ * Load the data from the values array into the ColumnVector.
+ * @param vector the output
+ * @param values the intput
+ * @param offset the first input value
+ * @param length the number of values to copy
+ */
+void loadData(ColumnVector vector, long[] values, int offset, int 
length);
+  }
+
+  static class Decimal64Loader implements Loader {
+final int 

[GitHub] orc pull request #252: ORC-345 Create and use a new Decimal64StatisticsImpl ...

2018-04-18 Thread omalley
GitHub user omalley opened a pull request:

https://github.com/apache/orc/pull/252

ORC-345 Create and use a new Decimal64StatisticsImpl on the write path

Currently the DecimalTreeWriter spends a lot of time maintaining the 
statistics for decimal objects. For short decimals (p <= 18) we can use longs 
to track the min, max, and sum.

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/omalley/orc orc-345

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/orc/pull/252.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #252


commit 12c37d717010e5b683074633b273b0bad0180e05
Author: Owen O'Malley 
Date:   2018-04-18T03:16:12Z

ORC-344. Support the new Decimal64ColumnVector.

Fixes #250

Signed-off-by: Owen O'Malley 

commit e7c395ed2e5a68e480e31c4ba6d74237e5025a33
Author: Owen O'Malley 
Date:   2018-04-18T21:30:35Z

ORC-345. Create Decimal64StatisticsImpl.




---