[07/15] orc git commit: trying to get it done.

2016-10-17 Thread omalley
trying to get it done.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/5ae2d412
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/5ae2d412
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/5ae2d412

Branch: refs/heads/orc-72
Commit: 5ae2d4122b03bf356b5e8c48b9b05b2a4f12da38
Parents: 825a944
Author: Owen O'Malley 
Authored: Mon Sep 26 09:14:07 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 10 13:59:16 2016 -0700

--
 java/bench/pom.xml  | 17 +++-
 .../java/org/apache/orc/bench/SalesToOrc.java   |  2 +-
 java/pom.xml| 41 
 3 files changed, 47 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/5ae2d412/java/bench/pom.xml
--
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
index 019bdf0..738dfb3 100644
--- a/java/bench/pom.xml
+++ b/java/bench/pom.xml
@@ -19,7 +19,7 @@
   
 org.apache.orc
 orc
-1.2.0-SNAPSHOT
+1.3.0-SNAPSHOT
 ../pom.xml
   
 
@@ -34,19 +34,20 @@
 
   org.apache.orc
   orc-core
-  1.2.0-SNAPSHOT
+
+
+  org.apache.orc
+  orc-tools
 
 
 
 
   org.apache.avro
   avro
-  1.8.1
 
 
   org.apache.commons
   commons-csv
-  1.4
 
 
   org.apache.hadoop
@@ -58,27 +59,19 @@
 
 
   org.apache.hive
-  hive-exec
-  2.1.0
-
-
-  org.apache.hive
   hive-storage-api
 
 
   org.openjdk.jmh
   jmh-core
-  1.12
 
 
   org.openjdk.jmh
   jmh-generator-annprocess
-  1.12
 
 
   org.slf4j
   slf4j-simple
-  1.7.5
 
 
 

http://git-wip-us.apache.org/repos/asf/orc/blob/5ae2d412/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java 
b/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
index d570728..4e715b2 100644
--- a/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
+++ b/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
@@ -21,7 +21,7 @@ package org.apache.orc.bench;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.orc.OrcFile;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 

http://git-wip-us.apache.org/repos/asf/orc/blob/5ae2d412/java/pom.xml
--
diff --git a/java/pom.xml b/java/pom.xml
index 5236409..b894b2d 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -214,6 +214,11 @@
 orc-core
 1.3.0-SNAPSHOT
   
+  
+org.apache.orc
+orc-tools
+1.3.0-SNAPSHOT
+  
 
   
   
@@ -252,6 +257,16 @@
 0.3
   
   
+org.apache.commons
+commons-csv
+1.4
+  
+  
+org.apache.avro
+avro
+1.8.1
+  
+  
 org.apache.hadoop
 hadoop-common
 ${hadoop.version}
@@ -350,15 +365,41 @@
 ${storage-api.version}
   
   
+org.apache.hive
+hive-exec
+2.1.0
+
+  
+org.apache.calcite
+calcite-core
+  
+
+  
+  
 org.codehaus.jettison
 jettison
 1.1
   
   
+org.openjdk.jmh
+jmh-core
+1.12
+  
+  
+org.openjdk.jmh
+jmh-generator-annprocess
+1.12
+  
+  
 org.slf4j
 slf4j-api
 1.7.5
   
+  
+org.slf4j
+slf4j-simple
+1.7.5
+  
 
   
   



[orc] Git Push Summary

2016-10-17 Thread omalley
Repository: orc
Updated Branches:
  refs/heads/orc-72 [deleted] 7315a0145


[05/15] orc git commit: more updates

2016-10-17 Thread omalley
more updates


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/1752e172
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/1752e172
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/1752e172

Branch: refs/heads/orc-72
Commit: 1752e172a0f7b16fd193f75eb57d10ca05ab0b91
Parents: 5b37113
Author: Owen O'Malley 
Authored: Wed Oct 5 16:04:16 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 10 13:59:16 2016 -0700

--
 java/bench/pom.xml  |  13 +
 .../orc/bench/ColumnProjectionBenchmark.java|  10 +-
 .../org/apache/orc/bench/CompressionKind.java   |  56 
 .../java/org/apache/orc/bench/GithubToAvro.java |   4 +-
 .../java/org/apache/orc/bench/GithubToJson.java |  21 +-
 .../java/org/apache/orc/bench/GithubToOrc.java  |   6 +-
 .../org/apache/orc/bench/GithubToParquet.java   |   2 +-
 .../java/org/apache/orc/bench/SalesToJson.java  |  16 +-
 .../java/org/apache/orc/bench/SalesToOrc.java   |   2 +-
 .../java/org/apache/orc/bench/TaxiToJson.java   |  64 +---
 .../java/org/apache/orc/bench/TaxiToOrc.java|  63 +---
 .../java/org/apache/orc/bench/Utilities.java|  86 ++
 .../org/apache/orc/bench/avro/AvroWriter.java   |  23 --
 .../org/apache/orc/bench/json/JsonWriter.java   |  69 +
 .../orc/bench/parquet/ConverterParent.java  |  24 ++
 .../bench/parquet/DataWritableReadSupport.java  | 200 +
 .../parquet/DataWritableRecordConverter.java|  49 
 .../bench/parquet/DataWritableWriteSupport.java |  61 
 .../orc/bench/parquet/ETypeConverter.java   | 292 +++
 .../parquet/FilterPredicateLeafBuilder.java |  80 +
 .../bench/parquet/HiveCollectionConverter.java  | 196 +
 .../orc/bench/parquet/HiveGroupConverter.java   |  79 +
 .../orc/bench/parquet/HiveSchemaConverter.java  | 140 +
 .../orc/bench/parquet/HiveStructConverter.java  | 192 
 .../orc/bench/parquet/LeafFilterFactory.java| 200 +
 .../parquet/MapredParquetOutputFormat.java  | 129 
 .../org/apache/orc/bench/parquet/NanoTime.java  |  68 +
 .../apache/orc/bench/parquet/NanoTimeUtils.java | 113 +++
 .../ParquetFilterPredicateConverter.java| 143 +
 .../parquet/ParquetRecordReaderWrapper.java | 276 ++
 .../org/apache/orc/bench/parquet/Repeated.java  | 193 
 java/pom.xml|   5 +
 .../java/org/apache/orc/tools/PrintData.java|   8 +-
 33 files changed, 2699 insertions(+), 184 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/1752e172/java/bench/pom.xml
--
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
index f40f21b..caee888 100644
--- a/java/bench/pom.xml
+++ b/java/bench/pom.xml
@@ -37,6 +37,10 @@
 
 
   org.apache.orc
+  orc-mapreduce
+
+
+  org.apache.orc
   orc-tools
 
 
@@ -71,6 +75,10 @@
   parquet-hadoop
 
 
+  org.jodd
+  jodd-core
+
+
   org.openjdk.jmh
   jmh-core
 
@@ -89,6 +97,11 @@
   junit
   test
 
+
+  org.apache.orc
+  orc-mapreduce
+  1.3.0-SNAPSHOT
+
   
 
   

http://git-wip-us.apache.org/repos/asf/orc/blob/1752e172/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java
--
diff --git 
a/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java 
b/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java
index 4b17819..c53911f 100644
--- a/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java
+++ b/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java
@@ -29,8 +29,6 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.TrackingLocalFileSystem;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.FileSplit;
@@ -40,6 +38,8 @@ import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.bench.parquet.DataWritableReadSupport;
+import org.apache.orc.bench.parquet.ParquetRecordReaderWrapper;
 import org.apache.parquet.hadoop.ParquetInputFormat;
 import org.openjdk.jmh.annotations.AuxCounters;
 import org.openjdk.jmh.annotations.Benchmark;
@@ -58,12 +58,9 @@ import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.ru

[14/15] orc git commit: fiddling with avro writer

2016-10-17 Thread omalley
fiddling with avro writer


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/0e8b4e46
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/0e8b4e46
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/0e8b4e46

Branch: refs/heads/orc-72
Commit: 0e8b4e46f16455cb54fea469bdd51bb9614c634f
Parents: 2246b1a
Author: Owen O'Malley 
Authored: Mon Oct 17 10:40:36 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 17 10:40:36 2016 -0700

--
 .../java/org/apache/orc/bench/GithubToAvro.java|  2 +-
 .../src/java/org/apache/orc/bench/SalesToAvro.java |  2 +-
 .../src/java/org/apache/orc/bench/TaxiToAvro.java  | 13 +
 .../java/org/apache/orc/bench/avro/AvroWriter.java | 17 +
 4 files changed, 16 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/0e8b4e46/java/bench/src/java/org/apache/orc/bench/GithubToAvro.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/GithubToAvro.java 
b/java/bench/src/java/org/apache/orc/bench/GithubToAvro.java
index eb94ff2..d31f1b6 100644
--- a/java/bench/src/java/org/apache/orc/bench/GithubToAvro.java
+++ b/java/bench/src/java/org/apache/orc/bench/GithubToAvro.java
@@ -31,7 +31,7 @@ public class GithubToAvro {
 TypeDescription schema = Utilities.loadSchema("github.schema");
 Configuration conf = new Configuration();
 AvroWriter writer = new AvroWriter(new Path(args[0]), schema, conf,
-TaxiToAvro.getCodec(args[1]));
+CompressionKind.valueOf(args[1]));
 VectorizedRowBatch batch = schema.createRowBatch();
 for(String inFile: Utilities.sliceArray(args, 2)) {
   JsonReader reader = new JsonReader(new Path(inFile), conf, schema);

http://git-wip-us.apache.org/repos/asf/orc/blob/0e8b4e46/java/bench/src/java/org/apache/orc/bench/SalesToAvro.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/SalesToAvro.java 
b/java/bench/src/java/org/apache/orc/bench/SalesToAvro.java
index 900be66..fcfe434 100644
--- a/java/bench/src/java/org/apache/orc/bench/SalesToAvro.java
+++ b/java/bench/src/java/org/apache/orc/bench/SalesToAvro.java
@@ -31,7 +31,7 @@ public class SalesToAvro {
 SalesGenerator sales = new SalesGenerator(Long.parseLong(args[2]));
 TypeDescription schema = sales.getSchema();
 AvroWriter writer = new AvroWriter(new Path(args[0]), schema, conf,
-TaxiToAvro.getCodec(args[1]));
+CompressionKind.valueOf(args[1]));
 VectorizedRowBatch batch = schema.createRowBatch();
 while (sales.nextBatch(batch)) {
   writer.writeBatch(batch);

http://git-wip-us.apache.org/repos/asf/orc/blob/0e8b4e46/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java 
b/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
index 9fd2f23..d5eb822 100644
--- a/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
+++ b/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
@@ -27,22 +27,11 @@ import org.apache.orc.bench.csv.CsvReader;
 
 public class TaxiToAvro {
 
-  public static String getCodec(String compression) {
-if ("none".equals(compression)) {
-  return "null";
-} else if ("zlib".equals(compression)) {
-  return "deflate";
-} else if ("snappy".equals(compression)) {
-  return "snappy";
-}
-throw new IllegalArgumentException("Unknown compression " + compression);
-  }
-
   public static void main(String[] args) throws Exception {
 TypeDescription schema = Utilities.loadSchema("nyc-taxi.schema");
 Configuration conf = new Configuration();
 AvroWriter writer = new AvroWriter(new Path(args[0]), schema, conf,
-getCodec(args[1]));
+CompressionKind.valueOf(args[1]));
 VectorizedRowBatch batch = schema.createRowBatch();
 for(String inFile: Utilities.sliceArray(args, 2)) {
   CsvReader reader = new CsvReader(new Path(inFile), conf, schema);

http://git-wip-us.apache.org/repos/asf/orc/blob/0e8b4e46/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java 
b/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
index 8cc9d06..eeb2fee 100644
--- a/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
+++ b/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
@@ -37,12 +37,12 @@ import 
org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
 import org.apache.hadoop.hive.ql

[03/15] orc git commit: ORC-72. Add benchmarks to ORC.

2016-10-17 Thread omalley
ORC-72. Add benchmarks to ORC.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/825a9441
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/825a9441
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/825a9441

Branch: refs/heads/orc-72
Commit: 825a9441fdcdb8ceb392ae80bed0324e93f7b07d
Parents: 37b939b
Author: Owen O'Malley 
Authored: Tue Jun 14 10:00:15 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 10 13:59:16 2016 -0700

--
 java/bench/.gitignore   |   5 +
 java/bench/fetch-data.sh|   6 +
 java/bench/pom.xml  | 138 
 .../hadoop/fs/TrackingLocalFileSystem.java  |  57 ++
 .../hadoop/hive/ql/io/orc/VectorToWritable.java |  70 ++
 .../src/java/org/apache/orc/bench/AvroScan.java |  48 ++
 .../java/org/apache/orc/bench/AvroWriter.java   | 368 ++
 .../orc/bench/ColumnProjectionBenchmark.java| 192 +
 .../java/org/apache/orc/bench/CsvReader.java| 175 +
 .../src/java/org/apache/orc/bench/CsvScan.java  |  40 ++
 .../org/apache/orc/bench/FullReadBenchmark.java | 222 ++
 .../java/org/apache/orc/bench/GithubToAvro.java |  42 ++
 .../java/org/apache/orc/bench/GithubToJson.java |  51 ++
 .../java/org/apache/orc/bench/GithubToOrc.java  |  48 ++
 .../org/apache/orc/bench/GithubToParquet.java   |  63 ++
 .../java/org/apache/orc/bench/JsonReader.java   | 278 
 .../src/java/org/apache/orc/bench/JsonScan.java |  61 ++
 .../src/java/org/apache/orc/bench/OrcScan.java  |  46 ++
 .../java/org/apache/orc/bench/ParquetScan.java  |  54 ++
 .../org/apache/orc/bench/RandomGenerator.java   | 523 ++
 .../org/apache/orc/bench/SalesGenerator.java| 200 ++
 .../java/org/apache/orc/bench/SalesToAvro.java  |  40 ++
 .../java/org/apache/orc/bench/SalesToJson.java  |  49 ++
 .../java/org/apache/orc/bench/SalesToOrc.java   |  42 ++
 .../org/apache/orc/bench/SalesToParquet.java|  61 ++
 .../java/org/apache/orc/bench/TaxiToAvro.java   |  53 ++
 .../java/org/apache/orc/bench/TaxiToJson.java   |  93 +++
 .../java/org/apache/orc/bench/TaxiToOrc.java| 108 +++
 .../org/apache/orc/bench/TaxiToParquet.java |  75 ++
 java/bench/src/main/resources/github.schema | 702 +++
 java/bench/src/main/resources/log4j.properties  |   6 +
 java/bench/src/main/resources/nyc-taxi.schema   |  21 +
 .../java/org/apache/orc/TypeDescription.java|   2 +-
 java/pom.xml|   1 +
 .../src/java/org/apache/orc/tools/FileDump.java |   1 -
 35 files changed, 3939 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/825a9441/java/bench/.gitignore
--
diff --git a/java/bench/.gitignore b/java/bench/.gitignore
new file mode 100644
index 000..babcae6
--- /dev/null
+++ b/java/bench/.gitignore
@@ -0,0 +1,5 @@
+.*.crc
+*.json.gz
+*.avro
+*.parquet
+*.orc

http://git-wip-us.apache.org/repos/asf/orc/blob/825a9441/java/bench/fetch-data.sh
--
diff --git a/java/bench/fetch-data.sh b/java/bench/fetch-data.sh
new file mode 100644
index 000..79e77ff
--- /dev/null
+++ b/java/bench/fetch-data.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/bash
+mkdir -p data/nyc
+(cd data/nyc; wget 
https://storage.googleapis.com/tlc-trip-data/2015/yellow_tripdata_2015-{11..12}.csv)
+(cd data/nyc; gzip *.csv)
+mkdir -p data/github
+(cd data/github; wget 
http://data.githubarchive.org/2015-11-{01..15}-{0..23}.json.gz)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/orc/blob/825a9441/java/bench/pom.xml
--
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
new file mode 100644
index 000..019bdf0
--- /dev/null
+++ b/java/bench/pom.xml
@@ -0,0 +1,138 @@
+
+
+http://maven.apache.org/POM/4.0.0";
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  4.0.0
+  
+org.apache.orc
+orc
+1.2.0-SNAPSHOT
+../pom.xml
+  
+
+  orc-benchmarks
+  jar
+  ORC Benchmarks
+  
+Benchmarks for comparing ORC, Parquet, and Avro performance.
+  
+
+  
+
+  org.apache.orc
+  orc-core
+  1.2.0-SNAPSHOT
+
+
+
+
+  org.apache.avro
+  avro
+  1.8.1
+
+
+  org.apache.commons
+  commons-csv
+  1.4
+
+
+  org.apache.hadoop
+  hadoop-common
+
+
+  org.apache.hadoop
+  hadoop-mapreduce-client-core
+
+
+  org.apache.hive
+  hive-exec
+  2.1.0
+
+
+  org.apache.hive
+  hive-storage-api
+
+
+  org.openjdk.jmh
+

[02/15] orc git commit: ORC-72. Add benchmarks to ORC.

2016-10-17 Thread omalley
http://git-wip-us.apache.org/repos/asf/orc/blob/825a9441/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java 
b/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java
new file mode 100644
index 000..fe8f85e
--- /dev/null
+++ b/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java
@@ -0,0 +1,523 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.TypeDescription;
+
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+public class RandomGenerator {
+  private final TypeDescription schema = TypeDescription.createStruct();
+  private final List fields = new ArrayList<>();
+  private final Random random;
+
+  public RandomGenerator(int seed) {
+random = new Random(seed);
+  }
+
+  private abstract class ValueGenerator {
+double nullProbability = 0;
+abstract void generate(ColumnVector vector, int valueCount);
+  }
+
+  private class RandomBoolean extends ValueGenerator {
+public void generate(ColumnVector v, int valueCount) {
+  LongColumnVector vector = (LongColumnVector) v;
+  for(int r=0; r < valueCount; ++r) {
+if (nullProbability != 0 && random.nextDouble() < nullProbability) {
+  v.noNulls = false;
+  v.isNull[r] = true;
+} else {
+  vector.vector[r] = random.nextInt(2);
+}
+  }
+}
+  }
+
+  private class RandomList extends ValueGenerator {
+private final int minSize;
+private final int sizeRange;
+private final Field child;
+
+public RandomList(int minSize, int maxSize, Field child) {
+  this.minSize = minSize;
+  this.sizeRange = maxSize - minSize + 1;
+  this.child = child;
+}
+
+public void generate(ColumnVector v, int valueCount) {
+  ListColumnVector vector = (ListColumnVector) v;
+  for(int r=0; r < valueCount; ++r) {
+if (nullProbability != 0 && random.nextDouble() < nullProbability) {
+  v.noNulls = false;
+  v.isNull[r] = true;
+} else {
+  vector.offsets[r] = vector.childCount;
+  vector.lengths[r] = random.nextInt(sizeRange) + minSize;
+  vector.childCount += vector.lengths[r];
+}
+  }
+  vector.child.ensureSize(vector.childCount, false);
+  child.generator.generate(vector.child, vector.childCount);
+}
+  }
+
+  private class RandomStruct extends ValueGenerator {
+private final Field[] children;
+
+public RandomStruct(Field[] children) {
+  this.children = children;
+}
+
+public void generate(ColumnVector v, int valueCount) {
+  StructColumnVector vector = (StructColumnVector) v;
+  for(int r=0; r < valueCount; ++r) {
+if (nullProbability != 0 && random.nextDouble() < nullProbability) {
+  v.noNulls = false;
+  v.isNull[r] = true;
+}
+  }
+  for(int c=0; c < children.length; ++c) {
+children[c].generator.generate(vector.fields[c], valueCount);
+  }
+}
+  }
+
+  private abstract class IntegerGenerator extends ValueGenerator {
+private final long sign;
+private final long mask;
+
+private IntegerGenerator(TypeDescription.Category kind) {
+  int bits = getIntegerLength(kind);
+  mask = bits == 64 ? 0 : -1L << bits;
+  sign = 1L << (bits - 1);
+}
+
+protected void normalize(LongColumnVector vector, int valueCount) {
+  // make sure the value stays in range by sign extending it
+  for(int r=0; r < valueCount; ++r) {
+if ((vector.vector[r] & sign) == 0) {
+  vector.vector[

[12/15] orc git commit: fix urls

2016-10-17 Thread omalley
fix urls


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/839d29d7
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/839d29d7
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/839d29d7

Branch: refs/heads/orc-72
Commit: 839d29d743263f7ce8c6464eb8536aaa4a1571bc
Parents: 611388b
Author: Owen O'Malley 
Authored: Fri Oct 14 11:16:50 2016 -0700
Committer: Owen O'Malley 
Committed: Fri Oct 14 11:16:50 2016 -0700

--
 java/bench/fetch-data.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/839d29d7/java/bench/fetch-data.sh
--
diff --git a/java/bench/fetch-data.sh b/java/bench/fetch-data.sh
old mode 100644
new mode 100755
index 79e77ff..e139117
--- a/java/bench/fetch-data.sh
+++ b/java/bench/fetch-data.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/bash
 mkdir -p data/nyc
-(cd data/nyc; wget 
https://storage.googleapis.com/tlc-trip-data/2015/yellow_tripdata_2015-{11..12}.csv)
+(cd data/nyc; wget 
https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-{11,12}.csv)
 (cd data/nyc; gzip *.csv)
 mkdir -p data/github
-(cd data/github; wget 
http://data.githubarchive.org/2015-11-{01..15}-{0..23}.json.gz)
\ No newline at end of file
+(cd data/github; wget 
http://data.githubarchive.org/2015-11-{01..15}-{0..23}.json.gz)



[15/15] orc git commit: Fiddling with orc writer

2016-10-17 Thread omalley
Fiddling with orc writer


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/7315a014
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/7315a014
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/7315a014

Branch: refs/heads/orc-72
Commit: 7315a0145efe92cc1e19c2b13a51b1b5d63779ff
Parents: 0e8b4e4
Author: Owen O'Malley 
Authored: Mon Oct 17 11:24:41 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 17 11:24:41 2016 -0700

--
 .../java/org/apache/orc/bench/GithubToOrc.java  |  2 +-
 .../java/org/apache/orc/bench/SalesToOrc.java   |  3 +--
 .../java/org/apache/orc/bench/TaxiToOrc.java|  7 +--
 .../java/org/apache/orc/bench/Utilities.java| 20 ++--
 4 files changed, 13 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/7315a014/java/bench/src/java/org/apache/orc/bench/GithubToOrc.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/GithubToOrc.java 
b/java/bench/src/java/org/apache/orc/bench/GithubToOrc.java
index a04b08e..9caca4a 100644
--- a/java/bench/src/java/org/apache/orc/bench/GithubToOrc.java
+++ b/java/bench/src/java/org/apache/orc/bench/GithubToOrc.java
@@ -34,7 +34,7 @@ public class GithubToOrc {
 Configuration conf = new Configuration();
 Writer writer = OrcFile.createWriter(new Path(args[0]),
 OrcFile.writerOptions(conf).setSchema(schema)
-.compress(Utilities.getCodec(args[1])));
+.compress(Utilities.getCodec(CompressionKind.valueOf(args[1];
 for(String inFile: Utilities.sliceArray(args, 2)) {
   JsonReader reader = new JsonReader(new Path(inFile), conf, schema);
   while (reader.nextBatch(batch)) {

http://git-wip-us.apache.org/repos/asf/orc/blob/7315a014/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java 
b/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
index d3b2615..062b863 100644
--- a/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
+++ b/java/bench/src/java/org/apache/orc/bench/SalesToOrc.java
@@ -22,7 +22,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.OrcFile;
-import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 
 public class SalesToOrc {
@@ -33,7 +32,7 @@ public class SalesToOrc {
 Writer writer = OrcFile.createWriter(new Path(args[0]),
 OrcFile.writerOptions(conf)
 .setSchema(sales.getSchema())
-.compress(Utilities.getCodec(args[1])));
+.compress(Utilities.getCodec(CompressionKind.valueOf(args[1];
 while (sales.nextBatch(batch)) {
   writer.addRowBatch(batch);
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/7315a014/java/bench/src/java/org/apache/orc/bench/TaxiToOrc.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/TaxiToOrc.java 
b/java/bench/src/java/org/apache/orc/bench/TaxiToOrc.java
index e95f794..89f0d0e 100644
--- a/java/bench/src/java/org/apache/orc/bench/TaxiToOrc.java
+++ b/java/bench/src/java/org/apache/orc/bench/TaxiToOrc.java
@@ -22,15 +22,10 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.OrcFile;
-import org.apache.orc.CompressionKind;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.bench.csv.CsvReader;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-
 public class TaxiToOrc {
 
   public static void main(String[] args) throws Exception {
@@ -40,7 +35,7 @@ public class TaxiToOrc {
 Writer writer = OrcFile.createWriter(new Path(args[0]),
 OrcFile.writerOptions(conf)
 .setSchema(schema)
-.compress(Utilities.getCodec(args[1])));
+.compress(Utilities.getCodec(CompressionKind.valueOf(args[1];
 for(String inFile: Utilities.sliceArray(args, 2)) {
   CsvReader reader = new CsvReader(new Path(inFile), conf, schema);
   while (reader.nextBatch(batch)) {

http://git-wip-us.apache.org/repos/asf/orc/blob/7315a014/java/bench/src/java/org/apache/orc/bench/Utilities.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/Utilities.java 
b/java/bench/src/java/org/apache/orc/bench/Utilities.java
index 9a95ae9..d3e10de 100644
--- a/java/bench/src/java/org/apache/orc/bench/Utilities.java
+++ b/java/b

[13/15] orc git commit: minor debugging

2016-10-17 Thread omalley
minor debugging


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/2246b1a0
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/2246b1a0
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/2246b1a0

Branch: refs/heads/orc-72
Commit: 2246b1a01a26e01e72b31047f85a81c0cf64f08f
Parents: 839d29d
Author: Owen O'Malley 
Authored: Fri Oct 14 13:06:36 2016 -0700
Committer: Owen O'Malley 
Committed: Fri Oct 14 13:07:23 2016 -0700

--
 .../java/org/apache/orc/bench/avro/AvroSchemaUtils.java | 12 +++-
 .../src/java/org/apache/orc/bench/avro/AvroWriter.java  |  2 ++
 2 files changed, 9 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/2246b1a0/java/bench/src/java/org/apache/orc/bench/avro/AvroSchemaUtils.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/avro/AvroSchemaUtils.java 
b/java/bench/src/java/org/apache/orc/bench/avro/AvroSchemaUtils.java
index 5df7b70..893a4fd 100644
--- a/java/bench/src/java/org/apache/orc/bench/avro/AvroSchemaUtils.java
+++ b/java/bench/src/java/org/apache/orc/bench/avro/AvroSchemaUtils.java
@@ -110,7 +110,7 @@ public class AvroSchemaUtils {
 throw new UnsupportedOperationException(typeInfo + " is not 
supported.");
 }
 
-return wrapInUnionWithNull(schema);
+return schema;
   }
 
   private static Schema createAvroUnion(TypeDescription typeInfo) {
@@ -128,7 +128,7 @@ public class AvroSchemaUtils {
   }
 }
 
-return Schema.createUnion(childSchemas);
+return wrapInUnionWithNull(Schema.createUnion(childSchemas));
   }
 
   private static Schema createAvroRecord(TypeDescription typeInfo) {
@@ -140,7 +140,8 @@ public class AvroSchemaUtils {
 for (int i = 0; i < fieldNames.size(); ++i) {
   TypeDescription childTypeInfo = fieldTypes.get(i);
   Schema.Field field = new Schema.Field(fieldNames.get(i),
-  createAvroSchema(childTypeInfo), childTypeInfo.toString(),
+  wrapInUnionWithNull(createAvroSchema(childTypeInfo)),
+  childTypeInfo.toString(),
   (Object) null);
   childFields.add(field);
 }
@@ -158,14 +159,15 @@ public class AvroSchemaUtils {
   + typeInfo);
 }
 
-Schema valueSchema = createAvroSchema(typeInfo.getChildren().get(1));
+Schema valueSchema = wrapInUnionWithNull(createAvroSchema
+(typeInfo.getChildren().get(1)));
 
 return Schema.createMap(valueSchema);
   }
 
   private static Schema createAvroArray(TypeDescription typeInfo) {
 Schema child = createAvroSchema(typeInfo.getChildren().get(0));
-return Schema.createArray(child);
+return Schema.createArray(wrapInUnionWithNull(child));
   }
 
   private static Schema wrapInUnionWithNull(Schema schema) {

http://git-wip-us.apache.org/repos/asf/orc/blob/2246b1a0/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java 
b/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
index 2735a71..8cc9d06 100644
--- a/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
+++ b/java/bench/src/java/org/apache/orc/bench/avro/AvroWriter.java
@@ -308,6 +308,8 @@ public class AvroWriter {
 String compression) throws IOException {
 List childTypes = schema.getChildren();
 Schema avroSchema = AvroSchemaUtils.createAvroSchema(schema);
+System.out.println("Hive schema " + schema);
+System.out.println("Avro schema " + avroSchema);
 List avroFields = avroSchema.getFields();
 converters = new AvroConverter[childTypes.size()];
 for(int c=0; c < converters.length; ++c) {



[10/15] orc git commit: more updates

2016-10-17 Thread omalley
more updates


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/0f56aaad
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/0f56aaad
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/0f56aaad

Branch: refs/heads/orc-72
Commit: 0f56aaade0b33c5473d640fd70adb7ee59a694a3
Parents: 73cdb4c
Author: Owen O'Malley 
Authored: Tue Oct 11 15:25:10 2016 -0700
Committer: Owen O'Malley 
Committed: Tue Oct 11 15:25:10 2016 -0700

--
 .../java/org/apache/orc/bench/TaxiToAvro.java   |   4 +-
 .../bench/parquet/DataWritableWriteSupport.java |  18 +-
 .../orc/bench/parquet/DataWritableWriter.java   | 466 ---
 .../apache/orc/bench/parquet/ParquetScan.java   |   6 +-
 .../apache/orc/bench/parquet/RowInBatch.java|  65 ++-
 5 files changed, 283 insertions(+), 276 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/0f56aaad/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java 
b/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
index 2b14f50..9fd2f23 100644
--- a/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
+++ b/java/bench/src/java/org/apache/orc/bench/TaxiToAvro.java
@@ -39,12 +39,12 @@ public class TaxiToAvro {
   }
 
   public static void main(String[] args) throws Exception {
-TypeDescription schema = TaxiToOrc.loadSchema("nyc-taxi.schema");
+TypeDescription schema = Utilities.loadSchema("nyc-taxi.schema");
 Configuration conf = new Configuration();
 AvroWriter writer = new AvroWriter(new Path(args[0]), schema, conf,
 getCodec(args[1]));
 VectorizedRowBatch batch = schema.createRowBatch();
-for(String inFile: TaxiToOrc.sliceArray(args, 2)) {
+for(String inFile: Utilities.sliceArray(args, 2)) {
   CsvReader reader = new CsvReader(new Path(inFile), conf, schema);
   while (reader.nextBatch(batch)) {
 writer.writeBatch(batch);

http://git-wip-us.apache.org/repos/asf/orc/blob/0f56aaad/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriteSupport.java
--
diff --git 
a/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriteSupport.java
 
b/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriteSupport.java
index f4621e5..2b8a1d3 100644
--- 
a/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriteSupport.java
+++ 
b/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriteSupport.java
@@ -11,13 +11,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hive.ql.io.parquet.write;
+package org.apache.orc.bench.parquet;
 
 import java.util.HashMap;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord;
 
+import org.apache.orc.TypeDescription;
 import org.apache.parquet.hadoop.api.WriteSupport;
 import org.apache.parquet.io.api.RecordConsumer;
 import org.apache.parquet.schema.MessageType;
@@ -28,15 +28,20 @@ import org.apache.parquet.schema.MessageTypeParser;
  * DataWritableWriteSupport is a WriteSupport for the DataWritableWriter
  *
  */
-public class DataWritableWriteSupport extends WriteSupport {
+public class DataWritableWriteSupport extends WriteSupport {
 
   public static final String PARQUET_HIVE_SCHEMA = "parquet.hive.schema";
+  public static final String HIVE_SCHEMA = "hive.schema";
 
   private DataWritableWriter writer;
   private MessageType schema;
+  private TypeDescription hiveType;
 
-  public static void setSchema(final MessageType schema, final Configuration 
configuration) {
+  public static void setSchema(final MessageType schema,
+   final TypeDescription hiveType,
+   final Configuration configuration) {
 configuration.set(PARQUET_HIVE_SCHEMA, schema.toString());
+configuration.set(HIVE_SCHEMA, hiveType.toString());
   }
 
   public static MessageType getSchema(final Configuration configuration) {
@@ -46,16 +51,17 @@ public class DataWritableWriteSupport extends 
WriteSupport {
   @Override
   public WriteContext init(final Configuration configuration) {
 schema = getSchema(configuration);
+hiveType = TypeDescription.fromString(configuration.get(HIVE_SCHEMA));
 return new WriteContext(schema, new HashMap());
   }
 
   @Override
   public void prepareForWrite(final RecordConsumer recordConsumer) {
-writer = new DataWritableWriter(recordConsumer, schema);
+writer = new DataWritableWriter(recordConsumer, schema, hiveType);
   }
 
   @Override
-  public void write(final ParquetHiveRecord record) {
+  public void write(fi

[01/15] orc git commit: more updates

2016-10-17 Thread omalley
Repository: orc
Updated Branches:
  refs/heads/orc-72 [created] 7315a0145


more updates


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/73cdb4c2
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/73cdb4c2
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/73cdb4c2

Branch: refs/heads/orc-72
Commit: 73cdb4c2de03e512d5be28daaf5d1f0f292535b7
Parents: 1752e17
Author: Owen O'Malley 
Authored: Mon Oct 10 09:30:20 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 10 13:59:16 2016 -0700

--
 .../orc/bench/parquet/DataWritableWriter.java   | 550 +++
 .../apache/orc/bench/parquet/RowInBatch.java|  33 ++
 2 files changed, 583 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/73cdb4c2/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriter.java
--
diff --git 
a/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriter.java 
b/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriter.java
new file mode 100644
index 000..220e452
--- /dev/null
+++ b/java/bench/src/java/org/apache/orc/bench/parquet/DataWritableWriter.java
@@ -0,0 +1,550 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.bench.parquet;
+
+import org.apache.orc.TypeDescription;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.Type;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Map;
+
+/**
+ *
+ * DataWritableWriter sends a record to the Parquet API with the expected 
schema in order
+ * to be written to a file.
+ * This class is only used through DataWritableWriteSupport class.
+ */
+public class DataWritableWriter {
+  private static final Logger LOG = 
LoggerFactory.getLogger(DataWritableWriter.class);
+  protected final RecordConsumer recordConsumer;
+  private final GroupType schema;
+  private final TypeDescription hiveType;
+
+  /* This writer will be created when writing the first row in order to get
+  information about how to inspect the record data.  */
+  private final DataWriter messageWriter;
+
+  public DataWritableWriter(final RecordConsumer recordConsumer,
+final GroupType schema,
+TypeDescription hiveType) {
+this.recordConsumer = recordConsumer;
+this.schema = schema;
+this.hiveType = hiveType;
+messageWriter = createMessageWriter(hiveType, schema);
+  }
+
+  /**
+   * It writes a record to Parquet.
+   * @param record Contains the record that is going to be written.
+   */
+  public void write(final RowInBatch record) {
+  messageWriter.write(record);
+  }
+
+  private MessageDataWriter createMessageWriter(TypeDescription hiveType,
+GroupType schema) {
+return new MessageDataWriter(hiveType, schema);
+  }
+
+  /**
+   * Creates a writer for the specific object inspector. The returned writer 
will be used
+   * to call Parquet API for the specific data type.
+   * @param hiveType The type description used to get the correct value type.
+   * @param type Type that contains information about the type schema.
+   * @return A ParquetWriter object used to call the Parquet API fo the 
specific data type.
+   */
+  private DataWriter createWriter(TypeDescription hiveType, Type type) {
+switch (hiveType.getCategory()) {
+  case BOOLEAN:
+return new BooleanDataWriter(hiveType);
+  case BYTE:
+return new ByteDataWriter(hiveType);
+  case SHORT:
+return new ShortDataWriter(hiveType);
+  case INT:
+return new IntDataWriter(hiveType);
+  case LONG:
+return new LongDataWriter(hiveType);
+  case FLOAT:
+return new FloatDataWriter(hiveType);
+  case DOUBLE:
+return new DoubleDataWriter(hiveType);
+  case STRING:
+return new StringDataWriter(hiveType);
+  case CHAR:
+return

[11/15] orc git commit: It compiles

2016-10-17 Thread omalley
It compiles


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/611388b5
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/611388b5
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/611388b5

Branch: refs/heads/orc-72
Commit: 611388b50f8f3e9deeaa0e5335f0afd7f0ab9422
Parents: 0f56aaa
Author: Owen O'Malley 
Authored: Wed Oct 12 16:06:29 2016 -0700
Committer: Owen O'Malley 
Committed: Wed Oct 12 16:06:29 2016 -0700

--
 .../org/apache/orc/bench/CompressionKind.java   |  17 ++-
 .../org/apache/orc/bench/FullReadBenchmark.java |  20 +--
 .../org/apache/orc/bench/GithubToParquet.java   |  34 ++---
 .../org/apache/orc/bench/SalesToParquet.java|  34 +
 .../org/apache/orc/bench/TaxiToParquet.java |  49 ++-
 .../java/org/apache/orc/bench/csv/CsvScan.java  |   4 +-
 .../bench/parquet/DataWritableReadSupport.java  |   2 -
 .../bench/parquet/HiveCollectionConverter.java  |   2 -
 .../parquet/MapredParquetOutputFormat.java  | 129 ---
 .../parquet/ParquetRecordReaderWrapper.java |   2 -
 .../apache/orc/bench/parquet/ParquetWriter.java |  72 +++
 11 files changed, 118 insertions(+), 247 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/611388b5/java/bench/src/java/org/apache/orc/bench/CompressionKind.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/CompressionKind.java 
b/java/bench/src/java/org/apache/orc/bench/CompressionKind.java
index 9fe9ba9..86ac476 100644
--- a/java/bench/src/java/org/apache/orc/bench/CompressionKind.java
+++ b/java/bench/src/java/org/apache/orc/bench/CompressionKind.java
@@ -20,11 +20,13 @@ package org.apache.orc.bench;
 
 import io.airlift.compress.snappy.SnappyCodec;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
 
 /**
- * Created by owen on 10/5/16.
+ * Enum for handling the compression codecs for the benchmark
  */
 public enum CompressionKind {
   NONE(""),
@@ -53,4 +55,17 @@ public enum CompressionKind {
 throw new IllegalArgumentException("Unhandled kind " + this);
 }
   }
+
+  public InputStream read(InputStream in) throws IOException {
+switch (this) {
+  case NONE:
+return in;
+  case ZLIB:
+return new GZIPInputStream(in);
+  case SNAPPY:
+return new SnappyCodec().createInputStream(in);
+  default:
+throw new IllegalArgumentException("Unhandled kind " + this);
+}
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/611388b5/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java 
b/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java
index 917707d..849e030 100644
--- a/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java
+++ b/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java
@@ -29,19 +29,17 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.TrackingLocalFileSystem;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Reporter;
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.bench.parquet.DataWritableReadSupport;
+import org.apache.orc.bench.parquet.ParquetRecordReaderWrapper;
 import org.apache.parquet.hadoop.ParquetInputFormat;
-import io.airlift.compress.snappy.HadoopSnappyInputStream;
 import org.openjdk.jmh.annotations.AuxCounters;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -63,7 +61,6 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.URI;
 import java.util.concurrent.TimeUnit;
-import java.util.zip.GZIPInputStream;
 
 @BenchmarkMode(Mode.AverageTime)
 @Warmup(iterations=1, time=10, timeUnit = TimeUnit.SECONDS)
@@ -173,8 +170,7 @@ public class FullReadBenchmark {
 NullWritable nada = NullWritable.get();
 FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[]{});
 org.apache.hadoop.mapred.RecordReader 
recordReader =
-new ParquetRecordReaderWrapper(inputFormat, split, conf,
-Reporter.NULL);

[09/15] orc git commit: more updates

2016-10-17 Thread omalley
more updates


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/5b37113b
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/5b37113b
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/5b37113b

Branch: refs/heads/orc-72
Commit: 5b37113b73eb0e12744f2711326e11cd2ef6eaef
Parents: 86628bc
Author: Owen O'Malley 
Authored: Mon Oct 3 10:01:40 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 10 13:59:16 2016 -0700

--
 java/bench/pom.xml  |   4 +
 .../src/java/org/apache/orc/bench/AvroScan.java |  47 ---
 .../org/apache/orc/bench/AvroSchemaUtils.java   | 190 --
 .../java/org/apache/orc/bench/AvroWriter.java   | 375 ---
 .../java/org/apache/orc/bench/CsvReader.java| 175 -
 .../src/java/org/apache/orc/bench/CsvScan.java  |  40 --
 .../java/org/apache/orc/bench/GithubToAvro.java |   2 +
 .../java/org/apache/orc/bench/GithubToJson.java |   2 +-
 .../java/org/apache/orc/bench/GithubToOrc.java  |   4 +-
 .../org/apache/orc/bench/GithubToParquet.java   |   2 +
 .../java/org/apache/orc/bench/JsonReader.java   | 278 --
 .../src/java/org/apache/orc/bench/JsonScan.java |  61 ---
 .../src/java/org/apache/orc/bench/OrcScan.java  |  46 ---
 .../java/org/apache/orc/bench/ParquetScan.java  |  54 ---
 .../java/org/apache/orc/bench/SalesToAvro.java  |   1 +
 .../org/apache/orc/bench/SalesToParquet.java|   1 +
 .../java/org/apache/orc/bench/TaxiToAvro.java   |   2 +
 .../java/org/apache/orc/bench/TaxiToJson.java   |   1 +
 .../java/org/apache/orc/bench/TaxiToOrc.java|   1 +
 .../org/apache/orc/bench/TaxiToParquet.java |   2 +
 .../org/apache/orc/bench/avro/AvroScan.java |  47 +++
 .../apache/orc/bench/avro/AvroSchemaUtils.java  | 190 ++
 .../org/apache/orc/bench/avro/AvroWriter.java   | 375 +++
 .../org/apache/orc/bench/csv/CsvReader.java | 175 +
 .../java/org/apache/orc/bench/csv/CsvScan.java  |  41 ++
 .../org/apache/orc/bench/json/JsonReader.java   | 278 ++
 .../org/apache/orc/bench/json/JsonScan.java |  61 +++
 .../java/org/apache/orc/bench/orc/OrcScan.java  |  46 +++
 .../apache/orc/bench/parquet/ParquetScan.java   |  54 +++
 java/pom.xml|  15 +-
 30 files changed, 1295 insertions(+), 1275 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/5b37113b/java/bench/pom.xml
--
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
index f0bf55a..f40f21b 100644
--- a/java/bench/pom.xml
+++ b/java/bench/pom.xml
@@ -67,6 +67,10 @@
   hive-storage-api
 
 
+  org.apache.parquet
+  parquet-hadoop
+
+
   org.openjdk.jmh
   jmh-core
 

http://git-wip-us.apache.org/repos/asf/orc/blob/5b37113b/java/bench/src/java/org/apache/orc/bench/AvroScan.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/AvroScan.java 
b/java/bench/src/java/org/apache/orc/bench/AvroScan.java
deleted file mode 100644
index 61f6a62..000
--- a/java/bench/src/java/org/apache/orc/bench/AvroScan.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.mapred.FsInput;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-public class AvroScan {
-  public static void main(String[] args) throws Exception {
-Configuration conf = new Configuration();
-long rowCount = 0;
-for(String filename: args) {
-  FsInput file = new FsInput(new Path(filename), conf);
-  DatumReader datumReader = new GenericDatumReader<>();
-  DataFileReader dataFileReader =
-  new DataFileReader<>(file, dat

[06/15] orc git commit: more updates

2016-10-17 Thread omalley
more updates


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/86628bcb
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/86628bcb
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/86628bcb

Branch: refs/heads/orc-72
Commit: 86628bcbffc1d19f8f2f1fe5c840ac9d429d3dc6
Parents: 5ae2d41
Author: Owen O'Malley 
Authored: Sat Oct 1 10:24:32 2016 -0700
Committer: Owen O'Malley 
Committed: Mon Oct 10 13:59:16 2016 -0700

--
 java/bench/pom.xml  |   5 +
 .../hadoop/hive/ql/io/orc/VectorToWritable.java |  70 ---
 .../src/java/org/apache/orc/bench/AvroScan.java |   1 -
 .../org/apache/orc/bench/AvroSchemaUtils.java   | 190 +++
 .../java/org/apache/orc/bench/AvroWriter.java   |  31 +--
 .../orc/bench/ColumnProjectionBenchmark.java|   1 -
 .../org/apache/orc/bench/FullReadBenchmark.java |   4 +-
 .../java/org/apache/orc/bench/GithubToOrc.java  |   2 +-
 .../java/org/apache/orc/bench/TaxiToOrc.java|   2 +-
 java/pom.xml|  17 +-
 10 files changed, 224 insertions(+), 99 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/86628bcb/java/bench/pom.xml
--
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
index 738dfb3..f0bf55a 100644
--- a/java/bench/pom.xml
+++ b/java/bench/pom.xml
@@ -46,6 +46,11 @@
   avro
 
 
+  org.apache.avro
+  avro-mapred
+  hadoop2
+
+
   org.apache.commons
   commons-csv
 

http://git-wip-us.apache.org/repos/asf/orc/blob/86628bcb/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/VectorToWritable.java
--
diff --git 
a/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/VectorToWritable.java 
b/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/VectorToWritable.java
deleted file mode 100644
index ae8e8da..000
--- a/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/VectorToWritable.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.orc.OrcProto;
-import org.apache.orc.OrcUtils;
-import org.apache.orc.TypeDescription;
-
-import java.util.List;
-
-/**
- * This class is just here to provide a public API to some of the ORC internal
- * methods.
- */
-public class VectorToWritable {
-  public static ObjectInspector createObjectInspector(TypeDescription schema) {
-// convert the type descr to protobuf types
-List types = OrcUtils.getOrcTypes(schema);
-// convert the protobuf types to an ObjectInspector
-return OrcStruct.createObjectInspector(0, types);
-  }
-
-  public static Object createValue(VectorizedRowBatch batch,
-   int row,
-   TypeDescription schema,
-   Object previous) {
-if(schema.getCategory() == TypeDescription.Category.STRUCT) {
-  List children = schema.getChildren();
-  int numberOfChildren = children.size();
-  OrcStruct result;
-  if(previous != null && previous.getClass() == OrcStruct.class) {
-result = (OrcStruct)previous;
-if(result.getNumFields() != numberOfChildren) {
-  result.setNumFields(numberOfChildren);
-}
-  } else {
-result = new OrcStruct(numberOfChildren);
-previous = result;
-  }
-
-  for(int i = 0; i < numberOfChildren; ++i) {
-result.setFieldValue(i, RecordReaderImpl.nextValue(batch.cols[i], row,
-children.get(i), result.getFieldValue(i)));
-  }
-} else {
-  previous = RecordReaderImpl.nextValue(batch.cols[0], row, schema,
-  previous);
-}
-;
-return previous;
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/86628bcb/java/bench/sr

[08/15] orc git commit: more updates

2016-10-17 Thread omalley
http://git-wip-us.apache.org/repos/asf/orc/blob/5b37113b/java/bench/src/java/org/apache/orc/bench/csv/CsvScan.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/csv/CsvScan.java 
b/java/bench/src/java/org/apache/orc/bench/csv/CsvScan.java
new file mode 100644
index 000..ae78cc4
--- /dev/null
+++ b/java/bench/src/java/org/apache/orc/bench/csv/CsvScan.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench.csv;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.bench.TaxiToOrc;
+
+public class CsvScan {
+  public static void main(String[] args) throws Exception {
+Configuration conf = new Configuration();
+long rowCount = 0;
+TypeDescription schema = TaxiToOrc.loadSchema("nyc-taxi.schema");
+for(String filename: args) {
+  CsvReader reader = new CsvReader(new Path(filename), conf, schema);
+  VectorizedRowBatch batch = schema.createRowBatch();
+  while (reader.nextBatch(batch)) {
+rowCount += batch.size;
+  }
+}
+System.out.println("Rows read: " + rowCount);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/5b37113b/java/bench/src/java/org/apache/orc/bench/json/JsonReader.java
--
diff --git a/java/bench/src/java/org/apache/orc/bench/json/JsonReader.java 
b/java/bench/src/java/org/apache/orc/bench/json/JsonReader.java
new file mode 100644
index 000..a5057e4
--- /dev/null
+++ b/java/bench/src/java/org/apache/orc/bench/json/JsonReader.java
@@ -0,0 +1,278 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.bench.json;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonStreamParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.TypeDescription;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.sql.Timestamp;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+
+public class JsonReader {
+  private final TypeDescription schema;
+  private final JsonStreamParser parser;
+  private final JsonConverter[] converters;
+
+  interface JsonConverter {
+void convert(JsonElement value, ColumnVector vect, int row);
+  }
+
+  static class BooleanColumnConverter implements JsonConverter {
+public void convert(JsonElement value, ColumnVec

[04/15] orc git commit: more updates

2016-10-17 Thread omalley
http://git-wip-us.apache.org/repos/asf/orc/blob/1752e172/java/bench/src/java/org/apache/orc/bench/parquet/LeafFilterFactory.java
--
diff --git 
a/java/bench/src/java/org/apache/orc/bench/parquet/LeafFilterFactory.java 
b/java/bench/src/java/org/apache/orc/bench/parquet/LeafFilterFactory.java
new file mode 100644
index 000..13a822a
--- /dev/null
+++ b/java/bench/src/java/org/apache/orc/bench/parquet/LeafFilterFactory.java
@@ -0,0 +1,200 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.bench.parquet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
+
+import org.apache.parquet.filter2.predicate.FilterApi;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+
+import static org.apache.parquet.filter2.predicate.FilterApi.eq;
+import static org.apache.parquet.filter2.predicate.FilterApi.lt;
+import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.booleanColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.floatColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;
+
+public class LeafFilterFactory {
+  private static final Logger LOG = 
LoggerFactory.getLogger(LeafFilterFactory.class);
+
+  class IntFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+/**
+ * @param op consists of EQUALS, NULL_SAFE_EQUALS, LESS_THAN, 
LESS_THAN_EQUALS, IS_NULL
+ * @param literal
+ * @param columnName
+ * @return
+ */
+@Override
+public FilterPredicate buildPredict(Operator op, Object literal,
+String columnName) {
+  switch (op) {
+case LESS_THAN:
+  return lt(intColumn(columnName), ((Number) literal).intValue());
+case IS_NULL:
+case EQUALS:
+case NULL_SAFE_EQUALS:
+  return eq(intColumn(columnName),
+(literal == null) ? null : ((Number) literal).intValue());
+case LESS_THAN_EQUALS:
+  return ltEq(intColumn(columnName), ((Number) literal).intValue());
+default:
+  throw new RuntimeException("Unknown PredicateLeaf Operator type: " + 
op);
+  }
+}
+  }
+
+  class LongFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+@Override
+public FilterPredicate buildPredict(Operator op, Object constant,
+String columnName) {
+  switch (op) {
+case LESS_THAN:
+  return lt(FilterApi.longColumn(columnName), ((Number) 
constant).longValue());
+case IS_NULL:
+case EQUALS:
+case NULL_SAFE_EQUALS:
+  return eq(FilterApi.longColumn(columnName),
+(constant == null) ? null : ((Number) constant).longValue());
+case LESS_THAN_EQUALS:
+  return ltEq(FilterApi.longColumn(columnName),
+((Number) constant).longValue());
+default:
+  throw new RuntimeException("Unknown PredicateLeaf Operator type: " + 
op);
+  }
+}
+  }
+
+  class FloatFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+@Override
+public FilterPredicate buildPredict(Operator op, Object constant, String 
columnName) {
+  switch (op) {
+  case LESS_THAN:
+return lt(floatColumn(columnName), ((Number) constant).floatValue());
+  case IS_NULL:
+  case EQUALS:
+  case NULL_SAFE_EQUALS:
+return eq(floatColumn(columnName),
+(constant == null) ? null : ((Number) constant).floatValue());
+  case LESS_THAN_EQUALS:
+return ltEq(FilterApi.floatColumn(columnName), ((Number) 
constant).floatValue());
+  default:
+throw new RuntimeException("Unknown PredicateLeaf Operator type: " + 
op);
+  }
+}
+  }
+
+  class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
+
+@Override
+public FilterPredicate buildPredict(Operator op, Object constant,
+